void
VectorFunctionSynchronizer<P_V,P_M,Q_V,Q_M>::callFunction(
  const P_V*                    vecValues,
  const P_V*                    vecDirection,
        Q_V*                    imageVector,
        DistArray<P_V*>* gradVectors,     // Yes, 'P_V'
        DistArray<P_M*>* hessianMatrices, // Yes, 'P_M'
        DistArray<P_V*>* hessianEffects) const
{
  if ((m_env.numSubEnvironments() < (unsigned int) m_env.fullComm().NumProc()) &&
      (m_auxPVec.numOfProcsForStorage() == 1                                 ) &&
      (m_auxQVec.numOfProcsForStorage() == 1                                 )) {
    bool stayInRoutine = true;
    do {
      const P_V*                    internalValues    = NULL;
      const P_V*                    internalDirection = NULL;
            Q_V*                    internalImageVec  = NULL;
            DistArray<P_V*>* internalGrads     = NULL; // Yes, 'P_V'
            DistArray<P_M*>* internalHessians  = NULL; // Yes, 'P_M'
            DistArray<P_V*>* internalEffects   = NULL;

      /////////////////////////////////////////////////
      // Broadcast 1 of 3
      /////////////////////////////////////////////////
      // bufferChar[0] = '0' or '1' (vecValues       is NULL or not)
      // bufferChar[1] = '0' or '1' (vecDirection    is NULL or not)
      // bufferChar[2] = '0' or '1' (imageVector     is NULL or not)
      // bufferChar[3] = '0' or '1' (gradVectors     is NULL or not)
      // bufferChar[4] = '0' or '1' (hessianMatrices is NULL or not)
      // bufferChar[5] = '0' or '1' (hessianEffects  is NULL or not)
      std::vector<char> bufferChar(6,'0');

      if (m_env.subRank() == 0) {
        UQ_FATAL_TEST_MACRO((vecValues != NULL) && (imageVector == NULL),
                            m_env.worldRank(),
                            "VectorFunctionSynchronizer<P_V,P_M,Q_V,Q_M>::callFunction()",
                            "imageVector should not be NULL");
        internalValues    = vecValues;
        internalDirection = vecDirection;
        internalImageVec  = imageVector;
        internalGrads     = gradVectors;
        internalHessians  = hessianMatrices;
        internalEffects   = hessianEffects;

        if (internalValues    != NULL) bufferChar[0] = '1';
        if (internalDirection != NULL) bufferChar[1] = '1';
        if (internalImageVec  != NULL) bufferChar[2] = '1';
        if (internalGrads     != NULL) bufferChar[3] = '1';
        if (internalHessians  != NULL) bufferChar[4] = '1';
        if (internalEffects   != NULL) bufferChar[5] = '1';
      }

      m_env.subComm().syncPrintDebugMsg("In VectorFunctionSynchronizer<V,M>::callFunction(), just before char Bcast()",3,3000000);
      //if (m_env.subId() != 0) while (true) sleep(1);

      int count = (int) bufferChar.size();
      m_env.subComm().Bcast((void *) &bufferChar[0], count, RawValue_MPI_CHAR, 0,
                            "VectorFunctionSynchronizer<P_V,P_M,Q_V,Q_M>::callFunction()",
                            "failed broadcast 1 of 3");

      if (bufferChar[0] == '1') {
        ///////////////////////////////////////////////
        // Broadcast 2 of 3
        ///////////////////////////////////////////////

        // bufferDouble[0...] = contents for (eventual) vecValues
        std::vector<double> bufferDouble(m_auxPVec.sizeLocal(),0);

        if (m_env.subRank() == 0) {
          for (unsigned int i = 0; i < internalValues->sizeLocal(); ++i) {
            bufferDouble[i] = (*internalValues)[i];
          }
        }

        count = (int) bufferDouble.size();
        m_env.subComm().Bcast((void *) &bufferDouble[0], count, RawValue_MPI_DOUBLE, 0,
                              "VectorFunctionSynchronizer<P_V,P_M,Q_V,Q_M>::callFunction()",
                              "failed broadcast 2 of 3");

        if (m_env.subRank() != 0) {
          P_V tmpPVec(m_auxPVec);
          for (unsigned int i = 0; i < tmpPVec.sizeLocal(); ++i) {
            tmpPVec[i] = bufferDouble[i];
          }
          internalValues = new P_V(tmpPVec);
        }

        if (bufferChar[1] == '1') {
          /////////////////////////////////////////////
          // Broadcast 3 of 3
          /////////////////////////////////////////////
          // bufferDouble[0...] = contents for (eventual) vecDirection

          if (m_env.subRank() == 0) {
            for (unsigned int i = 0; i < internalDirection->sizeLocal(); ++i) {
              bufferDouble[i] = (*internalDirection)[i];
            }
          }

          count = (int) bufferDouble.size();
          m_env.subComm().Bcast((void *) &bufferDouble[0], count, RawValue_MPI_DOUBLE, 0,
                                "VectorFunctionSynchronizer<P_V,P_M,Q_V,Q_M>::callFunction()",
                                "failed broadcast 3 of 3");

          if (m_env.subRank() != 0) {
            P_V tmpPVec(m_auxPVec);
            for (unsigned int i = 0; i < tmpPVec.sizeLocal(); ++i) {
              tmpPVec[i] = bufferDouble[i];
            }
            internalDirection = new P_V(tmpPVec);
          }
        }

        ///////////////////////////////////////////////
        // All processors now call 'vectorFunction()'
        ///////////////////////////////////////////////
        if (m_env.subRank() != 0) {
          if (bufferChar[2] == '1') internalImageVec = new Q_V(m_auxQVec);
        //if (bufferChar[3] == '1') internalGrads    = new P_V(m_auxPVec);
        //if (bufferChar[4] == '1') internalHessians = new P_M(m_auxPVec);
        //if (bufferChar[5] == '1') internalEffects  = new P_V(m_auxPVec);
        }

        m_env.subComm().Barrier();
        m_vectorFunction.compute(*internalValues,
                                 internalDirection,
                                 *internalImageVec,
                                 internalGrads,
                                 internalHessians,
                                 internalEffects);
      }

      /////////////////////////////////////////////////
      // Prepare to exit routine or to stay in it
      /////////////////////////////////////////////////
      if (m_env.subRank() == 0) {
        stayInRoutine = false; // Always for processor 0
      }
      else {
        if (internalValues    != NULL) delete internalValues;
        if (internalDirection != NULL) delete internalDirection;
        if (internalImageVec  != NULL) delete internalImageVec;
      //if (internalGrads     != NULL) delete internalGrads;
      //if (internalHessians  != NULL) delete internalHessians;
      //if (internalEffects   != NULL) delete internalEffects;

        stayInRoutine = (vecValues == NULL) && (bufferChar[0] == '1');
      }
    } while (stayInRoutine);
  }
  else {
    UQ_FATAL_TEST_MACRO((vecValues == NULL) || (imageVector == NULL),
                        m_env.worldRank(),
                        "VectorFunctionSynchronizer<V,M>::callFunction()",
                        "Neither vecValues nor imageVector should not be NULL");
    UQ_FATAL_TEST_MACRO((m_auxPVec.numOfProcsForStorage() != m_auxQVec.numOfProcsForStorage()),
                        m_env.worldRank(),
                        "VectorFunctionSynchronizer<V,M>::callFunction()",
                        "Number of processors required for storage should be the same");

    m_env.subComm().Barrier();
    m_vectorFunction.compute(*vecValues,
                             vecDirection,
                             *imageVector,
                             gradVectors,
                             hessianMatrices,
                             hessianEffects);
  }

  return;
}
double ScalarFunctionSynchronizer<V,M>::callFunction(const V* vecValues,
    const V* vecDirection,
    V* gradVector,
    M* hessianMatrix,
    V* hessianEffect,
    double* extraOutput1,
    double* extraOutput2) const
{
  double result = 0.;

  if ((m_env.numSubEnvironments() < (unsigned int) m_env.fullComm().NumProc()) &&
      (m_auxVec.numOfProcsForStorage() == 1                                  )) {
    bool stayInRoutine = true;
    do {
      const V* internalValues    = NULL;
      const V* internalDirection = NULL;
            V* internalGrad      = NULL;
            M* internalHessian   = NULL;
            V* internalEffect    = NULL;

      /////////////////////////////////////////////////
      // Broadcast 1 of 3
      /////////////////////////////////////////////////
      // bufferChar[0] = '0' or '1' (vecValues     is NULL or not)
      // bufferChar[1] = '0' or '1' (vecDirection  is NULL or not)
      // bufferChar[2] = '0' or '1' (gradVector    is NULL or not)
      // bufferChar[3] = '0' or '1' (hessianMatrix is NULL or not)
      // bufferChar[4] = '0' or '1' (hessianEffect is NULL or not)
      std::vector<char> bufferChar(5,'0');

      if (m_env.subRank() == 0) {
        internalValues    = vecValues;
        internalDirection = vecDirection;
        internalGrad      = gradVector;
        internalHessian   = hessianMatrix;
        internalEffect    = hessianEffect;

        if (internalValues    != NULL) bufferChar[0] = '1';
        if (internalDirection != NULL) bufferChar[1] = '1';
        if (internalGrad      != NULL) bufferChar[2] = '1';
        if (internalHessian   != NULL) bufferChar[3] = '1';
        if (internalEffect    != NULL) bufferChar[4] = '1';
      }

      m_env.subComm().syncPrintDebugMsg("In ScalarFunctionSynchronizer<V,M>::callFunction(), just before char Bcast()",3,3000000);
      //if (m_env.subId() != 0) while (true) sleep(1);

      int count = (int) bufferChar.size();
      m_env.subComm().Bcast((void *) &bufferChar[0], count, RawValue_MPI_CHAR, 0,
                            "ScalarFunctionSynchronizer<V,M>::callFunction()",
                            "failed broadcast 1 of 3");

      m_env.subComm().syncPrintDebugMsg("In ScalarFunctionSynchronizer<V,M>::callFunction(), just after char Bcast()",3,3000000);
      //std::cout << "char contents = " << bufferChar[0] << " " << bufferChar[1] << " " << bufferChar[2] << " " << bufferChar[3] << " " << bufferChar[4]
      //          << std::endl;

      if (bufferChar[0] == '1') {
        ///////////////////////////////////////////////
        // Broadcast 2 of 3
        ///////////////////////////////////////////////

        // bufferDouble[0...] = contents for (eventual) vecValues
        std::vector<double> bufferDouble(m_auxVec.sizeLocal(),0.);

        if (m_env.subRank() == 0) {
          for (unsigned int i = 0; i < internalValues->sizeLocal(); ++i) {
            bufferDouble[i] = (*internalValues)[i];
          }
        }

        //m_env.fullComm().Barrier();
        //for (int i = 0; i < m_env.fullComm().NumProc(); ++i) {
        //  if (i == m_env.fullRank()) {
        //    std::cout << " In ScalarFunctionSynchronizer<V,M>::callFunction(), just before double Bcast()"
        //              << ": fullRank "       << m_env.fullRank()
        //              << ", subEnvironment " << m_env.subId()
        //              << ", subRank "        << m_env.subRank()
        //              << ": buffer related to first double Bcast() is ready to be broadcasted"
        //              << " and has size "      << bufferDouble.size()
        //              << std::endl;
        //    if (m_env.subRank() == 0) {
  //      std::cout << "Buffer contents are";
        //      for (unsigned int i = 0; i < bufferDouble.size(); ++i) {
  //        std::cout << " " << bufferDouble[i];
        //    }
  //      std::cout << std::endl;
        //    }
        //  }
        //  m_env.fullComm().Barrier();
        //}
        //if (m_env.fullRank() == 0) std::cout << "Sleeping 3 seconds..."
        //                                 << std::endl;
        //sleep(3);

        count = (int) bufferDouble.size();
        m_env.subComm().Bcast((void *) &bufferDouble[0], count, RawValue_MPI_DOUBLE, 0,
                              "ScalarFunctionSynchronizer<V,M>::callFunction()",
                              "failed broadcast 2 of 3");

        if (m_env.subRank() != 0) {
          V tmpVec(m_auxVec);
          for (unsigned int i = 0; i < tmpVec.sizeLocal(); ++i) {
            tmpVec[i] = bufferDouble[i];
          }
          internalValues = new V(tmpVec);
          //if (vecValues) *vecValues = tmpVec; // prudencio 2010-08-01
        }

        if (bufferChar[1] == '1') {
          /////////////////////////////////////////////
          // Broadcast 3 of 3
          /////////////////////////////////////////////
          // bufferDouble[0...] = contents for (eventual) vecDirection

          if (m_env.subRank() == 0) {
            for (unsigned int i = 0; i < internalDirection->sizeLocal(); ++i) {
              bufferDouble[i] = (*internalDirection)[i];
            }
          }

          count = (int) bufferDouble.size();
          m_env.subComm().Bcast((void *) &bufferDouble[0], count, RawValue_MPI_DOUBLE, 0,
                                "ScalarFunctionSynchronizer<V,M>::callFunction()",
                                "failed broadcast 3 of 3");

          if (m_env.subRank() != 0) {
            V tmpVec(m_auxVec);
            for (unsigned int i = 0; i < tmpVec.sizeLocal(); ++i) {
              tmpVec[i] = bufferDouble[i];
            }
            internalDirection = new V(tmpVec);
          }
        }

        ///////////////////////////////////////////////
        // All processors now call 'scalarFunction()'
        ///////////////////////////////////////////////
        if (m_env.subRank() != 0) {
          if (bufferChar[2] == '1') internalGrad    = new V(m_auxVec);
          if (bufferChar[3] == '1') internalHessian = new M(m_auxVec);
          if (bufferChar[4] == '1') internalEffect  = new V(m_auxVec);
        }

        m_env.subComm().syncPrintDebugMsg("In ScalarFunctionSynchronizer<V,M>::callFunction(), just before actual lnValue()",3,3000000);
        m_env.subComm().Barrier();
        result = m_scalarFunction.lnValue(*internalValues,   // input
                                          internalDirection, // input
                                          internalGrad,    // output
                                          internalHessian, // output
                                          internalEffect); // output
        if (extraOutput1) {
          if (m_bayesianJointPdfPtr) {
            *extraOutput1 = m_bayesianJointPdfPtr->lastComputedLogPrior();
          }
        }
        if (extraOutput2) {
          if (m_bayesianJointPdfPtr) {
            *extraOutput2 = m_bayesianJointPdfPtr->lastComputedLogLikelihood();
          }
        }
      } // if (bufferChar[0] == '1')

      /////////////////////////////////////////////////
      // Prepare to exit routine or to stay in it
      /////////////////////////////////////////////////
      if (m_env.subRank() == 0) {
        stayInRoutine = false; // Always for processor 0
      }
      else {
        if (internalValues    != NULL) delete internalValues;
        if (internalDirection != NULL) delete internalDirection;
        if (internalGrad      != NULL) delete internalGrad;
        if (internalHessian   != NULL) delete internalHessian;
        if (internalEffect    != NULL) delete internalEffect;

        stayInRoutine = (vecValues == NULL) && (bufferChar[0] == '1');
        //if (!stayInRoutine) std::cout << "Fullrank " << m_env.fullRank() << " is leaving scalarFunctionSync()" << std::endl;
      }
    } while (stayInRoutine);
  }
  else {
    queso_require_msg(vecValues, "vecValues should not be NULL");

    m_env.subComm().Barrier();
    result = m_scalarFunction.lnValue(*vecValues,
                                      vecDirection,
                                      gradVector,
                                      hessianMatrix,
                                      hessianEffect);
    if (extraOutput1) {
      if (m_bayesianJointPdfPtr) {
        *extraOutput1 = m_bayesianJointPdfPtr->lastComputedLogPrior();
      }
    }
    if (extraOutput2) {
      if (m_bayesianJointPdfPtr) {
        *extraOutput2 = m_bayesianJointPdfPtr->lastComputedLogLikelihood();
      }
    }
  }

  return result;
}