void Teuchos::updateParametersFromYamlFileAndBroadcast(
  const std::string &yamlFileName, 
  const Teuchos::Ptr<Teuchos::ParameterList> &paramList, 
  const Teuchos::Comm<int> &comm, 
  bool overwrite)
{
  struct SafeFile
  {
    SafeFile(const char* fname, const char* options)
    {
      handle = fopen(fname, options);
    }
    ~SafeFile()
    {
      if(handle)
        fclose(handle);
    }
    FILE* handle;
  };
  //BMK note: see teuchos/comm/src/Teuchos_XMLParameterListHelpers.cpp
  if(comm.getSize() == 1)
  {
    updateParametersFromYamlFile(yamlFileName, paramList);
  }
  else
  {
    if(comm.getRank() == 0)
    {
      //BMK: TODO! //reader.setAllowsDuplicateSublists(false);
      //create a string and load file contents into it
      //C way for readability and speed, same thing with C++ streams is slow & ugly
      SafeFile yamlFile(yamlFileName.c_str(), "rb");
      if(!yamlFile.handle)
      {
        throw std::runtime_error(std::string("Failed to open YAML file \"") + yamlFileName + "\"for reading.");
      }
      fseek(yamlFile.handle, 0, SEEK_END);
      int strsize = ftell(yamlFile.handle) + 1;
      rewind(yamlFile.handle);
      //Make the array raii
      Teuchos::ArrayRCP<char> contents(new char[strsize], 0, strsize, true);
      fread((void*) contents.get(), strsize - 1, 1, yamlFile.handle);
      contents.get()[strsize - 1] = 0;
      Teuchos::broadcast<int, int>(comm, 0, &strsize);
      Teuchos::broadcast<int, char>(comm, 0, strsize, contents.get());
      updateParametersFromYamlCString(contents.get(), paramList, overwrite);
    }
    else
    {
      int strsize;
      Teuchos::broadcast<int, int>(comm, 0, &strsize);
      Teuchos::ArrayRCP<char> contents(new char[strsize], 0, strsize, true);
      Teuchos::broadcast<int, char>(comm, 0, strsize, contents.get());
      updateParametersFromYamlCString(contents.get(), paramList, overwrite);
    }
  }
}
void
gathervPrint (std::ostream& out,
              const std::string& s,
              const Teuchos::Comm<int>& comm)
{
  using Teuchos::ArrayRCP;
  using Teuchos::CommRequest;
  using Teuchos::ireceive;
  using Teuchos::isend;
  using Teuchos::outArg;
  using Teuchos::RCP;
  using Teuchos::wait;

  const int myRank = comm.getRank ();
  const int rootRank = 0;
  if (myRank == rootRank) {
    out << s; // Proc 0 prints its buffer first
  }

  const int numProcs = comm.getSize ();
  const int sizeTag = 42;
  const int msgTag = 43;

  ArrayRCP<size_t> sizeBuf (1);
  ArrayRCP<char> msgBuf; // to be resized later
  RCP<CommRequest<int> > req;

  for (int p = 1; p < numProcs; ++p) {
    if (myRank == p) {
      sizeBuf[0] = s.size ();
      req = isend<int, size_t> (sizeBuf, rootRank, sizeTag, comm);
      (void) wait<int> (comm, outArg (req));

      const size_t msgSize = s.size ();
      msgBuf.resize (msgSize + 1); // for the '\0'
      std::copy (s.begin (), s.end (), msgBuf.begin ());
      msgBuf[msgSize] = '\0';

      req = isend<int, char> (msgBuf, rootRank, msgTag, comm);
      (void) wait<int> (comm, outArg (req));
    }
    else if (myRank == rootRank) {
      sizeBuf[0] = 0; // just a precaution
      req = ireceive<int, size_t> (sizeBuf, p, sizeTag, comm);
      (void) wait<int> (comm, outArg (req));

      const size_t msgSize = sizeBuf[0];
      msgBuf.resize (msgSize + 1); // for the '\0'
      req = ireceive<int, char> (msgBuf, p, msgTag, comm);
      (void) wait<int> (comm, outArg (req));

      std::string msg (msgBuf.getRawPtr ());
      out << msg;
    }
  }
}
Ordinal SpmdVectorSpaceUtilities::computeMapCode(
  const Teuchos::Comm<Ordinal> &comm, const Ordinal localSubDim
  )
{
  using Teuchos::outArg;
  using Teuchos::REDUCE_SUM;
  using Teuchos::reduceAll;
  //
  // Here we will make a map code out of just the local sub-dimension on each
  // processor.  If each processor has the same number of local elements, then
  // the map codes will be the same and this is all you need for RTOp
  // compatibility.
  //
  const int procRank = comm.getSize ();
  Ordinal mapCode = -1;
  Ordinal localCode = localSubDim % (procRank+1) + localSubDim;
  reduceAll<Ordinal, Ordinal> (comm, REDUCE_SUM, localCode, outArg (mapCode));
  return mapCode;
}
예제 #4
0
Teuchos::RCP<NodeType>
createKokkosNode( const CMD & cmd , const Teuchos::Comm<int>& comm ) {
  Teuchos::ParameterList params;
  params.set("Verbose", 0);
  if ( cmd.USE_THREADS  )
    params.set("Num Threads", cmd.USE_THREADS);
  else if ( cmd.USE_OPENMP  )
    params.set("Num Threads", cmd.USE_OPENMP);
  if ( cmd.USE_NUMA  && cmd.USE_CORE_PER_NUMA  ) {
    params.set("Num NUMA", cmd.USE_NUMA );
    params.set("Num CoresPerNUMA", cmd.USE_CORE_PER_NUMA );
  }
  if ( cmd.USE_CUDA  )
    params.set("Device", cmd.USE_CUDA_DEV  );
  Teuchos::RCP<NodeType> node = Teuchos::rcp (new NodeType(params));

  if ( cmd.VERBOSE ) {
    typedef typename NodeType::execution_space Device;
    if (comm.getRank() == 0)
      Device::print_configuration(std::cout);
    std::cout.flush();
    if ( cmd.USE_CUDA  ) {
      for (int i=0; i<comm.getSize(); ++i) {
        comm.barrier();
        comm.barrier();
        comm.barrier();
        if ( i == comm.getRank() ) {
          std::cout << "MPI rank " << comm.getRank()
                    << " attached to CUDA device "
                    << cmd.USE_CUDA_DEV  << std::endl;
          std::cout.flush();
        }
        comm.barrier();
        comm.barrier();
        comm.barrier();
      }
    }
  }

  return node;
}
예제 #5
0
    void ReportTimeAndMemory(Teuchos::Time const &timer, Teuchos::Comm<int> const &Comm)
    {
      double maxTime=0,minTime=0,avgTime=0;
      double localTime = timer.totalElapsedTime();
      int ntimers=1, root=0;
#ifdef HAVE_MPI
      MPI_Reduce(&localTime,&maxTime,ntimers,MPI_DOUBLE,MPI_MAX,root,MPI_COMM_WORLD);
      MPI_Reduce(&localTime,&minTime,ntimers,MPI_DOUBLE,MPI_MIN,root,MPI_COMM_WORLD);
      MPI_Reduce(&localTime,&avgTime,ntimers,MPI_DOUBLE,MPI_SUM,root,MPI_COMM_WORLD);
#else
      maxTime = localTime;
      minTime = localTime;
      avgTime = localTime;
#endif
      avgTime /= Comm.getSize();
      //std::cout << "(" << Comm.getRank() << ") " << localTime << std::endl;
      if (Comm.getRank()==0) {
        std::cout << "&&&" << timer.name()
                 << " max=" << maxTime << " min=" << minTime << " avg=" << avgTime << std::endl;
        std::cout << "&&&" << timer.name() << " " << MemUtils::PrintMemoryUsage() << std::endl;
      }
    } //ReportTimeAndMemory
예제 #6
0
  // TODO: this function can be templated (T=double).
  ArrayRCP<double> ReduceMaxMinAvg(double localValue, Teuchos::Comm<int> const &comm, int rootNode) {
    ArrayRCP<double> r = ArrayRCP<double>(3, localValue);

#ifdef HAVE_MPI
    double & maxTime = r[0], & minTime = r[1], & avgTime = r[2];

    // Note: workaround because reduce() is not implemented in Teuchos::Comm
    const Teuchos::MpiComm<int> & mpiComm = dynamic_cast<const Teuchos::MpiComm<int>& >(comm);
    MPI_Comm rawMpiComm = (*mpiComm.getRawMpiComm())();
    //

    // DEBUG std::cout << comm.getRank() << ": " << localValue << std::endl;

    int ntimers=1;
    MPI_Reduce(&localValue, &maxTime, ntimers, MPI_DOUBLE, MPI_MAX, rootNode, rawMpiComm);
    MPI_Reduce(&localValue, &minTime, ntimers, MPI_DOUBLE, MPI_MIN, rootNode, rawMpiComm);
    MPI_Reduce(&localValue, &avgTime, ntimers, MPI_DOUBLE, MPI_SUM, rootNode, rawMpiComm); avgTime /= comm.getSize();
#endif // HAVE_MPI

    return r;
  }
예제 #7
0
 Machine(const Teuchos::Comm<int> &comm) :
   numRanks(comm.getSize()), myRank(comm.getRank())
 { }