void Teuchos::updateParametersFromYamlFileAndBroadcast( const std::string &yamlFileName, const Teuchos::Ptr<Teuchos::ParameterList> ¶mList, const Teuchos::Comm<int> &comm, bool overwrite) { struct SafeFile { SafeFile(const char* fname, const char* options) { handle = fopen(fname, options); } ~SafeFile() { if(handle) fclose(handle); } FILE* handle; }; //BMK note: see teuchos/comm/src/Teuchos_XMLParameterListHelpers.cpp if(comm.getSize() == 1) { updateParametersFromYamlFile(yamlFileName, paramList); } else { if(comm.getRank() == 0) { //BMK: TODO! //reader.setAllowsDuplicateSublists(false); //create a string and load file contents into it //C way for readability and speed, same thing with C++ streams is slow & ugly SafeFile yamlFile(yamlFileName.c_str(), "rb"); if(!yamlFile.handle) { throw std::runtime_error(std::string("Failed to open YAML file \"") + yamlFileName + "\"for reading."); } fseek(yamlFile.handle, 0, SEEK_END); int strsize = ftell(yamlFile.handle) + 1; rewind(yamlFile.handle); //Make the array raii Teuchos::ArrayRCP<char> contents(new char[strsize], 0, strsize, true); fread((void*) contents.get(), strsize - 1, 1, yamlFile.handle); contents.get()[strsize - 1] = 0; Teuchos::broadcast<int, int>(comm, 0, &strsize); Teuchos::broadcast<int, char>(comm, 0, strsize, contents.get()); updateParametersFromYamlCString(contents.get(), paramList, overwrite); } else { int strsize; Teuchos::broadcast<int, int>(comm, 0, &strsize); Teuchos::ArrayRCP<char> contents(new char[strsize], 0, strsize, true); Teuchos::broadcast<int, char>(comm, 0, strsize, contents.get()); updateParametersFromYamlCString(contents.get(), paramList, overwrite); } } }
void gathervPrint (std::ostream& out, const std::string& s, const Teuchos::Comm<int>& comm) { using Teuchos::ArrayRCP; using Teuchos::CommRequest; using Teuchos::ireceive; using Teuchos::isend; using Teuchos::outArg; using Teuchos::RCP; using Teuchos::wait; const int myRank = comm.getRank (); const int rootRank = 0; if (myRank == rootRank) { out << s; // Proc 0 prints its buffer first } const int numProcs = comm.getSize (); const int sizeTag = 42; const int msgTag = 43; ArrayRCP<size_t> sizeBuf (1); ArrayRCP<char> msgBuf; // to be resized later RCP<CommRequest<int> > req; for (int p = 1; p < numProcs; ++p) { if (myRank == p) { sizeBuf[0] = s.size (); req = isend<int, size_t> (sizeBuf, rootRank, sizeTag, comm); (void) wait<int> (comm, outArg (req)); const size_t msgSize = s.size (); msgBuf.resize (msgSize + 1); // for the '\0' std::copy (s.begin (), s.end (), msgBuf.begin ()); msgBuf[msgSize] = '\0'; req = isend<int, char> (msgBuf, rootRank, msgTag, comm); (void) wait<int> (comm, outArg (req)); } else if (myRank == rootRank) { sizeBuf[0] = 0; // just a precaution req = ireceive<int, size_t> (sizeBuf, p, sizeTag, comm); (void) wait<int> (comm, outArg (req)); const size_t msgSize = sizeBuf[0]; msgBuf.resize (msgSize + 1); // for the '\0' req = ireceive<int, char> (msgBuf, p, msgTag, comm); (void) wait<int> (comm, outArg (req)); std::string msg (msgBuf.getRawPtr ()); out << msg; } } }
Ordinal SpmdVectorSpaceUtilities::computeMapCode( const Teuchos::Comm<Ordinal> &comm, const Ordinal localSubDim ) { using Teuchos::outArg; using Teuchos::REDUCE_SUM; using Teuchos::reduceAll; // // Here we will make a map code out of just the local sub-dimension on each // processor. If each processor has the same number of local elements, then // the map codes will be the same and this is all you need for RTOp // compatibility. // const int procRank = comm.getSize (); Ordinal mapCode = -1; Ordinal localCode = localSubDim % (procRank+1) + localSubDim; reduceAll<Ordinal, Ordinal> (comm, REDUCE_SUM, localCode, outArg (mapCode)); return mapCode; }
Teuchos::RCP<NodeType> createKokkosNode( const CMD & cmd , const Teuchos::Comm<int>& comm ) { Teuchos::ParameterList params; params.set("Verbose", 0); if ( cmd.USE_THREADS ) params.set("Num Threads", cmd.USE_THREADS); else if ( cmd.USE_OPENMP ) params.set("Num Threads", cmd.USE_OPENMP); if ( cmd.USE_NUMA && cmd.USE_CORE_PER_NUMA ) { params.set("Num NUMA", cmd.USE_NUMA ); params.set("Num CoresPerNUMA", cmd.USE_CORE_PER_NUMA ); } if ( cmd.USE_CUDA ) params.set("Device", cmd.USE_CUDA_DEV ); Teuchos::RCP<NodeType> node = Teuchos::rcp (new NodeType(params)); if ( cmd.VERBOSE ) { typedef typename NodeType::execution_space Device; if (comm.getRank() == 0) Device::print_configuration(std::cout); std::cout.flush(); if ( cmd.USE_CUDA ) { for (int i=0; i<comm.getSize(); ++i) { comm.barrier(); comm.barrier(); comm.barrier(); if ( i == comm.getRank() ) { std::cout << "MPI rank " << comm.getRank() << " attached to CUDA device " << cmd.USE_CUDA_DEV << std::endl; std::cout.flush(); } comm.barrier(); comm.barrier(); comm.barrier(); } } } return node; }
void ReportTimeAndMemory(Teuchos::Time const &timer, Teuchos::Comm<int> const &Comm) { double maxTime=0,minTime=0,avgTime=0; double localTime = timer.totalElapsedTime(); int ntimers=1, root=0; #ifdef HAVE_MPI MPI_Reduce(&localTime,&maxTime,ntimers,MPI_DOUBLE,MPI_MAX,root,MPI_COMM_WORLD); MPI_Reduce(&localTime,&minTime,ntimers,MPI_DOUBLE,MPI_MIN,root,MPI_COMM_WORLD); MPI_Reduce(&localTime,&avgTime,ntimers,MPI_DOUBLE,MPI_SUM,root,MPI_COMM_WORLD); #else maxTime = localTime; minTime = localTime; avgTime = localTime; #endif avgTime /= Comm.getSize(); //std::cout << "(" << Comm.getRank() << ") " << localTime << std::endl; if (Comm.getRank()==0) { std::cout << "&&&" << timer.name() << " max=" << maxTime << " min=" << minTime << " avg=" << avgTime << std::endl; std::cout << "&&&" << timer.name() << " " << MemUtils::PrintMemoryUsage() << std::endl; } } //ReportTimeAndMemory
// TODO: this function can be templated (T=double). ArrayRCP<double> ReduceMaxMinAvg(double localValue, Teuchos::Comm<int> const &comm, int rootNode) { ArrayRCP<double> r = ArrayRCP<double>(3, localValue); #ifdef HAVE_MPI double & maxTime = r[0], & minTime = r[1], & avgTime = r[2]; // Note: workaround because reduce() is not implemented in Teuchos::Comm const Teuchos::MpiComm<int> & mpiComm = dynamic_cast<const Teuchos::MpiComm<int>& >(comm); MPI_Comm rawMpiComm = (*mpiComm.getRawMpiComm())(); // // DEBUG std::cout << comm.getRank() << ": " << localValue << std::endl; int ntimers=1; MPI_Reduce(&localValue, &maxTime, ntimers, MPI_DOUBLE, MPI_MAX, rootNode, rawMpiComm); MPI_Reduce(&localValue, &minTime, ntimers, MPI_DOUBLE, MPI_MIN, rootNode, rawMpiComm); MPI_Reduce(&localValue, &avgTime, ntimers, MPI_DOUBLE, MPI_SUM, rootNode, rawMpiComm); avgTime /= comm.getSize(); #endif // HAVE_MPI return r; }
Machine(const Teuchos::Comm<int> &comm) : numRanks(comm.getSize()), myRank(comm.getRank()) { }