void Teuchos::updateParametersFromYamlFileAndBroadcast( const std::string &yamlFileName, const Teuchos::Ptr<Teuchos::ParameterList> ¶mList, const Teuchos::Comm<int> &comm, bool overwrite) { struct SafeFile { SafeFile(const char* fname, const char* options) { handle = fopen(fname, options); } ~SafeFile() { if(handle) fclose(handle); } FILE* handle; }; //BMK note: see teuchos/comm/src/Teuchos_XMLParameterListHelpers.cpp if(comm.getSize() == 1) { updateParametersFromYamlFile(yamlFileName, paramList); } else { if(comm.getRank() == 0) { //BMK: TODO! //reader.setAllowsDuplicateSublists(false); //create a string and load file contents into it //C way for readability and speed, same thing with C++ streams is slow & ugly SafeFile yamlFile(yamlFileName.c_str(), "rb"); if(!yamlFile.handle) { throw std::runtime_error(std::string("Failed to open YAML file \"") + yamlFileName + "\"for reading."); } fseek(yamlFile.handle, 0, SEEK_END); int strsize = ftell(yamlFile.handle) + 1; rewind(yamlFile.handle); //Make the array raii Teuchos::ArrayRCP<char> contents(new char[strsize], 0, strsize, true); fread((void*) contents.get(), strsize - 1, 1, yamlFile.handle); contents.get()[strsize - 1] = 0; Teuchos::broadcast<int, int>(comm, 0, &strsize); Teuchos::broadcast<int, char>(comm, 0, strsize, contents.get()); updateParametersFromYamlCString(contents.get(), paramList, overwrite); } else { int strsize; Teuchos::broadcast<int, int>(comm, 0, &strsize); Teuchos::ArrayRCP<char> contents(new char[strsize], 0, strsize, true); Teuchos::broadcast<int, char>(comm, 0, strsize, contents.get()); updateParametersFromYamlCString(contents.get(), paramList, overwrite); } } }
void gathervPrint (std::ostream& out, const std::string& s, const Teuchos::Comm<int>& comm) { using Teuchos::ArrayRCP; using Teuchos::CommRequest; using Teuchos::ireceive; using Teuchos::isend; using Teuchos::outArg; using Teuchos::RCP; using Teuchos::wait; const int myRank = comm.getRank (); const int rootRank = 0; if (myRank == rootRank) { out << s; // Proc 0 prints its buffer first } const int numProcs = comm.getSize (); const int sizeTag = 42; const int msgTag = 43; ArrayRCP<size_t> sizeBuf (1); ArrayRCP<char> msgBuf; // to be resized later RCP<CommRequest<int> > req; for (int p = 1; p < numProcs; ++p) { if (myRank == p) { sizeBuf[0] = s.size (); req = isend<int, size_t> (sizeBuf, rootRank, sizeTag, comm); (void) wait<int> (comm, outArg (req)); const size_t msgSize = s.size (); msgBuf.resize (msgSize + 1); // for the '\0' std::copy (s.begin (), s.end (), msgBuf.begin ()); msgBuf[msgSize] = '\0'; req = isend<int, char> (msgBuf, rootRank, msgTag, comm); (void) wait<int> (comm, outArg (req)); } else if (myRank == rootRank) { sizeBuf[0] = 0; // just a precaution req = ireceive<int, size_t> (sizeBuf, p, sizeTag, comm); (void) wait<int> (comm, outArg (req)); const size_t msgSize = sizeBuf[0]; msgBuf.resize (msgSize + 1); // for the '\0' req = ireceive<int, char> (msgBuf, p, msgTag, comm); (void) wait<int> (comm, outArg (req)); std::string msg (msgBuf.getRawPtr ()); out << msg; } } }
void run_samples( const Teuchos::Comm<int>& comm , ProblemType& problem, const CoeffFunctionType& coeff_function, const Teuchos::RCP<Kokkos::Example::FENL::SampleGrouping<double> >& grouper, const Teuchos::RCP<Teuchos::ParameterList>& fenlParams, const CMD & cmd , const double bc_lower_value, const double bc_upper_value, const Teuchos::Array< Teuchos::Array<double> >& points, Teuchos::Array<double>& responses, Teuchos::Array<int>& iterations, Kokkos::Example::FENL::Perf& perf_total) { typedef typename CoeffFunctionType::RandomVariableView RV; typedef typename RV::HostMirror HRV; RV rv = coeff_function.getRandomVariables(); HRV hrv = Kokkos::create_mirror_view(rv); const int num_samples = points.size(); const int dim = rv.dimension_0();; for (int sample=0; sample<num_samples; ++sample) { // Set random variable values to this sample for (int i=0; i<dim; ++i) hrv(i) = points[sample][i]; Kokkos::deep_copy( rv, hrv ); // Evaluate response at quadrature point double response = 0; Kokkos::Example::FENL::Perf perf = fenl( problem , fenlParams , cmd.PRINT , cmd.USE_TRIALS , cmd.USE_ATOMIC , cmd.USE_BELOS , cmd.USE_MUELU , cmd.USE_MEANBASED , coeff_function , cmd.USE_ISOTROPIC , cmd.USE_COEFF_SRC , cmd.USE_COEFF_ADV , bc_lower_value , bc_upper_value , response); responses[sample] = response; iterations[sample] = perf.cg_iter_count; if (cmd.PRINT_ITS && 0 == comm.getRank()) { std::cout << sample << " : " << perf.cg_iter_count << " ( "; for (int i=0; i<dim; ++i) std::cout << hrv(i) << " "; std::cout << ")" << std::endl; } // Increment timing statistics perf_total.increment(perf, !cmd.USE_BELOS); } }
// Print memory usage to stream void print_memory_usage(std::ostream& s, const Teuchos::Comm<int>& comm) { MemUsage mem = get_memory_usage(comm); if ( 0 == comm.getRank() ) { s << std::fixed; s.precision(3); s << "Memory usage across all processors (MB):" << std::endl << "\t Max: " << mem.max_mem << std::endl << "\t Min: " << mem.min_mem << std::endl << "\t Tot: " << mem.tot_mem << std::endl; } }
DefaultMappingStrategy::DefaultMappingStrategy(const RCP<const Thyra::LinearOpBase<double> > & thyraOp,const Teuchos::Comm<Thyra::Ordinal> & comm) { RCP<Teuchos::Comm<Thyra::Ordinal> > newComm = comm.duplicate(); // extract vector spaces from linear operator domainSpace_ = thyraOp->domain(); rangeSpace_ = thyraOp->range(); domainMap_ = Teko::TpetraHelpers::thyraVSToTpetraMap(*domainSpace_,newComm); rangeMap_ = Teko::TpetraHelpers::thyraVSToTpetraMap(*rangeSpace_,newComm); }
void ReportTimeAndMemory(Teuchos::Time const &timer, Teuchos::Comm<int> const &Comm) { double maxTime=0,minTime=0,avgTime=0; double localTime = timer.totalElapsedTime(); int ntimers=1, root=0; #ifdef HAVE_MPI MPI_Reduce(&localTime,&maxTime,ntimers,MPI_DOUBLE,MPI_MAX,root,MPI_COMM_WORLD); MPI_Reduce(&localTime,&minTime,ntimers,MPI_DOUBLE,MPI_MIN,root,MPI_COMM_WORLD); MPI_Reduce(&localTime,&avgTime,ntimers,MPI_DOUBLE,MPI_SUM,root,MPI_COMM_WORLD); #else maxTime = localTime; minTime = localTime; avgTime = localTime; #endif avgTime /= Comm.getSize(); //std::cout << "(" << Comm.getRank() << ") " << localTime << std::endl; if (Comm.getRank()==0) { std::cout << "&&&" << timer.name() << " max=" << maxTime << " min=" << minTime << " avg=" << avgTime << std::endl; std::cout << "&&&" << timer.name() << " " << MemUtils::PrintMemoryUsage() << std::endl; } } //ReportTimeAndMemory
unsigned int generateSeed(Teuchos::Comm<int> const &comm, const double initSeed) { timeval t1; gettimeofday(&t1, NULL); unsigned int seed; if (initSeed > -1) seed = Teuchos::as<unsigned int>(initSeed); else seed = t1.tv_usec * t1.tv_sec; // use variant of proc 0's seed so we can always reproduce the results const Teuchos::MpiComm<int> &mpicomm = dynamic_cast<const Teuchos::MpiComm<int> &>(comm); TEUCHOS_TEST_FOR_EXCEPTION(&mpicomm==0,MueLu::Exceptions::RuntimeError,"cast to MpiComm failed"); MPI_Bcast((void*)&seed,1,MPI_UNSIGNED,0,*(mpicomm.getRawMpiComm())); seed = seed * (1+comm.getRank()); return seed; }
void run_file( const Teuchos::Comm<int>& comm , ProblemType& problem , const CoeffFunctionType & coeff_function, const Teuchos::RCP<Kokkos::Example::FENL::SampleGrouping<double> >& grouper, const Teuchos::RCP<Teuchos::ParameterList>& fenlParams, const CMD & cmd , const double bc_lower_value, const double bc_upper_value, Kokkos::Example::FENL::Perf& perf_total) { using Teuchos::Array; const int dim = cmd.USE_UQ_DIM; int num_quad_points; Array< Array<double > > quad_points; // Open and read sample points std::ifstream fin("samples.txt"); fin >> num_quad_points; quad_points.resize(num_quad_points); for (int i=0; i<num_quad_points; ++i) { quad_points[i].resize(dim); for (int j=0; j<dim; ++j) fin >> quad_points[i][j]; } fin.close(); // Evaluate response at each quadrature point Array<double> responses(num_quad_points); Array<int> iterations(num_quad_points); run_samples(comm, problem, coeff_function, grouper, fenlParams, cmd, bc_lower_value, bc_upper_value, quad_points, responses, iterations, perf_total); // Write responses to file, including solver iterations if (comm.getRank() == 0) { std::ofstream fout("responses.txt"); fout << num_quad_points << std::endl; for (int i=0; i<num_quad_points; ++i) { fout << responses[i] << " " << iterations[i] << std::endl; } fout.close(); } perf_total.response_mean = 0.0; perf_total.response_std_dev = 0.0; }
////////////////////////////////////////////////////////////////////////////// // // printMemoryUsage() // ////////////////////////////////////////////////////////////////////////////// void printMemoryUsage(std::ostream& s, const Teuchos::Comm<int>& comm, const MemUsage& mem) { using std::endl; if (0 == comm.getRank()) { s << "Estimated memory usage across all processors:" << endl << " Current Peak " << endl << " ------------ ------------" << endl << " Min: "; pretty(s, mem.currMin); pretty(s, mem.peakMin); s << endl << " Max: "; pretty(s, mem.currMax); pretty(s, mem.peakMax); s << endl << " Tot: "; pretty(s, mem.currTot); pretty(s, mem.peakTot); s << endl; } return; } // end of printMemoryUsage()
Teuchos::RCP<NodeType> createKokkosNode( const CMD & cmd , const Teuchos::Comm<int>& comm ) { Teuchos::ParameterList params; params.set("Verbose", 0); if ( cmd.USE_THREADS ) params.set("Num Threads", cmd.USE_THREADS); else if ( cmd.USE_OPENMP ) params.set("Num Threads", cmd.USE_OPENMP); if ( cmd.USE_NUMA && cmd.USE_CORE_PER_NUMA ) { params.set("Num NUMA", cmd.USE_NUMA ); params.set("Num CoresPerNUMA", cmd.USE_CORE_PER_NUMA ); } if ( cmd.USE_CUDA ) params.set("Device", cmd.USE_CUDA_DEV ); Teuchos::RCP<NodeType> node = Teuchos::rcp (new NodeType(params)); if ( cmd.VERBOSE ) { typedef typename NodeType::execution_space Device; if (comm.getRank() == 0) Device::print_configuration(std::cout); std::cout.flush(); if ( cmd.USE_CUDA ) { for (int i=0; i<comm.getSize(); ++i) { comm.barrier(); comm.barrier(); comm.barrier(); if ( i == comm.getRank() ) { std::cout << "MPI rank " << comm.getRank() << " attached to CUDA device " << cmd.USE_CUDA_DEV << std::endl; std::cout.flush(); } comm.barrier(); comm.barrier(); comm.barrier(); } } } return node; }
Ordinal SpmdVectorSpaceUtilities::computeMapCode( const Teuchos::Comm<Ordinal> &comm, const Ordinal localSubDim ) { using Teuchos::outArg; using Teuchos::REDUCE_SUM; using Teuchos::reduceAll; // // Here we will make a map code out of just the local sub-dimension on each // processor. If each processor has the same number of local elements, then // the map codes will be the same and this is all you need for RTOp // compatibility. // const int procRank = comm.getSize (); Ordinal mapCode = -1; Ordinal localCode = localSubDim % (procRank+1) + localSubDim; reduceAll<Ordinal, Ordinal> (comm, REDUCE_SUM, localCode, outArg (mapCode)); return mapCode; }
// TODO: this function can be templated (T=double). ArrayRCP<double> ReduceMaxMinAvg(double localValue, Teuchos::Comm<int> const &comm, int rootNode) { ArrayRCP<double> r = ArrayRCP<double>(3, localValue); #ifdef HAVE_MPI double & maxTime = r[0], & minTime = r[1], & avgTime = r[2]; // Note: workaround because reduce() is not implemented in Teuchos::Comm const Teuchos::MpiComm<int> & mpiComm = dynamic_cast<const Teuchos::MpiComm<int>& >(comm); MPI_Comm rawMpiComm = (*mpiComm.getRawMpiComm())(); // // DEBUG std::cout << comm.getRank() << ": " << localValue << std::endl; int ntimers=1; MPI_Reduce(&localValue, &maxTime, ntimers, MPI_DOUBLE, MPI_MAX, rootNode, rawMpiComm); MPI_Reduce(&localValue, &minTime, ntimers, MPI_DOUBLE, MPI_MIN, rootNode, rawMpiComm); MPI_Reduce(&localValue, &avgTime, ntimers, MPI_DOUBLE, MPI_SUM, rootNode, rawMpiComm); avgTime /= comm.getSize(); #endif // HAVE_MPI return r; }
void run_samples( const Teuchos::Comm<int>& comm , Kokkos::Example::FENL::Problem< Sacado::MP::Vector<Storage>, Device, ElemOrder>& problem , const CoeffFunctionType & coeff_function, const Teuchos::RCP<Kokkos::Example::FENL::SampleGrouping<double> >& grouper, const Teuchos::RCP<Teuchos::ParameterList>& fenlParams, const CMD & cmd , const double bc_lower_value, const double bc_upper_value, const Teuchos::Array< Teuchos::Array<double> >& points, Teuchos::Array<double>& responses, Teuchos::Array<int>& iterations, Kokkos::Example::FENL::Perf& perf_total) { using Teuchos::Array; using Teuchos::Ordinal; typedef typename Sacado::MP::Vector<Storage> Scalar; typedef typename CoeffFunctionType::RandomVariableView RV; typedef typename RV::HostMirror HRV; static const int VectorSize = Storage::static_size; // Group points into ensembles Array< Array<Ordinal> > groups; Ordinal num_duplicate = 0; grouper->group(VectorSize, points, groups, num_duplicate); const int num_groups = groups.size(); RV rv = coeff_function.getRandomVariables(); HRV hrv = Kokkos::create_mirror_view(rv); const int dim = rv.dimension_0(); // Loop over quadrature point groups for (int group=0; group<num_groups; ++group) { // Set random variables for (int qp=0; qp<VectorSize; ++qp) for (int i=0; i<dim; ++i) hrv(i).fastAccessCoeff(qp) = points[groups[group][qp]][i]; Kokkos::deep_copy( rv, hrv ); // Evaluate response at quadrature point Scalar response = 0; Kokkos::Example::FENL::Perf perf = fenl( problem , fenlParams , cmd.PRINT , cmd.USE_TRIALS , cmd.USE_ATOMIC , cmd.USE_BELOS , cmd.USE_MUELU , cmd.USE_MEANBASED , coeff_function , cmd.USE_ISOTROPIC , cmd.USE_COEFF_SRC , cmd.USE_COEFF_ADV , bc_lower_value , bc_upper_value , response); // Save response -- note currently all samples within an ensemble // get the same number of iterations for (int qp=0; qp<VectorSize; ++qp) { responses[groups[group][qp]] = response.coeff(qp); iterations[groups[group][qp]] = perf.cg_iter_count; } if (cmd.PRINT_ITS && 0 == comm.getRank()) { std::cout << group << " : " << perf.cg_iter_count << " ( "; for (int qp=0; qp<VectorSize; ++qp) std::cout << groups[group][qp] << " "; std::cout << ")"; std::cout << " ( "; for (int i=0; i<dim; ++i) std::cout << hrv(i) << " "; std::cout << ")" << std::endl; } // Adjust timing statistics for ensemble size perf.newton_iter_count *= VectorSize; perf.cg_iter_count *= VectorSize; perf.map_ratio *= VectorSize; perf.fill_node_set *= VectorSize; perf.scan_node_count *= VectorSize; perf.fill_graph_entries *= VectorSize; perf.sort_graph_entries *= VectorSize; perf.fill_element_graph *= VectorSize; // Increment timing statistics perf_total.increment(perf, !cmd.USE_BELOS); } }
void run_tasmanian( const Teuchos::Comm<int>& comm , ProblemType& problem , const CoeffFunctionType & coeff_function, const Teuchos::RCP<Kokkos::Example::FENL::SampleGrouping<double> >& grouper, const Teuchos::RCP<Teuchos::ParameterList>& fenlParams, const CMD & cmd , const double bc_lower_value, const double bc_upper_value, Kokkos::Example::FENL::Perf& perf_total) { #ifdef HAVE_TRILINOSCOUPLINGS_TASMANIAN using Teuchos::Array; // Start up Tasmanian TasGrid::TasmanianSparseGrid sparseGrid; // Algorithmic parameters const int dim = cmd.USE_UQ_DIM; const int qoi = 2; const int initial_level = cmd.USE_UQ_INIT_LEVEL; const int max_level = cmd.USE_UQ_MAX_LEVEL; const int max_order = 1; const double tol = cmd.USE_UQ_TOL; const TasGrid::TypeOneDRule rule = TasGrid::rule_localp; const TasGrid::TypeRefinement refinement = TasGrid::refine_classic; const int qoi_to_refine = 0; // Create the initial grid sparseGrid.makeLocalPolynomialGrid(dim, qoi, initial_level, max_order, rule); int num_new_points = sparseGrid.getNumNeeded(); perf_total.uq_count = num_new_points; int level = initial_level; while (num_new_points > 0 && level <= max_level) { if (cmd.PRINT_ITS && 0 == comm.getRank()) { std::cout << "Tasmanian grid level " << level << ", " << num_new_points << " points" << std::endl; } // Get the sample points const double *points = sparseGrid.getNeededPoints(); // Copy points into Teuchos arrays Array< Array<double> > quad_points(num_new_points); for (int i=0; i<num_new_points; ++i) { quad_points[i].resize(dim); for (int j=0; j<dim; ++j) quad_points[i][j] = points[dim*i+j]; } // Evaluate response on those points Array<double> responses(num_new_points); Array<int> iterations(num_new_points); run_samples(comm, problem, coeff_function, grouper, fenlParams, cmd, bc_lower_value, bc_upper_value, quad_points, responses, iterations, perf_total); // Load responses back into Tasmanian Array<double> tas_responses(qoi*num_new_points); for (int i=0; i<num_new_points; ++i) { tas_responses[i*qoi] = responses[i]; // for mean tas_responses[i*qoi+1] = responses[i]*responses[i]; // for variance } sparseGrid.loadNeededPoints(&tas_responses[0]); // Refine the grid sparseGrid.setSurplusRefinement(tol, refinement, qoi_to_refine); // Get the number of new points num_new_points = sparseGrid.getNumNeeded(); perf_total.uq_count += num_new_points; ++level; } if (level > max_level && comm.getRank() == 0) std::cout << "Warning: Tasmanian did not achieve refinement tolerance " << tol << std::endl; // Compute mean and standard deviation of response double s[qoi]; sparseGrid.integrate(s); const double weight = std::pow(0.5, dim); // uniform measure in dim dimensions s[0] *= weight; s[1] *= weight; perf_total.response_mean = s[0]; perf_total.response_std_dev = std::sqrt(s[1]-s[0]*s[0]); #else TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, "TASMANIAN not available. Please re-configure with TASMANIAN TPL enabled."); #endif }
Machine(const Teuchos::Comm<int> &comm) : numRanks(comm.getSize()), myRank(comm.getRank()) { }