void Teuchos::updateParametersFromYamlFileAndBroadcast(
  const std::string &yamlFileName, 
  const Teuchos::Ptr<Teuchos::ParameterList> &paramList, 
  const Teuchos::Comm<int> &comm, 
  bool overwrite)
{
  struct SafeFile
  {
    SafeFile(const char* fname, const char* options)
    {
      handle = fopen(fname, options);
    }
    ~SafeFile()
    {
      if(handle)
        fclose(handle);
    }
    FILE* handle;
  };
  //BMK note: see teuchos/comm/src/Teuchos_XMLParameterListHelpers.cpp
  if(comm.getSize() == 1)
  {
    updateParametersFromYamlFile(yamlFileName, paramList);
  }
  else
  {
    if(comm.getRank() == 0)
    {
      //BMK: TODO! //reader.setAllowsDuplicateSublists(false);
      //create a string and load file contents into it
      //C way for readability and speed, same thing with C++ streams is slow & ugly
      SafeFile yamlFile(yamlFileName.c_str(), "rb");
      if(!yamlFile.handle)
      {
        throw std::runtime_error(std::string("Failed to open YAML file \"") + yamlFileName + "\"for reading.");
      }
      fseek(yamlFile.handle, 0, SEEK_END);
      int strsize = ftell(yamlFile.handle) + 1;
      rewind(yamlFile.handle);
      //Make the array raii
      Teuchos::ArrayRCP<char> contents(new char[strsize], 0, strsize, true);
      fread((void*) contents.get(), strsize - 1, 1, yamlFile.handle);
      contents.get()[strsize - 1] = 0;
      Teuchos::broadcast<int, int>(comm, 0, &strsize);
      Teuchos::broadcast<int, char>(comm, 0, strsize, contents.get());
      updateParametersFromYamlCString(contents.get(), paramList, overwrite);
    }
    else
    {
      int strsize;
      Teuchos::broadcast<int, int>(comm, 0, &strsize);
      Teuchos::ArrayRCP<char> contents(new char[strsize], 0, strsize, true);
      Teuchos::broadcast<int, char>(comm, 0, strsize, contents.get());
      updateParametersFromYamlCString(contents.get(), paramList, overwrite);
    }
  }
}
void
gathervPrint (std::ostream& out,
              const std::string& s,
              const Teuchos::Comm<int>& comm)
{
  using Teuchos::ArrayRCP;
  using Teuchos::CommRequest;
  using Teuchos::ireceive;
  using Teuchos::isend;
  using Teuchos::outArg;
  using Teuchos::RCP;
  using Teuchos::wait;

  const int myRank = comm.getRank ();
  const int rootRank = 0;
  if (myRank == rootRank) {
    out << s; // Proc 0 prints its buffer first
  }

  const int numProcs = comm.getSize ();
  const int sizeTag = 42;
  const int msgTag = 43;

  ArrayRCP<size_t> sizeBuf (1);
  ArrayRCP<char> msgBuf; // to be resized later
  RCP<CommRequest<int> > req;

  for (int p = 1; p < numProcs; ++p) {
    if (myRank == p) {
      sizeBuf[0] = s.size ();
      req = isend<int, size_t> (sizeBuf, rootRank, sizeTag, comm);
      (void) wait<int> (comm, outArg (req));

      const size_t msgSize = s.size ();
      msgBuf.resize (msgSize + 1); // for the '\0'
      std::copy (s.begin (), s.end (), msgBuf.begin ());
      msgBuf[msgSize] = '\0';

      req = isend<int, char> (msgBuf, rootRank, msgTag, comm);
      (void) wait<int> (comm, outArg (req));
    }
    else if (myRank == rootRank) {
      sizeBuf[0] = 0; // just a precaution
      req = ireceive<int, size_t> (sizeBuf, p, sizeTag, comm);
      (void) wait<int> (comm, outArg (req));

      const size_t msgSize = sizeBuf[0];
      msgBuf.resize (msgSize + 1); // for the '\0'
      req = ireceive<int, char> (msgBuf, p, msgTag, comm);
      (void) wait<int> (comm, outArg (req));

      std::string msg (msgBuf.getRawPtr ());
      out << msg;
    }
  }
}
void run_samples(
  const Teuchos::Comm<int>& comm ,
  ProblemType& problem,
  const CoeffFunctionType& coeff_function,
  const Teuchos::RCP<Kokkos::Example::FENL::SampleGrouping<double> >& grouper,
  const Teuchos::RCP<Teuchos::ParameterList>& fenlParams,
  const CMD & cmd ,
  const double bc_lower_value,
  const double bc_upper_value,
  const Teuchos::Array< Teuchos::Array<double> >& points,
  Teuchos::Array<double>& responses,
  Teuchos::Array<int>& iterations,
  Kokkos::Example::FENL::Perf& perf_total)
{
  typedef typename CoeffFunctionType::RandomVariableView RV;
  typedef typename RV::HostMirror HRV;
  RV rv = coeff_function.getRandomVariables();
  HRV hrv = Kokkos::create_mirror_view(rv);

  const int num_samples = points.size();
  const int dim = rv.dimension_0();;
  for (int sample=0; sample<num_samples; ++sample) {

    // Set random variable values to this sample
    for (int i=0; i<dim; ++i)
      hrv(i) = points[sample][i];
    Kokkos::deep_copy( rv, hrv );

    // Evaluate response at quadrature point
    double response = 0;
    Kokkos::Example::FENL::Perf perf =
      fenl( problem , fenlParams ,
            cmd.PRINT , cmd.USE_TRIALS , cmd.USE_ATOMIC ,
            cmd.USE_BELOS , cmd.USE_MUELU , cmd.USE_MEANBASED ,
            coeff_function , cmd.USE_ISOTROPIC ,
            cmd.USE_COEFF_SRC , cmd.USE_COEFF_ADV ,
            bc_lower_value , bc_upper_value ,
            response);

    responses[sample] = response;
    iterations[sample] = perf.cg_iter_count;

    if (cmd.PRINT_ITS && 0 == comm.getRank()) {
      std::cout << sample << " : " << perf.cg_iter_count << " ( ";
      for (int i=0; i<dim; ++i)
        std::cout << hrv(i) << " ";
      std::cout << ")" << std::endl;
    }

     // Increment timing statistics
    perf_total.increment(perf, !cmd.USE_BELOS);

  }
}
Esempio n. 4
0
// Print memory usage to stream
void print_memory_usage(std::ostream& s, const Teuchos::Comm<int>& comm) {
    MemUsage mem =  get_memory_usage(comm);
    if ( 0 == comm.getRank() ) {
        s << std::fixed;
        s.precision(3);
        s << "Memory usage across all processors (MB):" << std::endl
          << "\t Max:  " << mem.max_mem << std::endl
          << "\t Min:  " << mem.min_mem << std::endl
          << "\t Tot:  " << mem.tot_mem << std::endl;
    }
}
Esempio n. 5
0
Teuchos::RCP<NodeType>
createKokkosNode( const CMD & cmd , const Teuchos::Comm<int>& comm ) {
  Teuchos::ParameterList params;
  params.set("Verbose", 0);
  if ( cmd.USE_THREADS  )
    params.set("Num Threads", cmd.USE_THREADS);
  else if ( cmd.USE_OPENMP  )
    params.set("Num Threads", cmd.USE_OPENMP);
  if ( cmd.USE_NUMA  && cmd.USE_CORE_PER_NUMA  ) {
    params.set("Num NUMA", cmd.USE_NUMA );
    params.set("Num CoresPerNUMA", cmd.USE_CORE_PER_NUMA );
  }
  if ( cmd.USE_CUDA  )
    params.set("Device", cmd.USE_CUDA_DEV  );
  Teuchos::RCP<NodeType> node = Teuchos::rcp (new NodeType(params));

  if ( cmd.VERBOSE ) {
    typedef typename NodeType::execution_space Device;
    if (comm.getRank() == 0)
      Device::print_configuration(std::cout);
    std::cout.flush();
    if ( cmd.USE_CUDA  ) {
      for (int i=0; i<comm.getSize(); ++i) {
        comm.barrier();
        comm.barrier();
        comm.barrier();
        if ( i == comm.getRank() ) {
          std::cout << "MPI rank " << comm.getRank()
                    << " attached to CUDA device "
                    << cmd.USE_CUDA_DEV  << std::endl;
          std::cout.flush();
        }
        comm.barrier();
        comm.barrier();
        comm.barrier();
      }
    }
  }

  return node;
}
Esempio n. 6
0
unsigned int generateSeed(Teuchos::Comm<int> const &comm, const double initSeed)
{
  timeval t1;
  gettimeofday(&t1, NULL);
  unsigned int seed;
  if (initSeed > -1) seed = Teuchos::as<unsigned int>(initSeed);
  else               seed = t1.tv_usec * t1.tv_sec;
  // use variant of proc 0's seed so we can always reproduce the results
  const Teuchos::MpiComm<int> &mpicomm = dynamic_cast<const Teuchos::MpiComm<int> &>(comm);
  TEUCHOS_TEST_FOR_EXCEPTION(&mpicomm==0,MueLu::Exceptions::RuntimeError,"cast to MpiComm failed");
  MPI_Bcast((void*)&seed,1,MPI_UNSIGNED,0,*(mpicomm.getRawMpiComm()));
  seed = seed * (1+comm.getRank());
  return seed;
}
void run_file(
  const Teuchos::Comm<int>& comm ,
  ProblemType& problem ,
  const CoeffFunctionType & coeff_function,
  const Teuchos::RCP<Kokkos::Example::FENL::SampleGrouping<double> >& grouper,
  const Teuchos::RCP<Teuchos::ParameterList>& fenlParams,
  const CMD & cmd ,
  const double bc_lower_value,
  const double bc_upper_value,
  Kokkos::Example::FENL::Perf& perf_total)
{
  using Teuchos::Array;

  const int dim = cmd.USE_UQ_DIM;
  int num_quad_points;
  Array< Array<double > > quad_points;

  // Open and read sample points
  std::ifstream fin("samples.txt");
  fin >> num_quad_points;
  quad_points.resize(num_quad_points);
  for (int i=0; i<num_quad_points; ++i) {
    quad_points[i].resize(dim);
    for (int j=0; j<dim; ++j)
      fin >> quad_points[i][j];
  }
  fin.close();

  // Evaluate response at each quadrature point
  Array<double> responses(num_quad_points);
  Array<int> iterations(num_quad_points);
  run_samples(comm, problem, coeff_function, grouper,
              fenlParams, cmd,
              bc_lower_value, bc_upper_value,
              quad_points, responses, iterations, perf_total);

  // Write responses to file, including solver iterations
  if (comm.getRank() == 0) {
    std::ofstream fout("responses.txt");
    fout << num_quad_points << std::endl;
    for (int i=0; i<num_quad_points; ++i) {
      fout << responses[i] << " " << iterations[i] << std::endl;
    }
    fout.close();
  }

  perf_total.response_mean = 0.0;
  perf_total.response_std_dev = 0.0;
}
 //////////////////////////////////////////////////////////////////////////////
 //
 //  printMemoryUsage()
 //
 //////////////////////////////////////////////////////////////////////////////
 void printMemoryUsage(std::ostream& s, const Teuchos::Comm<int>& comm,
   const MemUsage& mem)
 {
   using std::endl;
   if (0 == comm.getRank())
   {
     s << "Estimated memory usage across all processors:" << endl
       << "        Current       Peak        "         << endl
       << "        ------------  ------------"         << endl << "  Min:  ";
     pretty(s, mem.currMin); pretty(s, mem.peakMin); s << endl << "  Max:  ";
     pretty(s, mem.currMax); pretty(s, mem.peakMax); s << endl << "  Tot:  ";
     pretty(s, mem.currTot); pretty(s, mem.peakTot); s << endl;
   }
   return;
 } // end of printMemoryUsage()
Esempio n. 9
0
    void ReportTimeAndMemory(Teuchos::Time const &timer, Teuchos::Comm<int> const &Comm)
    {
      double maxTime=0,minTime=0,avgTime=0;
      double localTime = timer.totalElapsedTime();
      int ntimers=1, root=0;
#ifdef HAVE_MPI
      MPI_Reduce(&localTime,&maxTime,ntimers,MPI_DOUBLE,MPI_MAX,root,MPI_COMM_WORLD);
      MPI_Reduce(&localTime,&minTime,ntimers,MPI_DOUBLE,MPI_MIN,root,MPI_COMM_WORLD);
      MPI_Reduce(&localTime,&avgTime,ntimers,MPI_DOUBLE,MPI_SUM,root,MPI_COMM_WORLD);
#else
      maxTime = localTime;
      minTime = localTime;
      avgTime = localTime;
#endif
      avgTime /= Comm.getSize();
      //std::cout << "(" << Comm.getRank() << ") " << localTime << std::endl;
      if (Comm.getRank()==0) {
        std::cout << "&&&" << timer.name()
                 << " max=" << maxTime << " min=" << minTime << " avg=" << avgTime << std::endl;
        std::cout << "&&&" << timer.name() << " " << MemUtils::PrintMemoryUsage() << std::endl;
      }
    } //ReportTimeAndMemory
void run_samples(
  const Teuchos::Comm<int>& comm ,
  Kokkos::Example::FENL::Problem< Sacado::MP::Vector<Storage>, Device, ElemOrder>& problem ,
  const CoeffFunctionType & coeff_function,
  const Teuchos::RCP<Kokkos::Example::FENL::SampleGrouping<double> >& grouper,
  const Teuchos::RCP<Teuchos::ParameterList>& fenlParams,
  const CMD & cmd ,
  const double bc_lower_value,
  const double bc_upper_value,
  const Teuchos::Array< Teuchos::Array<double> >& points,
  Teuchos::Array<double>& responses,
  Teuchos::Array<int>& iterations,
  Kokkos::Example::FENL::Perf& perf_total)
{
  using Teuchos::Array;
  using Teuchos::Ordinal;

  typedef typename Sacado::MP::Vector<Storage> Scalar;
  typedef typename CoeffFunctionType::RandomVariableView RV;
  typedef typename RV::HostMirror HRV;
  static const int VectorSize = Storage::static_size;

  // Group points into ensembles
  Array< Array<Ordinal> > groups;
  Ordinal num_duplicate = 0;
  grouper->group(VectorSize, points, groups, num_duplicate);

  const int num_groups = groups.size();
  RV rv = coeff_function.getRandomVariables();
  HRV hrv = Kokkos::create_mirror_view(rv);
  const int dim = rv.dimension_0();

  // Loop over quadrature point groups
  for (int group=0; group<num_groups; ++group) {

    // Set random variables
    for (int qp=0; qp<VectorSize; ++qp)
      for (int i=0; i<dim; ++i)
        hrv(i).fastAccessCoeff(qp) = points[groups[group][qp]][i];
    Kokkos::deep_copy( rv, hrv );

    // Evaluate response at quadrature point
    Scalar response = 0;
    Kokkos::Example::FENL::Perf perf =
      fenl( problem , fenlParams ,
            cmd.PRINT , cmd.USE_TRIALS , cmd.USE_ATOMIC ,
            cmd.USE_BELOS , cmd.USE_MUELU , cmd.USE_MEANBASED ,
            coeff_function , cmd.USE_ISOTROPIC ,
            cmd.USE_COEFF_SRC , cmd.USE_COEFF_ADV ,
            bc_lower_value , bc_upper_value ,
            response);

    // Save response -- note currently all samples within an ensemble
    // get the same number of iterations
    for (int qp=0; qp<VectorSize; ++qp) {
      responses[groups[group][qp]] = response.coeff(qp);
      iterations[groups[group][qp]] = perf.cg_iter_count;
    }

    if (cmd.PRINT_ITS && 0 == comm.getRank()) {
      std::cout << group << " : " << perf.cg_iter_count << " ( ";
      for (int qp=0; qp<VectorSize; ++qp)
        std::cout << groups[group][qp] << " ";
      std::cout << ")";
      std::cout << " ( ";
      for (int i=0; i<dim; ++i)
        std::cout << hrv(i) << " ";
      std::cout << ")" << std::endl;
    }

    // Adjust timing statistics for ensemble size
    perf.newton_iter_count *= VectorSize;
    perf.cg_iter_count *= VectorSize;
    perf.map_ratio *= VectorSize;
    perf.fill_node_set *= VectorSize;
    perf.scan_node_count *= VectorSize;
    perf.fill_graph_entries *= VectorSize;
    perf.sort_graph_entries *= VectorSize;
    perf.fill_element_graph *= VectorSize;

    // Increment timing statistics
    perf_total.increment(perf, !cmd.USE_BELOS);

  }
}
void run_tasmanian(
  const Teuchos::Comm<int>& comm ,
  ProblemType& problem ,
  const CoeffFunctionType & coeff_function,
  const Teuchos::RCP<Kokkos::Example::FENL::SampleGrouping<double> >& grouper,
  const Teuchos::RCP<Teuchos::ParameterList>& fenlParams,
  const CMD & cmd ,
  const double bc_lower_value,
  const double bc_upper_value,
  Kokkos::Example::FENL::Perf& perf_total)
{
#ifdef HAVE_TRILINOSCOUPLINGS_TASMANIAN

  using Teuchos::Array;

  // Start up Tasmanian
  TasGrid::TasmanianSparseGrid sparseGrid;

  // Algorithmic parameters
  const int dim = cmd.USE_UQ_DIM;
  const int qoi = 2;
  const int initial_level = cmd.USE_UQ_INIT_LEVEL;
  const int max_level = cmd.USE_UQ_MAX_LEVEL;
  const int max_order = 1;
  const double tol = cmd.USE_UQ_TOL;
  const TasGrid::TypeOneDRule rule = TasGrid::rule_localp;
  const TasGrid::TypeRefinement refinement = TasGrid::refine_classic;
  const int qoi_to_refine = 0;

  // Create the initial grid
  sparseGrid.makeLocalPolynomialGrid(dim, qoi, initial_level, max_order, rule);
  int num_new_points = sparseGrid.getNumNeeded();

  perf_total.uq_count = num_new_points;
  int level = initial_level;
  while (num_new_points > 0 && level <= max_level) {

    if (cmd.PRINT_ITS && 0 == comm.getRank()) {
      std::cout << "Tasmanian grid level " << level
                << ", " << num_new_points << " points"
                << std::endl;
    }

    // Get the sample points
    const double *points = sparseGrid.getNeededPoints();

    // Copy points into Teuchos arrays
    Array< Array<double> > quad_points(num_new_points);
    for (int i=0; i<num_new_points; ++i) {
      quad_points[i].resize(dim);
      for (int j=0; j<dim; ++j)
        quad_points[i][j] = points[dim*i+j];
    }

    // Evaluate response on those points
    Array<double> responses(num_new_points);
    Array<int> iterations(num_new_points);
    run_samples(comm, problem, coeff_function, grouper,
                fenlParams, cmd,
                bc_lower_value, bc_upper_value,
                quad_points, responses, iterations, perf_total);

    // Load responses back into Tasmanian
    Array<double> tas_responses(qoi*num_new_points);
    for (int i=0; i<num_new_points; ++i) {
      tas_responses[i*qoi]   = responses[i];              // for mean
      tas_responses[i*qoi+1] = responses[i]*responses[i]; // for variance
    }
    sparseGrid.loadNeededPoints(&tas_responses[0]);

    // Refine the grid
    sparseGrid.setSurplusRefinement(tol, refinement, qoi_to_refine);

    // Get the number of new points
    num_new_points = sparseGrid.getNumNeeded();
    perf_total.uq_count += num_new_points;
    ++level;
  }

  if (level > max_level && comm.getRank() == 0)
    std::cout << "Warning:  Tasmanian did not achieve refinement tolerance "
              << tol << std::endl;

  // Compute mean and standard deviation of response
  double s[qoi];
  sparseGrid.integrate(s);
  const double weight = std::pow(0.5, dim); // uniform measure in dim dimensions
  s[0] *= weight; s[1] *= weight;
  perf_total.response_mean = s[0];
  perf_total.response_std_dev = std::sqrt(s[1]-s[0]*s[0]);

#else

  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, "TASMANIAN not available.  Please re-configure with TASMANIAN TPL enabled.");

#endif
}
Esempio n. 12
0
 Machine(const Teuchos::Comm<int> &comm) :
   numRanks(comm.getSize()), myRank(comm.getRank())
 { }