int main (int argc, char *argv[]) {

  Teuchos::CommandLineProcessor clp;
  clp.setDocString("This example program measure the performance of IChol algorithms on Kokkos::Threads execution space.\n");

  int nthreads = 1;
  clp.setOption("nthreads", &nthreads, "Number of threads");

  int max_task_dependence = 10;
  clp.setOption("max-task-dependence", &max_task_dependence, "Max number of task dependence");

  int team_size = 1;
  clp.setOption("team-size", &team_size, "Team size");

  bool team_interface = false;
  clp.setOption("enable-team-interface", "disable-team-interface",
                &team_interface, "Flag for team interface");

  bool verbose = false;
  clp.setOption("enable-verbose", "disable-verbose", &verbose, "Flag for verbose printing");

  string file_input = "test.mtx";
  clp.setOption("file-input", &file_input, "Input file (MatrixMarket SPD matrix)");

  int niter = 10;
  clp.setOption("niter", &niter, "Number of iterations for testing");

  clp.recogniseAllOptions(true);
  clp.throwExceptions(false);

  Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv );

  if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0;
  if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL  ) return -1;
  
  int r_val = 0;
  {
    const bool overwrite = true;
    const int nshepherds = (team_interface ? nthreads/team_size : nthreads);
    const int nworker_per_shepherd = nthreads/nshepherds;

    setenv("QT_HWPAR",                    to_string(nthreads).c_str(),             overwrite);
    setenv("QT_NUM_SHEPHERDS",            to_string(nshepherds).c_str(),           overwrite);
    setenv("QT_NUM_WORKERS_PER_SHEPHERD", to_string(nworker_per_shepherd).c_str(), overwrite);

    exec_space::initialize(nthreads);
    exec_space::print_configuration(cout, true);
    
    // r_val = exampleICholPerformance
    //   <value_type,ordinal_type,size_type,exec_space,void>
    //   (file_input, niter, nthreads, max_task_dependence, team_size, team_interface, (nthreads != 1), verbose);

    exec_space::finalize();

    unsetenv("QT_HWPAR");
    unsetenv("QT_NUM_SHEPHERDS");
    unsetenv("QT_NUM_WORKERS_PER_SHEPHERD");
  }

  return r_val;
}
Exemplo n.º 2
0
int main(int argc, char **argv)
{
  // Typename of Polynomial Chaos scalar type
  typedef Stokhos::StandardStorage<int,double> pce_storage_type;
  typedef Sacado::ETPCE::OrthogPoly<double, pce_storage_type> pce_type;

  // Typename of ensemble scalar type
  const int EnsembleSize = 8;
  typedef Stokhos::StaticFixedStorage<int,double,EnsembleSize,Kokkos::DefaultExecutionSpace> ensemble_storage_type;
  typedef Sacado::MP::Vector<ensemble_storage_type> ensemble_type;

  // Short-hand for several classes used below
  using Teuchos::Array;
  using Teuchos::RCP;
  using Teuchos::rcp;
  using Stokhos::OneDOrthogPolyBasis;
  using Stokhos::HermiteBasis;
  using Stokhos::LegendreBasis;
  using Stokhos::CompletePolynomialBasis;
  using Stokhos::Quadrature;
  using Stokhos::TotalOrderIndexSet;
  using Stokhos::SmolyakSparseGridQuadrature;
  using Stokhos::TensorProductQuadrature;
  using Stokhos::Sparse3Tensor;
  using Stokhos::QuadOrthogPolyExpansion;

  try {

    // Setup command line options
    Teuchos::CommandLineProcessor CLP;
    CLP.setDocString(
      "This example computes the PC expansion of a simple function.\n");
    int p = 4;
    CLP.setOption("order", &p, "Polynomial order");
    bool sparse = false;
    CLP.setOption("sparse", "tensor", &sparse,
                  "Use sparse grid or tensor product quadrature");

    // Parse arguments
    CLP.parse( argc, argv );

    // Basis of dimension 3, order given by command-line option
    const int d = 3;
    Array< RCP<const OneDOrthogPolyBasis<int,double> > > bases(d);
    for (int i=0; i<d; i++) {
      bases[i] = rcp(new HermiteBasis<int,double>(p, true));
    }
    RCP<const CompletePolynomialBasis<int,double> > basis =
      rcp(new CompletePolynomialBasis<int,double>(bases));
    const int pce_size = basis->size();
    std::cout << "basis size = " << pce_size << std::endl;

    // Quadrature method
    RCP<const Quadrature<int,double> > quad;
    if (sparse) {
      const TotalOrderIndexSet<int> index_set(d, p);
      quad = rcp(new SmolyakSparseGridQuadrature<int,double>(basis, index_set));
    }
    else {
      quad = rcp(new TensorProductQuadrature<int,double>(basis));
    }
    std::cout << "quadrature size = " << quad->size() << std::endl;

    // Triple product tensor
    RCP<Sparse3Tensor<int,double> > Cijk =
      basis->computeTripleProductTensor();

    // Expansion method
    RCP<QuadOrthogPolyExpansion<int,double> > expn =
      rcp(new QuadOrthogPolyExpansion<int,double>(basis, Cijk, quad));

    // Polynomial expansion of u (note:  these are coefficients in the
    // normalized basis)
    pce_type u(expn);
    u.term(0,0) = 1.0;     // zeroth order term
    u.term(0,1) = 0.1;     // first order term for dimension 0
    u.term(1,1) = 0.05;    // first order term for dimension 1
    u.term(2,1) = 0.01;    // first order term for dimension 2

    //
    // Compute PCE expansion of function using NISP with ensemble propagation
    //

    // Extract quadrature data
    const int num_quad_points                 = quad->size();
    const Array<double>& quad_weights         = quad->getQuadWeights();
    const Array< Array<double> >& quad_points = quad->getQuadPoints();
    const Array< Array<double> >& quad_values = quad->getBasisAtQuadPoints();

    // Loop over quadrature points in blocks of size EnsembleSize
    pce_type v(expn);
    ensemble_type u_ensemble;
    for (int qp_block=0; qp_block<num_quad_points; qp_block+=EnsembleSize) {
      const int qp_sz = qp_block+EnsembleSize <= num_quad_points ?
        EnsembleSize : num_quad_points-qp_block;

      // Evaluate u at each quadrature point
      for (int qp=0; qp<qp_sz; ++qp)
        u_ensemble.fastAccessCoeff(qp) =
          u.evaluate(quad_points[qp_block+qp], quad_values[qp_block+qp]);
      for (int qp=qp_sz; qp<EnsembleSize; ++qp)
        u_ensemble.fastAccessCoeff(qp) = u_ensemble.fastAccessCoeff(qp_sz-1);

      // Evaluate function at each quadrature point
      ensemble_type v_ensemble = simple_function(u_ensemble);

      // Sum results into PCE integral
      for (int pc=0; pc<pce_size; ++pc)
        for (int qp=0; qp<qp_sz; ++qp)
          v.fastAccessCoeff(pc) += v_ensemble.fastAccessCoeff(qp)*quad_weights[qp_block+qp]*quad_values[qp_block+qp][pc];
    }

    /*
    for (int qp=0; qp<num_quad_points; ++qp) {
      double u_qp = u.evaluate(quad_points[qp]);
      double v_qp = simple_function(u_qp);
      double w = quad_weights[qp];
      for (int pc=0; pc<pce_size; ++pc)
        v.fastAccessCoeff(pc) += v_qp*w*quad_values[qp][pc];
    }
    */

    // Print u and v
    std::cout << "\tu = ";
    u.print(std::cout);
    std::cout << "\tv = ";
    v.print(std::cout);

    // Compute moments
    double mean = v.mean();
    double std_dev = v.standard_deviation();

    // Evaluate PCE and function at a point = 0.25 in each dimension
    Teuchos::Array<double> pt(d);
    for (int i=0; i<d; i++)
      pt[i] = 0.25;
    double up = u.evaluate(pt);
    double vp = simple_function(up);
    double vp2 = v.evaluate(pt);

    // Print results
    std::cout << "\tv mean         = " << mean << std::endl;
    std::cout << "\tv std. dev.    = " << std_dev << std::endl;
    std::cout << "\tv(0.25) (true) = " << vp << std::endl;
    std::cout << "\tv(0.25) (pce)  = " << vp2 << std::endl;

     // Check the answer
    if (std::abs(vp - vp2) < 1e-2)
      std::cout << "\nExample Passed!" << std::endl;
  }
  catch (std::exception& e) {
    std::cout << e.what() << std::endl;
  }
}
Exemplo n.º 3
0
int main(int argc, char **argv)
{
  try {

    // Initialize MPI
#ifdef HAVE_MPI
    MPI_Init(&argc,&argv);
#endif

    // Setup command line options
    Teuchos::CommandLineProcessor CLP;
    CLP.setDocString(
      "This example generates the sparsity pattern for the block stochastic Galerkin matrix.\n");
    int d = 3;
    CLP.setOption("dimension", &d, "Stochastic dimension");
    int p = 5;
    CLP.setOption("order", &p, "Polynomial order");
    double drop = 1.0e-12;
    CLP.setOption("drop", &drop, "Drop tolerance");
    std::string file = "A.mm";
    CLP.setOption("filename", &file, "Matrix Market filename");
    BasisType basis_type = LEGENDRE;
    CLP.setOption("basis", &basis_type, 
		  num_basis_types, basis_type_values, basis_type_names, 
		  "Basis type");
    Stokhos::GrowthPolicy growth_type = Stokhos::SLOW_GROWTH;
    CLP.setOption("growth", &growth_type, 
		  num_growth_types, growth_type_values, growth_type_names, 
		  "Growth type");
    ProductBasisType prod_basis_type = COMPLETE;
    CLP.setOption("product_basis", &prod_basis_type, 
		  num_prod_basis_types, prod_basis_type_values, 
		  prod_basis_type_names, 
		  "Product basis type");
    double alpha = 1.0;
    CLP.setOption("alpha", &alpha, "Jacobi alpha index");
    double beta = 1.0;
    CLP.setOption("beta", &beta, "Jacobi beta index");
    bool full = true;
    CLP.setOption("full", "linear", &full, "Use full or linear expansion");
    bool use_old = false;
    CLP.setOption("old", "new", &use_old, "Use old or new Cijk algorithm");
    int tile_size = 100;
    CLP.setOption("tile_size", &tile_size, "Tile size");

    // Parse arguments
    CLP.parse( argc, argv );

    // Basis
    Array< RCP<const Stokhos::OneDOrthogPolyBasis<int,double> > > bases(d); 
    for (int i=0; i<d; i++) {
      if (basis_type == HERMITE)
	bases[i] = Teuchos::rcp(new Stokhos::HermiteBasis<int,double>(
				  p, true, growth_type));
      else if (basis_type == LEGENDRE)
	bases[i] = Teuchos::rcp(new Stokhos::LegendreBasis<int,double>(
				  p, true, growth_type));
      else if (basis_type == CC_LEGENDRE)
	bases[i] = 
	  Teuchos::rcp(new Stokhos::ClenshawCurtisLegendreBasis<int,double>(
			 p, true));
      else if (basis_type == GP_LEGENDRE)
	bases[i] = 
	  Teuchos::rcp(new Stokhos::GaussPattersonLegendreBasis<int,double>(
			 p, true));
      else if (basis_type == RYS)
	bases[i] = Teuchos::rcp(new Stokhos::RysBasis<int,double>(
				  p, 1.0, true, growth_type));
      else if (basis_type == JACOBI)
	bases[i] = Teuchos::rcp(new Stokhos::JacobiBasis<int,double>(
				  p, alpha, beta, true, growth_type));
    }
    RCP<const Stokhos::ProductBasis<int,double> > basis;
    if (prod_basis_type == COMPLETE)
      basis = 
	Teuchos::rcp(new Stokhos::CompletePolynomialBasis<int,double>(
		       bases, drop, use_old));
    else if (prod_basis_type == TENSOR)
      basis = 
	Teuchos::rcp(new Stokhos::TensorProductBasis<int,double>(
		       bases, drop));
    else if (prod_basis_type == TOTAL)
      basis = 
	Teuchos::rcp(new Stokhos::TotalOrderBasis<int,double>(
		       bases, drop));
    else if (prod_basis_type == SMOLYAK) {
      Stokhos::TotalOrderIndexSet<int> index_set(d, p);
      basis = 
	Teuchos::rcp(new Stokhos::SmolyakBasis<int,double>(
		       bases, index_set, drop));
    }

    // Triple product tensor
    typedef Stokhos::Sparse3Tensor<int,double> Cijk_type;
    RCP<Cijk_type> Cijk;
    if (full)
      Cijk = basis->computeTripleProductTensor();
    else
      Cijk = basis->computeLinearTripleProductTensor();

    int sz = basis->size();
    std::cout << "basis size = " << sz
	      << " num nonzero Cijk entries = " << Cijk->num_entries() 
	      << std::endl;

    // Setup tiles
    if (tile_size > sz)
      tile_size = sz;
    int j_sz = sz;
    int k_sz = sz;
    if (!full)
      k_sz = basis->dimension()+1;
    int nj_tiles = j_sz / tile_size;
    int nk_tiles = k_sz / tile_size;
    if (j_sz - nj_tiles*tile_size > 0)
      ++nj_tiles;
    if (k_sz - nk_tiles*tile_size > 0)
      ++nk_tiles;
    Array<CijkNonzeros> nz(sz);
    for (int i=0; i<sz; ++i) {
      nz[i].i = i;
      nz[i].nz_tiles.resize(nj_tiles);
      for (int j=0; j<nj_tiles; ++j)
	nz[i].nz_tiles[j].resize(nk_tiles);
    }

    // Get number of nonzeros in Cijk for each i
    Cijk_type::k_iterator k_begin = Cijk->k_begin();
    Cijk_type::k_iterator k_end = Cijk->k_end();
    for (Cijk_type::k_iterator k_it=k_begin; k_it!=k_end; ++k_it) {
      int k = index(k_it);
      int k_tile = k / tile_size;
      Cijk_type::kj_iterator j_begin = Cijk->j_begin(k_it);
      Cijk_type::kj_iterator j_end = Cijk->j_end(k_it);
      for (Cijk_type::kj_iterator j_it = j_begin; j_it != j_end; ++j_it) {
	int j = index(j_it);
	int j_tile = j / tile_size;
	Cijk_type::kji_iterator i_begin = Cijk->i_begin(j_it);
	Cijk_type::kji_iterator i_end = Cijk->i_end(j_it);
	for (Cijk_type::kji_iterator i_it = i_begin; i_it != i_end; ++i_it) {
	  int i = index(i_it);
	  ++nz[i].total_nz;
	  ++nz[i].nz_tiles[j_tile][k_tile];
	}
      }
    }

    // Sort based on total number of nonzeros
    std::sort(nz.begin(), nz.end(), NZCompare());
    
    // Print nonzeros
    int w_index = 3;
    int w_nz = 5;
    int w_tile = 4;
    for (int i=0; i<nz.size(); ++i) {
      int idx = nz[i].i;
      std::cout << std::setw(w_index) << idx << " " 
		<< basis->term(idx) << ": " 
		<< std::setw(w_nz) << nz[i].total_nz
		<< ", ";
      for (int j=0; j<nj_tiles; ++j)
	for (int k=0; k<nk_tiles; ++k)
	  std::cout << std::setw(w_tile) << nz[i].nz_tiles[j][k] << " ";
      std::cout << std::endl;
    }

    // Add up the nonzeros for each (j,k) tile
    Array< Array<int> > total_nz_tiles(nj_tiles);
    int total_nz = 0;
    for (int j=0; j<nj_tiles; ++j)
      total_nz_tiles[j].resize(nk_tiles);
    for (int i=0; i<nz.size(); ++i) {
      total_nz += nz[i].total_nz;
      for (int j=0; j<nj_tiles; ++j)
	for (int k=0; k<nk_tiles; ++k)
	  total_nz_tiles[j][k] += nz[i].nz_tiles[j][k];
    }
    int w_total = (w_index+1) + (2*basis->dimension()+5) + w_nz;
    std::cout << std::endl << std::setw(w_total) << total_nz << ", ";
    for (int j=0; j<nj_tiles; ++j)
      for (int k=0; k<nk_tiles; ++k)
	std::cout << std::setw(w_tile) << total_nz_tiles[j][k] << " ";
    std::cout << std::endl;

    // Now partition Cijk for each tile
    Array< Array< RCP<Cijk_type> > > Cijk_tile(nj_tiles);
    for (int j=0; j<nj_tiles; ++j) {
      Cijk_tile[j].resize(nk_tiles);
      for (int k=0; k<nk_tiles; ++k)
	Cijk_tile[j][k] = rcp(new Cijk_type);
    }
    for (Cijk_type::k_iterator k_it=k_begin; k_it!=k_end; ++k_it) {
      int k = index(k_it);
      int k_tile = k / tile_size;
      Cijk_type::kj_iterator j_begin = Cijk->j_begin(k_it);
      Cijk_type::kj_iterator j_end = Cijk->j_end(k_it);
      for (Cijk_type::kj_iterator j_it = j_begin; j_it != j_end; ++j_it) {
	int j = index(j_it);
	int j_tile = j / tile_size;
	Cijk_type::kji_iterator i_begin = Cijk->i_begin(j_it);
	Cijk_type::kji_iterator i_end = Cijk->i_end(j_it);
	for (Cijk_type::kji_iterator i_it = i_begin; i_it != i_end; ++i_it) {
	  int i = index(i_it);
	  double c = value(i_it);
	  Cijk_tile[j_tile][k_tile]->add_term(i,j,k,c);
	}
      }
    }
    for (int j=0; j<nj_tiles; ++j)
      for (int k=0; k<nk_tiles; ++k)
	Cijk_tile[j][k]->fillComplete();

    
    Array< Array< std::map<int,int> > > nz_tile(nj_tiles);
    Array< Array< Array< std::pair<int,int> > > > sorted_nz_tile(nj_tiles);
    for (int j_tile=0; j_tile<nj_tiles; ++j_tile) {
      nz_tile[j_tile].resize(nk_tiles); 
      sorted_nz_tile[j_tile].resize(nk_tiles); 
      for (int k_tile=0; k_tile<nk_tiles; ++k_tile) {

	// Count nonzeros for each i, for each tile
	Cijk_type::k_iterator k_begin = Cijk_tile[j_tile][k_tile]->k_begin();
	Cijk_type::k_iterator k_end = Cijk_tile[j_tile][k_tile]->k_end();
	for (Cijk_type::k_iterator k_it=k_begin; k_it!=k_end; ++k_it) {
	  //int k = index(k_it);
	  Cijk_type::kj_iterator j_begin = 
	    Cijk_tile[j_tile][k_tile]->j_begin(k_it);
	  Cijk_type::kj_iterator j_end = 
	    Cijk_tile[j_tile][k_tile]->j_end(k_it);
	  for (Cijk_type::kj_iterator j_it = j_begin; j_it != j_end; ++j_it) {
	    //int j = index(j_it);
	    Cijk_type::kji_iterator i_begin = 
	      Cijk_tile[j_tile][k_tile]->i_begin(j_it);
	    Cijk_type::kji_iterator i_end = 
	      Cijk_tile[j_tile][k_tile]->i_end(j_it);
	    for (Cijk_type::kji_iterator i_it = i_begin; i_it != i_end; ++i_it){
	      int i = index(i_it);
	      if (nz_tile[j_tile][k_tile].count(i) == 0)
		nz_tile[j_tile][k_tile][i] = 1;
	      else
		++(nz_tile[j_tile][k_tile][i]);
	    }
	  }
	}

	// Sort based on non-zeros for each i, for each tile
	sorted_nz_tile[j_tile][k_tile].resize(nz_tile[j_tile][k_tile].size());
	int idx=0;
	for (std::map<int,int>::iterator it = nz_tile[j_tile][k_tile].begin();
	     it != nz_tile[j_tile][k_tile].end(); ++it) {
	  sorted_nz_tile[j_tile][k_tile][idx] = 
	    std::make_pair(it->first, it->second);
	  ++idx;
	}
	std::sort( sorted_nz_tile[j_tile][k_tile].begin(),
		   sorted_nz_tile[j_tile][k_tile].end(),
		   NZPairCompare() );

	// Print number of non-zeros for each i, for each tile
	std::cout << std::endl 
		  << "Tile (" << j_tile << ", " << k_tile << "):" << std::endl;
	for (int i=0; i<sorted_nz_tile[j_tile][k_tile].size(); ++i) {
	  int idx = sorted_nz_tile[j_tile][k_tile][i].first;
	  std::cout << std::setw(w_index) << idx << " " 
		    << basis->term(idx) << ": " 
		    << std::setw(w_nz) << sorted_nz_tile[j_tile][k_tile][i].second
		    << std::endl;
	  if (i % 32 == 31)
	    std::cout << std::endl;
	}
      }
    }
    
  }
  catch (std::exception& e) {
    std::cout << e.what() << std::endl;
  }

  return 0;
}
Exemplo n.º 4
0
int main(int argc, char **argv)
{
  int num_k;

  try {

    // Initialize MPI
#ifdef HAVE_MPI
    MPI_Init(&argc,&argv);
#endif

    // Setup command line options
    Teuchos::CommandLineProcessor CLP;
    CLP.setDocString(
      "This example generates the sparsity pattern for the block stochastic Galerkin matrix.\n");
    int d = 3;
    CLP.setOption("dimension", &d, "Stochastic dimension");
    int p = 5;
    CLP.setOption("order", &p, "Polynomial order");
    double drop = 1.0e-15;
    CLP.setOption("drop", &drop, "Drop tolerance");
    std::string file_base = "A";
    CLP.setOption("base filename", &file_base, "Base filename for matrix market files");
    BasisType basis_type = LEGENDRE;
    CLP.setOption("basis", &basis_type, 
		  num_basis_types, basis_type_values, basis_type_names, 
		  "Basis type");
    bool full = true;
    CLP.setOption("full", "linear", &full, "Use full or linear expansion");
    bool use_old = false;
    CLP.setOption("old", "new", &use_old, "Use old or new Cijk algorithm");

    // Parse arguments
    CLP.parse( argc, argv );

    // Basis
    Teuchos::Array< Teuchos::RCP<const Stokhos::OneDOrthogPolyBasis<int,double> > > bases(d); 
    for (int i=0; i<d; i++) {
      if (basis_type == HERMITE)
	bases[i] = Teuchos::rcp(new Stokhos::HermiteBasis<int,double>(p));
      else if (basis_type == LEGENDRE)
	bases[i] = Teuchos::rcp(new Stokhos::LegendreBasis<int,double>(p));
      else if (basis_type == RYS)
	bases[i] = Teuchos::rcp(new Stokhos::RysBasis<int,double>(p, 1.0, 
								  false));
    }
    Teuchos::RCP<const Stokhos::CompletePolynomialBasis<int,double> > basis = 
      Teuchos::rcp(new Stokhos::CompletePolynomialBasis<int,double>(bases,
								    drop,
								    use_old));

    // Triple product tensor
    Teuchos::RCP<Stokhos::Sparse3Tensor<int,double> > Cijk;
    if (full)
      num_k = basis->size();
    else
      num_k = basis->dimension()+1;
    Cijk = basis->computeTripleProductTensor(num_k);

    std::cout << "basis size = " << basis->size() 
	      << " num nonzero Cijk entries = " << Cijk->num_entries() 
	      << std::endl;

#ifdef HAVE_MPI
    Epetra_MpiComm comm(MPI_COMM_WORLD);
#else
    Epetra_SerialComm comm;
#endif

    // Number of stochastic rows
    int num_rows = basis->size();

    // Replicated local map
    Epetra_LocalMap map(num_rows, 0, comm);

    // Loop over Cijk entries including a non-zero in the graph at
    // indices (i,j) if Cijk is non-zero for each k
    typedef Stokhos::Sparse3Tensor<int,double> Cijk_type;
    double one = 1.0;
    for (Cijk_type::k_iterator k_it=Cijk->k_begin(); 
	 k_it!=Cijk->k_end(); ++k_it) {
      int k = index(k_it);
      Epetra_CrsMatrix mat(Copy, map, 1);
      for (Cijk_type::kj_iterator j_it = Cijk->j_begin(k_it); 
	   j_it != Cijk->j_end(k_it); ++j_it) {
	int j = index(j_it);
	for (Cijk_type::kji_iterator i_it = Cijk->i_begin(j_it);
	     i_it != Cijk->i_end(j_it); ++i_it) {
	  int i = index(i_it);
	  mat.InsertGlobalValues(i, 1, &one, &j);
	}
      }
      mat.FillComplete();

      // Construct file name
      std::stringstream ss;
      ss << file_base << "_" << k << ".mm";
      std::string file = ss.str();

      // Save matrix to file
      EpetraExt::RowMatrixToMatrixMarketFile(file.c_str(), mat);
    }
    
  }
  catch (std::exception& e) {
    std::cout << e.what() << std::endl;
  }

  return num_k;
}
int main(int narg, char *arg[]) {

  Teuchos::GlobalMPISession mpiSession(&narg, &arg,0);
  Platform &platform = Tpetra::DefaultPlatform::getDefaultPlatform();
  RCP<const Teuchos::Comm<int> > CommT = platform.getComm();

  int me = CommT->getRank();
  //int numProcs = CommT->getSize();

  if (me == 0){
  cout 
    << "====================================================================\n" 
    << "|                                                                  |\n" 
    << "|                  Example: Partition APF Mesh                     |\n" 
    << "|                                                                  |\n"
    << "|  Questions? Contact  Karen Devine      ([email protected]),     |\n"
    << "|                      Erik Boman        ([email protected]),     |\n"
    << "|                      Siva Rajamanickam ([email protected]).     |\n"
    << "|                                                                  |\n"
    << "|  Pamgen's website:   http://trilinos.sandia.gov/packages/pamgen  |\n"
    << "|  Zoltan2's website:  http://trilinos.sandia.gov/packages/zoltan2 |\n"
    << "|  Trilinos website:   http://trilinos.sandia.gov                  |\n"
    << "|                                                                  |\n"
    << "====================================================================\n";
  }


#ifdef HAVE_MPI
  if (me == 0) {
    cout << "PARALLEL executable \n";
  }
#else
  if (me == 0) {
    cout << "SERIAL executable \n";
  }
#endif

  /***************************************************************************/
  /******************************* GET INPUTS ********************************/
  /***************************************************************************/

  // default values for command-line arguments
  std::string meshFileName("4/");
  std::string modelFileName("torus.dmg");
  std::string action("zoltan_hg");
  std::string parma_method("VtxElm");
  std::string output_loc("");
  int nParts = CommT->getSize();
  double imbalance=1.1;

  // Read run-time options.
  Teuchos::CommandLineProcessor cmdp (false, false);
  cmdp.setOption("meshfile", &meshFileName,
                 "Mesh file with APF specifications (.smb file(s))");
  cmdp.setOption("modelfile", &modelFileName,
		 "Model file with APF specifications (.dmg file)");
  cmdp.setOption("action", &action,
                 "Method to use:  mj, scotch, zoltan_rcb, parma or color");
  cmdp.setOption("parma_method", &parma_method,
                 "Method to use: Vertex, Edge, Element, VtxElm, VtxEdgeElm, ElmLtVtx, Ghost, or Shape ");
  cmdp.setOption("nparts", &nParts,
                 "Number of parts to create");
  cmdp.setOption("imbalance", &imbalance,
                 "Target Imbalance for first partitioner");
  cmdp.setOption("output", &output_loc,
                 "Location of new partitioned apf mesh. Ex: 4/torus.smb");
  cmdp.parse(narg, arg);

  
  /***************************************************************************/
  /********************** GET CELL TOPOLOGY **********************************/
  /***************************************************************************/

  // Get dimensions
  //int dim = 3;

  /***************************************************************************/
  /***************************** GENERATE MESH *******************************/
  /***************************************************************************/

#ifdef HAVE_ZOLTAN2_PARMA

  if (me == 0) cout << "Generating mesh ... \n\n";

  //Setup for SCOREC
  PCU_Comm_Init();
  
  // Generate mesh with MDS
  gmi_register_mesh();
  apf::Mesh2* m = apf::loadMdsMesh(modelFileName.c_str(),meshFileName.c_str());
  
  runTest(CommT,m,action,parma_method,nParts,imbalance,"partition");
  
  runTest(CommT,m,"parma",parma_method,nParts,imbalance,"parma");

  


  if (output_loc!="") {
    m->writeNative(output_loc.c_str());
  }

  // delete mesh
  if (me == 0) cout << "Deleting the mesh ... \n\n";

  //Delete APF Mesh;
  m->destroyNative();
  apf::destroyMesh(m);
  //End communications
  PCU_Comm_Free();

#endif
  if (me == 0)
    std::cout << "PASS" << std::endl;

  return 0;

}
Exemplo n.º 6
0
int main_(Teuchos::CommandLineProcessor &clp, int argc, char *argv[]) {
#include <MueLu_UseShortNames.hpp>
  using Teuchos::RCP;
  using Teuchos::rcp;
  using Teuchos::ArrayRCP;
  using Teuchos::RCP;
  using Teuchos::TimeMonitor;

  // =========================================================================
  // MPI initialization using Teuchos
  // =========================================================================
  Teuchos::GlobalMPISession mpiSession(&argc, &argv, NULL);
  RCP<const Teuchos::Comm<int> > comm = Teuchos::DefaultComm<int>::getComm();

  // =========================================================================
  // Convenient definitions
  // =========================================================================
  typedef Teuchos::ScalarTraits<SC> STS;
  SC one = STS::one(), zero = STS::zero();

  RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
  Teuchos::FancyOStream& out = *fancy;
  out.setOutputToRootOnly(0);

  // =========================================================================
  // Parameters initialization
  // =========================================================================
  GO nx = 100, ny = 100, nz = 100;
  Galeri::Xpetra::Parameters<GO> galeriParameters(clp, nx, ny, nz, "Laplace2D"); // manage parameters of the test case
  Xpetra::Parameters             xpetraParameters(clp);                          // manage parameters of Xpetra

  std::string xmlFileName = "";     clp.setOption("xml",                &xmlFileName, "read parameters from a file");
  int         numRebuilds = 0;      clp.setOption("rebuild",            &numRebuilds, "#times to rebuild hierarchy");
  bool        useFilter   = true;   clp.setOption("filter", "nofilter", &useFilter,   "Print out only Setup times");
  bool        modify      = true;   clp.setOption("modify", "nomodify", &modify,      "Change values of the matrix used for reuse");

  clp.recogniseAllOptions(true);
  switch (clp.parse(argc, argv)) {
    case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED:        return EXIT_SUCCESS;
    case Teuchos::CommandLineProcessor::PARSE_ERROR:
    case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE;
    case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL:          break;
  }
  Xpetra::UnderlyingLib lib = xpetraParameters.GetLib();

  ParameterList paramList;
  paramList.set("verbosity", "none");
  if (xmlFileName != "")
    Teuchos::updateParametersFromXmlFileAndBroadcast(xmlFileName, Teuchos::Ptr<ParameterList>(&paramList), *comm);

  // Retrieve matrix parameters (they may have been changed on the command line)
  // [for instance, if we changed matrix type from 2D to 3D we need to update nz]
  ParameterList galeriList = galeriParameters.GetParameterList();

  // =========================================================================
  // Problem construction
  // =========================================================================
  // For comments, see Driver.cpp
  out << "========================================================\n" << xpetraParameters << galeriParameters;
  std::string matrixType = galeriParameters.GetMatrixType();
  RCP<Matrix>       A, B;
  RCP<const Map>    map;
  RCP<MultiVector>  coordinates, nullspace;
  ConstructData(matrixType, galeriList, lib, comm, A, map, coordinates, nullspace);

  if (modify) {
    galeriList.set("stretchx", 2.2);
    galeriList.set("stretchy", 1.2);
    galeriList.set("stretchz", 0.3);
  }
  ConstructData(matrixType, galeriList, lib, comm, B, map, coordinates, nullspace);

  out << "Processor subdomains in x direction: " << galeriList.get<GO>("mx") << std::endl
      << "Processor subdomains in y direction: " << galeriList.get<GO>("my") << std::endl
      << "Processor subdomains in z direction: " << galeriList.get<GO>("mz") << std::endl
      << "========================================================" << std::endl;

  // =========================================================================
  // Setups and solves
  // =========================================================================
  RCP<Vector> X = VectorFactory::Build(map);
  RCP<Vector> Y = VectorFactory::Build(map);
  Y->setSeed(846930886);
  Y->randomize();

  const int nIts = 9;

  std::string thickSeparator = "=============================================================";
  std::string thinSeparator  = "-------------------------------------------------------------";

  // =========================================================================
  // Setup #1 (no reuse)
  // =========================================================================
  out << thickSeparator << " no reuse " << thickSeparator << std::endl;
  {
    RCP<Hierarchy> H;

    // Run multiple builds for matrix A and time them
    RCP<Teuchos::Time> tm = TimeMonitor::getNewTimer("Setup #1: no reuse");
    for (int i = 0; i <= numRebuilds; i++) {
      out << thinSeparator << " no reuse (rebuild #" << i << ") " << thinSeparator << std::endl;
      // Start timing (skip first build to reduce jitter)
      if (!(numRebuilds && i == 0))
        tm->start();

      A->SetMaxEigenvalueEstimate(-one);
      H = CreateHierarchy(A, paramList, coordinates);

      // Stop timing
      if (!(numRebuilds && i == 0)) {
        tm->stop();
        tm->incrementNumCalls();
      }
    }

    X->putScalar(zero);
    H->Iterate(*Y, *X, nIts);
    out << "residual(A) = " << Utilities::ResidualNorm(*A, *X, *Y)[0] << " [no reuse]" << std::endl;

    // Run a build for matrix B to record its convergence
    B->SetMaxEigenvalueEstimate(-one);
    H = CreateHierarchy(B, paramList, coordinates);

    X->putScalar(zero);
    H->Iterate(*Y, *X, nIts);
    out << "residual(B) = " << Utilities::ResidualNorm(*B, *X, *Y)[0] << " [no reuse]" << std::endl;
  }

  // =========================================================================
  // Setup #2-inf (reuse)
  // =========================================================================
  std::vector<std::string> reuseTypes, reuseNames;
  reuseTypes.push_back("S");  reuseNames.push_back("smoothers");
  reuseTypes.push_back("tP"); reuseNames.push_back("tentative P");
  reuseTypes.push_back("RP"); reuseNames.push_back("smoothed P and R");

  for (size_t k = 0; k < reuseTypes.size(); k++) {
    out << thickSeparator << " " << reuseTypes[k] << " " << thickSeparator << std::endl;
    A->SetMaxEigenvalueEstimate(-one);

    paramList.set("reuse: type", reuseTypes[k]);

    out << thinSeparator << " " << reuseTypes[k] << " (initial) " << thinSeparator << std::endl;
    RCP<Hierarchy> H = CreateHierarchy(A, paramList, coordinates);

    X->putScalar(zero);
    H->Iterate(*Y, *X, nIts);
    out << "residual(A) = " << Utilities::ResidualNorm(*A, *X, *Y)[0] << " [reuse \"" << reuseNames[k] << "\"]" << std::endl;

    // Reuse setup
    RCP<Matrix> Bcopy = Xpetra::MatrixFactory2<Scalar, LocalOrdinal, GlobalOrdinal, Node>::BuildCopy(B);

    RCP<Teuchos::Time> tm = TimeMonitor::getNewTimer("Setup #" + MueLu::toString(k+2) + ": reuse " + reuseNames[k]);
    for (int i = 0; i <= numRebuilds; i++) {
      out << thinSeparator << " " << reuseTypes[k] << " (rebuild #" << i << ") " << thinSeparator << std::endl;

      // Start timing (skip first build to reduce jitter)
      if (!(numRebuilds && i == 0))
        tm->start();

      B->SetMaxEigenvalueEstimate(-one);
      ReuseHierarchy(B, *H);

      // Stop timing
      if (!(numRebuilds && i == 0)) {
        tm->stop();
        tm->incrementNumCalls();
      }

      X->putScalar(zero);
      H->Iterate(*Y, *X, nIts);
      out << "residual(B) = " << Utilities::ResidualNorm(*B, *X, *Y)[0] << " [reuse \"" << reuseNames[k] << "\"]" << std::endl;

      // Change the pointers so that reuse is not a no-op
      B.swap(Bcopy);
    }
  }
  out << thickSeparator << thickSeparator << std::endl;

  {
    const bool alwaysWriteLocal = true;
    const bool writeGlobalStats = true;
    const bool writeZeroTimers  = false;
    const bool ignoreZeroTimers = true;
    const std::string filter    = (useFilter ? "Setup #" : "");
    TimeMonitor::summarize(A->getRowMap()->getComm().ptr(), std::cout, alwaysWriteLocal, writeGlobalStats,
                           writeZeroTimers, Teuchos::Union, filter, ignoreZeroTimers);
  }

  return EXIT_SUCCESS;
}
int main(int argc, char* argv[]) {
  int ierr = 0;
  int p = 1;
  int w = p+7;
  int w_name = 13;

  try {
 
    // Set up command line options
    Teuchos::CommandLineProcessor clp;
    clp.setDocString("This program tests the speed of various forward mode AD implementations for a finite-element-like Jacobian fill");
    int work_count = 200000;
    int num_eqns_begin = 5;
    int num_eqns_end = 65;
    int num_eqns_delta = 10;
    int rt = 0;
    clp.setOption("wc", &work_count, "Work count = num_nodes*num_eqns");
    clp.setOption("p_begin", &num_eqns_begin, "Intitial number of equations");
    clp.setOption("p_end", &num_eqns_end, "Final number of equations");
    clp.setOption("p_delta", &num_eqns_delta, "Step in number of equations");
    clp.setOption("rt", &rt, "Include ADOL-C retaping test");

    // Parse options
    Teuchos::CommandLineProcessor::EParseCommandLineReturn
      parseReturn= clp.parse(argc, argv);
    if(parseReturn != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL)
      return 1;

    // Print header
    std::cout.setf(std::ios::right);
    std::cout << std::setw(w_name) << "Name" << " ";
    for (int num_eqns = num_eqns_begin; num_eqns <= num_eqns_end; 
	 num_eqns += num_eqns_delta)
      std::cout << std::setw(w) << num_eqns << " ";
    std::cout << std::endl;
    for (int j=0; j<w_name; j++)
      std::cout << '=';
    std::cout << " ";
    for (int num_eqns = num_eqns_begin; num_eqns <= num_eqns_end; 
	 num_eqns += num_eqns_delta) {
      for (int j=0; j<w; j++)
	std::cout << '=';
      std::cout << " ";
    }
    std::cout << std::endl;

    // Analytic
    std::vector<double> times_analytic =
      do_times(work_count, num_eqns_begin, num_eqns_end, num_eqns_delta, 
	       analytic_jac_fill);
    print_times(times_analytic, times_analytic, "Analytic", p, w, w_name);

#ifdef HAVE_ADIC
    // Note there seems to be a bug in ADIC where doing more than one num_eqns
    // value results in incorrect timings after the first.  Doing one value
    // at a time seems to give correct values though.
    std::vector<double> times_adic =
      do_times(work_count, num_eqns_begin, num_eqns_end, num_eqns_delta, 
	       adic_jac_fill);
    print_times(times_adic, times_analytic, "ADIC", p, w, w_name);
#endif

    // Original Fad
    std::vector<double> times_sfad =
      do_times_sfad<Sacado::Fad::SFad>(
	work_count, num_eqns_begin, num_eqns_end, num_eqns_delta);
    print_times(times_sfad, times_analytic, "SFAD", p, w, w_name);

    std::vector<double> times_slfad =
      do_times_sfad<Sacado::Fad::SLFad>(
	work_count, num_eqns_begin, num_eqns_end, num_eqns_delta);
    print_times(times_slfad, times_analytic, "SLFAD", p, w, w_name);

    std::vector<double> times_dfad =
      do_times_fad<Sacado::Fad::DFad>(
	work_count, num_eqns_begin, num_eqns_end, num_eqns_delta);
    print_times(times_dfad, times_analytic, "DFAD", p, w, w_name);
    
    
    // ELR Fad
    std::vector<double> times_elr_sfad =
      do_times_sfad<Sacado::ELRFad::SFad>(
	work_count, num_eqns_begin, num_eqns_end, num_eqns_delta);
    print_times(times_elr_sfad, times_analytic, "ELRSFAD", p, w, w_name);

    std::vector<double> times_elr_slfad =
      do_times_sfad<Sacado::ELRFad::SLFad>(
	work_count, num_eqns_begin, num_eqns_end, num_eqns_delta);
    print_times(times_elr_slfad, times_analytic, "ELRSLFAD", p, w, w_name);

    std::vector<double> times_elr_dfad =
      do_times_fad<Sacado::ELRFad::DFad>(
	work_count, num_eqns_begin, num_eqns_end, num_eqns_delta);
    print_times(times_elr_dfad, times_analytic, "ELRDFAD", p, w, w_name);
   

    // Cache Fad
    std::vector<double> times_cache_sfad =
      do_times_sfad<Sacado::CacheFad::SFad>(
	work_count, num_eqns_begin, num_eqns_end, num_eqns_delta);
    print_times(times_cache_sfad, times_analytic, "CacheSFAD", p, w, w_name);

    std::vector<double> times_cache_slfad =
      do_times_sfad<Sacado::CacheFad::SLFad>(
	work_count, num_eqns_begin, num_eqns_end, num_eqns_delta);
    print_times(times_cache_slfad, times_analytic, "CacheSLFAD", p, w, w_name);

    std::vector<double> times_cache_dfad =
      do_times_fad<Sacado::CacheFad::DFad>(
	work_count, num_eqns_begin, num_eqns_end, num_eqns_delta);
    print_times(times_cache_dfad, times_analytic, "CacheDFAD", p, w, w_name);

    // ELR Cache Fad
    std::vector<double> times_cache_elr_sfad =
      do_times_sfad<Sacado::ELRCacheFad::SFad>(
	work_count, num_eqns_begin, num_eqns_end, num_eqns_delta);
    print_times(times_cache_elr_sfad, times_analytic, "ELRCacheSFAD", p, w, w_name);

    std::vector<double> times_cache_elr_slfad =
      do_times_sfad<Sacado::ELRCacheFad::SLFad>(
	work_count, num_eqns_begin, num_eqns_end, num_eqns_delta);
    print_times(times_cache_elr_slfad, times_analytic, "ELRCacheSLFAD", p, w, w_name);

    std::vector<double> times_cache_elr_dfad =
      do_times_fad<Sacado::ELRCacheFad::DFad>(
	work_count, num_eqns_begin, num_eqns_end, num_eqns_delta);
    print_times(times_cache_elr_dfad, times_analytic, "ELRCacheDFAD", p, w, w_name);
    
  }
  catch (std::exception& e) {
    std::cout << e.what() << std::endl;
    ierr = 1;
  }
  catch (const char *s) {
    std::cout << s << std::endl;
    ierr = 1;
  }
  catch (...) {
    std::cout << "Caught unknown exception!" << std::endl;
    ierr = 1;
  }

  return ierr;
}