示例#1
0
int main(int argc, char *argv[])
{
  Teuchos::GlobalMPISession session(&argc, &argv);
  RCP<const Teuchos::Comm<int> > tcomm = Teuchos::DefaultComm<int>::getComm();
  int rank = tcomm->getRank();
  int nParts = tcomm->getSize();
  bool doRemap = false;
  string filename = "USAir97";

  // Read run-time options.
  Teuchos::CommandLineProcessor cmdp (false, false);
  cmdp.setOption("file", &filename, "Name of the Matrix Market file to read");
  cmdp.setOption("nparts", &nParts, "Number of parts.");
  cmdp.setOption("remap", "no-remap", &doRemap, "Remap part numbers.");
  cmdp.parse(argc, argv);

  meshCoordinatesTest(tcomm);

  testFromDataFile(tcomm, nParts, filename, doRemap);

  if (rank == 0)
    serialTest(nParts, doRemap);

  if (rank == 0)
    std::cout << "PASS" << std::endl;
}
示例#2
0
int
main (int argc, char* argv[])
{
  using KokkosBlas::Impl::testOverScalarsAndLayoutsAndDevices;
  using std::cout;
  using std::endl;
  Teuchos::oblackholestream blackHole;
  Teuchos::GlobalMPISession mpiSession (&argc, &argv, &blackHole);
  Kokkos::initialize (argc, argv);

#ifdef HAVE_MPI
  RCP<const Comm<int> > comm = rcp (new Teuchos::MpiComm<int> (MPI_COMM_WORLD));
#else
  RCP<const Comm<int> > comm = rcp (new Teuchos::SerialComm<int> ());
#endif // HAVE_MPI
  const int myRank = comm->getRank ();

  // Number of columns in the 2-D View(s) to test.
  int numCols = 3;
  bool oneCol = false;
  bool testComplex = true;

  Teuchos::CommandLineProcessor cmdp (false, true);
  cmdp.setOption ("numCols", &numCols,
                  "Number of columns in the 2-D View(s) to test");
  cmdp.setOption ("oneCol", "noOneCol", &oneCol, "Whether to test the 1-D View "
                  "(single-column) versions of the kernels");
  cmdp.setOption ("testComplex", "noTestComplex", &testComplex,
                  "Whether to test complex arithmetic");
  if (cmdp.parse (argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) {
    if (myRank == 0) {
      cout << "TEST FAILED to parse command-line arguments!" << endl;
    }
    return EXIT_FAILURE;
  }

  bool curSuccess = true;
  bool success = true;

  // Always test with numCols=1 first.
  curSuccess = testOverScalarsAndLayoutsAndDevices (cout, 1, oneCol, testComplex);
  success = curSuccess && success;
  if (numCols != 1) {
    curSuccess = testOverScalarsAndLayoutsAndDevices (cout, numCols,
                                                      oneCol, testComplex);
    success = curSuccess && success;
  }
  if (success) {
    if (myRank == 0) {
      cout << "End Result: TEST PASSED" << endl;
    }
  } else {
    if (myRank == 0) {
      cout << "End Result: TEST FAILED" << endl;
    }
  }
  Kokkos::finalize ();
  return EXIT_SUCCESS;
}
void
parseCommandLineArguments (Teuchos::CommandLineProcessor& cmdp,
                           bool& printedHelp,
                           int argc,
                           char* argv[],
                           int& nx,
                           int& ny,
                           int& nz,
                           std::string& xmlInputParamsFile,
                           std::string& solverName,
                           bool& verbose,
                           bool& debug)
{
  using Teuchos::CommandLineProcessor;

  const CommandLineProcessor::EParseCommandLineReturn parseResult =
    cmdp.parse (argc, argv);
  if (parseResult == CommandLineProcessor::PARSE_HELP_PRINTED) {
    printedHelp = true;
  }
  else {
    printedHelp = false;
    TEUCHOS_TEST_FOR_EXCEPTION(
      parseResult != CommandLineProcessor::PARSE_SUCCESSFUL,
      std::invalid_argument, "Failed to parse command-line arguments.");
    TEUCHOS_TEST_FOR_EXCEPTION(
      xmlInputParamsFile == "" && (nx <= 0 || ny <= 0 || nz <= 0),
      std::invalid_argument, "If no XML parameters filename is specified (via "
      "--inputParams), then the number of cells along each dimension of the "
      "mesh (--nx, --ny, and --nz) must be positive.");
  }
}
int main (int argc, char *argv[]) {

  Teuchos::CommandLineProcessor clp;
  clp.setDocString("This example program demonstrates ICholByBlocks algorithm on Kokkos::Threads execution space.\n");

  int nthreads = 1;
  clp.setOption("nthreads", &nthreads, "Number of threads");

  int max_task_dependence = 10;
  clp.setOption("max-task-dependence", &max_task_dependence, "Max number of task dependence");

  int team_size = 1;
  clp.setOption("team-size", &team_size, "Team size");

  bool verbose = false;
  clp.setOption("enable-verbose", "disable-verbose", &verbose, "Flag for verbose printing");

  string file_input = "test.mtx";
  clp.setOption("file-input", &file_input, "Input file (MatrixMarket SPD matrix)");

  clp.recogniseAllOptions(true);
  clp.throwExceptions(false);

  Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv );

  if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0;
  if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL  ) return -1;
  
  int r_val = 0;
  {
    exec_space::initialize(nthreads);
    exec_space::print_configuration(cout, true);
    
    r_val = exampleICholByBlocks
      <value_type,ordinal_type,size_type,exec_space,void>
      (file_input, max_task_dependence, team_size, verbose);

    exec_space::finalize();
  }

  return r_val;
}
int main (int argc, char *argv[]) {

  Teuchos::CommandLineProcessor clp;
  clp.setDocString("Tacho::DenseMatrixBase examples on Pthreads execution space.\n");

  int nthreads = 0;
  clp.setOption("nthreads", &nthreads, "Number of threads");

  int numa = 0;
  clp.setOption("numa", &numa, "Number of numa node");

  int core_per_numa = 0;
  clp.setOption("core-per-numa", &core_per_numa, "Number of cores per numa node");

  bool verbose = false;
  clp.setOption("enable-verbose", "disable-verbose", &verbose, "Flag for verbose printing");

  clp.recogniseAllOptions(true);
  clp.throwExceptions(false);

  Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv );

  if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0;
  if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL  ) return -1;

  int r_val = 0;
  {
    exec_space::initialize(nthreads, numa, core_per_numa);

    r_val = exampleCrsMatrixBase<exec_space>
      (verbose);
    
    exec_space::finalize();
  }
  
  return r_val;
}
示例#6
0
int main(int argc, char *argv[]) {

  Teuchos::CommandLineProcessor clp;
  clp.setDocString("Intrepid2::DynRankView_PerfTest01.\n");

  int nworkset = 8;
  clp.setOption("nworkset", &nworkset, "# of worksets");

  int C = 4096;
  clp.setOption("C", &C, "# of Cells in a workset");

  int order = 2;
  clp.setOption("order", &order, "cubature order");

  bool verbose = true;
  clp.setOption("enable-verbose", "disable-verbose", &verbose, "Flag for verbose printing");

  clp.recogniseAllOptions(true);
  clp.throwExceptions(false);

  Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv );

  if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0;
  if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL  ) return -1;

  Kokkos::initialize();

  if (verbose) 
    std::cout << "Testing datatype double\n";

  const int r_val_double = Intrepid2::Test::ComputeBasis_HGRAD
    <double,Kokkos::Cuda>(nworkset,
                            C,
                            order,
                            verbose);
  return r_val_double;
}
void
setUpCommandLineArguments (Teuchos::CommandLineProcessor& cmdp,
                           int& nx,
                           int& ny,
                           int& nz,
                           std::string& xmlInputParamsFile,
                           std::string& solverName,
                           double& tol,
                           int& maxNumIters,
                           bool& verbose,
                           bool& debug)
{
  cmdp.setOption ("nx", &nx, "Number of cells along the x dimension");
  cmdp.setOption ("ny", &ny, "Number of cells along the y dimension");
  cmdp.setOption ("nz", &nz, "Number of cells along the z dimension");
  cmdp.setOption ("inputParams", &xmlInputParamsFile, "XML file of input "
                  "parameters, which we read if specified and not \"\".  "
                  "If it has a \"meshInput\" parameter, we use its "
                  "std::string value as the Pamgen mesh specification.  "
                  "Otherwise, we tell Pamgen to make a cube, using "
                  "nx, ny, and nz.");
  cmdp.setOption ("solverName", &solverName, "Name of iterative linear solver "
                  "to use for solving the linear system.  You may use any name "
                  "that Belos::SolverFactory understands.  Examples include "
                  "\"GMRES\" and \"CG\".");
  cmdp.setOption ("tol", &tol, "Tolerance for the linear solve.  If not "
                  "specified, this is read from the input ParameterList (read "
                  "from the XML file).  If specified, this overrides any value "
                  "in the input ParameterList.");
  cmdp.setOption ("maxNumIters", &maxNumIters, "Maximum number of iterations "
                  "in the linear solve.  If not specified, this is read from "
                  "the input ParameterList (read from the XML file).  If "
                  "specified, this overrides any value in the input "
                  "ParameterList.");
  cmdp.setOption ("verbose", "quiet", &verbose,
                  "Whether to print verbose status output.");
  cmdp.setOption ("debug", "release", &debug,
                  "Whether to print copious debugging output to stderr.");
}
示例#8
0
int main(int argc, char *argv[])
{
  bool success = true;

  try {
    Teuchos::GlobalMPISession mpiSession(&argc, &argv, NULL);
    Teuchos::RCP< Teuchos::FancyOStream > out = 
      Teuchos::VerboseObjectBase::getDefaultOStream();
    
    // Setup command line options
    Teuchos::CommandLineProcessor CLP;
    int p = 3;
    CLP.setOption("p", &p, "Polynomial order");
    int d_min = 1;
    CLP.setOption("dmin", &d_min, "Starting stochastic dimension");
    int d_max = 12;
    CLP.setOption("dmax", &d_max, "Ending stochastic dimension");
    int nGrid = 64;
    CLP.setOption("n", &nGrid, "Number of spatial grid points in each dimension");
    int nIter = 1;
    CLP.setOption("niter", &nIter, "Number of iterations");
    bool test_block = true;
    CLP.setOption("block", "no-block", &test_block, "Use block algorithm");
    CLP.parse( argc, argv );
    
    bool print = false ;
    bool check = false;
    performance_test_driver_epetra( 
      p , d_min , d_max , nGrid , nIter , print , test_block , check , *out );
  }
  TEUCHOS_STANDARD_CATCH_STATEMENTS(true, std::cerr, success);
    
  if (!success)
    return -1;
  return 0;
}
示例#9
0
  int
main (int argc, char *argv[])
{
  using Teuchos::inOutArg;
  using Teuchos::ParameterList;
  using Teuchos::parameterList;
  using Teuchos::RCP;
  using Teuchos::rcp;
  using Teuchos::rcpFromRef;
  using std::endl;
  typedef double                          ST;
  typedef Epetra_Operator                 OP;
  typedef Epetra_MultiVector              MV;
  typedef Belos::OperatorTraits<ST,MV,OP> OPT;
  typedef Belos::MultiVecTraits<ST,MV>    MVT;

  // This calls MPI_Init and MPI_Finalize as necessary.
  Belos::Test::MPISession session (inOutArg (argc), inOutArg (argv));
  RCP<const Epetra_Comm> comm = session.getComm ();

  bool success = false;
  bool verbose = false;
  try {
    int MyPID = comm->MyPID ();

    //
    // Parameters to read from command-line processor
    //
    int frequency = -1;  // how often residuals are printed by solver
    int numRHS = 1;  // total number of right-hand sides to solve for
    int maxIters = 13000;  // maximum number of iterations for solver to use
    std::string filename ("bcsstk14.hb");
    double tol = 1.0e-5; // relative residual tolerance

    //
    // Read in command-line arguments
    //
    Teuchos::CommandLineProcessor cmdp (false, true);
    cmdp.setOption ("verbose", "quiet", &verbose, "Print messages and results.");
    cmdp.setOption ("frequency", &frequency, "Solvers frequency for printing "
        "residuals (#iters).");
    cmdp.setOption ("tol", &tol, "Relative residual tolerance used by MINRES "
        "solver.");
    cmdp.setOption ("filename", &filename, "Filename for Harwell-Boeing test "
        "matrix.");
    cmdp.setOption ("num-rhs", &numRHS, "Number of right-hand sides to solve.");
    cmdp.setOption ("max-iters", &maxIters, "Maximum number of iterations per "
        "linear system (-1 means \"adapt to problem/block size\").");
    if (cmdp.parse (argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) {
      return EXIT_FAILURE;
    }
    Teuchos::oblackholestream blackHole;
    std::ostream& verbOut = (verbose && MyPID == 0) ? std::cout : blackHole;

    //
    // Generate the linear system(s) to solve.
    //
    verbOut << "Generating the linear system(s) to solve" << endl << endl;
    RCP<Epetra_CrsMatrix> A;
    RCP<Epetra_MultiVector> B, X;
    RCP<Epetra_Map> rowMap;
    try {
      // This might change the number of right-hand sides, if we read in
      // a right-hand side from the Harwell-Boeing file.
      Belos::Util::createEpetraProblem (filename, &rowMap, &A, &B, &X, &MyPID, numRHS);
    } catch (std::exception& e) {
      TEUCHOS_TEST_FOR_EXCEPTION (true, std::runtime_error,
          "Failed to create Epetra problem for matrix "
          "filename \"" << filename << "\".  "
          "createEpetraProblem() reports the following "
          "error: " << e.what());
    }
    //
    // Compute the initial residual norm of the problem, so we can see
    // by how much it improved after the solve.
    //
    std::vector<double> initialResidualNorms (numRHS);
    std::vector<double> initialResidualInfNorms (numRHS);
    Epetra_MultiVector R (*rowMap, numRHS);
    OPT::Apply (*A, *X, R);
    MVT::MvAddMv (-1.0, R, 1.0, *B, R); // R := -(A*X) + B.
    MVT::MvNorm (R, initialResidualNorms);
    MVT::MvNorm (R, initialResidualInfNorms, Belos::InfNorm);
    if (verbose) {
      verbOut << "Initial residual 2-norms:            \t";
      for (int i = 0; i < numRHS; ++i) {
        verbOut << initialResidualNorms[i];
        if (i < numRHS-1) {
          verbOut << ", ";
        }
      }
      verbOut << endl << "Initial residual Inf-norms:          \t";
      for (int i = 0; i < numRHS; ++i) {
        verbOut << initialResidualInfNorms[i];
        if (i < numRHS-1) {
          verbOut << ", ";
        }
      }
      verbOut << endl;
    }

    std::vector<double> rhs2Norms (numRHS);
    std::vector<double> rhsInfNorms (numRHS);
    MVT::MvNorm (*B, rhs2Norms);
    MVT::MvNorm (*B, rhsInfNorms, Belos::InfNorm);
    if (verbose) {
      verbOut << "Right-hand side 2-norms:             \t";
      for (int i = 0; i < numRHS; ++i) {
        verbOut << rhs2Norms[i];
        if (i < numRHS-1) {
          verbOut << ", ";
        }
      }
      verbOut << endl << "Right-hand side Inf-norms:           \t";
      for (int i = 0; i < numRHS; ++i) {
        verbOut << rhsInfNorms[i];
        if (i < numRHS-1) {
          verbOut << ", ";
        }
      }
      verbOut << endl;
    }

    std::vector<double> initialGuess2Norms (numRHS);
    std::vector<double> initialGuessInfNorms (numRHS);
    MVT::MvNorm (*X, initialGuess2Norms);
    MVT::MvNorm (*X, initialGuessInfNorms, Belos::InfNorm);
    if (verbose) {
      verbOut << "Initial guess 2-norms:               \t";
      for (int i = 0; i < numRHS; ++i) {
        verbOut << initialGuess2Norms[i];
        if (i < numRHS-1) {
          verbOut << ", ";
        }
      }
      verbOut << endl << "Initial guess Inf-norms:             \t";
      for (int i = 0; i < numRHS; ++i) {
        verbOut << initialGuessInfNorms[i];
        if (i < numRHS-1) {
          verbOut << ", ";
        }
      }
      verbOut << endl;
    }
    //
    // Compute the infinity-norm of A.
    //
    const double normOfA = A->NormInf ();
    verbOut << "||A||_inf:                           \t" << normOfA << endl;
    //
    // Compute ||A|| ||X_i|| + ||B_i|| for each right-hand side B_i.
    //
    std::vector<double> scaleFactors (numRHS);
    for (int i = 0; i < numRHS; ++i) {
      scaleFactors[i] = normOfA * initialGuessInfNorms[i] + rhsInfNorms[i];
    }
    if (verbose) {
      verbOut << "||A||_inf ||X_i||_inf + ||B_i||_inf: \t";
      for (int i = 0; i < numRHS; ++i) {
        verbOut << scaleFactors[i];
        if (i < numRHS-1) {
          verbOut << ", ";
        }
      }
      verbOut << endl;
    }

    //
    // Solve using Belos
    //
    verbOut << endl << "Setting up Belos" << endl;
    const int NumGlobalElements = B->GlobalLength();

    // Set up Belos solver parameters.
    RCP<ParameterList> belosList = parameterList ("MINRES");
    belosList->set ("Maximum Iterations", maxIters);
    belosList->set ("Convergence Tolerance", tol);
    if (verbose) {
      belosList->set ("Verbosity", Belos::Errors + Belos::Warnings +
          Belos::IterationDetails + Belos::OrthoDetails +
          Belos::FinalSummary + Belos::TimingDetails + Belos::Debug);
      belosList->set ("Output Frequency", frequency);
    }
    else {
      belosList->set ("Verbosity", Belos::Errors + Belos::Warnings);
    }
    belosList->set ("Output Stream", rcpFromRef (verbOut));

    // Construct an unpreconditioned linear problem instance.
    typedef Belos::LinearProblem<double,MV,OP> prob_type;
    RCP<prob_type> problem = rcp (new prob_type (A, X, B));
    if (! problem->setProblem()) {
      verbOut << endl << "ERROR:  Failed to set up Belos::LinearProblem!" << endl;
      return EXIT_FAILURE;
    }

    // Create an iterative solver manager.
    Belos::SolverFactory<double, MV, OP> factory;
    RCP<Belos::SolverManager<double,MV,OP> > newSolver =
      factory.create ("MINRES", belosList);
    newSolver->setProblem (problem);

    // Print out information about problem.  Make sure to use the
    // information as stored in the Belos ParameterList, so that we know
    // what the solver will do.
    verbOut << endl
      << "Dimension of matrix: " << NumGlobalElements << endl
      << "Number of right-hand sides: " << numRHS << endl
      << "Max number of MINRES iterations: "
      << belosList->get<int> ("Maximum Iterations") << endl
      << "Relative residual tolerance: "
      << belosList->get<double> ("Convergence Tolerance") << endl
      << "Output frequency: "
      << belosList->get<int> ("Output Frequency") << endl
      << endl;

    // Solve the linear system.
    verbOut << "Solving the linear system" << endl << endl;
    Belos::ReturnType ret = newSolver->solve();
    verbOut << "Belos results:" << endl
      << "- Number of iterations: "
      << newSolver->getNumIters () << endl
      << "- " << (ret == Belos::Converged ? "Converged" : "Not converged")
      << endl;
    //
    // After the solve, compute residual(s) explicitly.  This tests
    // whether the Belos solver did so correctly.
    //
    std::vector<double> absoluteResidualNorms (numRHS);
    OPT::Apply (*A, *X, R);
    MVT::MvAddMv (-1.0, R, 1.0, *B, R);
    MVT::MvNorm (R, absoluteResidualNorms);

    std::vector<double> relativeResidualNorms (numRHS);
    for (int i = 0; i < numRHS; ++i) {
      relativeResidualNorms[i] = (initialResidualNorms[i] == 0.0) ?
        absoluteResidualNorms[i] :
        absoluteResidualNorms[i] / initialResidualNorms[i];
    }

    verbOut << "---------- Computed relative residual norms ----------"
      << endl << endl;
    bool badRes = false;
    if (verbose) {
      for (int i = 0; i < numRHS; ++i) {
        const double actRes = relativeResidualNorms[i];
        verbOut << "Problem " << i << " : \t" << actRes << endl;
        if (actRes > tol) {
          badRes = true;
        }
      }
    }

#   ifdef BELOS_TEUCHOS_TIME_MONITOR
    Teuchos::TimeMonitor::summarize (verbOut);
#   endif // BELOS_TEUCHOS_TIME_MONITOR

    success = (ret == Belos::Converged && !badRes);

    if (success) {
      verbOut << endl << "End Result: TEST PASSED" << endl;
    } else {
      verbOut << endl << "End Result: TEST FAILED" << endl;
    }
  } // try
  TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);

  return success ? EXIT_SUCCESS : EXIT_FAILURE;
} // end test_minres_hb.cpp
int main_(Teuchos::CommandLineProcessor &clp, int argc, char *argv[]) {
#include <MueLu_UseShortNames.hpp>
  using Teuchos::RCP;
  using Teuchos::rcp;
  using Teuchos::TimeMonitor;

  // =========================================================================
  // MPI initialization using Teuchos
  // =========================================================================
  Teuchos::GlobalMPISession mpiSession(&argc, &argv, NULL);

  RCP< const Teuchos::Comm<int> > comm = Teuchos::DefaultComm<int>::getComm();
  int numProc = comm->getSize();
  int myRank  = comm->getRank();

  // =========================================================================
  // Parameters initialization
  // =========================================================================
  ::Xpetra::Parameters xpetraParameters(clp);

  bool runHeavyTests = false;
  clp.setOption("heavytests", "noheavytests",  &runHeavyTests, "whether to exercise tests that take a long time to run");

  clp.recogniseAllOptions(true);
  switch (clp.parse(argc,argv)) {
    case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED:        return EXIT_SUCCESS;
    case Teuchos::CommandLineProcessor::PARSE_ERROR:
    case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE;
    case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL:          break;
  }

  Xpetra::UnderlyingLib lib = xpetraParameters.GetLib();

  // =========================================================================
  // Problem construction
  // =========================================================================
  ParameterList matrixParameters;
  matrixParameters.set("nx",         Teuchos::as<GO>(9999));
  matrixParameters.set("matrixType", "Laplace1D");
  RCP<Matrix>      A           = MueLuTests::TestHelpers::TestFactory<SC, LO, GO, NO>::Build1DPoisson(matrixParameters.get<GO>("nx"), lib);
  RCP<MultiVector> coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates<SC,LO,GO,Map,MultiVector>("1D", A->getRowMap(), matrixParameters);

  std::string outDir = "Output/";

  std::vector<std::string> dirList;
  if (runHeavyTests) {
    dirList.push_back("EasyParameterListInterpreter-heavy/");
    dirList.push_back("FactoryParameterListInterpreter-heavy/");
  } else {
    dirList.push_back("EasyParameterListInterpreter/");
    dirList.push_back("FactoryParameterListInterpreter/");
  }
#if defined(HAVE_MPI) && defined(HAVE_MUELU_ISORROPIA) && defined(HAVE_AMESOS2_KLU2)
  // The ML interpreter have internal ifdef, which means that the resulting
  // output would depend on configuration (reguarl interpreter does not have
  // that). Therefore, we need to stabilize the configuration here.
  // In addition, we run ML parameter list tests only if KLU is available
  dirList.push_back("MLParameterListInterpreter/");
  dirList.push_back("MLParameterListInterpreter2/");
#endif
  int numLists = dirList.size();

  bool failed = false;
  Teuchos::Time timer("Interpreter timer");
  //double lastTime = timer.wallTime();
  for (int k = 0; k < numLists; k++) {
    Teuchos::ArrayRCP<std::string> fileList = MueLuTests::TestHelpers::GetFileList(dirList[k],
      (numProc == 1 ? std::string(".xml") : std::string("_np" + Teuchos::toString(numProc) + ".xml")));

    for (int i = 0; i < fileList.size(); i++) {
      // Set seed
      std::srand(12345);

      // Reset (potentially) cached value of the estimate
      A->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits<SC>::one());

      std::string xmlFile  = dirList[k] + fileList[i];
      std::string outFile  = outDir     + fileList[i];
      std::string baseFile = outFile.substr(0, outFile.find_last_of('.'));
      std::size_t found = baseFile.find("_np");
      if (numProc == 1 && found != std::string::npos) {
#ifdef HAVE_MPI
        baseFile = baseFile.substr(0, found);
#else
        std::cout << "Skipping \"" << xmlFile << "\" as MPI is not enabled" << std::endl;
        continue;
#endif
      }
      baseFile = baseFile + (lib == Xpetra::UseEpetra ? "_epetra" : "_tpetra");
      std::string goldFile = baseFile + ".gold";
      std::ifstream f(goldFile.c_str());
      if (!f.good()) {
        if (myRank == 0)
          std::cout << "Warning: comparison file " << goldFile << " not found.  Skipping test" << std::endl;
        continue;
      }

      std::filebuf    buffer;
      std::streambuf* oldbuffer = NULL;
      if (myRank == 0) {
        // Redirect output
        buffer.open((baseFile + ".out").c_str(), std::ios::out);
        oldbuffer = std::cout.rdbuf(&buffer);
      }

      // NOTE: we cannot use ParameterListInterpreter(xmlFile, comm), because we want to update the ParameterList
      // first to include "test" verbosity
      Teuchos::ParameterList paramList;
      Teuchos::updateParametersFromXmlFileAndBroadcast(xmlFile, Teuchos::Ptr<Teuchos::ParameterList>(&paramList), *comm);
      if      (dirList[k] == "EasyParameterListInterpreter/" || dirList[k] == "EasyParameterListInterpreter-heavy/")
        paramList.set("verbosity", "test");
      else if (dirList[k] == "FactoryParameterListInterpreter/" || dirList[k] == "FactoryParameterListInterpreter-heavy/")
        paramList.sublist("Hierarchy").set("verbosity", "Test");
      else if (dirList[k] == "MLParameterListInterpreter/")
        paramList.set("ML output",     42);
      else if (dirList[k] == "MLParameterListInterpreter2/")
        paramList.set("ML output",     10);

      try {
        timer.start();
        Teuchos::RCP<HierarchyManager> mueluFactory;

        // create parameter list interpreter
        // here we have to distinguish between the general MueLu parameter list interpreter
        // and the ML parameter list interpreter. Note that the ML paramter interpreter also
        // works with Tpetra matrices.
        if (dirList[k] == "EasyParameterListInterpreter/"         ||
            dirList[k] == "EasyParameterListInterpreter-heavy/"   ||
            dirList[k] == "FactoryParameterListInterpreter/"      ||
            dirList[k] == "FactoryParameterListInterpreter-heavy/") {
          mueluFactory = Teuchos::rcp(new ParameterListInterpreter(paramList));

        } else if (dirList[k] == "MLParameterListInterpreter/") {
          mueluFactory = Teuchos::rcp(new MLParameterListInterpreter(paramList));

        } else if (dirList[k] == "MLParameterListInterpreter2/") {
          //std::cout << "ML ParameterList: " << std::endl;
          //std::cout << paramList << std::endl;
          RCP<ParameterList> mueluParamList = Teuchos::getParametersFromXmlString(MueLu::ML2MueLuParameterTranslator::translate(paramList,"SA"));
          //std::cout << "MueLu ParameterList: " << std::endl;
          //std::cout << *mueluParamList << std::endl;
          mueluFactory = Teuchos::rcp(new ParameterListInterpreter(*mueluParamList));
        }

        RCP<Hierarchy> H = mueluFactory->CreateHierarchy();

        H->GetLevel(0)->template Set<RCP<Matrix> >("A", A);

        if (dirList[k] == "MLParameterListInterpreter/") {
          // MLParameterInterpreter needs the nullspace information if rebalancing is active!
          // add default constant null space vector
          RCP<MultiVector> nullspace = MultiVectorFactory::Build(A->getRowMap(), 1);
          nullspace->putScalar(1.0);
          H->GetLevel(0)->Set("Nullspace", nullspace);
        }

        H->GetLevel(0)->Set("Coordinates", coordinates);

        mueluFactory->SetupHierarchy(*H);

        if (strncmp(fileList[i].c_str(), "reuse", 5) == 0) {
          // Build the Hierarchy the second time
          // Should be faster if we actually do the reuse
          A->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits<SC>::one());
          mueluFactory->SetupHierarchy(*H);
        }

        timer.stop();
      } catch (Teuchos::ExceptionBase& e) {
        std::string msg = e.what();
        msg = msg.substr(msg.find_last_of('\n')+1);

        if (myRank == 0) {
          std::cout << "Caught exception: " << msg << std::endl;

          // Redirect output back
          std::cout.rdbuf(oldbuffer);
          buffer.close();
        }

        if (msg == "Zoltan interface is not available" ||
            msg == "Zoltan2 interface is not available" ||
            msg == "MueLu::FactoryFactory:BuildFactory(): Cannot create a Zoltan2Interface object: Zoltan2 is disabled: HAVE_MUELU_ZOLTAN2 && HAVE_MPI == false.") {

          if (myRank == 0)
            std::cout << xmlFile << ": skipped (missing library)" << std::endl;

          continue;
        }
      }

      std::string cmd;
      if (myRank == 0) {
        // Redirect output back
        std::cout.rdbuf(oldbuffer);
        buffer.close();

        // Create a copy of outputs
        cmd = "cp -f ";
        system((cmd + baseFile + ".gold " + baseFile + ".gold_filtered").c_str());
        system((cmd + baseFile + ".out " + baseFile + ".out_filtered").c_str());

        // Tpetra produces different eigenvalues in Chebyshev due to using
        // std::rand() for generating random vectors, which may be initialized
        // using different seed, and may have different algorithm from one
        // gcc version to another, or to anogther compiler (like clang)
        // This leads to us always failing this test.
        // NOTE1 : Epetra, on the other hand, rolls out its out random number
        // generator, which always produces same results

        // Ignore the value of "lambdaMax"
        run_sed("'s/lambdaMax: [0-9]*.[0-9]*/lambdaMax = <ignored>/'", baseFile);

        // Ignore the value of "lambdaMin"
        run_sed("'s/lambdaMin: [0-9]*.[0-9]*/lambdaMin = <ignored>/'", baseFile);

        // Ignore the value of "chebyshev: max eigenvalue"
        // NOTE: we skip lines with default value ([default])
        run_sed("'/[default]/! s/chebyshev: max eigenvalue = [0-9]*.[0-9]*/chebyshev: max eigenvalue = <ignored>/'", baseFile);

        // Ignore the exact type of direct solver (it is selected semi-automatically
        // depending on how Trilinos was configured
        run_sed("'s/Amesos\\([2]*\\)Smoother{type = .*}/Amesos\\1Smoother{type = <ignored>}/'", baseFile);
        run_sed("'s/SuperLU solver interface, direct solve/<Direct> solver interface/'", baseFile);
        run_sed("'s/KLU2 solver interface/<Direct> solver interface/'", baseFile);
        run_sed("'s/Basker solver interface/<Direct> solver interface/'", baseFile);

        // Strip template args for some classes
        std::vector<std::string> classes;
        classes.push_back("Xpetra::Matrix");
        classes.push_back("MueLu::Constraint");
        classes.push_back("MueLu::SmootherPrototype");
        for (size_t q = 0; q < classes.size(); q++)
          run_sed("'s/" + classes[q] + "<.*>/" + classes[q] + "<ignored> >/'", baseFile);

#ifdef __APPLE__
        // Some Macs print outs ptrs as 0x0 instead of 0, fix that
        run_sed("'/RCP/ s/=0x0/=0/g'", baseFile);
#endif

        // Run comparison (ignoring whitespaces)
        cmd = "diff -u -w -I\"^\\s*$\" " + baseFile + ".gold_filtered " + baseFile + ".out_filtered";
        int ret = system(cmd.c_str());
        if (ret)
          failed = true;

        //std::ios_base::fmtflags ff(std::cout.flags());
        //std::cout.precision(2);
        //std::cout << xmlFile << " (" << std::setiosflags(std::ios::fixed)
        //          << timer.wallTime() - lastTime << " sec.) : " << (ret ? "failed" : "passed") << std::endl;
        //lastTime = timer.wallTime();
        //std::cout.flags(ff); // reset flags to whatever they were prior to printing time
        std::cout << xmlFile << " : " << (ret ? "failed" : "passed") << std::endl;
      }
    }
  }

  if (myRank == 0)
    std::cout << std::endl << "End Result: TEST " << (failed ? "FAILED" : "PASSED") << std::endl;

  return (failed ? EXIT_FAILURE : EXIT_SUCCESS);
}
示例#11
0
// calls MPI_Init and MPI_Finalize
int main (int argc, char* argv[])
{
  using Teuchos::RCP;
  using Teuchos::rcp_dynamic_cast;
  using panzer::StrPureBasisPair;
  using panzer::StrPureBasisComp;

  PHX::InitializeKokkosDevice();

  Teuchos::GlobalMPISession mpiSession(&argc, &argv);
  RCP<Epetra_Comm> Comm = Teuchos::rcp(new Epetra_MpiComm(MPI_COMM_WORLD));
  Teuchos::FancyOStream out(Teuchos::rcpFromRef(std::cout));
  out.setOutputToRootOnly(0);
  out.setShowProcRank(true);

  const std::size_t workset_size = 20;

  ProblemOptions po;
  {
    // Set up this problem with two discontinuous (A, C) and one continuous (B)
    // fields.
    //   On fields A are imposed Neumann and weak Dirichlet matching interface conditions.
    //   On fields C are imposed Robin interface conditions, with one-way
    // coupling to field B.
    //   If the Robin condition is linear, then the default setup is such that
    // A, B, C all converge to the same solution for which the Solution
    // evaluator provides the exact expression. A response function reports the
    // error so a convergence test can be wrapped around multiple runs of this
    // program.
    //   If the Robin condition is nonlinear, then the source is 0 and the
    // solution is two planes with a jump of 0.4 at the interface.

    Teuchos::CommandLineProcessor clp;
    po.nxelem = 10;
    clp.setOption("nx", &po.nxelem, "Number of elements in x direction");
    po.nonlinear_Robin = false;
    clp.setOption("nonlinear", "linear", &po.nonlinear_Robin,
                  "Use a nonlinear Robin interface condition");
    po.rtol = 1e-10;
    clp.setOption("rtol", &po.rtol, "Tolerance on residual norm");
    po.is3d = false;
    clp.setOption("3d", "2d", &po.is3d, "3D test instead of 2D");
    po.mesh_filename = "";
    clp.setOption("mesh-filename", &po.mesh_filename, "Optionally read from an Exodus mesh");
    po.test_Jacobian = false;
    clp.setOption("test-jacobian", "dont-test-jacobian", &po.test_Jacobian,
                  "Test Jacobian using finite differences.");
    po.generate_mesh_only = false;
    clp.setOption("generate-mesh-only", "dont-generate-mesh-only", &po.generate_mesh_only,
                  "Generate mesh, save, and quit.");
    try {
      clp.parse(argc, argv);
    } catch (...) {
      PHX::FinalizeKokkosDevice();
      return -1;
    }

    po.nyelem = po.nxelem;
    po.dof_names.push_back("A");
    po.dof_names.push_back("B");
    po.dof_names.push_back("C");
    po.ss_names.push_back("left");
    po.ss_names.push_back("vertical_0");
    po.ss_names.push_back("right");
    po.outer_iteration = true;
    po.check_error = true;

    out << po << "\n";
  }
  bool pass = true;

  // Can be overridden by the equation set.
  po.integration_order = 2;

  // Construct mesh.
  Teuchos::RCP<panzer_stk_classic::STK_MeshFactory> mesh_factory;
  if ( ! po.mesh_filename.empty()) {
    mesh_factory = Teuchos::rcp(new panzer_stk_classic::STK_ExodusReaderFactory(po.mesh_filename));
  } else {
    if (po.is3d)
      mesh_factory = Teuchos::rcp(new panzer_stk_classic::CubeHexMeshFactory);
    else
      mesh_factory = Teuchos::rcp(new panzer_stk_classic::SquareQuadMeshFactory);
  }

  if (po.mesh_filename.empty()) {
    // set mesh factory parameters
    RCP<Teuchos::ParameterList> pl = rcp(new Teuchos::ParameterList);
    pl->set("X Blocks",2);
    pl->set("Y Blocks",1);
    if (po.is3d) pl->set("Z Blocks",1);
    pl->set("X Elements", po.nxelem); // per block
    pl->set("Y Elements", po.nyelem);
    if (po.is3d) {
      pl->set("Z Elements", po.nyelem);
      pl->set("Build Interface Sidesets", true);
    }
    { // If np is even, put ranks in both x and y directions; if not, go with
      // default, which is x direction only. The x direction is the harder case.
      const int np = mpiSession.getNProc();
      if (np % 2 == 0 && np >= 4) {
        const int nxp = np/2, nyp = 2;
        pl->set("X Procs", nxp);
        pl->set("Y Procs", nyp);
      }
    }
    mesh_factory->setParameterList(pl);
  }

  RCP<panzer_stk_classic::STK_Interface> mesh = mesh_factory->buildUncommitedMesh(MPI_COMM_WORLD);
  if (po.generate_mesh_only) {
    mesh_factory->completeMeshConstruction(*mesh, MPI_COMM_WORLD);
    mesh->writeToExodus("output.exo");
    out << "Stopping after writing mesh because --generate-mesh-only was requested.\n";
    PHX::FinalizeKokkosDevice();
    return 0;
  }

  //todo mesh->getDimension() may not be right if mesh_factory is the Exodus
  // reader.
  po.is3d = mesh->getMetaData()->spatial_dimension() == 3;

  if ( ! po.mesh_filename.empty() && ! po.is3d) {
    // Special case.
    po.eb_names.clear();
    po.ss_names.clear();
    po.eb_names.push_back("silicon1");
    po.eb_names.push_back("silicon2");
    po.ss_names.push_back("anode");
    po.ss_names.push_back("interface");
    po.ss_names.push_back("cathode");    
  } else {
    if (po.is3d) {
      po.eb_names.push_back("eblock-0_0_0");
      po.eb_names.push_back("eblock-1_0_0");
    } else {
      po.eb_names.push_back("eblock-0_0");
      po.eb_names.push_back("eblock-1_0");
    }
  }

  // construct input physics and physics block
  ////////////////////////////////////////////////////////

  // factory definitions
  Teuchos::RCP<Example::EquationSetFactory> eqset_factory = 
    Teuchos::rcp(new Example::EquationSetFactory); // where poisson equation is defined
  Example::BCStrategyFactory bc_factory;    // where boundary conditions are defined 

  const Teuchos::RCP<Teuchos::ParameterList> ipb = Teuchos::parameterList("Physics Blocks");
  std::vector<panzer::BC> bcs;
  std::vector<RCP<panzer::PhysicsBlock> > physicsBlocks;
  {
    testInitialization(ipb, bcs, po);

    std::map<std::string,std::string> block_ids_to_physics_ids;
    std::map<std::string,Teuchos::RCP<const shards::CellTopology> > block_ids_to_cell_topo;

    block_ids_to_physics_ids[po.eb_names[0]] = "Poisson Physics Left";
    block_ids_to_physics_ids[po.eb_names[1]] = "Poisson Physics Right";

    block_ids_to_cell_topo[po.eb_names[0]] = mesh->getCellTopology(po.eb_names[0]);
    block_ids_to_cell_topo[po.eb_names[1]] = mesh->getCellTopology(po.eb_names[1]);
      
    // GobalData sets ostream and parameter interface to physics
    Teuchos::RCP<panzer::GlobalData> gd = panzer::createGlobalData();
      
    // the physics block knows how to build and register evaluator with the field manager
    panzer::buildPhysicsBlocks(block_ids_to_physics_ids,
                               block_ids_to_cell_topo,
                               ipb,
                               po.integration_order,
                               workset_size,
                               eqset_factory,
                               gd,
                               false,
                               physicsBlocks);
  }

  // finish building mesh, set required field variables and mesh bulk data
  ////////////////////////////////////////////////////////////////////////
  {
    std::vector<Teuchos::RCP<panzer::PhysicsBlock> >::const_iterator physIter;
    for(physIter=physicsBlocks.begin();physIter!=physicsBlocks.end();++physIter) {
      Teuchos::RCP<const panzer::PhysicsBlock> pb = *physIter;
      const std::vector<StrPureBasisPair> & blockFields = pb->getProvidedDOFs();

      // insert all fields into a set
      std::set<StrPureBasisPair,StrPureBasisComp> fieldNames;
      fieldNames.insert(blockFields.begin(),blockFields.end());

      // add basis to DOF manager: block specific
      std::set<StrPureBasisPair,StrPureBasisComp>::const_iterator fieldItr;
      for (fieldItr=fieldNames.begin();fieldItr!=fieldNames.end();++fieldItr)
        mesh->addSolutionField(fieldItr->first,pb->elementBlockID());
    }

    mesh_factory->completeMeshConstruction(*mesh,MPI_COMM_WORLD);
  }

  // build worksets
  ////////////////////////////////////////////////////////

  Teuchos::RCP<panzer_stk_classic::WorksetFactory> wkstFactory
    = Teuchos::rcp(new panzer_stk_classic::WorksetFactory(mesh)); // build STK workset factory
  Teuchos::RCP<panzer::WorksetContainer> wkstContainer     // attach it to a workset container (uses lazy evaluation)
    = Teuchos::rcp(new panzer::WorksetContainer(wkstFactory,physicsBlocks,workset_size));

  std::vector<std::string> elementBlockNames;
  mesh->getElementBlockNames(elementBlockNames);
  std::map<std::string,Teuchos::RCP<std::vector<panzer::Workset> > > volume_worksets;
  panzer::getVolumeWorksetsFromContainer(*wkstContainer,elementBlockNames,volume_worksets);

  // build DOF Manager and linear object factory
  /////////////////////////////////////////////////////////////

  RCP<panzer::UniqueGlobalIndexer<int,int> > dofManager;
  {
    const Teuchos::RCP<panzer::ConnManager<int,int> >
      conn_manager = Teuchos::rcp(new panzer_stk_classic::STKConnManager<int>(mesh)); 
    const bool has_interface_condition = hasInterfaceCondition(bcs);
    if (has_interface_condition)
      buildInterfaceConnections(bcs, conn_manager);
    panzer::DOFManagerFactory<int,int> globalIndexerFactory;
    globalIndexerFactory.setEnableGhosting(has_interface_condition);
    dofManager = globalIndexerFactory.buildUniqueGlobalIndexer(
      Teuchos::opaqueWrapper(MPI_COMM_WORLD), physicsBlocks, conn_manager, "");
    if (has_interface_condition)
      checkInterfaceConnections(conn_manager, dofManager->getComm());
  }

  // construct some linear algebra object, build object to pass to evaluators
  Teuchos::RCP<panzer::LinearObjFactory<panzer::Traits> > linObjFactory
    = Teuchos::rcp(new panzer::EpetraLinearObjFactory<panzer::Traits,int>(Comm.getConst(),dofManager));

  std::vector<std::string> names;
  std::vector<std::vector<std::string> > eblocks;
  const int c_name_start = 3;
  {
    for (int i = 1; i <= 2; ++i) {
      names.push_back(strint(po.dof_names[0], i));
      eblocks.push_back(std::vector<std::string>());
      eblocks.back().push_back(po.eb_names[i-1]);
    }
    names.push_back(po.dof_names[1]);
    eblocks.push_back(std::vector<std::string>());
    eblocks.back().push_back(po.eb_names[0]);
    eblocks.back().push_back(po.eb_names[1]);
    if (po.dof_names.size() >= 3)
      for (int i = 1; i <= 2; ++i) {
        names.push_back(strint(po.dof_names[2], i));
        eblocks.push_back(std::vector<std::string>());
        eblocks.back().push_back(po.eb_names[i-1]);
      }
  }
  
  Teuchos::RCP<panzer::ResponseLibrary<panzer::Traits> > errorResponseLibrary
    = Teuchos::rcp(new panzer::ResponseLibrary<panzer::Traits>(wkstContainer, dofManager, linObjFactory));
  {
    for (std::size_t i = po.nonlinear_Robin ? c_name_start : 0; i < names.size(); ++i) {
      panzer::FunctionalResponse_Builder<int,int> builder;
      builder.comm = MPI_COMM_WORLD;
      builder.cubatureDegree = po.integration_order;
      builder.requiresCellIntegral = true;
      builder.quadPointField = names[i] + "_ERROR";
      errorResponseLibrary->addResponse(names[i] + " L2 Error", eblocks[i], builder);
    }
  }

  // setup closure model
  /////////////////////////////////////////////////////////////
 
  panzer::ClosureModelFactory_TemplateManager<panzer::Traits> cm_factory; 
  Example::ClosureModelFactory_TemplateBuilder cm_builder;
  cm_factory.buildObjects(cm_builder);

  Teuchos::ParameterList closure_models("Closure Models");
  {    
    Teuchos::ParameterList& s = closure_models.sublist("solid");
    for (std::vector<std::string>::const_iterator it = names.begin(); it != names.end(); ++it) {
      if (po.nonlinear_Robin)
        s.sublist(std::string("SOURCE_") + *it).set<double>("Value", 0.0);
      else
        s.sublist(std::string("SOURCE_") + *it).set<std::string>("Type", "SIMPLE SOURCE");
    }
    if (po.check_error)
      for (std::size_t i = po.nonlinear_Robin ? c_name_start : 0; i < names.size(); ++i) {
        const std::string err = names[i] + "_ERROR";
        s.sublist(err).set<std::string>("Type", "ERROR_CALC");
        s.sublist(err).set<std::string>("Field A", names[i]);
        s.sublist(err).set<std::string>("Field B", "EXACT");
      }
    if (po.check_error)
      s.sublist("EXACT").set<std::string>("Type", po.nonlinear_Robin ? "EXACT nonlinear Robin" : "EXACT");
  }

  Teuchos::ParameterList user_data("User Data"); // user data can be empty here

  // setup field manager builder
  /////////////////////////////////////////////////////////////

  Teuchos::RCP<panzer::FieldManagerBuilder> fmb = 
    Teuchos::rcp(new panzer::FieldManagerBuilder);
  fmb->setWorksetContainer(wkstContainer);
  fmb->setupVolumeFieldManagers(physicsBlocks,cm_factory,closure_models,*linObjFactory,user_data);
  fmb->setupBCFieldManagers(bcs,physicsBlocks,*eqset_factory,cm_factory,bc_factory,closure_models,
                            *linObjFactory,user_data);
  fmb->writeVolumeGraphvizDependencyFiles("volume", physicsBlocks);
  fmb->writeBCGraphvizDependencyFiles("bc");

  // setup assembly engine
  /////////////////////////////////////////////////////////////

  // build assembly engine: The key piece that brings together everything and 
  //                        drives and controls the assembly process. Just add
  //                        matrices and vectors
  panzer::AssemblyEngine_TemplateManager<panzer::Traits> ae_tm;
  panzer::AssemblyEngine_TemplateBuilder builder(fmb,linObjFactory);
  ae_tm.buildObjects(builder);

  user_data.set<int>("Workset Size", workset_size);
  if (po.check_error)
    errorResponseLibrary->buildResponseEvaluators(physicsBlocks, cm_factory, closure_models, user_data);

  // assemble and solve
  /////////////////////////////////////////////////////////////
  Teuchos::RCP<panzer::EpetraLinearObjContainer> ep_container;
  Teuchos::RCP<panzer::LinearObjContainer> ghost_container;
  if ( ! po.outer_iteration) {
    // Straightfoward solve
    // build linear algebra objects: Ghost is for parallel assembly, it contains
    //                               local element contributions summed, the global IDs
    //                               are not unique. The non-ghosted or "global"
    //                               container will contain the sum over all processors
    //                               of the ghosted objects. The global indices are unique.
    ghost_container = linObjFactory->buildGhostedLinearObjContainer();
    RCP<panzer::LinearObjContainer> container = linObjFactory->buildLinearObjContainer();
    linObjFactory->initializeGhostedContainer(panzer::LinearObjContainer::X |
                                              panzer::LinearObjContainer::F |
                                              panzer::LinearObjContainer::Mat,*ghost_container);
    linObjFactory->initializeContainer(panzer::LinearObjContainer::X |
                                       panzer::LinearObjContainer::F |
                                       panzer::LinearObjContainer::Mat,*container);
    ghost_container->initialize();
    container->initialize();

    panzer::AssemblyEngineInArgs input(ghost_container,container);
    input.alpha = 0;
    input.beta = 1;

    // evaluate physics: This does both the Jacobian and residual at once
    ae_tm.getAsObject<panzer::Traits::Jacobian>()->evaluate(input);

    // solve linear system
    /////////////////////////////////////////////////////////////
    // convert generic linear object container to epetra container
    ep_container = rcp_dynamic_cast<panzer::EpetraLinearObjContainer>(container);

    // Setup the linear solve: notice A is used directly 
    Epetra_LinearProblem problem(&*ep_container->get_A(),&*ep_container->get_x(),&*ep_container->get_f()); 

    // build the solver
    AztecOO solver(problem);
    solver.SetAztecOption(AZ_solver,AZ_gmres); // we don't push out dirichlet conditions
    solver.SetAztecOption(AZ_precond,AZ_none);
    solver.SetAztecOption(AZ_kspace,300);
    solver.SetAztecOption(AZ_output,10);
    solver.SetAztecOption(AZ_precond,AZ_Jacobi);

    // solve the linear system
    solver.Iterate(1000,1e-5);

    // we have now solved for the residual correction from
    // zero in the context of a Newton solve.
    //     J*e = -r = -(f - J*0) where f = J*u
    // Therefore we have  J*e=-J*u which implies e = -u
    // thus we will scale the solution vector 
    ep_container->get_x()->Scale(-1.0);
  } else { // Some analysis and an outer iteration if necessary.
    Teuchos::RCP<Epetra_CrsMatrix> J_fd;
    assembleAndSolve(ae_tm, linObjFactory, ep_container, ghost_container, po);
    if (po.test_Jacobian) {
      const double nwre = testJacobian(ae_tm, linObjFactory, ep_container->get_x());
      out << "TEST JACOBIAN " << nwre << "\n";
      if (nwre < 0 || nwre > 1e-5) pass = false;
    }
  }
  
  // output data (optional)
  /////////////////////////////////////////////////////////////

  // write linear system
  if (false) {
    EpetraExt::RowMatrixToMatrixMarketFile("a_op.mm",*ep_container->get_A());
    EpetraExt::VectorToMatrixMarketFile("x_vec.mm",*ep_container->get_x());
    EpetraExt::VectorToMatrixMarketFile("b_vec.mm",*ep_container->get_f());
  }

  if (po.check_error) {
    std::vector<Teuchos::RCP<panzer::Response_Functional<panzer::Traits::Residual> > > rfs(names.size());
    for (std::size_t i = po.nonlinear_Robin ? c_name_start : 0; i < names.size(); ++i) {
      Teuchos::RCP<panzer::ResponseBase>
        resp = errorResponseLibrary->getResponse<panzer::Traits::Residual>(names[i] + " L2 Error");
      rfs[i] = Teuchos::rcp_dynamic_cast<panzer::Response_Functional<panzer::Traits::Residual> >(resp);
      Teuchos::RCP<Thyra::VectorBase<double> > respVec = Thyra::createMember(rfs[i]->getVectorSpace());
      rfs[i]->setVector(respVec);
    }

    panzer::AssemblyEngineInArgs respInput(ghost_container, ep_container);
    respInput.alpha = 0;
    respInput.beta = 1;
    errorResponseLibrary->addResponsesToInArgs<panzer::Traits::Residual>(respInput);
    errorResponseLibrary->evaluate<panzer::Traits::Residual>(respInput);

    // Record a max error so we can use convergence_rate.py.
    double max_err = -1;
    for (std::size_t i = po.nonlinear_Robin ? c_name_start : 0; i < names.size(); ++i) {
      const double err = sqrt(rfs[i]->value);
      max_err = std::max(max_err, err);
      out << names[i] << " ERROR = " << err << "\n";
      if (err < 0 || err > (po.nonlinear_Robin ? 1e-10 : 0.03/(po.nxelem*po.nxelem/25.0)))
        pass = false;
    }
    out << "Error = " << max_err << "\n";
  }

  // Write solution except in the special case of a generated 3D mesh and #rank
  // > 1. In that case, something in the mesh-gen and rebalance code is causing
  // a failure in IossBridge::write_side_data_to_ioss.
  if ( ! (po.is3d && mpiSession.getNProc() > 1 && po.mesh_filename.empty())) {
    // redistribute solution vector to ghosted vector
    linObjFactory->globalToGhostContainer(*ep_container,*ghost_container,
                                          panzer::EpetraLinearObjContainer::X 
                                          | panzer::EpetraLinearObjContainer::DxDt); 

    // get X Epetra_Vector from ghosted container
    RCP<panzer::EpetraLinearObjContainer> ep_ghost_container =
      rcp_dynamic_cast<panzer::EpetraLinearObjContainer>(ghost_container);
    panzer_stk_classic::write_solution_data(*dofManager,*mesh,*ep_ghost_container->get_x());
    mesh->writeToExodus("output.exo");
  }

  // all done!
  /////////////////////////////////////////////////////////////
  out << (pass ? "PASS" : "FAIL") << " BASICS\n";

  PHX::FinalizeKokkosDevice();

  return 0;
}
示例#12
0
int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int argc, char *argv[]) {
#include <MueLu_UseShortNames.hpp>
  using Teuchos::RCP;
  using Teuchos::rcp;
  using Teuchos::TimeMonitor;
  bool success = true;
  bool verbose = true;
  try {
    RCP<const Teuchos::Comm<int> > comm = Teuchos::DefaultComm<int>::getComm();

    RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
    Teuchos::FancyOStream& out = *fancy;

    typedef Teuchos::ScalarTraits<SC> STS;

    // =========================================================================
    // Parameters initialization
    // =========================================================================
    //Teuchos::CommandLineProcessor clp(false);

    GO nx = 100, ny = 100, nz = 100;
    Galeri::Xpetra::Parameters<GO> galeriParameters(clp, nx, ny, nz, "Laplace2D"); // manage parameters of the test case
    Xpetra::Parameters             xpetraParameters(clp);                          // manage parameters of Xpetra
    std::string matFileName       = "";  clp.setOption("matrix",&matFileName,"read matrix from a file");
    LO blocksize = 1;                    clp.setOption("blocksize",&blocksize,"block size");

    switch (clp.parse(argc, argv)) {
      case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED:        return EXIT_SUCCESS; break;
      case Teuchos::CommandLineProcessor::PARSE_ERROR:
      case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; break;
      case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL:                               break;
    }

    //Xpetra::UnderlyingLib lib = xpetraParameters.GetLib();
    ParameterList galeriList = galeriParameters.GetParameterList();

    if(lib!=Xpetra::UseTpetra)
      throw std::runtime_error("This test only works with Tpetra linear algebra");

    // =========================================================================
    // Problem construction
    // =========================================================================
    RCP<const Map>   map;
    RCP<Matrix> A;
    RCP<MultiVector> nullspace;

    typedef Tpetra::CrsMatrix<SC,LO,GO,NO> Tpetra_CrsMatrix;
    typedef Tpetra::Operator<SC,LO,GO,NO> Tpetra_Operator;
    typedef Tpetra::Experimental::BlockCrsMatrix<SC,LO,GO,NO> Tpetra_BlockCrsMatrix;
    typedef Xpetra::TpetraBlockCrsMatrix<SC,LO,GO,NO> Xpetra_TpetraBlockCrsMatrix;
    typedef Xpetra::CrsMatrix<SC,LO,GO,NO> Xpetra_CrsMatrix;
    typedef Xpetra::CrsMatrixWrap<SC,LO,GO,NO> Xpetra_CrsMatrixWrap;
    typedef typename Teuchos::ScalarTraits<SC>::magnitudeType SCN;

    RCP<Tpetra_CrsMatrix> Acrs;
    RCP<Tpetra_BlockCrsMatrix> Ablock;

    if(matFileName.length() > 0) {
      // Read matrix from disk
      out << thickSeparator << std::endl << "Reading matrix from disk" <<std::endl;
      typedef Tpetra::MatrixMarket::Reader<Tpetra_CrsMatrix> reader_type;
      Acrs = reader_type::readSparseFile(matFileName,comm);
    }
    else{
      // Use Galeri
      out << thickSeparator << std::endl << xpetraParameters << galeriParameters;
      std::string matrixType = galeriParameters.GetMatrixType();
      RCP<Xpetra::Matrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> > Axp;
      MueLuExamples::generate_user_matrix_and_nullspace<Scalar,LocalOrdinal,GlobalOrdinal,Node>(matrixType,lib,galeriList,comm,Axp,nullspace);
      Acrs = Xpetra::Helpers<SC,LO,GO,NO>::Op2NonConstTpetraCrs(Axp);
    }
    // Block this bad boy
    Ablock = Tpetra::Experimental::convertToBlockCrsMatrix(*Acrs,blocksize);

    // Now wrap BlockCrs to Xpetra::Matrix
    RCP<Xpetra_CrsMatrix> Axt = rcp(new Xpetra_TpetraBlockCrsMatrix(Ablock));
    A = rcp(new Xpetra_CrsMatrixWrap(Axt));

    // =========================================================================
    // Setups and solves
    // =========================================================================
    map=Xpetra::toXpetra(Acrs->getRowMap());

    RCP<Vector> X1 = VectorFactory::Build(map);
    RCP<Vector> X2 = VectorFactory::Build(map);
    RCP<Vector> B = VectorFactory::Build(map);
    B->setSeed(846930886);
    B->randomize();
    RCP<TimeMonitor> tm;

    // Belos Options
    RCP<Teuchos::ParameterList> SList = rcp(new Teuchos::ParameterList );
    SList->set("Verbosity",Belos::Errors + Belos::Warnings + Belos::StatusTestDetails);
    SList->set("Output Frequency",10);
    SList->set("Output Style",Belos::Brief);
    SList->set("Maximum Iterations",10);
    SList->set("Convergence Tolerance",5e-2);


    // =========================================================================
    // Solve #1 (fixed point + Jacobi)
    // =========================================================================
    out << thickSeparator << std::endl;
    out << prefSeparator << " Solve 1: Fixed Point + Jacobi"<< prefSeparator <<std::endl;
    {
      Teuchos::ParameterList MueList;
      MueList.set("max levels",1);
      MueList.set("coarse: type", "RELAXATION");

      std::string belos_solver("Fixed Point");
      MueLuExamples::solve_system_belos<Scalar,LocalOrdinal,GlobalOrdinal,Node>(A,X1,B,MueList,belos_solver,SList);
      std::cout << "I" << std::endl;
      SCN result = MueLuExamples::compute_resid_norm<Scalar,LocalOrdinal,GlobalOrdinal,Node>(*A,*X1,*B);
      out<<"Solve #1: Residual Norm = "<<result<<std::endl;
    }

    // =========================================================================
    // Solve #2 (striaght up Jacobi)
    // =========================================================================
    out << thickSeparator << std::endl;
    out << prefSeparator << " Solve 2: Fixed Jacobi"<< prefSeparator <<std::endl;
    {
      Teuchos::ParameterList IList;
      IList.set("relaxation: type","Jacobi");
      IList.set("relaxation: damping factor",1.0);
      IList.set("relaxation: sweeps",10);
      std::string ifpack2_precond("RELAXATION");

      MueLuExamples::solve_system_ifpack2(A,X2,B,ifpack2_precond,IList);

      SCN result = MueLuExamples::compute_resid_norm<Scalar,LocalOrdinal,GlobalOrdinal,Node>(*A,*X2,*B);
      out<<"Solve #2: Residual Norm = "<<result<<std::endl;
    }

    // Compare 1 & 2
    SCN norm = MueLuExamples::diff_vectors<Scalar,LocalOrdinal,GlobalOrdinal,Node>(*X1,*X2);
    if(norm > 1e-10) {
      out<<"ERROR: Norm of Solve #1 and Solve #2 differs by "<<norm<<std::endl;
      success=false;
    }

  }//end try
  TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);

  return ( success ? EXIT_SUCCESS : EXIT_FAILURE );
}
int 
main (int argc, char *argv[]) 
{
  using Teuchos::Comm;
  using Teuchos::FancyOStream;
  using Teuchos::getFancyOStream;
  using Teuchos::oblackholestream;
  using Teuchos::OSTab;
  using Teuchos::ParameterList;
  using Teuchos::parameterList;
  using Teuchos::RCP;
  using Teuchos::rcpFromRef;
  using std::cout;
  using std::endl;
  //
  // Typedefs for Tpetra template arguments.
  //
  typedef double scalar_type;
  typedef long int global_ordinal_type;
  typedef int local_ordinal_type;
  typedef Kokkos::DefaultNode::DefaultNodeType node_type;
  //
  // Tpetra objects which are the MV and OP template parameters of the
  // Belos specialization which we are testing.
  //
  typedef Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type> MV;
  typedef Tpetra::Operator<scalar_type, local_ordinal_type, global_ordinal_type, node_type> OP;
  // 
  // Other typedefs.
  // 
  typedef Teuchos::ScalarTraits<scalar_type> STS;
  typedef Tpetra::CrsMatrix<scalar_type, local_ordinal_type, global_ordinal_type, node_type> sparse_matrix_type;

  Teuchos::GlobalMPISession mpiSession (&argc, &argv, &cout);
  RCP<const Comm<int> > comm = Tpetra::DefaultPlatform::getDefaultPlatform().getComm();
  RCP<node_type> node = Tpetra::DefaultPlatform::getDefaultPlatform().getNode();
  RCP<oblackholestream> blackHole (new oblackholestream);
  const int myRank = comm->getRank();

  // Output stream that prints only on Rank 0.
  RCP<FancyOStream> out;
  if (myRank == 0) {
    out = Teuchos::getFancyOStream (rcpFromRef (cout));
  } else {
    out = Teuchos::getFancyOStream (blackHole);
  }

  //
  // Get test parameters from command-line processor.
  //  
  // CommandLineProcessor always understands int, but may not
  // understand global_ordinal_type.  We convert to the latter below.
  int numRows = comm->getSize() * 100;
  bool tolerant = false;
  bool verbose = false;
  bool debug = false;
  Teuchos::CommandLineProcessor cmdp (false, true);
  cmdp.setOption("numRows", &numRows,
		 "Global number of rows (and columns) in the sparse matrix to generate.");
  cmdp.setOption("tolerant", "intolerant", &tolerant,
		 "Whether to parse files tolerantly.");
  cmdp.setOption("verbose", "quiet", &verbose, 
		 "Print messages and results.");
  cmdp.setOption("debug", "release", &debug, 
		 "Run debugging checks and print copious debugging output.");
  if (cmdp.parse(argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) {
    *out << "\nEnd Result: TEST FAILED" << endl;
    return EXIT_FAILURE;
  }
  // Output stream for verbose output.
  RCP<FancyOStream> verbOut = verbose ? out : getFancyOStream (blackHole);

  const bool success = true;

  // Test whether it's possible to instantiate the solver.
  // This is a minimal compilation test.
  *verbOut << "Instantiating Block GCRODR solver" << endl;
  Belos::BlockGCRODRSolMgr<scalar_type, MV, OP> solver;
  //
  // Test setting solver parameters.  For now, we just use an empty
  // (but non-null) parameter list, which the solver should fill in
  // with defaults.
  //
  *verbOut << "Setting solver parameters" << endl;
  RCP<ParameterList> solverParams = parameterList ();
  solver.setParameters (solverParams);
  //
  // Create a linear system to solve.
  //
  *verbOut << "Creating linear system" << endl;
  RCP<sparse_matrix_type> A;
  RCP<MV> X_guess, X_exact, B;
  {
    typedef Belos::Tpetra::ProblemMaker<sparse_matrix_type> factory_type;
    factory_type factory (comm, node, out, tolerant, debug);
    
    RCP<ParameterList> problemParams = parameterList ();
    problemParams->set ("Global number of rows", 
			static_cast<global_ordinal_type> (numRows));
    problemParams->set ("Problem type", std::string ("Nonsymmetric"));
    factory.makeProblem (A, X_guess, X_exact, B, problemParams);
  }
  // Approximate solution vector is a copy of the guess vector.
  RCP<MV> X (new MV (*X_guess));

  TEUCHOS_TEST_FOR_EXCEPTION(A.is_null(), std::logic_error,
			     "The sparse matrix is null!");
  TEUCHOS_TEST_FOR_EXCEPTION(X_guess.is_null(), std::logic_error,
			     "The initial guess X_guess is null!");
  TEUCHOS_TEST_FOR_EXCEPTION(X_exact.is_null(), std::logic_error,
			     "The exact solution X_exact is null!");
  TEUCHOS_TEST_FOR_EXCEPTION(B.is_null(), std::logic_error,
			     "The right-hand side B is null!");
  TEUCHOS_TEST_FOR_EXCEPTION(X.is_null(), std::logic_error,
			     "The approximate solution vector X is null!");

  typedef Belos::LinearProblem<scalar_type, MV, OP> problem_type;
  RCP<problem_type> problem (new problem_type (A, X, B));
  problem->setProblem ();
  solver.setProblem (problem);

  *verbOut << "Solving linear system" << endl;
  Belos::ReturnType result = solver.solve ();

  *verbOut << "Result of solve: " 
	   << Belos::convertReturnTypeToString (result) 
	   << endl;
  if (success) {
    *out << "\nEnd Result: TEST PASSED" << endl;
    return EXIT_SUCCESS;
  } 
  else {
    *out << "\nEnd Result: TEST FAILED" << endl;
    return EXIT_FAILURE;
  }
}
示例#14
0
int main(int argc,char * argv[])
{
  typedef panzer::unit_test::CartesianConnManager<int,panzer::Ordinal64> CCM;
  typedef panzer::DOFManager<int,panzer::Ordinal64> DOFManager;

  using Teuchos::RCP;
  using Teuchos::rcp;

  Teuchos::GlobalMPISession mpiSession(&argc, &argv);
  Kokkos::initialize(argc,argv);

  Teuchos::MpiComm<int> comm(MPI_COMM_WORLD);
  int np   = comm.getSize(); // number of processors

  // timings output
  std::string timingsFile = "timings.yaml";

  // mesh description
  int nx = 10, ny = 7, nz = 4;
  int px = np, py = 1, pz = 1;
  int bx =  1, by = 2, bz = 1;

  // parse command line arguments
  Teuchos::CommandLineProcessor clp;
  clp.setOption("nx",&nx);
  clp.setOption("ny",&ny);
  clp.setOption("nz",&nz);
  clp.setOption("px",&px);
  clp.setOption("py",&py);
  clp.setOption("pz",&pz);
  clp.setOption("timings-file",&timingsFile);
  auto cmdResult = clp.parse(argc,argv);
  if(cmdResult!=Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) {
    clp.printHelpMessage(argv[0],std::cout);
    return -1;
  } 

  // build velocity, temperature and pressure fields
  RCP<const panzer::FieldPattern> pattern_U = buildFieldPattern<Intrepid2::Basis_HGRAD_HEX_C2_FEM<PHX::Device::execution_space,double,double>>();
  RCP<const panzer::FieldPattern> pattern_P = buildFieldPattern<Intrepid2::Basis_HGRAD_HEX_C1_FEM<PHX::Device::execution_space,double,double>>();
  RCP<const panzer::FieldPattern> pattern_T = buildFieldPattern<Intrepid2::Basis_HGRAD_HEX_C1_FEM<PHX::Device::execution_space,double,double>>();
  RCP<const panzer::FieldPattern> pattern_B = buildFieldPattern<Intrepid2::Basis_HDIV_HEX_I1_FEM<PHX::Device::execution_space,double,double>>();
  RCP<const panzer::FieldPattern> pattern_E = buildFieldPattern<Intrepid2::Basis_HCURL_HEX_I1_FEM<PHX::Device::execution_space,double,double>>();

  // repeatedly construct DOFManager timing the buildGlobalUnknowns
  for(int repeats=0;repeats<100;repeats++) {
  
    // build the topology
    RCP<CCM> connManager = rcp(new CCM);
    connManager->initialize(comm,
                            Teuchos::as<panzer::Ordinal64>(nx),
                            Teuchos::as<panzer::Ordinal64>(ny),
                            Teuchos::as<panzer::Ordinal64>(nz),
                            px,py,pz,bx,by,bz);
  
    // build the dof manager, and assocaite with the topology
    RCP<DOFManager> dofManager = rcp(new DOFManager);
    dofManager->setConnManager(connManager,*comm.getRawMpiComm());
  
    // add velocity (U) and PRESSURE fields to the MHD element block
    dofManager->addField("eblock-0_0_0","UX",pattern_U);
    dofManager->addField("eblock-0_0_0","UY",pattern_U);
    dofManager->addField("eblock-0_0_0","UZ",pattern_U);
    dofManager->addField("eblock-0_0_0","PRESSURE",pattern_P);
    dofManager->addField("eblock-0_0_0","B",pattern_B);
    dofManager->addField("eblock-0_0_0","E",pattern_E);
  
    // add velocity (U) fields to the solid element block
    dofManager->addField("eblock-0_1_0","UX",pattern_U);
    dofManager->addField("eblock-0_1_0","UY",pattern_U);
    dofManager->addField("eblock-0_1_0","UZ",pattern_U);
  
    // try to get them all synced up
    comm.barrier();

    {
      PANZER_FUNC_TIME_MONITOR("panzer::ScalingTest::buildGlobalUnknowns");
  
      dofManager->buildGlobalUnknowns();
    }
  }

  Teuchos::TimeMonitor::summarize(std::cout,false,true,false);

  if ( timingsFile != "" ){
    std::ofstream fout(timingsFile.c_str());
    Teuchos::RCP<Teuchos::ParameterList> reportParams = parameterList(* (Teuchos::TimeMonitor::getValidReportParameters()));
    reportParams->set("Report format", "YAML");
    reportParams->set("YAML style", "spacious");
    Teuchos::TimeMonitor::report(fout,reportParams);
  }
 
  // this confirms the application passes
  std::cout << "Scaling test completed" << std::endl;

  return 0;
}
示例#15
0
int main(int argc, char * argv[]) 
{
   using Teuchos::RCP;
   using Teuchos::rcp;
   using Teuchos::rcpFromRef;
 
   Teuchos::GlobalMPISession mpiSession(&argc,&argv,&std::cout);

   std::string output_file_name = "square_mesh.gen";

   int xBlocks=1,yBlocks=1, zBlocks=1;
   int xElements=1,yElements=1, zElements=1;
   double x0=0.0, xf=1.0;
   double y0=0.0, yf=1.0;
   double z0=0.0, zf=1.0;
   bool threeD = false;
 
   // setup input arguments
   { 
     Teuchos::CommandLineProcessor clp;
     clp.throwExceptions(false);

     clp.setOption("o", &output_file_name, "Mesh output filename");

     clp.setOption("3d", "2d", &threeD, "Cube versus square mesh.");

     clp.setOption("x-blocks", &xBlocks, "Number of blocks in 'x' direction");
     clp.setOption("y-blocks", &yBlocks, "Number of blocks in 'y' direction");
     clp.setOption("z-blocks", &zBlocks, "Number of blocks in 'z' direction");

     clp.setOption("x-elmts", &xElements, "Number of elements in 'x' direction in each block");
     clp.setOption("y-elmts", &yElements, "Number of elements in 'y' direction in each block");
     clp.setOption("z-elmts", &zElements, "Number of elements in 'z' direction in each block");

     clp.setOption("x0", &x0, "Location of left edge");
     clp.setOption("xf", &xf, "Location of right edge");

     clp.setOption("y0", &y0, "Location of left edge");
     clp.setOption("yf", &yf, "Location of right edge");

     clp.setOption("z0", &z0, "Location of front(?) edge");
     clp.setOption("zf", &zf, "Location of back(?) edge");
 
     Teuchos::CommandLineProcessor::EParseCommandLineReturn parse_return = clp.parse(argc,argv,&std::cerr);

     if(parse_return==Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED)
       return -1;
      
     TEUCHOS_TEST_FOR_EXCEPTION(parse_return != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL, 
                                std::runtime_error, "Failed to parse command line!");
   }
  

   RCP<Teuchos::ParameterList> pl = rcp(new Teuchos::ParameterList);
   pl->set("X Blocks",xBlocks);
   pl->set("Y Blocks",yBlocks);
   pl->set("X Elements",xElements);
   pl->set("Y Elements",yElements);
   pl->set("X0",x0);
   pl->set("Y0",y0);
   pl->set("Xf",xf);
   pl->set("Yf",yf);

   if(threeD) {
     pl->set("Z Blocks",zBlocks);
     pl->set("Z Elements",zElements);
     pl->set("Z0",z0);
     pl->set("Zf",zf);
   }

   int numprocs = stk_classic::parallel_machine_size(MPI_COMM_WORLD);
   int rank = stk_classic::parallel_machine_rank(MPI_COMM_WORLD);

   RCP<panzer_stk_classic::STK_MeshFactory> factory; 
   if(!threeD)
     factory = Teuchos::rcp(new panzer_stk_classic::SquareQuadMeshFactory); 
   else
     factory = Teuchos::rcp(new panzer_stk_classic::CubeHexMeshFactory); 

   factory->setParameterList(pl);

   RCP<panzer_stk_classic::STK_Interface> mesh = factory->buildMesh(MPI_COMM_WORLD);
   mesh->writeToExodus(output_file_name);

   return 0;
}
示例#16
0
int main(int argc, char *argv[]) {
  int r_val = 0;

  Teuchos::CommandLineProcessor clp;

  int nthreads = 1;
  clp.setOption("nthreads", &nthreads, "Number of threads");

  clp.recogniseAllOptions(true);
  clp.throwExceptions(false);

  Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv );

  if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL  ) {
    cout << "Testing Kokkos::Qthread:: Failed in parsing command line input" << endl;
    return -1;
  }
  if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) {
    return 0;
  }

  unsigned threads_count = 0;
  if (Kokkos::hwloc::available())  {
    const unsigned numa_count       = Kokkos::hwloc::get_available_numa_count();
    const unsigned cores_per_numa   = Kokkos::hwloc::get_available_cores_per_numa();
    const unsigned threads_per_core = Kokkos::hwloc::get_available_threads_per_core();
    
    const unsigned one = 1u;
    threads_count = max(one, numa_count)*max(one, cores_per_numa)*max(one, threads_per_core);
 
    cout << " = Kokkos::hwloc = " << endl
         << "NUMA count       = " << numa_count << endl
         << "Cores per NUMA   = " << cores_per_numa << endl
         << "Threads per core = " << threads_per_core << endl
         << "Threads count    = " << threads_count << endl;
  } else {
    threads_count = thread::hardware_concurrency();

    cout << " = std::thread::hardware_concurrency = " << endl
         << "Threads count    = " << threads_count << endl;
  }

  if (static_cast<unsigned int>(nthreads) > threads_count) {
    ++r_val;
    cout << "Testing Kokkos::Threads:: Failed that the given nthreads is greater than the number of threads counted" << endl;
  } else {
    Kokkos::Threads::initialize( nthreads );
    Kokkos::Threads::print_configuration( cout , true /* detailed */ );
    
    //__TestSuiteDoUnitTests__(float,int,unsigned int,Kokkos::Serial,void);
    //__TestSuiteDoUnitTests__(float,long,unsigned long,Kokkos::Serial,void);
    
    __TestSuiteDoUnitTests__(double,int,unsigned int,Kokkos::Threads,void);
    // __TestSuiteDoUnitTests__(double,long,unsigned long,Kokkos::Serial,void);
    
    // __TestSuiteDoUnitTests__(complex<float>,int,unsigned int,Kokkos::Serial,void);
    // __TestSuiteDoUnitTests__(complex<float>,long,unsigned long,Kokkos::Serial,void);
    
    // __TestSuiteDoUnitTests__(complex<double>,int,unsigned int,Kokkos::Serial,void);
    // __TestSuiteDoUnitTests__(complex<double>,long,unsigned long,Kokkos::Serial,void);
    
    Kokkos::Threads::finalize();
  }

  string eval;
  __EVAL_STRING__(r_val, eval);
  cout << "Testing Kokkos::Threads::" << eval << endl;

  return r_val;
}
int main (int argc, char *argv[]) {

  Teuchos::CommandLineProcessor clp;
  clp.setDocString("This example program measure the performance of Chol algorithms on Kokkos::Threads execution space.\n");

  int nthreads = 1;
  clp.setOption("nthreads", &nthreads, "Number of threads");

  int max_task_dependence = 10;
  clp.setOption("max-task-dependence", &max_task_dependence, "Max number of task dependence");

  int team_size = 1;
  clp.setOption("team-size", &team_size, "Team size");

  int fill_level = 0;
  clp.setOption("fill-level", &fill_level, "Fill level");

  bool team_interface = true;
  clp.setOption("enable-team-interface", "disable-team-interface",
                &team_interface, "Flag for team interface");

  bool mkl_interface = false;
  clp.setOption("enable-mkl-interface", "disable-mkl-interface",
                &mkl_interface, "Flag for MKL interface");

  int stack_size = 8192;
  clp.setOption("stack-size", &stack_size, "Stack size");

  bool verbose = false;
  clp.setOption("enable-verbose", "disable-verbose", &verbose, "Flag for verbose printing");

  string file_input = "test.mtx";
  clp.setOption("file-input", &file_input, "Input file (MatrixMarket SPD matrix)");

  int treecut = 15;
  clp.setOption("treecut", &treecut, "Level to cut tree from bottom");

  int minblksize = 0;
  clp.setOption("minblksize", &minblksize, "Minimum block size for internal reordering");

  int prunecut = 0;
  clp.setOption("prunecut", &prunecut, "Leve to prune tree from bottom");

  int seed = 0;
  clp.setOption("seed", &seed, "Seed for random number generator in graph partition");

  int niter = 10;
  clp.setOption("niter", &niter, "Number of iterations for testing");

  clp.recogniseAllOptions(true);
  clp.throwExceptions(false);

  Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv );

  if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0;
  if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL  ) return -1;
  
  int r_val = 0;
  {
    const bool overwrite = true;
    const int nshepherds = (team_interface ? nthreads/team_size : nthreads);
    const int nworker_per_shepherd = nthreads/nshepherds;

    setenv("QT_HWPAR",                    to_string(nthreads).c_str(),             overwrite);
    setenv("QT_NUM_SHEPHERDS",            to_string(nshepherds).c_str(),           overwrite);
    setenv("QT_NUM_WORKERS_PER_SHEPHERD", to_string(nworker_per_shepherd).c_str(), overwrite);
    setenv("QT_STACK_SIZE",               to_string(stack_size).c_str(),           overwrite);

    exec_space::initialize(nthreads);
    exec_space::print_configuration(cout, true);
    
    r_val = exampleCholPerformance
      <value_type,ordinal_type,size_type,exec_space,void>
      (file_input, 
       treecut,
       minblksize,
       prunecut,
       seed,
       niter, 
       nthreads, 
       max_task_dependence, 
       team_size, 
       fill_level,
       nshepherds,
       team_interface, 
       (nthreads != 1), 
       mkl_interface,
       verbose);

    exec_space::finalize();

    unsetenv("QT_HWPAR");
    unsetenv("QT_NUM_SHEPHERDS");
    unsetenv("QT_NUM_WORKERS_PER_SHEPHERD");
    unsetenv("QT_STACK_SIZE");
  }

  return r_val;
}
示例#18
0
int main(int argc, char *argv[])
{
  bool success = true;
  bool verbose = false;
  try {

    const size_t num_sockets = Kokkos::hwloc::get_available_numa_count();
    const size_t num_cores_per_socket =
      Kokkos::hwloc::get_available_cores_per_numa();
    const size_t num_threads_per_core =
      Kokkos::hwloc::get_available_threads_per_core();

    // Setup command line options
    Teuchos::CommandLineProcessor CLP;
    CLP.setDocString(
      "This test performance of MP::Vector multiply routines.\n");
    int nGrid = 32;
    CLP.setOption("n", &nGrid, "Number of mesh points in the each direction");
    int nIter = 10;
    CLP.setOption("ni", &nIter, "Number of multiply iterations");
#ifdef KOKKOS_HAVE_PTHREAD
    bool threads = true;
    CLP.setOption("threads", "no-threads", &threads, "Enable Threads device");
    int num_cores = num_cores_per_socket * num_sockets;
    CLP.setOption("cores", &num_cores,
                  "Number of CPU cores to use (defaults to all)");
    int num_hyper_threads = num_threads_per_core;
    CLP.setOption("hyperthreads", &num_hyper_threads,
                  "Number of hyper threads per core to use (defaults to all)");
    int threads_per_vector = 1;
    CLP.setOption("threads_per_vector", &threads_per_vector,
                  "Number of threads to use within each vector");
#endif
#ifdef KOKKOS_HAVE_CUDA
    bool cuda = true;
    CLP.setOption("cuda", "no-cuda", &cuda, "Enable Cuda device");
    int cuda_threads_per_vector = 16;
    CLP.setOption("cuda_threads_per_vector", &cuda_threads_per_vector,
                  "Number of Cuda threads to use within each vector");
    int cuda_block_size = 0;
    CLP.setOption("cuda_block_size", &cuda_block_size,
                  "Cuda block size (0 implies the default choice)");
    int num_cuda_blocks = 0;
    CLP.setOption("num_cuda_blocks", &num_cuda_blocks,
                  "Number of Cuda blocks (0 implies the default choice)");
    int device_id = 0;
    CLP.setOption("device", &device_id, "CUDA device ID");
#endif
    CLP.parse( argc, argv );

    typedef int Ordinal;
    typedef double Scalar;

#ifdef KOKKOS_HAVE_PTHREAD
    if (threads) {
      typedef Kokkos::Threads Device;
      typedef Stokhos::StaticFixedStorage<Ordinal,Scalar,1,Device> Storage;

      Kokkos::Threads::initialize(num_cores*num_hyper_threads);

      std::cout << std::endl
                << "Threads performance with " << num_cores*num_hyper_threads
                << " threads:" << std::endl;

      Kokkos::DeviceConfig dev_config(num_cores,
                                       threads_per_vector,
                                       num_hyper_threads / threads_per_vector);

      mainHost<Storage>(nGrid, nIter, dev_config);

      Kokkos::Threads::finalize();
    }
#endif

#ifdef KOKKOS_HAVE_CUDA
    if (cuda) {
      typedef Kokkos::Cuda Device;
      typedef Stokhos::StaticFixedStorage<Ordinal,Scalar,1,Device> Storage;

      Kokkos::Cuda::host_mirror_device_type::initialize();
      Kokkos::Cuda::initialize(Kokkos::Cuda::SelectDevice(device_id));

      cudaDeviceProp deviceProp;
      cudaGetDeviceProperties(&deviceProp, device_id);
      std::cout << std::endl
                << "CUDA performance for device " << device_id << " ("
                << deviceProp.name << "):"
                << std::endl;

      Kokkos::DeviceConfig dev_config(
        num_cuda_blocks,
        cuda_threads_per_vector,
        cuda_threads_per_vector == 0 ? 0 : cuda_block_size / cuda_threads_per_vector);

      mainCuda<Storage>(nGrid,nIter,dev_config);

      Kokkos::Cuda::host_mirror_device_type::finalize();
      Kokkos::Cuda::finalize();
    }
#endif

  }
  TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);

  if (success)
    return 0;
  return -1;
}
示例#19
0
int main(int argc,char * argv[])
{
   bool status = false;
   Kokkos::initialize(argc,argv);

   { 
     // need to protect kokkos and MPI
     // calls MPI_Init and MPI_Finalize
     Teuchos::GlobalMPISession mpiSession(&argc,&argv);
  
     // build MPI/Serial communicators
     #ifdef HAVE_MPI
        Epetra_MpiComm Comm_epetra(MPI_COMM_WORLD);
     #else
        Epetra_SerialComm Comm_epetra;
     #endif
     Teuchos::RCP<const Teuchos::Comm<int> > Comm = Tpetra::DefaultPlatform::getDefaultPlatform ().getComm ();
  
     Teko::Test::UnitTest::SetComm(Teuchos::rcpFromRef(Comm_epetra));
     Teko::Test::UnitTest::SetComm_tpetra(Comm);
  
     Teuchos::CommandLineProcessor clp;
  
     int verbosity = 1;
     std::string faillog = "failure.log";
     bool isfast = false;
  
     clp.setOption("verb",&verbosity,"How verbose is the output? 1 is normal 10 is a lot.");
     clp.setOption("log",&faillog,"File for failure information to go to (also high verbosity text)");
     clp.setOption("fast","notfast",&isfast,"Run only fast tests");
     clp.parse(argc,argv);
  
     Teuchos::RCP<Teuchos::FancyOStream> termout = Teuchos::getFancyOStream(Teuchos::rcpFromRef(std::cout));
     Teuchos::RCP<Teuchos::FancyOStream> failout;
     std::ofstream failure;
  
     if(faillog=="stdout") {
        failout = termout;
     }
     else {
        failure.open(faillog.c_str());
        failout = Teuchos::getFancyOStream(Teuchos::rcpFromRef(failure));
     }
  
     termout->setOutputToRootOnly(0);
     failout->setOutputToRootOnly(0);
  
     // gdbIn();
     Teko_ADD_UNIT_TEST(Teko::Test::tSIMPLEPreconditionerFactory_tpetra,SIMPLEPreconditionerFactory_tpetra);
     Teko_ADD_UNIT_TEST(Teko::Test::tDiagonalPreconditionerFactory_tpetra,DiagonalPreconditionerFactory_tpetra);
     Teko_ADD_UNIT_TEST(Teko::Test::tLU2x2PreconditionerFactory_tpetra,LU2x2PreconditionerFactory_tpetra);
     Teko_ADD_UNIT_TEST(Teko::Test::tLSCStablePreconditionerFactory_tpetra,LSCStablePreconditionerFactory_tpetra);
     Teko_ADD_UNIT_TEST(Teko::Test::tLSCStabilized_tpetra,LSCStabilized_tpetra);
     Teko_ADD_UNIT_TEST(Teko::Test::tJacobi2x2PreconditionerFactory_tpetra,Jacobi2x2PreconditionerFactory_tpetra);
     Teko_ADD_UNIT_TEST(Teko::Test::tBlockJacobiPreconditionerFactory_tpetra,BlockJacobiPreconditionerFactory_tpetra);
     Teko_ADD_UNIT_TEST(Teko::Test::tBlockUpperTriInverseOp_tpetra,BlockUpperTriInverseOp_tpetra);
     Teko_ADD_UNIT_TEST(Teko::Test::tBlockLowerTriInverseOp_tpetra,BlockLowerTriInverseOp_tpetra);
     Teko_ADD_UNIT_TEST(Teko::Test::tTpetraOperatorWrapper,tTpetraOperatorWrapper);
     Teko_ADD_UNIT_TEST(Teko::Test::tInterlacedTpetra,InterlacedTpetra);
     Teko_ADD_UNIT_TEST(Teko::Test::tBlockingTpetra,BlockingTpetra);
     Teko_ADD_UNIT_TEST(Teko::Test::tTpetraThyraConverter,TpetraThyraConverter);
     Teko_ADD_UNIT_TEST(Teko::Test::tGraphLaplacian_tpetra,tGraphLaplacian_tpetra);
     Teko_ADD_UNIT_TEST(Teko::Test::tParallelInverse_tpetra,tParallelInverse_tpetra);
     Teko_ADD_UNIT_TEST(Teko::Test::tExplicitOps_tpetra,tExplicitOps_tpetra);
     Teko_ADD_UNIT_TEST(Teko::Test::tLSCHIntegrationTest_tpetra,LSCHIntegrationTest_tpetra);
     Teko_ADD_UNIT_TEST(Teko::Test::tLumping_tpetra,Lumping_tpetra);
     Teko_ADD_UNIT_TEST(Teko::Test::tAbsRowSum_tpetra,AbsRowSum_tpetra);
     Teko_ADD_UNIT_TEST(Teko::Test::tNeumannSeries_tpetra,NeumannSeries_tpetra);
     Teko_ADD_UNIT_TEST(Teko::Test::tPCDStrategy_tpetra,PCDStrategy_tpetra);
     if(not isfast) {
        Teko_ADD_UNIT_TEST(Teko::Test::tLSCIntegrationTest_tpetra,LSCIntegrationTest_tpetra);
        Teko_ADD_UNIT_TEST(Teko::Test::tStridedTpetraOperator,tStridedTpetraOperator);
        Teko_ADD_UNIT_TEST(Teko::Test::tBlockedTpetraOperator,tBlockedTpetraOperator);
     }
  
     status = Teko::Test::UnitTest::RunTests_tpetra(verbosity,*termout,*failout);
  
     if(not status)
        *termout << "Teko tests failed" << std::endl; 

     // release any stored Kokkos memory
     Teko::Test::UnitTest::ClearTests();
   }

   Kokkos::finalize();

   return status ? 0 : -1;
}
示例#20
0
int main(int argc, char* argv[]) {
  int ierr = 0;

  try {
    double t, ta;
    int p = 2;
    int w = p+7;

    // Set up command line options
    Teuchos::CommandLineProcessor clp;
    clp.setDocString("This program tests the speed of various forward mode AD implementations for a single multiplication operation");
    int nderiv = 10;
    clp.setOption("nderiv", &nderiv, "Number of derivative components");
    int nloop = 1000000;
    clp.setOption("nloop", &nloop, "Number of loops");

    // Parse options
    Teuchos::CommandLineProcessor::EParseCommandLineReturn
      parseReturn= clp.parse(argc, argv);
    if(parseReturn != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL)
      return 1;

    // Memory pool & manager
    Sacado::Fad::MemPoolManager<double> poolManager(10);
    Sacado::Fad::MemPool* pool = poolManager.getMemoryPool(nderiv);
    Sacado::Fad::DMFad<double>::setDefaultPool(pool);

    std::cout.setf(std::ios::scientific);
    std::cout.precision(p);
    std::cout << "Times (sec) for nderiv = " << nderiv
	      << " nloop =  " << nloop << ":  " << std::endl;

    ta = do_time_analytic(nderiv, nloop);
    std::cout << "Analytic:  " << std::setw(w) << ta << std::endl;

    t = do_time< FAD::TFad<10,double> >(nderiv, nloop);
    std::cout << "TFad:      " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << std::endl;

    t = do_time< FAD::Fad<double> >(nderiv, nloop);
    std::cout << "Fad:       " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << std::endl;

    t = do_time< Sacado::Fad::SFad<double,10> >(nderiv, nloop);
    std::cout << "SFad:      " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << std::endl;

    t = do_time< Sacado::Fad::SLFad<double,10> >(nderiv, nloop);
    std::cout << "SLFad:     " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << std::endl;

    t = do_time< Sacado::Fad::DFad<double> >(nderiv, nloop);
    std::cout << "DFad:      " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << std::endl;

    t = do_time< Sacado::Fad::DMFad<double> >(nderiv, nloop);
    std::cout << "DMFad:     " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << std::endl;

    t = do_time< Sacado::ELRFad::SFad<double,10> >(nderiv, nloop);
    std::cout << "ELRSFad:   " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << std::endl;

    t = do_time< Sacado::ELRFad::SLFad<double,10> >(nderiv, nloop);
    std::cout << "ELRSLFad:  " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << std::endl;

    t = do_time< Sacado::ELRFad::DFad<double> >(nderiv, nloop);
    std::cout << "ELRDFad:   " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << std::endl;

    t = do_time< Sacado::CacheFad::DFad<double> >(nderiv, nloop);
    std::cout << "CacheFad:  " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << std::endl;

    t = do_time< Sacado::Fad::DVFad<double> >(nderiv, nloop);
    std::cout << "DVFad:     " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << std::endl;

  }
  catch (std::exception& e) {
    std::cout << e.what() << std::endl;
    ierr = 1;
  }
  catch (const char *s) {
    std::cout << s << std::endl;
    ierr = 1;
  }
  catch (...) {
    std::cout << "Caught unknown exception!" << std::endl;
    ierr = 1;
  }

  return ierr;
}
int
main (int argc, char *argv[])
{
  using Teuchos::RCP;
  using std::cout;
  using std::endl;

  //
  // Initialize MPI.
  //
  Teuchos::oblackholestream blackhole;
  Teuchos::GlobalMPISession mpiSession (&argc, &argv, &blackhole);

  //
  // Get the default communicator and node
  //
  RCP<const Teuchos::Comm<int> > comm =
    Tpetra::DefaultPlatform::getDefaultPlatform ().getComm ();
  const int myRank = comm->getRank ();

  //
  // Get parameters from command-line processor
  //
  MyOp::global_ordinal_type n = 100;
  Teuchos::CommandLineProcessor cmdp (false, true);
  cmdp.setOption ("n", &n, "Number of rows of our operator.");
  if (cmdp.parse (argc, argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) {
    return -1;
  }

  // Construct the operator.  Note that the operator does not have to
  // be an explicitly stored matrix.  Here, we are using our
  // user-defined operator.
  MyOp K (n, comm);

  // Construct a Vector of all ones, using the above Operator's domain Map.
  typedef Tpetra::Vector<MyOp::scalar_type, MyOp::local_ordinal_type,
    MyOp::global_ordinal_type, MyOp::node_type> vec_type;
  vec_type x (K.getDomainMap ());
  x.putScalar (1.0);
  // Construct an output Vector for K*x.
  vec_type y (K.getRangeMap ());
  K.apply (x, y); // Compute y := K*x.

  // The operator has a stencil (-1, 2, -1), except for the
  // boundaries.  At the left boundary (global row 0), the stencil is
  // (2, -1), and at the right boundary (global row n-1), the stencil
  // is (-1, 2).  Thus, we know that if all entries of the input
  // Vector are 1, then all entries of the output Vector are 0, except
  // for the boundary entries, which are both 1.
  //
  // To test this, construct the expected output vector y_expected,
  // and compare y to y_expected using the max norm.  Even in single
  // precision, the max norm of y - y_expected should be exactly zero.

  typedef MyOp::map_type map_type;
  RCP<const map_type> rangeMap = K.getRangeMap ();

  vec_type y_expected (rangeMap);
  y_expected.putScalar (0.0);

  if (rangeMap->isNodeGlobalElement (0)) {
    y_expected.replaceGlobalValue (0, 1.0);
  }
  if (rangeMap->isNodeGlobalElement (n - 1)) {
    y_expected.replaceGlobalValue (n - 1, 1.0);
  }

  y_expected.update (1.0, y, -1.0); // y_expected := y - y_expected
  typedef vec_type::mag_type mag_type; // type of a norm of vec_type
  const mag_type diffMaxNorm = y_expected.normInf ();

  bool success = true;
  if (myRank == 0) {
    if (diffMaxNorm == 0.0) {
      // This tells the Trilinos test framework that the test passed.
      cout << "Yay!  ||y - y_expected||_inf = 0." << endl
           << "End Result: TEST PASSED" << endl;
    } else {
      success = false;
      // This tells the Trilinos test framework that the test passed.
      cout << "Oops!  ||y - y_expected||_inf = " << diffMaxNorm << " != 0."
           << endl << "End Result: TEST FAILED" << endl;
    }
  }

  return success ? 0 : -1;
}
示例#22
0
int main(int argc, char *argv[])
{
    int np=1, rank=0;
    int splitrank, splitsize;
    int rc = 0;
    nssi_service xfer_svc;

    int server_index=0;
    int rank_in_server=0;

    int transport_index=-1;

    MPI_Init(&argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &np);

    MPI_Barrier(MPI_COMM_WORLD);

    Teuchos::oblackholestream blackhole;
    std::ostream &out = ( rank == 0 ? std::cout : blackhole );

    struct xfer_args args;

    const int num_io_methods = 8;
    const int io_method_vals[] = {
            XFER_WRITE_ENCODE_SYNC, XFER_WRITE_ENCODE_ASYNC,
            XFER_WRITE_RDMA_SYNC, XFER_WRITE_RDMA_ASYNC,
            XFER_READ_ENCODE_SYNC, XFER_READ_ENCODE_ASYNC,
            XFER_READ_RDMA_SYNC, XFER_READ_RDMA_ASYNC};
    const char * io_method_names[] = {
            "write-encode-sync", "write-encode-async",
            "write-rdma-sync", "write-rdma-async",
            "read-encode-sync", "read-encode-async",
            "read-rdma-sync", "read-rdma-async"};

    const int nssi_transport_list[] = {
            NSSI_RPC_PTL,
            NSSI_RPC_PTL,
            NSSI_RPC_IB,
            NSSI_RPC_IB,
            NSSI_RPC_GEMINI,
            NSSI_RPC_GEMINI,
            NSSI_RPC_BGPDCMF,
            NSSI_RPC_BGPDCMF,
            NSSI_RPC_BGQPAMI,
            NSSI_RPC_BGQPAMI,
            NSSI_RPC_MPI};

    const int num_nssi_transports = 11;
    const int nssi_transport_vals[] = {
            0,
            1,
            2,
            3,
            4,
            5,
            6,
            7,
            8,
            9,
            10
            };
    const char * nssi_transport_names[] = {
            "portals",
            "ptl",
            "infiniband",
            "ib",
            "gemini",
            "gni",
            "bgpdcmf",
            "dcmf",
            "bgqpami",
            "pami",
            "mpi"
    };


    // Initialize arguments
    args.transport=NSSI_DEFAULT_TRANSPORT;
    args.len = 1;
    args.delay = 1;
    args.io_method = XFER_WRITE_RDMA_SYNC;
    args.debug_level = LOG_WARN;
    args.num_trials = 1;
    args.num_reqs = 1;
    args.result_file_mode = "a";
    args.result_file = "";
    args.url_file = "";
    args.logfile = "";
    args.client_flag = true;
    args.server_flag = true;
    args.num_servers = 1;
    args.num_threads = 0;
    args.timeout = 500;
    args.num_retries = 5;
    args.validate_flag = true;
    args.kill_server_flag = true;
    args.block_distribution = true;


    bool success = true;

    /**
     * We make extensive use of the \ref Teuchos::CommandLineProcessor for command-line
     * options to control the behavior of the test code.   To evaluate performance,
     * the "num-trials", "num-reqs", and "len" options control the amount of data transferred
     * between client and server.  The "io-method" selects the type of data transfer.  The
     * server-url specifies the URL of the server.  If running as a server, the server-url
     * provides a recommended URL when initializing the network transport.
     */
    try {

        //out << Teuchos::Teuchos_Version() << std::endl << std::endl;

        // Creating an empty command line processor looks like:
        Teuchos::CommandLineProcessor parser;
        parser.setDocString(
                "This example program demonstrates a simple data-transfer service "
                "built using the NEtwork Scalable Service Interface (Nessie)."
        );

        /* To set and option, it must be given a name and default value.  Additionally,
           each option can be given a help std::string.  Although it is not necessary, a help
           std::string aids a users comprehension of the acceptable command line arguments.
           Some examples of setting command line options are:
         */

        parser.setOption("delay", &args.delay, "time(s) for client to wait for server to start" );
        parser.setOption("timeout", &args.timeout, "time(ms) to wait for server to respond" );
        parser.setOption("server", "no-server", &args.server_flag, "Run the server" );
        parser.setOption("client", "no-client", &args.client_flag, "Run the client");
        parser.setOption("len", &args.len, "The number of structures in an input buffer");
        parser.setOption("debug",(int*)(&args.debug_level), "Debug level");
        parser.setOption("logfile", &args.logfile, "log file");
        parser.setOption("num-trials", &args.num_trials, "Number of trials (experiments)");
        parser.setOption("num-reqs", &args.num_reqs, "Number of reqs/trial");
        parser.setOption("result-file", &args.result_file, "Where to store results");
        parser.setOption("result-file-mode", &args.result_file_mode, "Write mode for the result");
        parser.setOption("server-url-file", &args.url_file, "File that has URL client uses to find server");
        parser.setOption("validate", "no-validate", &args.validate_flag, "Validate the data");
        parser.setOption("num-servers", &args.num_servers, "Number of server processes");
        parser.setOption("num-threads", &args.num_threads, "Number of threads used by each server process");
        parser.setOption("kill-server", "no-kill-server", &args.kill_server_flag, "Kill the server at the end of the experiment");
        parser.setOption("block-distribution", "rr-distribution", &args.block_distribution,
                "Use a block distribution scheme to assign clients to servers");

        // Set an enumeration command line option for the io_method
        parser.setOption("io-method", &args.io_method, num_io_methods, io_method_vals, io_method_names,
                "I/O Methods for the example: \n"
                "\t\t\twrite-encode-sync : Write data through the RPC args, synchronous\n"
                "\t\t\twrite-encode-async: Write data through the RPC args - asynchronous\n"
                "\t\t\twrite-rdma-sync : Write data using RDMA (server pulls) - synchronous\n"
                "\t\t\twrite-rdma-async: Write data using RDMA (server pulls) - asynchronous\n"
                "\t\t\tread-encode-sync : Read data through the RPC result - synchronous\n"
                "\t\t\tread-encode-async: Read data through the RPC result - asynchronous\n"
                "\t\t\tread-rdma-sync : Read data using RDMA (server puts) - synchronous\n"
                "\t\t\tread-rdma-async: Read data using RDMA (server puts) - asynchronous");


        // Set an enumeration command line option for the NNTI transport
        parser.setOption("transport", &transport_index, num_nssi_transports, nssi_transport_vals, nssi_transport_names,
                "NSSI transports (not all are available on every platform): \n"
                "\t\t\tportals|ptl    : Cray or Schutt\n"
                "\t\t\tinfiniband|ib  : libibverbs\n"
                "\t\t\tgemini|gni     : Cray libugni (Gemini or Aries)\n"
                "\t\t\tbgpdcmf|dcmf   : IBM BG/P DCMF\n"
                "\t\t\tbgqpami|pami   : IBM BG/Q PAMI\n"
                "\t\t\tmpi            : isend/irecv implementation\n"
                );



        /* There are also two methods that control the behavior of the
           command line processor.  First, for the command line processor to
           allow an unrecognized a command line option to be ignored (and
           only have a warning printed), use:
         */
        parser.recogniseAllOptions(true);

        /* Second, by default, if the parser finds a command line option it
           doesn't recognize or finds the --help option, it will throw an
           std::exception.  If you want prevent a command line processor from
           throwing an std::exception (which is important in this program since
           we don't have an try/catch around this) when it encounters a
           unrecognized option or help is printed, use:
         */
        parser.throwExceptions(false);

        /* We now parse the command line where argc and argv are passed to
           the parse method.  Note that since we have turned off std::exception
           throwing above we had better grab the return argument so that
           we can see what happened and act accordingly.
         */
        Teuchos::CommandLineProcessor::EParseCommandLineReturn parseReturn= parser.parse( argc, argv );

        if( parseReturn == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED ) {
            return 0;
        }

        if( parseReturn != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL   ) {
            return 1; // Error!

        }

        // Here is where you would use these command line arguments but for this example program
        // we will just print the help message with the new values of the command-line arguments.
        //if (rank == 0)
        //    out << "\nPrinting help message with new values of command-line arguments ...\n\n";

        //parser.printHelpMessage(argv[0],out);

    }

    TEUCHOS_STANDARD_CATCH_STATEMENTS(true,std::cerr,success);

    log_debug(args.debug_level, "transport_index=%d", transport_index);
    if (transport_index > -1) {
    	args.transport     =nssi_transport_list[transport_index];
    	args.transport_name=std::string(nssi_transport_names[transport_index]);
    }
	args.io_method_name=std::string(io_method_names[args.io_method]);

    log_debug(args.debug_level, "%d: Finished processing arguments", rank);


    if (!success) {
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

    if (!args.server_flag && args.client_flag) {
        /* initialize logger */
        if (args.logfile.empty()) {
            logger_init(args.debug_level, NULL);
        } else {
            char fn[1024];
            sprintf(fn, "%s.client.%03d.log", args.logfile.c_str(), rank);
            logger_init(args.debug_level, fn);
        }
    } else if (args.server_flag && !args.client_flag) {
        /* initialize logger */
        if (args.logfile.empty()) {
            logger_init(args.debug_level, NULL);
        } else {
            char fn[1024];
            sprintf(fn, "%s.server.%03d.log", args.logfile.c_str(), rank);
            logger_init(args.debug_level, fn);
        }
    } else if (args.server_flag && args.client_flag) {
        /* initialize logger */
        if (args.logfile.empty()) {
            logger_init(args.debug_level, NULL);
        } else {
            char fn[1024];
            sprintf(fn, "%s.%03d.log", args.logfile.c_str(), rank);
            logger_init(args.debug_level, fn);
        }
    }

    log_level debug_level = args.debug_level;

    // Communicator used for both client and server (may split if using client and server)
    MPI_Comm comm;

    log_debug(debug_level, "%d: Starting xfer-service test", rank);

#ifdef TRIOS_ENABLE_COMMSPLITTER
    if (args.transport == NSSI_RPC_MPI) {
        MPI_Pcontrol(0);
    }
#endif

    /**
     * Since this test can be run as a server, client, or both, we need to play some fancy
     * MPI games to get the communicators working correctly.  If we're executing as both
     * a client and a server, we split the communicator so that the client thinks its
     * running by itself.
     */
    int color = 0;  // color=0-->server, color=1-->client
    if (args.client_flag && args.server_flag) {
        if (np < 2) {
            log_error(debug_level, "Must use at least 2 MPI processes for client and server mode");
            MPI_Abort(MPI_COMM_WORLD, -1);
        }

        // Split the communicators. Put all the servers as the first ranks.
        if (rank < args.num_servers) {
            color = 0;
            log_debug(debug_level, "rank=%d is a server", rank);
        }
        else {
            color = 1;  // all others are clients
            log_debug(debug_level, "rank=%d is a client", rank);
        }

        MPI_Comm_split(MPI_COMM_WORLD, color, rank, &comm);
    }
    else {
        if (args.client_flag) {
            color=1;
            log_debug(debug_level, "rank=%d is a client", rank);
        }
        else if (args.server_flag) {
            color=0;
            log_debug(debug_level, "rank=%d is a server", rank);
        }
        else {
            log_error(debug_level, "Must be either a client or a server");
            MPI_Abort(MPI_COMM_WORLD, -1);
        }
        MPI_Comm_split(MPI_COMM_WORLD, color, rank, &comm);
    }

    MPI_Comm_rank(comm, &splitrank);
    MPI_Comm_size(comm, &splitsize);

    log_debug(debug_level, "%d: Finished splitting communicators", rank);

    /**
     * Initialize the Nessie interface by specifying a transport, encoding scheme, and a
     * recommended URL.  \ref NSSI_DEFAULT_TRANSPORT is usually the best choice, since it
     * is often the case that only one type of transport exists on a particular platform.
     * Currently supported transports are \ref NSSI_RPC_PTL, \ref NSSI_RPC_GNI, and
     * \ref NSSI_RPC_IB.  We only support one type of encoding scheme so NSSI_DEFAULT_ENCODE
     * should always be used for the second argument.   The URL can be specified (as we did for
     * the server, or NULL (as we did for the client).  This is a recommended value.  Use the
     * \ref nssi_get_url function to find the actual value.
     */
    nssi_rpc_init((nssi_rpc_transport)args.transport, NSSI_DEFAULT_ENCODE, NULL);

    // Get the Server URL
    std::string my_url(NSSI_URL_LEN, '\0');
    nssi_get_url((nssi_rpc_transport)args.transport, &my_url[0], NSSI_URL_LEN);

    // If running as both client and server, gather and distribute
    // the server URLs to all the clients.
    if (args.server_flag && args.client_flag) {

        std::string all_urls;

        // This needs to be a vector of chars, not a string
        all_urls.resize(args.num_servers * NSSI_URL_LEN, '\0');

        // Have servers gather their URLs
        if (color == 0) {
            assert(args.num_servers == splitsize);  // these should be equal

            log_debug(debug_level, "%d: Gathering urls: my_url=%s", rank, my_url.c_str());

            // gather all urls to rank 0 of the server comm (also rank 0 of MPI_COMM_WORLD)
            MPI_Gather(&my_url[0], NSSI_URL_LEN, MPI_CHAR,
                    &all_urls[0], NSSI_URL_LEN, MPI_CHAR, 0, comm);
        }

        // broadcast the full set of server urls to all processes
        MPI_Bcast(&all_urls[0], all_urls.size(), MPI_CHAR, 0, MPI_COMM_WORLD);

        log_debug(debug_level, "%d: Bcast urls, urls.size=%d", rank, all_urls.size());

        if (color == 1) {

            // For block distribution scheme use the utility function (in xfer_util.cpp)
            if (args.block_distribution) {
                // Use this utility function to calculate the server_index
                xfer_block_partition(args.num_servers, splitsize, splitrank, &server_index, &rank_in_server);
            }

            // Use a simple round robin distribution scheme
            else {
                server_index   = splitrank % args.num_servers;
                rank_in_server = splitrank / args.num_servers;
            }

            // Copy the server url out of the list of urls
            int offset = server_index * NSSI_URL_LEN;

            args.server_url = all_urls.substr(offset, NSSI_URL_LEN);

            log_debug(debug_level, "client %d assigned to server \"%s\"", splitrank, args.server_url.c_str());
        }


        log_debug(debug_level, "%d: Finished distributing server urls, server_url=%s", rank, args.server_url.c_str());
    }

    // If running as a client only, have to get the list of servers from the urlfile.
    else if (!args.server_flag && args.client_flag){

        sleep(args.delay);  // give server time to get started

        std::vector< std::string > urlbuf;
        xfer_read_server_url_file(args.url_file.c_str(), urlbuf, comm);
        args.num_servers = urlbuf.size();

        // For block distribution scheme use the utility function (in xfer_util.cpp)
        if (args.block_distribution) {
            // Use this utility function to calculate the server_index
            xfer_block_partition(args.num_servers, splitsize, splitrank, &server_index, &rank_in_server);
        }

        // Use a simple round robin distribution scheme
        else {
            server_index   = splitrank % args.num_servers;
            rank_in_server = splitrank / args.num_servers;
        }

        args.server_url = urlbuf[server_index];
        log_debug(debug_level, "client %d assigned to server \"%s\"", splitrank, args.server_url.c_str());
    }

    else if (args.server_flag && !args.client_flag) {
        args.server_url = my_url;

        if (args.url_file.empty()) {
            log_error(debug_level, "Must set --url-file");
            MPI_Abort(MPI_COMM_WORLD, -1);
        }

        xfer_write_server_url_file(args.url_file.c_str(), my_url.c_str(), comm);
    }

    // Set the debug level for the xfer service.
    xfer_debug_level = args.debug_level;

    // Print the arguments after they've all been set.
    log_debug(debug_level, "%d: server_url=%s", rank, args.server_url.c_str());

    print_args(out, args, "%");

    log_debug(debug_level, "server_url=%s", args.server_url.c_str());

    //------------------------------------------------------------------------------
    /** If we're running this job with a server, the server always executes on node 0.
     *  In this example, the server is a single process.
     */
    if (color == 0) {
        rc = xfer_server_main((nssi_rpc_transport)args.transport, args.num_threads, comm);
        log_debug(debug_level, "Server is finished");
    }

    // ------------------------------------------------------------------------------
     /**  The parallel client will execute this branch.  The root node, node 0, of the client connects
      *   connects with the server, using the \ref nssi_get_service function.  Then the root
      *   broadcasts the service description to the other clients before starting the main
      *   loop of the client code by calling \ref xfer_client_main.
      */
    else {
        int i;
        int client_rank;

        // get rank within the client communicator
        MPI_Comm_rank(comm, &client_rank);

        nssi_init((nssi_rpc_transport)args.transport);

        // Only one process needs to connect to the service
        // TODO: Make get_service a collective call (some transports do not need a connection)
        //if (client_rank == 0) {
        {


            // connect to remote server
            for (i=0; i < args.num_retries; i++) {
                log_debug(debug_level, "Try to connect to server: attempt #%d, url=%s", i, args.server_url.c_str());
                rc=nssi_get_service((nssi_rpc_transport)args.transport, args.server_url.c_str(), args.timeout, &xfer_svc);
                if (rc == NSSI_OK)
                    break;
                else if (rc != NSSI_ETIMEDOUT) {
                    log_error(xfer_debug_level, "could not get svc description: %s",
                            nssi_err_str(rc));
                    break;
                }
            }
        }

        // wait for all the clients to connect
        MPI_Barrier(comm);

        //MPI_Bcast(&rc, 1, MPI_INT, 0, comm);

        if (rc == NSSI_OK) {
            if (client_rank == 0) log_debug(debug_level, "Connected to service on attempt %d\n", i);

            // Broadcast the service description to the other clients
            //log_debug(xfer_debug_level, "Bcasting svc to other clients");
            //MPI_Bcast(&xfer_svc, sizeof(nssi_service), MPI_BYTE, 0, comm);

            log_debug(debug_level, "Starting client main");
            // Start the client code
            xfer_client_main(args, xfer_svc, comm);


            MPI_Barrier(comm);

            // Tell one of the clients to kill the server
            if ((args.kill_server_flag) && (rank_in_server == 0)) {
                log_debug(debug_level, "%d: Halting xfer service", rank);
                rc = nssi_kill(&xfer_svc, 0, 5000);
            }
            rc=nssi_free_service((nssi_rpc_transport)args.transport, &xfer_svc);
            if (rc != NSSI_OK) {
                log_error(xfer_debug_level, "could not free svc description: %s",
                        nssi_err_str(rc));
            }
        }

        else {
            if (client_rank == 0)
                log_error(debug_level, "Failed to connect to service after %d attempts: ABORTING", i);
            success = false;
            //MPI_Abort(MPI_COMM_WORLD, -1);
        }

        nssi_fini((nssi_rpc_transport)args.transport);

    }

    log_debug(debug_level, "%d: clean up nssi", rank);
    MPI_Barrier(MPI_COMM_WORLD);

    // Clean up nssi_rpc
    rc = nssi_rpc_fini((nssi_rpc_transport)args.transport);
    if (rc != NSSI_OK)
        log_error(debug_level, "Error in nssi_rpc_fini");

    log_debug(debug_level, "%d: MPI_Finalize()", rank);
    MPI_Finalize();

    logger_fini();

    if(success && (rc == NSSI_OK))
    	out << "\nEnd Result: TEST PASSED" << std::endl;
    else
    	out << "\nEnd Result: TEST FAILED" << std::endl;

    return ((success && (rc==NSSI_OK)) ? 0 : 1 );
}
int main(int narg, char *arg[]) {

  Teuchos::GlobalMPISession mpiSession(&narg, &arg,0);
  Platform &platform = Tpetra::DefaultPlatform::getDefaultPlatform();
  RCP<const Teuchos::Comm<int> > CommT = platform.getComm();

  int me = CommT->getRank();
  //int numProcs = CommT->getSize();

  if (me == 0){
  cout 
    << "====================================================================\n" 
    << "|                                                                  |\n" 
    << "|                  Example: Partition APF Mesh                     |\n" 
    << "|                                                                  |\n"
    << "|  Questions? Contact  Karen Devine      ([email protected]),     |\n"
    << "|                      Erik Boman        ([email protected]),     |\n"
    << "|                      Siva Rajamanickam ([email protected]).     |\n"
    << "|                                                                  |\n"
    << "|  Pamgen's website:   http://trilinos.sandia.gov/packages/pamgen  |\n"
    << "|  Zoltan2's website:  http://trilinos.sandia.gov/packages/zoltan2 |\n"
    << "|  Trilinos website:   http://trilinos.sandia.gov                  |\n"
    << "|                                                                  |\n"
    << "====================================================================\n";
  }


#ifdef HAVE_MPI
  if (me == 0) {
    cout << "PARALLEL executable \n";
  }
#else
  if (me == 0) {
    cout << "SERIAL executable \n";
  }
#endif

  /***************************************************************************/
  /******************************* GET INPUTS ********************************/
  /***************************************************************************/

  // default values for command-line arguments
  std::string meshFileName("4/");
  std::string modelFileName("torus.dmg");
  std::string action("parma");
  std::string parma_method("VtxElm");
  std::string output_loc("");
  int nParts = CommT->getSize();
  double imbalance = 1.1;

  // Read run-time options.
  Teuchos::CommandLineProcessor cmdp (false, false);
  cmdp.setOption("meshfile", &meshFileName,
                 "Mesh file with APF specifications (.smb file(s))");
  cmdp.setOption("modelfile", &modelFileName,
		 "Model file with APF specifications (.dmg file)");
  cmdp.setOption("action", &action,
                 "Method to use:  mj, scotch, zoltan_rcb, parma or color");
  cmdp.setOption("parma_method", &parma_method,
                 "Method to use: Vertex, Element, VtxElm, VtxEdgeElm, Ghost, or Shape ");
  cmdp.setOption("nparts", &nParts,
                 "Number of parts to create");
  cmdp.setOption("imbalance", &imbalance,
                 "Target imbalance for the partitioning method");
  cmdp.setOption("output", &output_loc,
                 "Location of new partitioned apf mesh. Ex: 4/torus.smb");
  cmdp.parse(narg, arg);

  
  /***************************************************************************/
  /********************** GET CELL TOPOLOGY **********************************/
  /***************************************************************************/

  // Get dimensions
  //int dim = 3;

  /***************************************************************************/
  /***************************** GENERATE MESH *******************************/
  /***************************************************************************/

#ifdef HAVE_ZOLTAN2_PARMA

  if (me == 0) cout << "Generating mesh ... \n\n";

  //Setup for SCOREC
  PCU_Comm_Init();
  
  // Generate mesh with MDS
  double time_1=PCU_Time();
  gmi_register_mesh();
  apf::Mesh2* m = apf::loadMdsMesh(modelFileName.c_str(),meshFileName.c_str());
  apf::verify(m);
  // Creating mesh adapter
  if (me == 0) cout << "Creating mesh adapter ... \n\n";

  typedef Zoltan2::RPIMeshAdapter<apf::Mesh2*> inputAdapter_t;

  inputAdapter_t ia(*CommT, m);
  
  double time_2=PCU_Time();
  // Set parameters for partitioning
  if (me == 0) cout << "Creating parameter list ... \n\n";

  Teuchos::ParameterList params("test params");
  params.set("timer_output_stream" , "std::cout");

  bool do_partitioning = false;
  if (action == "mj") {
    do_partitioning = true;
    params.set("debug_level", "basic_status");
    params.set("imbalance_tolerance", imbalance);
    params.set("num_global_parts", nParts);
    params.set("algorithm", "multijagged");
    params.set("rectilinear", "yes");
  }
  else if (action == "scotch") {
    do_partitioning = true;
    params.set("debug_level", "no_status");
    params.set("imbalance_tolerance", imbalance);
    params.set("num_global_parts", nParts);
    params.set("partitioning_approach", "partition");
    params.set("objects_to_partition","mesh_elements");
    params.set("algorithm", "scotch");
  }
  else if (action == "zoltan_rcb") {
    do_partitioning = true;
    params.set("debug_level", "verbose_detailed_status");
    params.set("imbalance_tolerance", imbalance);
    params.set("num_global_parts", nParts);
    params.set("partitioning_approach", "partition");
    params.set("algorithm", "zoltan");
  }
  else if (action == "parma") {
    do_partitioning = true;
    params.set("debug_level", "no_status");
    params.set("imbalance_tolerance", imbalance);
    params.set("algorithm", "parma");
    Teuchos::ParameterList &pparams = params.sublist("parma_parameters",false);
    pparams.set("parma_method",parma_method);
    pparams.set("step_size",1.1);
    if (parma_method=="Ghost") {
      pparams.set("ghost_layers",3);
      pparams.set("ghost_bridge",m->getDimension()-1);
    }
    params.set("compute_metrics","yes");

  }
  else if (action=="zoltan_hg") {
    do_partitioning = true;
    params.set("debug_level", "no_status");
    params.set("imbalance_tolerance", imbalance);
    params.set("algorithm", "zoltan");
    params.set("num_global_parts", nParts);
    Teuchos::ParameterList &zparams = params.sublist("zoltan_parameters",false);
    zparams.set("LB_METHOD","HYPERGRAPH");
    zparams.set("LB_APPROACH","REPARTITION");
    //params.set("compute_metrics","yes");

  }
  else if (action == "color") {
    params.set("debug_level", "verbose_detailed_status");
    params.set("debug_output_file", "kdd");
    params.set("debug_procs", "all");
  }
  Parma_PrintPtnStats(m,"before");
  // create Partitioning problem
  double time_3 = PCU_Time();
  if (do_partitioning) {
    if (me == 0) cout << "Creating partitioning problem ... \n\n";

    Zoltan2::PartitioningProblem<inputAdapter_t> problem(&ia, &params, CommT);

    // call the partitioner
    if (me == 0) cout << "Calling the partitioner ... \n\n";

    problem.solve();



    if (me==0) cout << "Applying Solution to Mesh\n\n";
    apf::Mesh2** new_mesh = &m;
    ia.applyPartitioningSolution(m,new_mesh,problem.getSolution());
    
    

    if (!me) problem.printMetrics(cout);
  }
  else {
    if (me == 0) cout << "Creating coloring problem ... \n\n";

    Zoltan2::ColoringProblem<inputAdapter_t> problem(&ia, &params);

    // call the partitioner
    if (me == 0) cout << "Calling the coloring algorithm ... \n\n";

    problem.solve();

    problem.printTimers();


  }
  double time_4=PCU_Time();
  //if (!me)
  Parma_PrintPtnStats(m,"after");
  if (output_loc!="") {
    m->writeNative(output_loc.c_str());
  }

  // delete mesh
  if (me == 0) cout << "Deleting the mesh ... \n\n";
  time_4-=time_3;
  time_2-=time_1;
  PCU_Max_Doubles(&time_2,1);
  PCU_Max_Doubles(&time_4,1);
  if (!me) {
    std::cout<<"\nConstruction time: "<<time_2<<"\n"
	     <<"Problem time: " << time_4<<"\n\n";
  }
  //Delete_APF_Mesh();
  ia.destroy();
  m->destroyNative();
  apf::destroyMesh(m);
  //End communications
  PCU_Comm_Free();
#endif
  if (me == 0)
    std::cout << "PASS" << std::endl;

  return 0;

}
int main (int argc, char *argv[]) {

    Teuchos::CommandLineProcessor clp;
    clp.setDocString("This example program measure the performance of dense Herk on Kokkos::Threads execution space.\n");

    int nthreads = 0;
    clp.setOption("nthreads", &nthreads, "Number of threads");

    int numa = 0;
    clp.setOption("numa", &numa, "Number of numa node");

    int core_per_numa = 0;
    clp.setOption("core-per-numa", &core_per_numa, "Number of cores per numa node");

    int max_concurrency = 250000;
    clp.setOption("max-concurrency", &max_concurrency, "Max number of concurrent tasks");

    int memory_pool_grain_size = 16;
    clp.setOption("memory-pool-grain-size", &memory_pool_grain_size, "Memorypool chunk size (12 - 16)");

    int mkl_nthreads = 1;
    clp.setOption("mkl-nthreads", &mkl_nthreads, "MKL threads for nested parallelism");

    bool verbose = false;
    clp.setOption("enable-verbose", "disable-verbose", &verbose, "Flag for verbose printing");

    int mmin = 1000;
    clp.setOption("mmin", &mmin, "C(mmin,mmin)");

    int mmax = 8000;
    clp.setOption("mmax", &mmax, "C(mmax,mmax)");

    int minc = 1000;
    clp.setOption("minc", &minc, "Increment of m");

    int k = 1024;
    clp.setOption("k", &k, "A(mmax,k) or A(k,mmax) according to transpose flags");

    int mb = 256;
    clp.setOption("mb", &mb, "Blocksize");

    bool check = true;
    clp.setOption("enable-check", "disable-check", &check, "Flag for check solution");

    clp.recogniseAllOptions(true);
    clp.throwExceptions(false);

    Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv );

    if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0;
    if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL  ) return -1;

    int r_val = 0;
    {
        exec_space::initialize(nthreads, numa, core_per_numa);

        std::cout << std::endl << "DenseHerkByBlocks:: Upper, ConjTranspose, Variant::One (external)" << std::endl;
        r_val = exampleDenseHerkByBlocks
                <Uplo::Upper,Trans::ConjTranspose,Variant::One,exec_space>
                (mmin, mmax, minc, k, mb,
                 max_concurrency, memory_pool_grain_size, mkl_nthreads,
                 check,
                 verbose);

        exec_space::finalize();
    }

    return r_val;
}
示例#25
0
// calls MPI_Init and MPI_Finalize
int main(int argc,char * argv[])
{
   using Teuchos::RCP;
   using Teuchos::rcp_dynamic_cast;
   using panzer::StrPureBasisPair;
   using panzer::StrPureBasisComp;

   Teuchos::GlobalMPISession mpiSession(&argc,&argv);
   RCP<Epetra_Comm> Comm = Teuchos::rcp(new Epetra_MpiComm(MPI_COMM_WORLD));
   Teuchos::RCP<Teuchos::Comm<int> > comm = Teuchos::rcp(new Teuchos::MpiComm<int>(Teuchos::opaqueWrapper(MPI_COMM_WORLD)));
   Teuchos::FancyOStream out(Teuchos::rcpFromRef(std::cout));
   out.setOutputToRootOnly(0);
   out.setShowProcRank(true);

   // Build command line processor
   ////////////////////////////////////////////////////

   bool useTpetra = false;
   Teuchos::CommandLineProcessor clp;
   clp.setOption("use-tpetra","use-epetra",&useTpetra);

   // parse commandline argument
   TEUCHOS_ASSERT(clp.parse(argc,argv)==Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL);

   // variable declarations
   ////////////////////////////////////////////////////

   // factory definitions
   Teuchos::RCP<Example::EquationSetFactory> eqset_factory = 
     Teuchos::rcp(new Example::EquationSetFactory); // where poison equation is defined
   Example::BCStrategyFactory bc_factory;    // where boundary conditions are defined 

   panzer_stk::SquareQuadMeshFactory mesh_factory;

   // other declarations
   const std::size_t workset_size = 2*2;

   // construction of uncommitted (no elements) mesh 
   ////////////////////////////////////////////////////////

   // set mesh factory parameters
   RCP<Teuchos::ParameterList> pl = rcp(new Teuchos::ParameterList);
   pl->set("X Blocks",1);
   pl->set("Y Blocks",1);
   pl->set("X Elements",20);
   pl->set("Y Elements",20);
   mesh_factory.setParameterList(pl);

   RCP<panzer_stk::STK_Interface> mesh = mesh_factory.buildUncommitedMesh(MPI_COMM_WORLD);

   // construct input physics and physics block
   ////////////////////////////////////////////////////////

   Teuchos::RCP<Teuchos::ParameterList> ipb = Teuchos::parameterList("Physics Blocks");
   std::vector<panzer::BC> bcs;
   std::vector<RCP<panzer::PhysicsBlock> > physicsBlocks;
   {
      bool build_transient_support = false;

      testInitialization(ipb, bcs);
      
      const panzer::CellData volume_cell_data(workset_size, mesh->getCellTopology("eblock-0_0"));

      // GobalData sets ostream and parameter interface to physics
      Teuchos::RCP<panzer::GlobalData> gd = panzer::createGlobalData();

      // Can be overridden by the equation set
      int default_integration_order = 1;
      
      // the physics block nows how to build and register evaluator with the field manager
      RCP<panzer::PhysicsBlock> pb 
	= rcp(new panzer::PhysicsBlock(ipb, "eblock-0_0",
				       default_integration_order, 
				       volume_cell_data,
				       eqset_factory,
				       gd,
				       build_transient_support));

      // we can have more than one physics block, one per element block
      physicsBlocks.push_back(pb);
   }

   // finish building mesh, set required field variables and mesh bulk data
   ////////////////////////////////////////////////////////////////////////

   {
      RCP<panzer::PhysicsBlock> pb = physicsBlocks[0]; // we are assuming only one physics block

      const std::vector<StrPureBasisPair> & blockFields = pb->getProvidedDOFs();

      // insert all fields into a set
      std::set<StrPureBasisPair,StrPureBasisComp> fieldNames;
      fieldNames.insert(blockFields.begin(),blockFields.end());

      // build string for modifiying vectors
      std::vector<std::string> dimenStr(3);
      dimenStr[0] = "X"; dimenStr[1] = "Y"; dimenStr[2] = "Z";

      // add basis to DOF manager: block specific
      std::set<StrPureBasisPair,StrPureBasisComp>::const_iterator fieldItr;
      for (fieldItr=fieldNames.begin();fieldItr!=fieldNames.end();++fieldItr) {
         Teuchos::RCP<const panzer::PureBasis> basis = fieldItr->second;
         if(basis->getElementSpace()==panzer::PureBasis::HGRAD)
            mesh->addSolutionField(fieldItr->first,pb->elementBlockID());
         else if(basis->getElementSpace()==panzer::PureBasis::HCURL) {
            for(int i=0;i<basis->dimension();i++) 
               mesh->addCellField(fieldItr->first+dimenStr[i],pb->elementBlockID());
         }
      }

      mesh_factory.completeMeshConstruction(*mesh,MPI_COMM_WORLD);
   }

   // build worksets
   ////////////////////////////////////////////////////////

   Teuchos::RCP<panzer_stk::WorksetFactory> wkstFactory
      = Teuchos::rcp(new panzer_stk::WorksetFactory(mesh)); // build STK workset factory
   Teuchos::RCP<panzer::WorksetContainer> wkstContainer     // attach it to a workset container (uses lazy evaluation)
      = Teuchos::rcp(new panzer::WorksetContainer(wkstFactory,physicsBlocks,workset_size));

   // build DOF Manager and linear object factory
   /////////////////////////////////////////////////////////////
 
   // build the connection manager 
   const Teuchos::RCP<panzer::ConnManager<int,int> > 
     conn_manager = Teuchos::rcp(new panzer_stk::STKConnManager(mesh));

   panzer::DOFManagerFactory<int,int> globalIndexerFactory;
   RCP<panzer::UniqueGlobalIndexer<int,int> > dofManager 
         = globalIndexerFactory.buildUniqueGlobalIndexer(Teuchos::opaqueWrapper(MPI_COMM_WORLD),physicsBlocks,conn_manager);

   // construct some linear algebra object, build object to pass to evaluators
   Teuchos::RCP<panzer::LinearObjFactory<panzer::Traits> > linObjFactory;
   if(!useTpetra)
      linObjFactory = Teuchos::rcp(new panzer::EpetraLinearObjFactory<panzer::Traits,int>(Comm.getConst(),dofManager));
   else
      linObjFactory = Teuchos::rcp(new panzer::TpetraLinearObjFactory<panzer::Traits,double,int,int>(comm,dofManager));

   // Setup STK response library for writing out the solution fields
   ////////////////////////////////////////////////////////////////////////
   Teuchos::RCP<panzer::ResponseLibrary<panzer::Traits> > stkIOResponseLibrary 
      = Teuchos::rcp(new panzer::ResponseLibrary<panzer::Traits>(wkstContainer,dofManager,linObjFactory));

   {
      // get a vector of all the element blocks 
      std::vector<std::string> eBlocks;
      {
         // get all element blocks and add them to the list
         std::vector<std::string> eBlockNames;
         mesh->getElementBlockNames(eBlockNames);
         for(std::size_t i=0;i<eBlockNames.size();i++)
            eBlocks.push_back(eBlockNames[i]);
      }
      
      panzer_stk::RespFactorySolnWriter_Builder builder;
      builder.mesh = mesh;
      stkIOResponseLibrary->addResponse("Main Field Output",eBlocks,builder);
   }

   // setup closure model
   /////////////////////////////////////////////////////////////
 
   // Add in the application specific closure model factory
   panzer::ClosureModelFactory_TemplateManager<panzer::Traits> cm_factory; 
   Example::ClosureModelFactory_TemplateBuilder cm_builder;
   cm_factory.buildObjects(cm_builder);

   Teuchos::ParameterList closure_models("Closure Models");
   closure_models.sublist("solid").sublist("SOURCE_EFIELD").set<std::string>("Type","SIMPLE SOURCE"); // a constant source
      // SOURCE_EFIELD field is required by the CurlLaplacianEquationSet

   Teuchos::ParameterList user_data("User Data"); // user data can be empty here

   // setup field manager builder
   /////////////////////////////////////////////////////////////

   Teuchos::RCP<panzer::FieldManagerBuilder> fmb = 
         Teuchos::rcp(new panzer::FieldManagerBuilder);
   fmb->setWorksetContainer(wkstContainer);
   fmb->setupVolumeFieldManagers(physicsBlocks,cm_factory,closure_models,*linObjFactory,user_data);
   fmb->setupBCFieldManagers(bcs,physicsBlocks,*eqset_factory,cm_factory,bc_factory,closure_models,
                             *linObjFactory,user_data);

   // setup assembly engine
   /////////////////////////////////////////////////////////////

   // build assembly engine: The key piece that brings together everything and 
   //                        drives and controls the assembly process. Just add
   //                        matrices and vectors
   panzer::AssemblyEngine_TemplateManager<panzer::Traits> ae_tm;
   panzer::AssemblyEngine_TemplateBuilder builder(fmb,linObjFactory);
   ae_tm.buildObjects(builder);

   // Finalize construcition of STK writer response library
   /////////////////////////////////////////////////////////////
   {
      user_data.set<int>("Workset Size",workset_size);
      stkIOResponseLibrary->buildResponseEvaluators(physicsBlocks,
                                        cm_factory,
                                        closure_models,
                                        user_data);
   }

   // assemble linear system
   /////////////////////////////////////////////////////////////

   // build linear algebra objects: Ghost is for parallel assembly, it contains
   //                               local element contributions summed, the global IDs
   //                               are not unique. The non-ghosted or "global"
   //                               container will contain the sum over all processors
   //                               of the ghosted objects. The global indices are unique.
   RCP<panzer::LinearObjContainer> ghostCont = linObjFactory->buildGhostedLinearObjContainer();
   RCP<panzer::LinearObjContainer> container = linObjFactory->buildLinearObjContainer();
   linObjFactory->initializeGhostedContainer(panzer::LinearObjContainer::X |
                                             panzer::LinearObjContainer::F |
                                             panzer::LinearObjContainer::Mat,*ghostCont);
   linObjFactory->initializeContainer(panzer::LinearObjContainer::X |
                                      panzer::LinearObjContainer::F |
                                      panzer::LinearObjContainer::Mat,*container);
   ghostCont->initialize();
   container->initialize();

   // Actually evaluate
   /////////////////////////////////////////////////////////////

   panzer::AssemblyEngineInArgs input(ghostCont,container);
   input.alpha = 0;
   input.beta = 1;

   // evaluate physics: This does both the Jacobian and residual at once
   ae_tm.getAsObject<panzer::Traits::Jacobian>()->evaluate(input);

   // solve linear system
   /////////////////////////////////////////////////////////////

   if(useTpetra)
      solveTpetraSystem(*container);
   else
      solveEpetraSystem(*container);
  
   // output data (optional)
   /////////////////////////////////////////////////////////////

   // write out solution
   if(true) {
      // fill STK mesh objects
      Teuchos::RCP<panzer::ResponseBase> resp = stkIOResponseLibrary->getResponse<panzer::Traits::Residual>("Main Field Output");
      panzer::AssemblyEngineInArgs respInput(ghostCont,container);
      respInput.alpha = 0;
      respInput.beta = 1;

      stkIOResponseLibrary->addResponsesToInArgs<panzer::Traits::Residual>(respInput);
      stkIOResponseLibrary->evaluate<panzer::Traits::Residual>(respInput);

      // write to exodus
      mesh->writeToExodus("output.exo");
   }

   // all done!
   /////////////////////////////////////////////////////////////

   if(useTpetra)
      std::cout << "ALL PASSED: Tpetra" << endl;
   else
      std::cout << "ALL PASSED: Epetra" << endl;

   return 0;
}
示例#26
0
int main(int narg, char *arg[]) {

  Teuchos::GlobalMPISession mpiSession(&narg, &arg,0);
  Platform &platform = Tpetra::DefaultPlatform::getDefaultPlatform();
  RCP<const Teuchos::Comm<int> > CommT = platform.getComm();

  int me = CommT->getRank();
  //int numProcs = CommT->getSize();

  if (me == 0){
  cout 
    << "====================================================================\n" 
    << "|                                                                  |\n" 
    << "|                  Example: Partition APF Mesh                     |\n" 
    << "|                                                                  |\n"
    << "|  Questions? Contact  Karen Devine      ([email protected]),     |\n"
    << "|                      Erik Boman        ([email protected]),     |\n"
    << "|                      Siva Rajamanickam ([email protected]).     |\n"
    << "|                                                                  |\n"
    << "|  Zoltan2's website:  http://trilinos.sandia.gov/packages/zoltan2 |\n"
    << "|  Trilinos website:   http://trilinos.sandia.gov                  |\n"
    << "|                                                                  |\n"
    << "====================================================================\n";
  }


#ifdef HAVE_MPI
  if (me == 0) {
    cout << "PARALLEL executable \n";
  }
#else
  if (me == 0) {
    cout << "SERIAL executable \n";
  }
#endif

  /***************************************************************************/
  /******************************* GET INPUTS ********************************/
  /***************************************************************************/

  // default values for command-line arguments
  std::string meshFileName("4/");
  std::string modelFileName("torus.dmg");
  std::string action("parma");
  std::string parma_method("VtxElm");
  std::string output_loc("");
  int nParts = CommT->getSize();
  double imbalance = 1.1;
  int layers=2;
  int ghost_metric=false;
  // Read run-time options.
  Teuchos::CommandLineProcessor cmdp (false, false);
  cmdp.setOption("meshfile", &meshFileName,
                 "Mesh file with APF specifications (.smb file(s))");
  cmdp.setOption("modelfile", &modelFileName,
		 "Model file with APF specifications (.dmg file)");
  cmdp.setOption("action", &action,
                 "Method to use:  mj, scotch, zoltan_rcb, parma or color");
  cmdp.setOption("parma_method", &parma_method,
                 "Method to use: Vertex, Element, VtxElm, VtxEdgeElm, Ghost, Shape, or Centroid ");
  cmdp.setOption("nparts", &nParts,
                 "Number of parts to create");
  cmdp.setOption("imbalance", &imbalance,
                 "Target imbalance for the partitioning method");
  cmdp.setOption("output", &output_loc,
                 "Location of new partitioned apf mesh. Ex: 4/torus.smb");
  cmdp.setOption("layers", &layers,
                 "Number of layers for ghosting");
  cmdp.setOption("ghost_metric", &ghost_metric,
                 "0 does not compute ghost metric otherwise compute both before and after");
  cmdp.parse(narg, arg);
  
  /***************************************************************************/
  /********************** GET CELL TOPOLOGY **********************************/
  /***************************************************************************/

  // Get dimensions
  //int dim = 3;

  /***************************************************************************/
  /***************************** GENERATE MESH *******************************/
  /***************************************************************************/

#ifdef HAVE_ZOLTAN2_PARMA

  if (me == 0) cout << "Generating mesh ... \n\n";

  //Setup for SCOREC
  PCU_Comm_Init();
  
  // Generate mesh with MDS
  gmi_register_mesh();
  apf::Mesh2* m = apf::loadMdsMesh(modelFileName.c_str(),meshFileName.c_str());
  apf::verify(m);
  
  //Data for APF MeshAdapter
  std::string primary="region";
  std::string adjacency="face";
  if (m->getDimension()==2) {
    primary="face";
    adjacency="edge";
  }
  bool needSecondAdj=false;
  
  // Set parameters for partitioning
  if (me == 0) cout << "Creating parameter list ... \n\n";

  Teuchos::ParameterList params("test params");
  params.set("timer_output_stream" , "std::cout");

  bool do_partitioning = false;
  if (action == "mj") {
    do_partitioning = true;
    params.set("debug_level", "basic_status");
    params.set("imbalance_tolerance", imbalance);
    params.set("num_global_parts", nParts);
    params.set("algorithm", "multijagged");
    params.set("rectilinear", "yes");
  }
  else if (action == "scotch") {
    do_partitioning = true;
    params.set("debug_level", "no_status");
    params.set("imbalance_tolerance", imbalance);
    params.set("num_global_parts", nParts);
    params.set("partitioning_approach", "partition");
    params.set("objects_to_partition","mesh_elements");
    params.set("algorithm", "scotch");
    needSecondAdj=true;
  }
  else if (action == "zoltan_rcb") {
    do_partitioning = true;
    params.set("debug_level", "verbose_detailed_status");
    params.set("imbalance_tolerance", imbalance);
    params.set("num_global_parts", nParts);
    params.set("partitioning_approach", "partition");
    params.set("algorithm", "zoltan");
  }
  else if (action == "parma") {
    do_partitioning = true;
    params.set("debug_level", "no_status");
    params.set("imbalance_tolerance", imbalance);
    params.set("algorithm", "parma");
    Teuchos::ParameterList &pparams = params.sublist("parma_parameters",false);
    pparams.set("parma_method",parma_method);
    pparams.set("step_size",1.1);
    if (parma_method=="Ghost") {
      pparams.set("ghost_layers",layers);
      pparams.set("ghost_bridge",m->getDimension()-1);
    }
    adjacency="vertex";
  }
  else if (action=="zoltan_hg") {
    do_partitioning = true;
    params.set("debug_level", "no_status");
    params.set("imbalance_tolerance", imbalance);
    params.set("algorithm", "zoltan");
    params.set("num_global_parts", nParts);
    Teuchos::ParameterList &zparams = params.sublist("zoltan_parameters",false);
    zparams.set("LB_METHOD","HYPERGRAPH");
    zparams.set("LB_APPROACH","PARTITION");
    //params.set("compute_metrics","yes");
    adjacency="vertex";
  }
  else if (action=="hg_ghost") {
    do_partitioning = true;
    params.set("debug_level", "no_status");
    params.set("imbalance_tolerance", imbalance);
    params.set("algorithm", "zoltan");
    params.set("num_global_parts", nParts);
    params.set("hypergraph_model_type","ghosting");
    params.set("ghost_layers",layers);
    Teuchos::ParameterList &zparams = params.sublist("zoltan_parameters",false);
    zparams.set("LB_METHOD","HYPERGRAPH");
    zparams.set("LB_APPROACH","PARTITION");
    zparams.set("PHG_EDGE_SIZE_THRESHOLD", "1.0");
    primary="vertex";
    adjacency="edge";
    needSecondAdj=true;
  }
  else if (action == "color") {
    params.set("debug_level", "verbose_detailed_status");
    params.set("debug_output_file", "kdd");
    params.set("debug_procs", "all");
  }
  Parma_PrintPtnStats(m,"before");

  // Creating mesh adapter
  if (me == 0) cout << "Creating mesh adapter ... \n\n";
  typedef Zoltan2::APFMeshAdapter<apf::Mesh2*> inputAdapter_t;
  typedef Zoltan2::EvaluatePartition<inputAdapter_t> quality_t;
  typedef Zoltan2::MeshAdapter<apf::Mesh2*> baseMeshAdapter_t;
  
  double time_1=PCU_Time();
  inputAdapter_t *ia =
    new inputAdapter_t(*CommT, m,primary,adjacency,needSecondAdj);  
  double time_2=PCU_Time();
 
  
  inputAdapter_t::scalar_t* arr =
    new inputAdapter_t::scalar_t[ia->getLocalNumOf(ia->getPrimaryEntityType())];
  for (size_t i=0;i<ia->getLocalNumOf(ia->getPrimaryEntityType());i++) {
    arr[i]=PCU_Comm_Self()+1;
  }
  
  const inputAdapter_t::scalar_t* weights=arr;
  ia->setWeights(ia->getPrimaryEntityType(),weights,1);
  

  if (ghost_metric) {
    const baseMeshAdapter_t *base_ia = dynamic_cast<const baseMeshAdapter_t*>(ia);
    Zoltan2::modelFlag_t graphFlags_;
    RCP<Zoltan2::Environment> env;
    try{
      env = rcp(new Zoltan2::Environment(params, Teuchos::DefaultComm<int>::getComm()));
    }
    Z2_FORWARD_EXCEPTIONS
      
    RCP<const Zoltan2::Environment> envConst = Teuchos::rcp_const_cast<const Zoltan2::Environment>(env);

    RCP<const baseMeshAdapter_t> baseInputAdapter_(base_ia,false);
    Zoltan2::HyperGraphModel<inputAdapter_t> model(baseInputAdapter_,envConst,CommT,
                                                 graphFlags_,Zoltan2::HYPEREDGE_CENTRIC);
    PrintGhostMetrics(model);
  }

  // create Partitioning problem
  double time_3 = PCU_Time();
  if (do_partitioning) {
    if (me == 0) cout << "Creating partitioning problem ... \n\n";

    Zoltan2::PartitioningProblem<inputAdapter_t> problem(ia, &params, CommT);

    // call the partitioner
    if (me == 0) cout << "Calling the partitioner ... \n\n";

    problem.solve();



    if (me==0) cout << "Applying Solution to Mesh\n\n";
    apf::Mesh2** new_mesh = &m;
    ia->applyPartitioningSolution(m,new_mesh,problem.getSolution());
    
    // create metric object
    RCP<quality_t> metricObject =
      rcp(new quality_t(ia, &params, CommT, &problem.getSolution()));

    if (!me) {
      metricObject->printMetrics(cout);
    }
  }
  else {
    if (me == 0) cout << "Creating coloring problem ... \n\n";

    Zoltan2::ColoringProblem<inputAdapter_t> problem(ia, &params);

    // call the partitioner
    if (me == 0) cout << "Calling the coloring algorithm ... \n\n";

    problem.solve();

    problem.printTimers();


  }
  
  double time_4=PCU_Time();
  
  //Destroy the adapter
  ia->destroy();
  delete [] arr;
  //Parma_PrintPtnStats(m,"after");
  
  if (ghost_metric) {
    inputAdapter_t ia2(*CommT, m,primary,adjacency,true);  
    const baseMeshAdapter_t *base_ia = dynamic_cast<const baseMeshAdapter_t*>(&ia2);

    Zoltan2::modelFlag_t graphFlags_;
    RCP<Zoltan2::Environment> env;
    try{
      env = rcp(new Zoltan2::Environment(params, Teuchos::DefaultComm<int>::getComm()));
    }
    Z2_FORWARD_EXCEPTIONS
    RCP<const Zoltan2::Environment> envConst = Teuchos::rcp_const_cast<const Zoltan2::Environment>(env);
    RCP<const baseMeshAdapter_t> baseInputAdapter_(base_ia,false);
    Zoltan2::HyperGraphModel<inputAdapter_t> model(baseInputAdapter_, envConst, CommT,
                                                   graphFlags_,Zoltan2::HYPEREDGE_CENTRIC);

    PrintGhostMetrics(model);
    ia2.destroy();
  }
  
  if (output_loc!="") {
    m->writeNative(output_loc.c_str());
  }

  // delete mesh
  if (me == 0) cout << "Deleting the mesh ... \n\n";
  time_4-=time_3;
  time_2-=time_1;
  PCU_Max_Doubles(&time_2,1);
  PCU_Max_Doubles(&time_4,1);
  if (!me) {
    std::cout<<"\nConstruction time: "<<time_2<<"\n"
	     <<"Problem time: " << time_4<<"\n\n";
  }
  //Delete the APF Mesh
  m->destroyNative();
  apf::destroyMesh(m);
  //End communications
  PCU_Comm_Free();
#endif

  if (me == 0)
    std::cout << "PASS" << std::endl;

  return 0;

}
示例#27
0
int
main (int argc, char *argv[])
{
  using namespace Anasazi;
  using Teuchos::RCP;
  using Teuchos::rcp;
  using std::endl;

#ifdef HAVE_MPI
  // Initialize MPI
  MPI_Init (&argc, &argv);
#endif // HAVE_MPI

  // Create an Epetra communicator
#ifdef HAVE_MPI
  Epetra_MpiComm Comm (MPI_COMM_WORLD);
#else
  Epetra_SerialComm Comm;
#endif // HAVE_MPI

  // Create an Anasazi output manager
  BasicOutputManager<double> printer;
  printer.stream(Errors) << Anasazi_Version() << std::endl << std::endl;

  // Get the sorting std::string from the command line
  std::string which ("LM");
  Teuchos::CommandLineProcessor cmdp (false, true);
  cmdp.setOption("sort", &which, "Targetted eigenvalues (SM or LM).");
  if (cmdp.parse (argc, argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) {
#ifdef HAVE_MPI
    MPI_Finalize ();
#endif // HAVE_MPI
    return -1;
  }

  // Dimension of the matrix
  //
  // Discretization points in any one direction.
  const int nx = 10;
  // Size of matrix nx*nx
  const int NumGlobalElements = nx*nx;

  // Construct a Map that puts approximately the same number of
  // equations on each process.
  Epetra_Map Map (NumGlobalElements, 0, Comm);

  // Get update list and number of local equations from newly created Map.
  int NumMyElements = Map.NumMyElements ();

  std::vector<int> MyGlobalElements (NumMyElements);
  Map.MyGlobalElements (&MyGlobalElements[0]);

  // Create an integer vector NumNz that is used to build the Petra
  // matrix.  NumNz[i] is the number of OFF-DIAGONAL terms for the
  // i-th global equation on this process.
  std::vector<int> NumNz (NumMyElements);

  /* We are building a matrix of block structure:

      | T -I          |
      |-I  T -I       |
      |   -I  T       |
      |        ...  -I|
      |           -I T|

   where each block is dimension nx by nx and the matrix is on the order of
   nx*nx.  The block T is a tridiagonal matrix.
  */
  for (int i=0; i<NumMyElements; ++i) {
    if (MyGlobalElements[i] == 0 || MyGlobalElements[i] == NumGlobalElements-1 ||
        MyGlobalElements[i] == nx-1 || MyGlobalElements[i] == nx*(nx-1) ) {
      NumNz[i] = 3;
    }
    else if (MyGlobalElements[i] < nx || MyGlobalElements[i] > nx*(nx-1) ||
             MyGlobalElements[i]%nx == 0 || (MyGlobalElements[i]+1)%nx == 0) {
      NumNz[i] = 4;
    }
    else {
      NumNz[i] = 5;
    }
  }

  // Create an Epetra_Matrix
  RCP<Epetra_CrsMatrix> A = rcp (new Epetra_CrsMatrix (Epetra_DataAccess::Copy, Map, &NumNz[0]));

  // Compute coefficients for discrete convection-diffution operator
  const double one = 1.0;
  std::vector<double> Values(4);
  std::vector<int> Indices(4);
  double rho = 0.0;
  double h = one /(nx+1);
  double h2 = h*h;
  double c = 5.0e-01*rho/ h;
  Values[0] = -one/h2 - c; Values[1] = -one/h2 + c; Values[2] = -one/h2; Values[3]= -one/h2;
  double diag = 4.0 / h2;
  int NumEntries;

  for (int i=0; i<NumMyElements; ++i) {
    if (MyGlobalElements[i]==0) {
      Indices[0] = 1;
      Indices[1] = nx;
      NumEntries = 2;
      int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[1], &Indices[0]);
      TEUCHOS_TEST_FOR_EXCEPTION
        (info != 0, std::runtime_error, "InsertGlobalValues returned info = "
         << info << " != 0." );
    }
    else if (MyGlobalElements[i] == nx*(nx-1)) {
      Indices[0] = nx*(nx-1)+1;
      Indices[1] = nx*(nx-2);
      NumEntries = 2;
      int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[1], &Indices[0]);
      TEUCHOS_TEST_FOR_EXCEPTION
        (info != 0, std::runtime_error, "InsertGlobalValues returned info = "
         << info << " != 0." );
    }
    else if (MyGlobalElements[i] == nx-1) {
      Indices[0] = nx-2;
      NumEntries = 1;
      int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]);
      TEUCHOS_TEST_FOR_EXCEPTION
        (info != 0, std::runtime_error, "InsertGlobalValues returned info = "
         << info << " != 0." );
      Indices[0] = 2*nx-1;
      info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[2], &Indices[0]);
      TEUCHOS_TEST_FOR_EXCEPTION
        (info != 0, std::runtime_error, "InsertGlobalValues returned info = "
         << info << " != 0." );
    }
    else if (MyGlobalElements[i] == NumGlobalElements-1) {
      Indices[0] = NumGlobalElements-2;
      NumEntries = 1;
      int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]);
      TEUCHOS_TEST_FOR_EXCEPTION
        (info != 0, std::runtime_error, "InsertGlobalValues returned info = "
         << info << " != 0." );
      Indices[0] = nx*(nx-1)-1;
      info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[2], &Indices[0]);
      TEUCHOS_TEST_FOR_EXCEPTION
        (info != 0, std::runtime_error, "InsertGlobalValues returned info = "
         << info << " != 0." );
    }
    else if (MyGlobalElements[i] < nx) {
      Indices[0] = MyGlobalElements[i]-1;
      Indices[1] = MyGlobalElements[i]+1;
      Indices[2] = MyGlobalElements[i]+nx;
      NumEntries = 3;
      int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]);
      TEUCHOS_TEST_FOR_EXCEPTION
        (info != 0, std::runtime_error, "InsertGlobalValues returned info = "
         << info << " != 0." );
    }
    else if (MyGlobalElements[i] > nx*(nx-1)) {
      Indices[0] = MyGlobalElements[i]-1;
      Indices[1] = MyGlobalElements[i]+1;
      Indices[2] = MyGlobalElements[i]-nx;
      NumEntries = 3;
      int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]);
      TEUCHOS_TEST_FOR_EXCEPTION
        (info != 0, std::runtime_error, "InsertGlobalValues returned info = "
         << info << " != 0." );
    }
    else if (MyGlobalElements[i]%nx == 0) {
      Indices[0] = MyGlobalElements[i]+1;
      Indices[1] = MyGlobalElements[i]-nx;
      Indices[2] = MyGlobalElements[i]+nx;
      NumEntries = 3;
      int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[1], &Indices[0]);
      TEUCHOS_TEST_FOR_EXCEPTION
        (info != 0, std::runtime_error, "InsertGlobalValues returned info = "
         << info << " != 0." );
    }
    else if ((MyGlobalElements[i]+1)%nx == 0) {
      Indices[0] = MyGlobalElements[i]-nx;
      Indices[1] = MyGlobalElements[i]+nx;
      NumEntries = 2;
      int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[2], &Indices[0]);
      TEUCHOS_TEST_FOR_EXCEPTION
        (info != 0, std::runtime_error, "InsertGlobalValues returned info = "
         << info << " != 0." );
      Indices[0] = MyGlobalElements[i]-1;
      NumEntries = 1;
      info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]);
      TEUCHOS_TEST_FOR_EXCEPTION
        (info != 0, std::runtime_error, "InsertGlobalValues returned info = "
         << info << " != 0." );
    }
    else {
      Indices[0] = MyGlobalElements[i]-1;
      Indices[1] = MyGlobalElements[i]+1;
      Indices[2] = MyGlobalElements[i]-nx;
      Indices[3] = MyGlobalElements[i]+nx;
      NumEntries = 4;
      int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]);
      TEUCHOS_TEST_FOR_EXCEPTION
        (info != 0, std::runtime_error, "InsertGlobalValues returned info = "
         << info << " != 0." );
    }
    // Put in the diagonal entry
    int info = A->InsertGlobalValues(MyGlobalElements[i], 1, &diag, &MyGlobalElements[i]);
    TEUCHOS_TEST_FOR_EXCEPTION
      (info != 0, std::runtime_error, "InsertGlobalValues returned info = "
       << info << " != 0." );
  }

  // Finish up
  int info = A->FillComplete ();
  TEUCHOS_TEST_FOR_EXCEPTION
    (info != 0, std::runtime_error, "A->FillComplete() returned info = "
     << info << " != 0." );
  A->SetTracebackMode (1); // Shutdown Epetra Warning tracebacks

  // Create a identity matrix for the temporary mass matrix
  RCP<Epetra_CrsMatrix> M = rcp (new Epetra_CrsMatrix (Epetra_DataAccess::Copy, Map, 1));
  for (int i=0; i<NumMyElements; i++) {
    Values[0] = one;
    Indices[0] = i;
    NumEntries = 1;
    info = M->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]);
    TEUCHOS_TEST_FOR_EXCEPTION
      (info != 0, std::runtime_error, "M->InsertGlobalValues() returned info = "
       << info << " != 0." );
  }
  // Finish up
  info = M->FillComplete ();
  TEUCHOS_TEST_FOR_EXCEPTION
    (info != 0, std::runtime_error, "M->FillComplete() returned info = "
     << info << " != 0." );
  M->SetTracebackMode (1); // Shutdown Epetra Warning tracebacks

  //************************************
  // Call the LOBPCG solver manager
  //***********************************
  //
  // Variables used for the LOBPCG Method
  const int nev       = 10;
  const int blockSize = 5;
  const int maxIters  = 500;
  const double tol    = 1.0e-8;

  typedef Epetra_MultiVector MV;
  typedef Epetra_Operator OP;
  typedef MultiVecTraits<double, Epetra_MultiVector> MVT;

  // Create an Epetra_MultiVector for an initial vector to start the
  // solver.  Note: This needs to have the same number of columns as
  // the blocksize.
  RCP<Epetra_MultiVector> ivec = rcp (new Epetra_MultiVector (Map, blockSize));
  ivec->Random (); // fill the initial vector with random values

  // Create the eigenproblem.
  RCP<BasicEigenproblem<double, MV, OP> > MyProblem =
    rcp (new BasicEigenproblem<double, MV, OP> (A, ivec));

  // Inform the eigenproblem that the operator A is symmetric
  MyProblem->setHermitian (true);

  // Set the number of eigenvalues requested
  MyProblem->setNEV (nev);

  // Tell the eigenproblem that you are finishing passing it information.
  const bool success = MyProblem->setProblem ();
  if (! success) {
    printer.print (Errors, "Anasazi::BasicEigenproblem::setProblem() reported an error.\n");
#ifdef HAVE_MPI
    MPI_Finalize ();
#endif // HAVE_MPI
    return -1;
  }

  // Create parameter list to pass into the solver manager
  Teuchos::ParameterList MyPL;
  MyPL.set ("Which", which);
  MyPL.set ("Block Size", blockSize);
  MyPL.set ("Maximum Iterations", maxIters);
  MyPL.set ("Convergence Tolerance", tol);
  MyPL.set ("Full Ortho", true);
  MyPL.set ("Use Locking", true);

  // Create the solver manager
  LOBPCGSolMgr<double, MV, OP> MySolverMan (MyProblem, MyPL);

  // Solve the problem
  ReturnType returnCode = MySolverMan.solve ();

  // Get the eigenvalues and eigenvectors from the eigenproblem
  Eigensolution<double,MV> sol = MyProblem->getSolution ();
  std::vector<Value<double> > evals = sol.Evals;
  RCP<MV> evecs = sol.Evecs;

  // Compute residuals.
  std::vector<double> normR (sol.numVecs);
  if (sol.numVecs > 0) {
    Teuchos::SerialDenseMatrix<int,double> T (sol.numVecs, sol.numVecs);
    Epetra_MultiVector tempAevec (Map, sol.numVecs );
    T.putScalar (0.0);
    for (int i = 0; i < sol.numVecs; ++i) {
      T(i,i) = evals[i].realpart;
    }
    A->Apply (*evecs, tempAevec);
    MVT::MvTimesMatAddMv (-1.0, *evecs, T, 1.0, tempAevec);
    MVT::MvNorm (tempAevec, normR);
  }

  // Print the results
  std::ostringstream os;
  os.setf (std::ios_base::right, std::ios_base::adjustfield);
  os << "Solver manager returned "
     << (returnCode == Converged ? "converged." : "unconverged.") << endl;
  os << endl;
  os << "------------------------------------------------------" << endl;
  os << std::setw(16) << "Eigenvalue"
     << std::setw(18) << "Direct Residual"
     << endl;
  os << "------------------------------------------------------" << endl;
  for (int i = 0; i < sol.numVecs; ++i) {
    os << std::setw(16) << evals[i].realpart
       << std::setw(18) << normR[i] / evals[i].realpart
       << endl;
  }
  os << "------------------------------------------------------" << endl;
  printer.print (Errors, os.str ());

#ifdef HAVE_MPI
  MPI_Finalize ();
#endif // HAVE_MPI
  return 0;
}
示例#28
0
int main(int argc, char *argv[]) {
  typedef double MeshScalar;
  typedef double BasisScalar;
  typedef Tpetra::DefaultPlatform::DefaultPlatformType::NodeType Node;
  typedef Teuchos::ScalarTraits<Scalar>::magnitudeType magnitudeType;

  //double g_mean_exp = 1.906587e-01;      // expected response mean
  //double g_std_dev_exp = 8.680605e-02;  // expected response std. dev.
  //double g_tol = 1e-6;               // tolerance on determining success



  using Teuchos::RCP;
  using Teuchos::rcp;
  using Teuchos::Array;
  using Teuchos::ArrayRCP;
  using Teuchos::ArrayView;
  using Teuchos::ParameterList;

// Initialize MPI
#ifdef HAVE_MPI
  MPI_Init(&argc,&argv);
#endif

//  feenableexcept(FE_ALL_EXCEPT);

  LocalOrdinal MyPID;

  try {

    // Create a communicator for Epetra objects
    RCP<const Epetra_Comm> globalComm;
#ifdef HAVE_MPI
    globalComm = rcp(new Epetra_MpiComm(MPI_COMM_WORLD));
#else
    globalComm = rcp(new Epetra_SerialComm);
#endif
    MyPID = globalComm->MyPID();

    // Setup command line options
    Teuchos::CommandLineProcessor CLP;
    CLP.setDocString(
      "This example runs an interlaced stochastic Galerkin solvers.\n");

    int n = 32;
    CLP.setOption("num_mesh", &n, "Number of mesh points in each direction");

    // multigrid specific options
    int minAggSize = 1;
    CLP.setOption("min_agg_size", &minAggSize, "multigrid aggregate size");
    int smootherSweeps = 3;
    CLP.setOption("smoother_sweeps", &smootherSweeps, "# multigrid smoother sweeps");
    int plainAgg=1;
    CLP.setOption("plain_aggregation", &plainAgg, "plain aggregation");
    LocalOrdinal nsSize=-1;
    CLP.setOption("nullspace_size", &nsSize, "nullspace dimension");


    bool symmetric = false;
    CLP.setOption("symmetric", "unsymmetric", &symmetric, 
                  "Symmetric discretization");

    int num_spatial_procs = -1;
    CLP.setOption("num_spatial_procs", &num_spatial_procs, "Number of spatial processors (set -1 for all available procs)");

    SG_RF randField = UNIFORM;
    CLP.setOption("rand_field", &randField, 
                  num_sg_rf, sg_rf_values, sg_rf_names,
                  "Random field type");

    double mu = 0.2;
    CLP.setOption("mean", &mu, "Mean");

    double s = 0.1;
    CLP.setOption("std_dev", &s, "Standard deviation");

    int num_KL = 2;
    CLP.setOption("num_kl", &num_KL, "Number of KL terms");

    int order = 3;
    CLP.setOption("order", &order, "Polynomial order");

    bool normalize_basis = true;
    CLP.setOption("normalize", "unnormalize", &normalize_basis, 
                  "Normalize PC basis");

    Krylov_Method solver_method = GMRES;
    CLP.setOption("solver_method", &solver_method, 
                  num_krylov_method, krylov_method_values, krylov_method_names, 
                  "Krylov solver method");

    SG_Prec prec_method = STOCHASTIC;
    CLP.setOption("prec_method", &prec_method, 
                  num_sg_prec, sg_prec_values, sg_prec_names,
                  "Preconditioner method");

    SG_Div division_method = DIRECT;
    CLP.setOption("division_method", &division_method, 
                  num_sg_div, sg_div_values, sg_div_names,
                  "Stochastic division method");

    SG_DivPrec divprec_method = NO;
    CLP.setOption("divprec_method", &divprec_method,
                  num_sg_divprec, sg_divprec_values, sg_divprec_names,
                  "Preconditioner for division method");
    Schur_option schur_option = diag;
    CLP.setOption("schur_option", &schur_option,
                  num_schur_option, Schur_option_values, schur_option_names,
                  "Schur option");
    Prec_option prec_option = whole;
    CLP.setOption("prec_option", &prec_option,
                  num_prec_option, Prec_option_values, prec_option_names,
                  "Prec option");


    double solver_tol = 1e-12;
    CLP.setOption("solver_tol", &solver_tol, "Outer solver tolerance");

    double div_tol = 1e-6;
    CLP.setOption("div_tol", &div_tol, "Tolerance in Iterative Solver");
    
    int prec_level = 1;
    CLP.setOption("prec_level", &prec_level, "Level in Schur Complement Prec 0->Solve A0u0=g0 with division; 1->Form 1x1 Schur Complement");

    int max_it_div = 50;
    CLP.setOption("max_it_div", &max_it_div, "Maximum # of Iterations in Iterative Solver for Division");

    bool equilibrate = true; //JJH 8/26/12 changing to true to match ETP example
    CLP.setOption("equilibrate", "noequilibrate", &equilibrate,
                  "Equilibrate the linear system");


    CLP.parse( argc, argv );

    if (MyPID == 0) {
      std::cout << "Summary of command line options:" << std::endl
                << "\tnum_mesh           = " << n << std::endl
                << "\tsymmetric          = " << symmetric << std::endl
                << "\tnum_spatial_procs  = " << num_spatial_procs << std::endl
                << "\trand_field         = " << sg_rf_names[randField] 
                << std::endl
                << "\tmean               = " << mu << std::endl
                << "\tstd_dev            = " << s << std::endl
                << "\tnum_kl             = " << num_KL << std::endl
                << "\torder              = " << order << std::endl
                << "\tnormalize_basis    = " << normalize_basis << std::endl
                << "\tsolver_method      = " << krylov_method_names[solver_method] << std::endl
                << "\tprec_method        = " << sg_prec_names[prec_method]    << std::endl
                << "\tdivision_method    = " << sg_div_names[division_method]     << std::endl
                << "\tdiv_tol            = " << div_tol << std::endl
                << "\tdiv_prec           = " << sg_divprec_names[divprec_method]      << std::endl
                << "\tprec_level         = " << prec_level << std::endl
                << "\tmax_it_div     = " << max_it_div << std::endl;
    }
    bool nonlinear_expansion = false;
    if (randField == UNIFORM)
      nonlinear_expansion = false;
    else if (randField == LOGNORMAL)
      nonlinear_expansion = true;

    {
    TEUCHOS_FUNC_TIME_MONITOR("Total PCE Calculation Time");

    // Create Stochastic Galerkin basis and expansion
    Teuchos::Array< RCP<const Stokhos::OneDOrthogPolyBasis<LocalOrdinal,BasisScalar> > > bases(num_KL); 
    for (LocalOrdinal i=0; i<num_KL; i++)
      if (randField == UNIFORM)
        bases[i] = rcp(new Stokhos::LegendreBasis<LocalOrdinal,BasisScalar>(order, normalize_basis));
      else if (randField == LOGNORMAL)
        bases[i] = rcp(new Stokhos::HermiteBasis<int,double>(order, normalize_basis));
    RCP<const Stokhos::CompletePolynomialBasis<LocalOrdinal,BasisScalar> > basis = 
      rcp(new Stokhos::CompletePolynomialBasis<LocalOrdinal,BasisScalar>(bases, 1e-12));
    LocalOrdinal sz = basis->size();
    RCP<Stokhos::Sparse3Tensor<LocalOrdinal,BasisScalar> > Cijk = 
      basis->computeTripleProductTensor(sz);
    RCP<const Stokhos::Quadrature<int,double> > quad = 
      rcp(new Stokhos::TensorProductQuadrature<int,double>(basis));
    RCP<ParameterList> expn_params = Teuchos::rcp(new ParameterList);
    if (division_method == MEAN_DIV) {
      expn_params->set("Division Strategy", "Mean-Based");
      expn_params->set("Use Quadrature for Division", false);
    }
    else if (division_method == DIRECT) {
      expn_params->set("Division Strategy", "Dense Direct");
      expn_params->set("Use Quadrature for Division", false);
    }
    else if (division_method == SPD_DIRECT) {
      expn_params->set("Division Strategy", "SPD Dense Direct");
      expn_params->set("Use Quadrature for Division", false);
    }
    else if (division_method == CGD) {
      expn_params->set("Division Strategy", "CG");
      expn_params->set("Use Quadrature for Division", false);
    }

    else if (division_method == QUAD) {
      expn_params->set("Use Quadrature for Division", true);
    }

    if (divprec_method == NO)
         expn_params->set("Prec Strategy", "None");
    else if (divprec_method == DIAG)
         expn_params->set("Prec Strategy", "Diag");
    else if (divprec_method == JACOBI)
         expn_params->set("Prec Strategy", "Jacobi");
    else if (divprec_method == GS)
         expn_params->set("Prec Strategy", "GS");
    else if (divprec_method == SCHUR)
         expn_params->set("Prec Strategy", "Schur");

    if (schur_option == diag)
        expn_params->set("Schur option", "diag");
    else
        expn_params->set("Schur option", "full");
    if (prec_option == linear)
        expn_params->set("Prec option", "linear");


    if (equilibrate)
      expn_params->set("Equilibrate", 1);
    else
      expn_params->set("Equilibrate", 0); 
    expn_params->set("Division Tolerance", div_tol);
    expn_params->set("prec_iter", prec_level);
    expn_params->set("max_it_div", max_it_div);

    RCP<Stokhos::OrthogPolyExpansion<LocalOrdinal,BasisScalar> > expansion = 
      rcp(new Stokhos::QuadOrthogPolyExpansion<LocalOrdinal,BasisScalar>(
            basis, Cijk, quad, expn_params));

    if (MyPID == 0)
      std::cout << "Stochastic Galerkin expansion size = " << sz << std::endl;

    // Create stochastic parallel distribution
    ParameterList parallelParams;
    parallelParams.set("Number of Spatial Processors", num_spatial_procs);
    // parallelParams.set("Rebalance Stochastic Graph", true);
    // Teuchos::ParameterList& isorropia_params = 
    //   parallelParams.sublist("Isorropia");
    // isorropia_params.set("Balance objective", "nonzeros");
    RCP<Stokhos::ParallelData> sg_parallel_data =
      rcp(new Stokhos::ParallelData(basis, Cijk, globalComm, parallelParams));
    RCP<const EpetraExt::MultiComm> sg_comm = 
      sg_parallel_data->getMultiComm();
    RCP<const Epetra_Comm> app_comm = 
      sg_parallel_data->getSpatialComm();

    // Create Teuchos::Comm from Epetra_Comm
    RCP< Teuchos::Comm<int> > teuchos_app_comm;
#ifdef HAVE_MPI
    RCP<const Epetra_MpiComm> app_mpi_comm = 
      Teuchos::rcp_dynamic_cast<const Epetra_MpiComm>(app_comm);
    RCP<const Teuchos::OpaqueWrapper<MPI_Comm> > raw_mpi_comm = 
      Teuchos::opaqueWrapper(app_mpi_comm->Comm());
    teuchos_app_comm = rcp(new Teuchos::MpiComm<int>(raw_mpi_comm));
#else
    teuchos_app_comm = rcp(new Teuchos::SerialComm<int>());
#endif

    // Create application
    typedef twoD_diffusion_problem<Scalar,MeshScalar,BasisScalar,LocalOrdinal,GlobalOrdinal,Node> problem_type;
    RCP<problem_type> model = 
      rcp(new problem_type(teuchos_app_comm, n, num_KL, s, mu, 
               nonlinear_expansion, symmetric));

    // Create vectors and operators
    typedef problem_type::Tpetra_Vector Tpetra_Vector;
    typedef problem_type::Tpetra_CrsMatrix Tpetra_CrsMatrix;
    typedef Tpetra::MatrixMarket::Writer<Tpetra_CrsMatrix> Writer;
    //Xpetra matrices
    typedef Xpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> Xpetra_CrsMatrix;
    typedef Xpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> Xpetra_MultiVector;
    typedef Xpetra::MultiVectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node> Xpetra_MultiVectorFactory;
    typedef Xpetra::Operator<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> Xpetra_Operator;
    typedef Xpetra::TpetraCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> Xpetra_TpetraCrsMatrix;
    typedef Xpetra::CrsOperator<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> Xpetra_CrsOperator;
    typedef Belos::MueLuOp<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> Belos_MueLuOperator;
    //MueLu typedefs
    typedef MueLu::Hierarchy<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> MueLu_Hierarchy;
    typedef MueLu::SmootherPrototype<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps> SmootherPrototype;
    typedef MueLu::TrilinosSmoother<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps> TrilinosSmoother;
    typedef MueLu::SmootherFactory<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps> SmootherFactory;
    typedef MueLu::FactoryManager<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps> FactoryManager;

    RCP<Tpetra_Vector> p = Tpetra::createVector<Scalar>(model->get_p_map(0));
    RCP<Tpetra_Vector> x = Tpetra::createVector<Scalar>(model->get_x_map());
    x->putScalar(0.0);
    RCP<Tpetra_Vector> f = Tpetra::createVector<Scalar>(model->get_f_map());
    RCP<Tpetra_Vector> dx = Tpetra::createVector<Scalar>(model->get_x_map());
    RCP<Tpetra_CrsMatrix> J = model->create_W();
    RCP<Tpetra_CrsMatrix> J0;
    if (prec_method == MEAN)
      J0 = model->create_W();

    // Set PCE expansion of p
    p->putScalar(0.0);
    ArrayRCP<Scalar> p_view = p->get1dViewNonConst();
    for (ArrayRCP<Scalar>::size_type i=0; i<p_view.size(); i++) {
      p_view[i].reset(expansion);
      p_view[i].copyForWrite();
    }
    Array<double> point(num_KL, 1.0);
    Array<double> basis_vals(sz);
    basis->evaluateBases(point, basis_vals);
    if (order > 0) {
      for (int i=0; i<num_KL; i++) {
        p_view[i].term(i,1) = 1.0 / basis_vals[i+1];
      }
    }

    // Create preconditioner
    typedef Ifpack2::Preconditioner<Scalar,LocalOrdinal,GlobalOrdinal,Node> Tprec;
    RCP<Belos_MueLuOperator> M;
    RCP<MueLu_Hierarchy> H;
    RCP<Xpetra_CrsMatrix> xcrsJ = rcp(new Xpetra_TpetraCrsMatrix(J));
    RCP<Xpetra_Operator> xopJ = rcp(new Xpetra_CrsOperator(xcrsJ));
    if (prec_method != NONE) {
      ParameterList precParams;
      std::string prec_name = "RILUK";
      precParams.set("fact: iluk level-of-fill", 1);
      precParams.set("fact: iluk level-of-overlap", 0);
      //Ifpack2::Factory factory;
      RCP<Xpetra_Operator> xopJ0;
      if (prec_method == MEAN) {
        RCP<Xpetra_CrsMatrix> xcrsJ0 = rcp(new Xpetra_TpetraCrsMatrix(J0));
        xopJ0 = rcp(new Xpetra_CrsOperator(xcrsJ0));
        //M = factory.create<Tpetra_CrsMatrix>(prec_name, J0);
      } else if (prec_method == STOCHASTIC) {
        xopJ0 = xopJ;
        //M = factory.create<Tpetra_CrsMatrix>(prec_name, J);
      }
      H = rcp(new MueLu_Hierarchy(xopJ0));
      M = rcp(new Belos_MueLuOperator(H));
      //M->setParameters(precParams);
      if (nsSize!=-1) sz=nsSize;
      RCP<Xpetra_MultiVector> Z = Xpetra_MultiVectorFactory::Build(xcrsJ->getDomainMap(), sz);
      size_t n = Z->getLocalLength();
      for (LocalOrdinal j=0; j<sz; ++j) {
        ArrayRCP<Scalar> col = Z->getDataNonConst(j);
        for (size_t i=0; i<n; ++i) {
          col[i].reset(expansion);
          col[i].copyForWrite();
          col[i].fastAccessCoeff(j) = 1.0;
        }
      }
      H->GetLevel(0)->Set("Nullspace", Z);
      //RCP<Teuchos::FancyOStream> fos = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
      //fos->setOutputToRootOnly(-1);
      //Z->describe(*fos);
    }

    // Evaluate model
    model->computeResidual(*x, *p, *f);
    model->computeJacobian(*x, *p, *J);

    // Compute mean for mean-based preconditioner
    if (prec_method == MEAN) {
      size_t nrows = J->getNodeNumRows();
      ArrayView<const LocalOrdinal> indices;
      ArrayView<const Scalar> values;
      J0->resumeFill();
      for (size_t i=0; i<nrows; i++) {
        J->getLocalRowView(i, indices, values);
        Array<Scalar> values0(values.size());
        for (LocalOrdinal j=0; j<values.size(); j++)
          values0[j] = values[j].coeff(0);
        J0->replaceLocalValues(i, indices, values0);
      }
      J0->fillComplete();
    }

    // compute preconditioner
    if (prec_method != NONE) {
      //M->initialize();
      //M->compute();

      //override MueLu defaults via factory manager
      RCP<FactoryManager> fm = rcp( new FactoryManager() );;

      //smoother
      ParameterList smootherParamList;
      /*
      smootherParamList.set("chebyshev: degree", smootherSweeps);
      smootherParamList.set("chebyshev: ratio eigenvalue", (double) 20);
      smootherParamList.set("chebyshev: max eigenvalue", (double) -1.0);
      smootherParamList.set("chebyshev: min eigenvalue", (double) 1.0);
      smootherParamList.set("chebyshev: zero starting solution", true);
      RCP<SmootherPrototype> smooPrototype     = rcp( new TrilinosSmoother("CHEBYSHEV", smootherParamList) );
      */
      smootherParamList.set("relaxation: sweeps", smootherSweeps);
      smootherParamList.set("relaxation: type", "Symmetric Gauss-Seidel");
      RCP<SmootherPrototype> smooPrototype     = rcp( new TrilinosSmoother("RELAXATION", smootherParamList) );

      RCP<SmootherFactory>   smooFact      = rcp( new SmootherFactory(smooPrototype) );
      fm->SetFactory("Smoother", smooFact);

      // coarse level solve
      ParameterList coarseParamList;
      coarseParamList.set("fact: level-of-fill", 0);
      RCP<SmootherPrototype> coarsePrototype     = rcp( new TrilinosSmoother("ILUT", coarseParamList) );
      RCP<SmootherFactory>   coarseSolverFact      = rcp( new SmootherFactory(coarsePrototype, Teuchos::null) );
      fm->SetFactory("CoarseSolver", coarseSolverFact);

      //allow for larger aggregates
      typedef MueLu::UCAggregationFactory<LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>
      MueLu_UCAggregationFactory;
      RCP<MueLu_UCAggregationFactory> aggFact = rcp(new MueLu_UCAggregationFactory());
      aggFact->SetMinNodesPerAggregate(minAggSize);
      fm->SetFactory("Aggregates", aggFact);

      //turn off damping
      typedef MueLu::SaPFactory<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps> MueLu_SaPFactory;
      if (plainAgg) {
        RCP<MueLu_SaPFactory> sapFactory = rcp(new MueLu_SaPFactory);
        sapFactory->SetDampingFactor( (Scalar) 0.0 );
        fm->SetFactory("P", sapFactory);
      }

      H->Setup(*fm);
    }

    // Setup Belos solver
    RCP<ParameterList> belosParams = rcp(new ParameterList);
   


    belosParams->set("Flexible Gmres", false);

    belosParams->set("Num Blocks", 500);//20
    belosParams->set("Convergence Tolerance", solver_tol);
    belosParams->set("Maximum Iterations", 1000);
    belosParams->set("Verbosity", 33);
    belosParams->set("Output Style", 1);
    belosParams->set("Output Frequency", 1);
    typedef Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV;
    typedef Belos::OperatorT<Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> > OP;
    typedef Belos::OperatorTraits<Scalar,MV,OP> BOPT;
    typedef Belos::MultiVecTraits<Scalar,MV> BMVT;
    typedef Belos::MultiVecTraits<double,MV> BTMVT;
    typedef Belos::LinearProblem<double,MV,OP> BLinProb;
    typedef Belos::XpetraOp<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> BXpetraOp;
    RCP<OP> belosJ = rcp(new BXpetraOp(xopJ)); // Turns an Xpetra::Operator object into a Belos operator
    RCP< BLinProb > problem = rcp(new BLinProb(belosJ, dx, f));
    if (prec_method != NONE)
      problem->setRightPrec(M);
    problem->setProblem();
    RCP<Belos::SolverManager<double,MV,OP> > solver;
    if (solver_method == CG)
      solver = rcp(new Belos::PseudoBlockCGSolMgr<double,MV,OP>(problem, belosParams));
    else if (solver_method == GMRES)
      solver = rcp(new Belos::BlockGmresSolMgr<double,MV,OP>(problem, belosParams));
    

    // Print initial residual norm
    std::vector<double> norm_f(1);
    //BMVT::MvNorm(*f, norm_f);
    BTMVT::MvNorm(*f, norm_f);
    if (MyPID == 0)
      std::cout << "\nInitial residual norm = " << norm_f[0] << std::endl;

    // Solve linear system
    Belos::ReturnType ret = solver->solve();

    if (MyPID == 0) {
      if (ret == Belos::Converged)
        std::cout << "Solver converged!" << std::endl;
      else
        std::cout << "Solver failed to converge!" << std::endl;
    }

    // Update x
    x->update(-1.0, *dx, 1.0);
    Writer::writeDenseFile("stochastic_solution.mm", x);

    // Compute new residual & response function
    RCP<Tpetra_Vector> g = Tpetra::createVector<Scalar>(model->get_g_map(0));
    f->putScalar(0.0);
    model->computeResidual(*x, *p, *f);
    model->computeResponse(*x, *p, *g);

    // Print final residual norm
    //BMVT::MvNorm(*f, norm_f);
    BTMVT::MvNorm(*f, norm_f);
    if (MyPID == 0)
      std::cout << "\nFinal residual norm = " << norm_f[0] << std::endl;

    // Print response
    std::cout << "\nResponse =      " << std::endl;
    //Writer::writeDense(std::cout, g);
    Writer::writeDenseFile("stochastic_residual.mm", f);















/*
    double g_mean = g->get1dView()[0].mean();
    double g_std_dev = g->get1dView()[0].standard_deviation();
    std::cout << "g mean = " << g_mean << std::endl;
    std::cout << "g std_dev = " << g_std_dev << std::endl;
    bool passed = false;
    if (norm_f[0] < 1.0e-10 &&
        std::abs(g_mean-g_mean_exp) < g_tol &&
        std::abs(g_std_dev - g_std_dev_exp) < g_tol)
      passed = true;
    if (MyPID == 0) {
      if (passed)
        std::cout << "Example Passed!" << std::endl;
      else{
        std::cout << "Example Failed!" << std::endl;
        std::cout << "expected g_mean = "<< g_mean_exp << std::endl;
        std::cout << "expected g_std_dev = "<< g_std_dev_exp << std::endl;
      }
    }
*/






    }



    Teuchos::TimeMonitor::summarize(std::cout);
    Teuchos::TimeMonitor::zeroOutTimers();

  }
  
  catch (std::exception& e) {
    std::cout << e.what() << std::endl;
  }
  catch (string& s) {
    std::cout << s << std::endl;
  }
  catch (char *s) {
    std::cout << s << std::endl;
  }
  catch (...) {
    std::cout << "Caught unknown exception!" <<std:: endl;
  }

#ifdef HAVE_MPI
  MPI_Finalize() ;
#endif

}
示例#29
0
int main(int argc, char* argv[]) {
  int ierr = 0;

  try {
    double t, ta, tr;
    int p = 2;
    int w = p+7;

    // Maximum number of derivative components for SLFad
    const int slfad_max = 130;

    // Set up command line options
    Teuchos::CommandLineProcessor clp;
    clp.setDocString("This program tests the speed of various forward mode AD implementations for a finite-element-like Jacobian fill");
    int num_nodes = 100000;
    int num_eqns = 2;
    int rt = 0;
    clp.setOption("n", &num_nodes, "Number of nodes");
    clp.setOption("p", &num_eqns, "Number of equations");
    clp.setOption("rt", &rt, "Include ADOL-C retaping test");

    // Parse options
    Teuchos::CommandLineProcessor::EParseCommandLineReturn
      parseReturn= clp.parse(argc, argv);
    if(parseReturn != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL)
      return 1;

    double mesh_spacing = 1.0 / static_cast<double>(num_nodes - 1);

    // Memory pool & manager
    Sacado::Fad::MemPoolManager<double> poolManager(num_nodes*num_eqns);
    Sacado::Fad::MemPool* pool = poolManager.getMemoryPool(num_nodes*num_eqns);
    Sacado::Fad::DMFad<double>::setDefaultPool(pool);

    std::cout.setf(std::ios::scientific);
    std::cout.precision(p);
    std::cout << "num_nodes =  " << num_nodes
        << ", num_eqns = " << num_eqns << ":  " << std::endl
        << "               " << "   Time   " << "\t"<< "Time/Analytic" << "\t"
        << "Time/(2*p*Residual)" << std::endl;

    ta = 1.0;
    tr = 1.0;

    tr = residual_fill(num_nodes, num_eqns, mesh_spacing);

    ta = analytic_jac_fill(num_nodes, num_eqns, mesh_spacing);
    std::cout << "Analytic:      " << std::setw(w) << ta << "\t" << std::setw(w) << ta/ta << "\t" << std::setw(w) << ta/(2.0*num_eqns*tr) << std::endl;

#ifdef HAVE_ADOLC
#ifndef ADOLC_TAPELESS
    t = adolc_jac_fill(num_nodes, num_eqns, mesh_spacing);
    std::cout << "ADOL-C:        " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl;

    if (rt != 0) {
      t = adolc_retape_jac_fill(num_nodes, num_eqns, mesh_spacing);
      std::cout << "ADOL-C(rt):  " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl;
    }

#else
    t = adolc_tapeless_jac_fill(num_nodes, num_eqns, mesh_spacing);
    std::cout << "ADOL-C(tl):    " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl;
#endif
#endif

#ifdef HAVE_ADIC
    t = adic_jac_fill(num_nodes, num_eqns, mesh_spacing);
    std::cout << "ADIC:          " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl;
#endif

    if (num_eqns*2 == 4) {
      t = fad_jac_fill< FAD::TFad<16,double> >(num_nodes, num_eqns, mesh_spacing);
      std::cout << "TFad:          " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl;
    }
    else if (num_eqns*2 == 16) {
      t = fad_jac_fill< FAD::TFad<16,double> >(num_nodes, num_eqns, mesh_spacing);
      std::cout << "TFad:          " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl;
    }
    else if (num_eqns*2 == 32) {
      t = fad_jac_fill< FAD::TFad<32,double> >(num_nodes, num_eqns, mesh_spacing);
      std::cout << "TFad:          " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl;
    }
    else if (num_eqns*2 == 64) {
      t = fad_jac_fill< FAD::TFad<64,double> >(num_nodes, num_eqns, mesh_spacing);
      std::cout << "TFad:          " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl;
    }

    t = fad_jac_fill< FAD::Fad<double> >(num_nodes, num_eqns, mesh_spacing);
    std::cout << "Fad:           " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl;

    if (num_eqns*2 == 4) {
      t = fad_jac_fill< Sacado::Fad::SFad<double,4> >(num_nodes, num_eqns, mesh_spacing);
      std::cout << "SFad:          " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl;
    }
    else if (num_eqns*2 == 16) {
      t = fad_jac_fill< Sacado::Fad::SFad<double,16> >(num_nodes, num_eqns, mesh_spacing);
      std::cout << "SFad:          " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl;
    }
    else if (num_eqns*2 == 32) {
      t = fad_jac_fill< Sacado::Fad::SFad<double,32> >(num_nodes, num_eqns, mesh_spacing);
      std::cout << "SFad:          " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl;
    }
    else if (num_eqns*2 == 64) {
      t = fad_jac_fill< Sacado::Fad::SFad<double,64> >(num_nodes, num_eqns, mesh_spacing);
      std::cout << "SFad:          " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl;
    }

    if (num_eqns*2 < slfad_max) {
      t = fad_jac_fill< Sacado::Fad::SLFad<double,slfad_max> >(num_nodes, num_eqns, mesh_spacing);
      std::cout << "SLFad:         " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl;
    }

    t = fad_jac_fill< Sacado::Fad::DFad<double> >(num_nodes, num_eqns, mesh_spacing);
    std::cout << "DFad:          " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl;

    t = fad_jac_fill< Sacado::Fad::SimpleFad<double> >(num_nodes, num_eqns, mesh_spacing);
    std::cout << "SimpleFad:     " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl;

    t = fad_jac_fill< Sacado::Fad::DMFad<double> >(num_nodes, num_eqns, mesh_spacing);
    std::cout << "DMFad:         " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl;

    if (num_eqns*2 == 4) {
      t = fad_jac_fill< Sacado::ELRFad::SFad<double,4> >(num_nodes, num_eqns, mesh_spacing);
      std::cout << "ELRSFad:       " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl;
    }
    else if (num_eqns*2 == 16) {
      t = fad_jac_fill< Sacado::ELRFad::SFad<double,16> >(num_nodes, num_eqns, mesh_spacing);
      std::cout << "ELRSFad:       " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl;
    }
    else if (num_eqns*2 == 32) {
      t = fad_jac_fill< Sacado::ELRFad::SFad<double,32> >(num_nodes, num_eqns, mesh_spacing);
      std::cout << "ELRSFad:       " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl;
    }
    else if (num_eqns*2 == 64) {
      t = fad_jac_fill< Sacado::ELRFad::SFad<double,64> >(num_nodes, num_eqns, mesh_spacing);
      std::cout << "ELRSFad:       " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl;
    }

    if (num_eqns*2 < slfad_max) {
      t = fad_jac_fill< Sacado::ELRFad::SLFad<double,slfad_max> >(num_nodes, num_eqns, mesh_spacing);
      std::cout << "ELRSLFad:      " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl;
    }

    t = fad_jac_fill< Sacado::ELRFad::DFad<double> >(num_nodes, num_eqns, mesh_spacing);
    std::cout << "ELRDFad:       " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl;

    if (num_eqns*2 == 4) {
      t = fad_jac_fill< Sacado::CacheFad::SFad<double,4> >(num_nodes, num_eqns, mesh_spacing);
      std::cout << "CacheSFad:     " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl;
    }
    else if (num_eqns*2 == 16) {
      t = fad_jac_fill< Sacado::CacheFad::SFad<double,16> >(num_nodes, num_eqns, mesh_spacing);
      std::cout << "CacheSFad:     " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl;
    }
    else if (num_eqns*2 == 32) {
      t = fad_jac_fill< Sacado::CacheFad::SFad<double,32> >(num_nodes, num_eqns, mesh_spacing);
      std::cout << "CacheSFad:     " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl;
    }
    else if (num_eqns*2 == 64) {
      t = fad_jac_fill< Sacado::CacheFad::SFad<double,64> >(num_nodes, num_eqns, mesh_spacing);
      std::cout << "CacheSFad:     " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl;
    }

    if (num_eqns*2 < slfad_max) {
      t = fad_jac_fill< Sacado::CacheFad::SLFad<double,slfad_max> >(num_nodes, num_eqns, mesh_spacing);
      std::cout << "CacheSLFad:    " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl;
    }

    t = fad_jac_fill< Sacado::CacheFad::DFad<double> >(num_nodes, num_eqns, mesh_spacing);
    std::cout << "CacheFad:      " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl;

    if (num_eqns*2 == 4) {
      t = fad_jac_fill< Sacado::ELRCacheFad::SFad<double,4> >(num_nodes, num_eqns, mesh_spacing);
      std::cout << "ELRCacheSFad:  " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl;
    }
    else if (num_eqns*2 == 16) {
      t = fad_jac_fill< Sacado::ELRCacheFad::SFad<double,16> >(num_nodes, num_eqns, mesh_spacing);
      std::cout << "ELRCacheSFad:  " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl;
    }
    else if (num_eqns*2 == 32) {
      t = fad_jac_fill< Sacado::ELRCacheFad::SFad<double,32> >(num_nodes, num_eqns, mesh_spacing);
      std::cout << "ELRCacheSFad:  " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl;
    }
    else if (num_eqns*2 == 64) {
      t = fad_jac_fill< Sacado::ELRCacheFad::SFad<double,64> >(num_nodes, num_eqns, mesh_spacing);
      std::cout << "ELRCacheSFad:  " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl;
    }

    if (num_eqns*2 < slfad_max) {
      t = fad_jac_fill< Sacado::ELRCacheFad::SLFad<double,slfad_max> >(num_nodes, num_eqns, mesh_spacing);
      std::cout << "ELRCacheSLFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl;
    }

    t = fad_jac_fill< Sacado::ELRCacheFad::DFad<double> >(num_nodes, num_eqns, mesh_spacing);
    std::cout << "ELRCacheFad:   " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl;

    t = fad_jac_fill< Sacado::Fad::DVFad<double> >(num_nodes, num_eqns, mesh_spacing);
    std::cout << "DVFad:         " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl;

  }
  catch (std::exception& e) {
    std::cout << e.what() << std::endl;
    ierr = 1;
  }
  catch (const char *s) {
    std::cout << s << std::endl;
    ierr = 1;
  }
  catch (...) {
    std::cout << "Caught unknown exception!" << std::endl;
    ierr = 1;
  }

  return ierr;
}
示例#30
0
int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int argc, char *argv[]) {
#include <MueLu_UseShortNames.hpp>

  using Teuchos::RCP;
  using Teuchos::rcp;

  //
  // MPI initialization
  //

  Teuchos::oblackholestream blackhole;

  bool success = false;
  bool verbose = true;
  try {
    RCP< const Teuchos::Comm<int> > comm = Teuchos::DefaultComm<int>::getComm();

    //
    // Process command line arguments
    //
    Galeri::Xpetra::Parameters<GO> matrixParameters(clp, 81); // manage parameters of the test case
    Xpetra::Parameters             xpetraParameters(clp);     // manage parameters of xpetra

    switch (clp.parse(argc,argv)) {
      case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED:        return EXIT_SUCCESS;
      case Teuchos::CommandLineProcessor::PARSE_ERROR:
      case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE;
      case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL:          break;
      default:;
    }

    if (comm->getRank() == 0) std::cout << xpetraParameters << matrixParameters;

    //
    // Setup test case (Ax = b)
    //

    // Distribution
    RCP<const Map> map = MapFactory::Build(lib, matrixParameters.GetNumGlobalElements(), 0, comm);

    // Matrix
    RCP<Galeri::Xpetra::Problem<Map,CrsMatrixWrap,MultiVector> > Pr =
      Galeri::Xpetra::BuildProblem<SC, LO, GO, Map, CrsMatrixWrap, MultiVector>(matrixParameters.GetMatrixType(), map, matrixParameters.GetParameterList());
    RCP<Matrix> A = Pr->BuildMatrix();

    // User defined nullspace
    RCP<MultiVector> nullSpace = VectorFactory::Build(map,1); nullSpace->putScalar((SC) 1.0);

    // Define B
    RCP<Vector> X = VectorFactory::Build(map,1);
    RCP<Vector> B = VectorFactory::Build(map,1);
    X->setSeed(846930886);
    X->randomize();
    A->apply(*X, *B, Teuchos::NO_TRANS, (SC)1.0, (SC)0.0);

    // X = 0
    X->putScalar((SC) 0.0);

    //
    // Create a multigrid configuration
    //

    // Transfer operators
    RCP<TentativePFactory> TentativePFact = rcp( new TentativePFactory() );
    RCP<SaPFactory>        SaPFact        = rcp( new SaPFactory() );
    RCP<TransPFactory>     RFact          = rcp( new TransPFactory());

    FactoryManager M;
    M.SetFactory("Ptent", TentativePFact);
    M.SetFactory("P",     SaPFact);
    M.SetFactory("R",     RFact);

    M.SetFactory("Smoother", Teuchos::null);      //skips smoother setup
    M.SetFactory("CoarseSolver", Teuchos::null);  //skips coarsest solve setup

    //
    // Multigrid setup phase
    //

    int startLevel = 0;
    int maxLevels = 10;

    std::cout << "=============== Setup transfers only ====================" << std::endl;

    Hierarchy H;
    H.SetDefaultVerbLevel(MueLu::Medium);

    RCP<Level> finestLevel = H.GetLevel();
    finestLevel->Set("A", A);
    finestLevel->Set("Nullspace", nullSpace);

    // Indicate which Hierarchy operators we want to keep
    H.Keep("P", SaPFact.get());  //SaPFact is the generating factory for P.
    H.Keep("R", RFact.get());    //RFact is the generating factory for R.
    H.Keep("Ptent", TentativePFact.get());  //SaPFact is the generating factory for P.

    H.Setup(M,startLevel,maxLevels);

    std::cout << "=============== Setup smoothers only ====================" << std::endl;

    // Create a new A.
    RCP<Matrix> newA = Pr->BuildMatrix();
    finestLevel->Set("A", newA);

    // Create Gauss-Seidel smoother.
    std::string ifpackType = "RELAXATION";
    Teuchos::ParameterList ifpackList;
    ifpackList.set("relaxation: sweeps", (LO) 3);
    ifpackList.set("relaxation: damping factor", (SC) 1.0);
    RCP<SmootherPrototype> smootherPrototype = rcp(new TrilinosSmoother(ifpackType, ifpackList));

    M.SetFactory("Smoother", rcp(new SmootherFactory(smootherPrototype)));

    // Create coarsest solver.
    RCP<SmootherPrototype> coarseSolverPrototype = rcp( new DirectSolver() );
    RCP<SmootherFactory>   coarseSolverFact      = rcp( new SmootherFactory(coarseSolverPrototype, Teuchos::null) );
    M.SetFactory("CoarseSolver", coarseSolverFact);

    // Note that we pass the number of levels back in.
    H.Setup(M,startLevel, H.GetNumLevels());

    std::cout << "=============== Solve ====================" << std::endl;

    //
    // Solve Ax = B
    //

    LO nIts = 9;
    H.Iterate(*B, *X, nIts);

    //
    // Print relative residual norm
    //

    typename Teuchos::ScalarTraits<SC>::magnitudeType residualNorms = Utilities::ResidualNorm(*A, *X, *B)[0];
    if (comm->getRank() == 0) {
      std::ios::fmtflags f(std::cout.flags());
      std::cout << "||Residual|| = " << std::setiosflags(std::ios::fixed) << std::setprecision(20) << residualNorms << std::endl;
       std::cout.flags(f);
    }

    success = true;
  }
  TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success);

  return ( success ? EXIT_SUCCESS : EXIT_FAILURE );
}