int main(int argc, char *argv[]) { Teuchos::GlobalMPISession session(&argc, &argv); RCP<const Teuchos::Comm<int> > tcomm = Teuchos::DefaultComm<int>::getComm(); int rank = tcomm->getRank(); int nParts = tcomm->getSize(); bool doRemap = false; string filename = "USAir97"; // Read run-time options. Teuchos::CommandLineProcessor cmdp (false, false); cmdp.setOption("file", &filename, "Name of the Matrix Market file to read"); cmdp.setOption("nparts", &nParts, "Number of parts."); cmdp.setOption("remap", "no-remap", &doRemap, "Remap part numbers."); cmdp.parse(argc, argv); meshCoordinatesTest(tcomm); testFromDataFile(tcomm, nParts, filename, doRemap); if (rank == 0) serialTest(nParts, doRemap); if (rank == 0) std::cout << "PASS" << std::endl; }
int main (int argc, char* argv[]) { using KokkosBlas::Impl::testOverScalarsAndLayoutsAndDevices; using std::cout; using std::endl; Teuchos::oblackholestream blackHole; Teuchos::GlobalMPISession mpiSession (&argc, &argv, &blackHole); Kokkos::initialize (argc, argv); #ifdef HAVE_MPI RCP<const Comm<int> > comm = rcp (new Teuchos::MpiComm<int> (MPI_COMM_WORLD)); #else RCP<const Comm<int> > comm = rcp (new Teuchos::SerialComm<int> ()); #endif // HAVE_MPI const int myRank = comm->getRank (); // Number of columns in the 2-D View(s) to test. int numCols = 3; bool oneCol = false; bool testComplex = true; Teuchos::CommandLineProcessor cmdp (false, true); cmdp.setOption ("numCols", &numCols, "Number of columns in the 2-D View(s) to test"); cmdp.setOption ("oneCol", "noOneCol", &oneCol, "Whether to test the 1-D View " "(single-column) versions of the kernels"); cmdp.setOption ("testComplex", "noTestComplex", &testComplex, "Whether to test complex arithmetic"); if (cmdp.parse (argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) { if (myRank == 0) { cout << "TEST FAILED to parse command-line arguments!" << endl; } return EXIT_FAILURE; } bool curSuccess = true; bool success = true; // Always test with numCols=1 first. curSuccess = testOverScalarsAndLayoutsAndDevices (cout, 1, oneCol, testComplex); success = curSuccess && success; if (numCols != 1) { curSuccess = testOverScalarsAndLayoutsAndDevices (cout, numCols, oneCol, testComplex); success = curSuccess && success; } if (success) { if (myRank == 0) { cout << "End Result: TEST PASSED" << endl; } } else { if (myRank == 0) { cout << "End Result: TEST FAILED" << endl; } } Kokkos::finalize (); return EXIT_SUCCESS; }
void parseCommandLineArguments (Teuchos::CommandLineProcessor& cmdp, bool& printedHelp, int argc, char* argv[], int& nx, int& ny, int& nz, std::string& xmlInputParamsFile, std::string& solverName, bool& verbose, bool& debug) { using Teuchos::CommandLineProcessor; const CommandLineProcessor::EParseCommandLineReturn parseResult = cmdp.parse (argc, argv); if (parseResult == CommandLineProcessor::PARSE_HELP_PRINTED) { printedHelp = true; } else { printedHelp = false; TEUCHOS_TEST_FOR_EXCEPTION( parseResult != CommandLineProcessor::PARSE_SUCCESSFUL, std::invalid_argument, "Failed to parse command-line arguments."); TEUCHOS_TEST_FOR_EXCEPTION( xmlInputParamsFile == "" && (nx <= 0 || ny <= 0 || nz <= 0), std::invalid_argument, "If no XML parameters filename is specified (via " "--inputParams), then the number of cells along each dimension of the " "mesh (--nx, --ny, and --nz) must be positive."); } }
int main (int argc, char *argv[]) { Teuchos::CommandLineProcessor clp; clp.setDocString("This example program demonstrates ICholByBlocks algorithm on Kokkos::Threads execution space.\n"); int nthreads = 1; clp.setOption("nthreads", &nthreads, "Number of threads"); int max_task_dependence = 10; clp.setOption("max-task-dependence", &max_task_dependence, "Max number of task dependence"); int team_size = 1; clp.setOption("team-size", &team_size, "Team size"); bool verbose = false; clp.setOption("enable-verbose", "disable-verbose", &verbose, "Flag for verbose printing"); string file_input = "test.mtx"; clp.setOption("file-input", &file_input, "Input file (MatrixMarket SPD matrix)"); clp.recogniseAllOptions(true); clp.throwExceptions(false); Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv ); if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0; if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL ) return -1; int r_val = 0; { exec_space::initialize(nthreads); exec_space::print_configuration(cout, true); r_val = exampleICholByBlocks <value_type,ordinal_type,size_type,exec_space,void> (file_input, max_task_dependence, team_size, verbose); exec_space::finalize(); } return r_val; }
int main (int argc, char *argv[]) { Teuchos::CommandLineProcessor clp; clp.setDocString("Tacho::DenseMatrixBase examples on Pthreads execution space.\n"); int nthreads = 0; clp.setOption("nthreads", &nthreads, "Number of threads"); int numa = 0; clp.setOption("numa", &numa, "Number of numa node"); int core_per_numa = 0; clp.setOption("core-per-numa", &core_per_numa, "Number of cores per numa node"); bool verbose = false; clp.setOption("enable-verbose", "disable-verbose", &verbose, "Flag for verbose printing"); clp.recogniseAllOptions(true); clp.throwExceptions(false); Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv ); if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0; if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL ) return -1; int r_val = 0; { exec_space::initialize(nthreads, numa, core_per_numa); r_val = exampleCrsMatrixBase<exec_space> (verbose); exec_space::finalize(); } return r_val; }
int main(int argc, char *argv[]) { Teuchos::CommandLineProcessor clp; clp.setDocString("Intrepid2::DynRankView_PerfTest01.\n"); int nworkset = 8; clp.setOption("nworkset", &nworkset, "# of worksets"); int C = 4096; clp.setOption("C", &C, "# of Cells in a workset"); int order = 2; clp.setOption("order", &order, "cubature order"); bool verbose = true; clp.setOption("enable-verbose", "disable-verbose", &verbose, "Flag for verbose printing"); clp.recogniseAllOptions(true); clp.throwExceptions(false); Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv ); if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0; if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL ) return -1; Kokkos::initialize(); if (verbose) std::cout << "Testing datatype double\n"; const int r_val_double = Intrepid2::Test::ComputeBasis_HGRAD <double,Kokkos::Cuda>(nworkset, C, order, verbose); return r_val_double; }
void setUpCommandLineArguments (Teuchos::CommandLineProcessor& cmdp, int& nx, int& ny, int& nz, std::string& xmlInputParamsFile, std::string& solverName, double& tol, int& maxNumIters, bool& verbose, bool& debug) { cmdp.setOption ("nx", &nx, "Number of cells along the x dimension"); cmdp.setOption ("ny", &ny, "Number of cells along the y dimension"); cmdp.setOption ("nz", &nz, "Number of cells along the z dimension"); cmdp.setOption ("inputParams", &xmlInputParamsFile, "XML file of input " "parameters, which we read if specified and not \"\". " "If it has a \"meshInput\" parameter, we use its " "std::string value as the Pamgen mesh specification. " "Otherwise, we tell Pamgen to make a cube, using " "nx, ny, and nz."); cmdp.setOption ("solverName", &solverName, "Name of iterative linear solver " "to use for solving the linear system. You may use any name " "that Belos::SolverFactory understands. Examples include " "\"GMRES\" and \"CG\"."); cmdp.setOption ("tol", &tol, "Tolerance for the linear solve. If not " "specified, this is read from the input ParameterList (read " "from the XML file). If specified, this overrides any value " "in the input ParameterList."); cmdp.setOption ("maxNumIters", &maxNumIters, "Maximum number of iterations " "in the linear solve. If not specified, this is read from " "the input ParameterList (read from the XML file). If " "specified, this overrides any value in the input " "ParameterList."); cmdp.setOption ("verbose", "quiet", &verbose, "Whether to print verbose status output."); cmdp.setOption ("debug", "release", &debug, "Whether to print copious debugging output to stderr."); }
int main(int argc, char *argv[]) { bool success = true; try { Teuchos::GlobalMPISession mpiSession(&argc, &argv, NULL); Teuchos::RCP< Teuchos::FancyOStream > out = Teuchos::VerboseObjectBase::getDefaultOStream(); // Setup command line options Teuchos::CommandLineProcessor CLP; int p = 3; CLP.setOption("p", &p, "Polynomial order"); int d_min = 1; CLP.setOption("dmin", &d_min, "Starting stochastic dimension"); int d_max = 12; CLP.setOption("dmax", &d_max, "Ending stochastic dimension"); int nGrid = 64; CLP.setOption("n", &nGrid, "Number of spatial grid points in each dimension"); int nIter = 1; CLP.setOption("niter", &nIter, "Number of iterations"); bool test_block = true; CLP.setOption("block", "no-block", &test_block, "Use block algorithm"); CLP.parse( argc, argv ); bool print = false ; bool check = false; performance_test_driver_epetra( p , d_min , d_max , nGrid , nIter , print , test_block , check , *out ); } TEUCHOS_STANDARD_CATCH_STATEMENTS(true, std::cerr, success); if (!success) return -1; return 0; }
int main (int argc, char *argv[]) { using Teuchos::inOutArg; using Teuchos::ParameterList; using Teuchos::parameterList; using Teuchos::RCP; using Teuchos::rcp; using Teuchos::rcpFromRef; using std::endl; typedef double ST; typedef Epetra_Operator OP; typedef Epetra_MultiVector MV; typedef Belos::OperatorTraits<ST,MV,OP> OPT; typedef Belos::MultiVecTraits<ST,MV> MVT; // This calls MPI_Init and MPI_Finalize as necessary. Belos::Test::MPISession session (inOutArg (argc), inOutArg (argv)); RCP<const Epetra_Comm> comm = session.getComm (); bool success = false; bool verbose = false; try { int MyPID = comm->MyPID (); // // Parameters to read from command-line processor // int frequency = -1; // how often residuals are printed by solver int numRHS = 1; // total number of right-hand sides to solve for int maxIters = 13000; // maximum number of iterations for solver to use std::string filename ("bcsstk14.hb"); double tol = 1.0e-5; // relative residual tolerance // // Read in command-line arguments // Teuchos::CommandLineProcessor cmdp (false, true); cmdp.setOption ("verbose", "quiet", &verbose, "Print messages and results."); cmdp.setOption ("frequency", &frequency, "Solvers frequency for printing " "residuals (#iters)."); cmdp.setOption ("tol", &tol, "Relative residual tolerance used by MINRES " "solver."); cmdp.setOption ("filename", &filename, "Filename for Harwell-Boeing test " "matrix."); cmdp.setOption ("num-rhs", &numRHS, "Number of right-hand sides to solve."); cmdp.setOption ("max-iters", &maxIters, "Maximum number of iterations per " "linear system (-1 means \"adapt to problem/block size\")."); if (cmdp.parse (argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) { return EXIT_FAILURE; } Teuchos::oblackholestream blackHole; std::ostream& verbOut = (verbose && MyPID == 0) ? std::cout : blackHole; // // Generate the linear system(s) to solve. // verbOut << "Generating the linear system(s) to solve" << endl << endl; RCP<Epetra_CrsMatrix> A; RCP<Epetra_MultiVector> B, X; RCP<Epetra_Map> rowMap; try { // This might change the number of right-hand sides, if we read in // a right-hand side from the Harwell-Boeing file. Belos::Util::createEpetraProblem (filename, &rowMap, &A, &B, &X, &MyPID, numRHS); } catch (std::exception& e) { TEUCHOS_TEST_FOR_EXCEPTION (true, std::runtime_error, "Failed to create Epetra problem for matrix " "filename \"" << filename << "\". " "createEpetraProblem() reports the following " "error: " << e.what()); } // // Compute the initial residual norm of the problem, so we can see // by how much it improved after the solve. // std::vector<double> initialResidualNorms (numRHS); std::vector<double> initialResidualInfNorms (numRHS); Epetra_MultiVector R (*rowMap, numRHS); OPT::Apply (*A, *X, R); MVT::MvAddMv (-1.0, R, 1.0, *B, R); // R := -(A*X) + B. MVT::MvNorm (R, initialResidualNorms); MVT::MvNorm (R, initialResidualInfNorms, Belos::InfNorm); if (verbose) { verbOut << "Initial residual 2-norms: \t"; for (int i = 0; i < numRHS; ++i) { verbOut << initialResidualNorms[i]; if (i < numRHS-1) { verbOut << ", "; } } verbOut << endl << "Initial residual Inf-norms: \t"; for (int i = 0; i < numRHS; ++i) { verbOut << initialResidualInfNorms[i]; if (i < numRHS-1) { verbOut << ", "; } } verbOut << endl; } std::vector<double> rhs2Norms (numRHS); std::vector<double> rhsInfNorms (numRHS); MVT::MvNorm (*B, rhs2Norms); MVT::MvNorm (*B, rhsInfNorms, Belos::InfNorm); if (verbose) { verbOut << "Right-hand side 2-norms: \t"; for (int i = 0; i < numRHS; ++i) { verbOut << rhs2Norms[i]; if (i < numRHS-1) { verbOut << ", "; } } verbOut << endl << "Right-hand side Inf-norms: \t"; for (int i = 0; i < numRHS; ++i) { verbOut << rhsInfNorms[i]; if (i < numRHS-1) { verbOut << ", "; } } verbOut << endl; } std::vector<double> initialGuess2Norms (numRHS); std::vector<double> initialGuessInfNorms (numRHS); MVT::MvNorm (*X, initialGuess2Norms); MVT::MvNorm (*X, initialGuessInfNorms, Belos::InfNorm); if (verbose) { verbOut << "Initial guess 2-norms: \t"; for (int i = 0; i < numRHS; ++i) { verbOut << initialGuess2Norms[i]; if (i < numRHS-1) { verbOut << ", "; } } verbOut << endl << "Initial guess Inf-norms: \t"; for (int i = 0; i < numRHS; ++i) { verbOut << initialGuessInfNorms[i]; if (i < numRHS-1) { verbOut << ", "; } } verbOut << endl; } // // Compute the infinity-norm of A. // const double normOfA = A->NormInf (); verbOut << "||A||_inf: \t" << normOfA << endl; // // Compute ||A|| ||X_i|| + ||B_i|| for each right-hand side B_i. // std::vector<double> scaleFactors (numRHS); for (int i = 0; i < numRHS; ++i) { scaleFactors[i] = normOfA * initialGuessInfNorms[i] + rhsInfNorms[i]; } if (verbose) { verbOut << "||A||_inf ||X_i||_inf + ||B_i||_inf: \t"; for (int i = 0; i < numRHS; ++i) { verbOut << scaleFactors[i]; if (i < numRHS-1) { verbOut << ", "; } } verbOut << endl; } // // Solve using Belos // verbOut << endl << "Setting up Belos" << endl; const int NumGlobalElements = B->GlobalLength(); // Set up Belos solver parameters. RCP<ParameterList> belosList = parameterList ("MINRES"); belosList->set ("Maximum Iterations", maxIters); belosList->set ("Convergence Tolerance", tol); if (verbose) { belosList->set ("Verbosity", Belos::Errors + Belos::Warnings + Belos::IterationDetails + Belos::OrthoDetails + Belos::FinalSummary + Belos::TimingDetails + Belos::Debug); belosList->set ("Output Frequency", frequency); } else { belosList->set ("Verbosity", Belos::Errors + Belos::Warnings); } belosList->set ("Output Stream", rcpFromRef (verbOut)); // Construct an unpreconditioned linear problem instance. typedef Belos::LinearProblem<double,MV,OP> prob_type; RCP<prob_type> problem = rcp (new prob_type (A, X, B)); if (! problem->setProblem()) { verbOut << endl << "ERROR: Failed to set up Belos::LinearProblem!" << endl; return EXIT_FAILURE; } // Create an iterative solver manager. Belos::SolverFactory<double, MV, OP> factory; RCP<Belos::SolverManager<double,MV,OP> > newSolver = factory.create ("MINRES", belosList); newSolver->setProblem (problem); // Print out information about problem. Make sure to use the // information as stored in the Belos ParameterList, so that we know // what the solver will do. verbOut << endl << "Dimension of matrix: " << NumGlobalElements << endl << "Number of right-hand sides: " << numRHS << endl << "Max number of MINRES iterations: " << belosList->get<int> ("Maximum Iterations") << endl << "Relative residual tolerance: " << belosList->get<double> ("Convergence Tolerance") << endl << "Output frequency: " << belosList->get<int> ("Output Frequency") << endl << endl; // Solve the linear system. verbOut << "Solving the linear system" << endl << endl; Belos::ReturnType ret = newSolver->solve(); verbOut << "Belos results:" << endl << "- Number of iterations: " << newSolver->getNumIters () << endl << "- " << (ret == Belos::Converged ? "Converged" : "Not converged") << endl; // // After the solve, compute residual(s) explicitly. This tests // whether the Belos solver did so correctly. // std::vector<double> absoluteResidualNorms (numRHS); OPT::Apply (*A, *X, R); MVT::MvAddMv (-1.0, R, 1.0, *B, R); MVT::MvNorm (R, absoluteResidualNorms); std::vector<double> relativeResidualNorms (numRHS); for (int i = 0; i < numRHS; ++i) { relativeResidualNorms[i] = (initialResidualNorms[i] == 0.0) ? absoluteResidualNorms[i] : absoluteResidualNorms[i] / initialResidualNorms[i]; } verbOut << "---------- Computed relative residual norms ----------" << endl << endl; bool badRes = false; if (verbose) { for (int i = 0; i < numRHS; ++i) { const double actRes = relativeResidualNorms[i]; verbOut << "Problem " << i << " : \t" << actRes << endl; if (actRes > tol) { badRes = true; } } } # ifdef BELOS_TEUCHOS_TIME_MONITOR Teuchos::TimeMonitor::summarize (verbOut); # endif // BELOS_TEUCHOS_TIME_MONITOR success = (ret == Belos::Converged && !badRes); if (success) { verbOut << endl << "End Result: TEST PASSED" << endl; } else { verbOut << endl << "End Result: TEST FAILED" << endl; } } // try TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success); return success ? EXIT_SUCCESS : EXIT_FAILURE; } // end test_minres_hb.cpp
int main_(Teuchos::CommandLineProcessor &clp, int argc, char *argv[]) { #include <MueLu_UseShortNames.hpp> using Teuchos::RCP; using Teuchos::rcp; using Teuchos::TimeMonitor; // ========================================================================= // MPI initialization using Teuchos // ========================================================================= Teuchos::GlobalMPISession mpiSession(&argc, &argv, NULL); RCP< const Teuchos::Comm<int> > comm = Teuchos::DefaultComm<int>::getComm(); int numProc = comm->getSize(); int myRank = comm->getRank(); // ========================================================================= // Parameters initialization // ========================================================================= ::Xpetra::Parameters xpetraParameters(clp); bool runHeavyTests = false; clp.setOption("heavytests", "noheavytests", &runHeavyTests, "whether to exercise tests that take a long time to run"); clp.recogniseAllOptions(true); switch (clp.parse(argc,argv)) { case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; case Teuchos::CommandLineProcessor::PARSE_ERROR: case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; } Xpetra::UnderlyingLib lib = xpetraParameters.GetLib(); // ========================================================================= // Problem construction // ========================================================================= ParameterList matrixParameters; matrixParameters.set("nx", Teuchos::as<GO>(9999)); matrixParameters.set("matrixType", "Laplace1D"); RCP<Matrix> A = MueLuTests::TestHelpers::TestFactory<SC, LO, GO, NO>::Build1DPoisson(matrixParameters.get<GO>("nx"), lib); RCP<MultiVector> coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates<SC,LO,GO,Map,MultiVector>("1D", A->getRowMap(), matrixParameters); std::string outDir = "Output/"; std::vector<std::string> dirList; if (runHeavyTests) { dirList.push_back("EasyParameterListInterpreter-heavy/"); dirList.push_back("FactoryParameterListInterpreter-heavy/"); } else { dirList.push_back("EasyParameterListInterpreter/"); dirList.push_back("FactoryParameterListInterpreter/"); } #if defined(HAVE_MPI) && defined(HAVE_MUELU_ISORROPIA) && defined(HAVE_AMESOS2_KLU2) // The ML interpreter have internal ifdef, which means that the resulting // output would depend on configuration (reguarl interpreter does not have // that). Therefore, we need to stabilize the configuration here. // In addition, we run ML parameter list tests only if KLU is available dirList.push_back("MLParameterListInterpreter/"); dirList.push_back("MLParameterListInterpreter2/"); #endif int numLists = dirList.size(); bool failed = false; Teuchos::Time timer("Interpreter timer"); //double lastTime = timer.wallTime(); for (int k = 0; k < numLists; k++) { Teuchos::ArrayRCP<std::string> fileList = MueLuTests::TestHelpers::GetFileList(dirList[k], (numProc == 1 ? std::string(".xml") : std::string("_np" + Teuchos::toString(numProc) + ".xml"))); for (int i = 0; i < fileList.size(); i++) { // Set seed std::srand(12345); // Reset (potentially) cached value of the estimate A->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits<SC>::one()); std::string xmlFile = dirList[k] + fileList[i]; std::string outFile = outDir + fileList[i]; std::string baseFile = outFile.substr(0, outFile.find_last_of('.')); std::size_t found = baseFile.find("_np"); if (numProc == 1 && found != std::string::npos) { #ifdef HAVE_MPI baseFile = baseFile.substr(0, found); #else std::cout << "Skipping \"" << xmlFile << "\" as MPI is not enabled" << std::endl; continue; #endif } baseFile = baseFile + (lib == Xpetra::UseEpetra ? "_epetra" : "_tpetra"); std::string goldFile = baseFile + ".gold"; std::ifstream f(goldFile.c_str()); if (!f.good()) { if (myRank == 0) std::cout << "Warning: comparison file " << goldFile << " not found. Skipping test" << std::endl; continue; } std::filebuf buffer; std::streambuf* oldbuffer = NULL; if (myRank == 0) { // Redirect output buffer.open((baseFile + ".out").c_str(), std::ios::out); oldbuffer = std::cout.rdbuf(&buffer); } // NOTE: we cannot use ParameterListInterpreter(xmlFile, comm), because we want to update the ParameterList // first to include "test" verbosity Teuchos::ParameterList paramList; Teuchos::updateParametersFromXmlFileAndBroadcast(xmlFile, Teuchos::Ptr<Teuchos::ParameterList>(¶mList), *comm); if (dirList[k] == "EasyParameterListInterpreter/" || dirList[k] == "EasyParameterListInterpreter-heavy/") paramList.set("verbosity", "test"); else if (dirList[k] == "FactoryParameterListInterpreter/" || dirList[k] == "FactoryParameterListInterpreter-heavy/") paramList.sublist("Hierarchy").set("verbosity", "Test"); else if (dirList[k] == "MLParameterListInterpreter/") paramList.set("ML output", 42); else if (dirList[k] == "MLParameterListInterpreter2/") paramList.set("ML output", 10); try { timer.start(); Teuchos::RCP<HierarchyManager> mueluFactory; // create parameter list interpreter // here we have to distinguish between the general MueLu parameter list interpreter // and the ML parameter list interpreter. Note that the ML paramter interpreter also // works with Tpetra matrices. if (dirList[k] == "EasyParameterListInterpreter/" || dirList[k] == "EasyParameterListInterpreter-heavy/" || dirList[k] == "FactoryParameterListInterpreter/" || dirList[k] == "FactoryParameterListInterpreter-heavy/") { mueluFactory = Teuchos::rcp(new ParameterListInterpreter(paramList)); } else if (dirList[k] == "MLParameterListInterpreter/") { mueluFactory = Teuchos::rcp(new MLParameterListInterpreter(paramList)); } else if (dirList[k] == "MLParameterListInterpreter2/") { //std::cout << "ML ParameterList: " << std::endl; //std::cout << paramList << std::endl; RCP<ParameterList> mueluParamList = Teuchos::getParametersFromXmlString(MueLu::ML2MueLuParameterTranslator::translate(paramList,"SA")); //std::cout << "MueLu ParameterList: " << std::endl; //std::cout << *mueluParamList << std::endl; mueluFactory = Teuchos::rcp(new ParameterListInterpreter(*mueluParamList)); } RCP<Hierarchy> H = mueluFactory->CreateHierarchy(); H->GetLevel(0)->template Set<RCP<Matrix> >("A", A); if (dirList[k] == "MLParameterListInterpreter/") { // MLParameterInterpreter needs the nullspace information if rebalancing is active! // add default constant null space vector RCP<MultiVector> nullspace = MultiVectorFactory::Build(A->getRowMap(), 1); nullspace->putScalar(1.0); H->GetLevel(0)->Set("Nullspace", nullspace); } H->GetLevel(0)->Set("Coordinates", coordinates); mueluFactory->SetupHierarchy(*H); if (strncmp(fileList[i].c_str(), "reuse", 5) == 0) { // Build the Hierarchy the second time // Should be faster if we actually do the reuse A->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits<SC>::one()); mueluFactory->SetupHierarchy(*H); } timer.stop(); } catch (Teuchos::ExceptionBase& e) { std::string msg = e.what(); msg = msg.substr(msg.find_last_of('\n')+1); if (myRank == 0) { std::cout << "Caught exception: " << msg << std::endl; // Redirect output back std::cout.rdbuf(oldbuffer); buffer.close(); } if (msg == "Zoltan interface is not available" || msg == "Zoltan2 interface is not available" || msg == "MueLu::FactoryFactory:BuildFactory(): Cannot create a Zoltan2Interface object: Zoltan2 is disabled: HAVE_MUELU_ZOLTAN2 && HAVE_MPI == false.") { if (myRank == 0) std::cout << xmlFile << ": skipped (missing library)" << std::endl; continue; } } std::string cmd; if (myRank == 0) { // Redirect output back std::cout.rdbuf(oldbuffer); buffer.close(); // Create a copy of outputs cmd = "cp -f "; system((cmd + baseFile + ".gold " + baseFile + ".gold_filtered").c_str()); system((cmd + baseFile + ".out " + baseFile + ".out_filtered").c_str()); // Tpetra produces different eigenvalues in Chebyshev due to using // std::rand() for generating random vectors, which may be initialized // using different seed, and may have different algorithm from one // gcc version to another, or to anogther compiler (like clang) // This leads to us always failing this test. // NOTE1 : Epetra, on the other hand, rolls out its out random number // generator, which always produces same results // Ignore the value of "lambdaMax" run_sed("'s/lambdaMax: [0-9]*.[0-9]*/lambdaMax = <ignored>/'", baseFile); // Ignore the value of "lambdaMin" run_sed("'s/lambdaMin: [0-9]*.[0-9]*/lambdaMin = <ignored>/'", baseFile); // Ignore the value of "chebyshev: max eigenvalue" // NOTE: we skip lines with default value ([default]) run_sed("'/[default]/! s/chebyshev: max eigenvalue = [0-9]*.[0-9]*/chebyshev: max eigenvalue = <ignored>/'", baseFile); // Ignore the exact type of direct solver (it is selected semi-automatically // depending on how Trilinos was configured run_sed("'s/Amesos\\([2]*\\)Smoother{type = .*}/Amesos\\1Smoother{type = <ignored>}/'", baseFile); run_sed("'s/SuperLU solver interface, direct solve/<Direct> solver interface/'", baseFile); run_sed("'s/KLU2 solver interface/<Direct> solver interface/'", baseFile); run_sed("'s/Basker solver interface/<Direct> solver interface/'", baseFile); // Strip template args for some classes std::vector<std::string> classes; classes.push_back("Xpetra::Matrix"); classes.push_back("MueLu::Constraint"); classes.push_back("MueLu::SmootherPrototype"); for (size_t q = 0; q < classes.size(); q++) run_sed("'s/" + classes[q] + "<.*>/" + classes[q] + "<ignored> >/'", baseFile); #ifdef __APPLE__ // Some Macs print outs ptrs as 0x0 instead of 0, fix that run_sed("'/RCP/ s/=0x0/=0/g'", baseFile); #endif // Run comparison (ignoring whitespaces) cmd = "diff -u -w -I\"^\\s*$\" " + baseFile + ".gold_filtered " + baseFile + ".out_filtered"; int ret = system(cmd.c_str()); if (ret) failed = true; //std::ios_base::fmtflags ff(std::cout.flags()); //std::cout.precision(2); //std::cout << xmlFile << " (" << std::setiosflags(std::ios::fixed) // << timer.wallTime() - lastTime << " sec.) : " << (ret ? "failed" : "passed") << std::endl; //lastTime = timer.wallTime(); //std::cout.flags(ff); // reset flags to whatever they were prior to printing time std::cout << xmlFile << " : " << (ret ? "failed" : "passed") << std::endl; } } } if (myRank == 0) std::cout << std::endl << "End Result: TEST " << (failed ? "FAILED" : "PASSED") << std::endl; return (failed ? EXIT_FAILURE : EXIT_SUCCESS); }
// calls MPI_Init and MPI_Finalize int main (int argc, char* argv[]) { using Teuchos::RCP; using Teuchos::rcp_dynamic_cast; using panzer::StrPureBasisPair; using panzer::StrPureBasisComp; PHX::InitializeKokkosDevice(); Teuchos::GlobalMPISession mpiSession(&argc, &argv); RCP<Epetra_Comm> Comm = Teuchos::rcp(new Epetra_MpiComm(MPI_COMM_WORLD)); Teuchos::FancyOStream out(Teuchos::rcpFromRef(std::cout)); out.setOutputToRootOnly(0); out.setShowProcRank(true); const std::size_t workset_size = 20; ProblemOptions po; { // Set up this problem with two discontinuous (A, C) and one continuous (B) // fields. // On fields A are imposed Neumann and weak Dirichlet matching interface conditions. // On fields C are imposed Robin interface conditions, with one-way // coupling to field B. // If the Robin condition is linear, then the default setup is such that // A, B, C all converge to the same solution for which the Solution // evaluator provides the exact expression. A response function reports the // error so a convergence test can be wrapped around multiple runs of this // program. // If the Robin condition is nonlinear, then the source is 0 and the // solution is two planes with a jump of 0.4 at the interface. Teuchos::CommandLineProcessor clp; po.nxelem = 10; clp.setOption("nx", &po.nxelem, "Number of elements in x direction"); po.nonlinear_Robin = false; clp.setOption("nonlinear", "linear", &po.nonlinear_Robin, "Use a nonlinear Robin interface condition"); po.rtol = 1e-10; clp.setOption("rtol", &po.rtol, "Tolerance on residual norm"); po.is3d = false; clp.setOption("3d", "2d", &po.is3d, "3D test instead of 2D"); po.mesh_filename = ""; clp.setOption("mesh-filename", &po.mesh_filename, "Optionally read from an Exodus mesh"); po.test_Jacobian = false; clp.setOption("test-jacobian", "dont-test-jacobian", &po.test_Jacobian, "Test Jacobian using finite differences."); po.generate_mesh_only = false; clp.setOption("generate-mesh-only", "dont-generate-mesh-only", &po.generate_mesh_only, "Generate mesh, save, and quit."); try { clp.parse(argc, argv); } catch (...) { PHX::FinalizeKokkosDevice(); return -1; } po.nyelem = po.nxelem; po.dof_names.push_back("A"); po.dof_names.push_back("B"); po.dof_names.push_back("C"); po.ss_names.push_back("left"); po.ss_names.push_back("vertical_0"); po.ss_names.push_back("right"); po.outer_iteration = true; po.check_error = true; out << po << "\n"; } bool pass = true; // Can be overridden by the equation set. po.integration_order = 2; // Construct mesh. Teuchos::RCP<panzer_stk_classic::STK_MeshFactory> mesh_factory; if ( ! po.mesh_filename.empty()) { mesh_factory = Teuchos::rcp(new panzer_stk_classic::STK_ExodusReaderFactory(po.mesh_filename)); } else { if (po.is3d) mesh_factory = Teuchos::rcp(new panzer_stk_classic::CubeHexMeshFactory); else mesh_factory = Teuchos::rcp(new panzer_stk_classic::SquareQuadMeshFactory); } if (po.mesh_filename.empty()) { // set mesh factory parameters RCP<Teuchos::ParameterList> pl = rcp(new Teuchos::ParameterList); pl->set("X Blocks",2); pl->set("Y Blocks",1); if (po.is3d) pl->set("Z Blocks",1); pl->set("X Elements", po.nxelem); // per block pl->set("Y Elements", po.nyelem); if (po.is3d) { pl->set("Z Elements", po.nyelem); pl->set("Build Interface Sidesets", true); } { // If np is even, put ranks in both x and y directions; if not, go with // default, which is x direction only. The x direction is the harder case. const int np = mpiSession.getNProc(); if (np % 2 == 0 && np >= 4) { const int nxp = np/2, nyp = 2; pl->set("X Procs", nxp); pl->set("Y Procs", nyp); } } mesh_factory->setParameterList(pl); } RCP<panzer_stk_classic::STK_Interface> mesh = mesh_factory->buildUncommitedMesh(MPI_COMM_WORLD); if (po.generate_mesh_only) { mesh_factory->completeMeshConstruction(*mesh, MPI_COMM_WORLD); mesh->writeToExodus("output.exo"); out << "Stopping after writing mesh because --generate-mesh-only was requested.\n"; PHX::FinalizeKokkosDevice(); return 0; } //todo mesh->getDimension() may not be right if mesh_factory is the Exodus // reader. po.is3d = mesh->getMetaData()->spatial_dimension() == 3; if ( ! po.mesh_filename.empty() && ! po.is3d) { // Special case. po.eb_names.clear(); po.ss_names.clear(); po.eb_names.push_back("silicon1"); po.eb_names.push_back("silicon2"); po.ss_names.push_back("anode"); po.ss_names.push_back("interface"); po.ss_names.push_back("cathode"); } else { if (po.is3d) { po.eb_names.push_back("eblock-0_0_0"); po.eb_names.push_back("eblock-1_0_0"); } else { po.eb_names.push_back("eblock-0_0"); po.eb_names.push_back("eblock-1_0"); } } // construct input physics and physics block //////////////////////////////////////////////////////// // factory definitions Teuchos::RCP<Example::EquationSetFactory> eqset_factory = Teuchos::rcp(new Example::EquationSetFactory); // where poisson equation is defined Example::BCStrategyFactory bc_factory; // where boundary conditions are defined const Teuchos::RCP<Teuchos::ParameterList> ipb = Teuchos::parameterList("Physics Blocks"); std::vector<panzer::BC> bcs; std::vector<RCP<panzer::PhysicsBlock> > physicsBlocks; { testInitialization(ipb, bcs, po); std::map<std::string,std::string> block_ids_to_physics_ids; std::map<std::string,Teuchos::RCP<const shards::CellTopology> > block_ids_to_cell_topo; block_ids_to_physics_ids[po.eb_names[0]] = "Poisson Physics Left"; block_ids_to_physics_ids[po.eb_names[1]] = "Poisson Physics Right"; block_ids_to_cell_topo[po.eb_names[0]] = mesh->getCellTopology(po.eb_names[0]); block_ids_to_cell_topo[po.eb_names[1]] = mesh->getCellTopology(po.eb_names[1]); // GobalData sets ostream and parameter interface to physics Teuchos::RCP<panzer::GlobalData> gd = panzer::createGlobalData(); // the physics block knows how to build and register evaluator with the field manager panzer::buildPhysicsBlocks(block_ids_to_physics_ids, block_ids_to_cell_topo, ipb, po.integration_order, workset_size, eqset_factory, gd, false, physicsBlocks); } // finish building mesh, set required field variables and mesh bulk data //////////////////////////////////////////////////////////////////////// { std::vector<Teuchos::RCP<panzer::PhysicsBlock> >::const_iterator physIter; for(physIter=physicsBlocks.begin();physIter!=physicsBlocks.end();++physIter) { Teuchos::RCP<const panzer::PhysicsBlock> pb = *physIter; const std::vector<StrPureBasisPair> & blockFields = pb->getProvidedDOFs(); // insert all fields into a set std::set<StrPureBasisPair,StrPureBasisComp> fieldNames; fieldNames.insert(blockFields.begin(),blockFields.end()); // add basis to DOF manager: block specific std::set<StrPureBasisPair,StrPureBasisComp>::const_iterator fieldItr; for (fieldItr=fieldNames.begin();fieldItr!=fieldNames.end();++fieldItr) mesh->addSolutionField(fieldItr->first,pb->elementBlockID()); } mesh_factory->completeMeshConstruction(*mesh,MPI_COMM_WORLD); } // build worksets //////////////////////////////////////////////////////// Teuchos::RCP<panzer_stk_classic::WorksetFactory> wkstFactory = Teuchos::rcp(new panzer_stk_classic::WorksetFactory(mesh)); // build STK workset factory Teuchos::RCP<panzer::WorksetContainer> wkstContainer // attach it to a workset container (uses lazy evaluation) = Teuchos::rcp(new panzer::WorksetContainer(wkstFactory,physicsBlocks,workset_size)); std::vector<std::string> elementBlockNames; mesh->getElementBlockNames(elementBlockNames); std::map<std::string,Teuchos::RCP<std::vector<panzer::Workset> > > volume_worksets; panzer::getVolumeWorksetsFromContainer(*wkstContainer,elementBlockNames,volume_worksets); // build DOF Manager and linear object factory ///////////////////////////////////////////////////////////// RCP<panzer::UniqueGlobalIndexer<int,int> > dofManager; { const Teuchos::RCP<panzer::ConnManager<int,int> > conn_manager = Teuchos::rcp(new panzer_stk_classic::STKConnManager<int>(mesh)); const bool has_interface_condition = hasInterfaceCondition(bcs); if (has_interface_condition) buildInterfaceConnections(bcs, conn_manager); panzer::DOFManagerFactory<int,int> globalIndexerFactory; globalIndexerFactory.setEnableGhosting(has_interface_condition); dofManager = globalIndexerFactory.buildUniqueGlobalIndexer( Teuchos::opaqueWrapper(MPI_COMM_WORLD), physicsBlocks, conn_manager, ""); if (has_interface_condition) checkInterfaceConnections(conn_manager, dofManager->getComm()); } // construct some linear algebra object, build object to pass to evaluators Teuchos::RCP<panzer::LinearObjFactory<panzer::Traits> > linObjFactory = Teuchos::rcp(new panzer::EpetraLinearObjFactory<panzer::Traits,int>(Comm.getConst(),dofManager)); std::vector<std::string> names; std::vector<std::vector<std::string> > eblocks; const int c_name_start = 3; { for (int i = 1; i <= 2; ++i) { names.push_back(strint(po.dof_names[0], i)); eblocks.push_back(std::vector<std::string>()); eblocks.back().push_back(po.eb_names[i-1]); } names.push_back(po.dof_names[1]); eblocks.push_back(std::vector<std::string>()); eblocks.back().push_back(po.eb_names[0]); eblocks.back().push_back(po.eb_names[1]); if (po.dof_names.size() >= 3) for (int i = 1; i <= 2; ++i) { names.push_back(strint(po.dof_names[2], i)); eblocks.push_back(std::vector<std::string>()); eblocks.back().push_back(po.eb_names[i-1]); } } Teuchos::RCP<panzer::ResponseLibrary<panzer::Traits> > errorResponseLibrary = Teuchos::rcp(new panzer::ResponseLibrary<panzer::Traits>(wkstContainer, dofManager, linObjFactory)); { for (std::size_t i = po.nonlinear_Robin ? c_name_start : 0; i < names.size(); ++i) { panzer::FunctionalResponse_Builder<int,int> builder; builder.comm = MPI_COMM_WORLD; builder.cubatureDegree = po.integration_order; builder.requiresCellIntegral = true; builder.quadPointField = names[i] + "_ERROR"; errorResponseLibrary->addResponse(names[i] + " L2 Error", eblocks[i], builder); } } // setup closure model ///////////////////////////////////////////////////////////// panzer::ClosureModelFactory_TemplateManager<panzer::Traits> cm_factory; Example::ClosureModelFactory_TemplateBuilder cm_builder; cm_factory.buildObjects(cm_builder); Teuchos::ParameterList closure_models("Closure Models"); { Teuchos::ParameterList& s = closure_models.sublist("solid"); for (std::vector<std::string>::const_iterator it = names.begin(); it != names.end(); ++it) { if (po.nonlinear_Robin) s.sublist(std::string("SOURCE_") + *it).set<double>("Value", 0.0); else s.sublist(std::string("SOURCE_") + *it).set<std::string>("Type", "SIMPLE SOURCE"); } if (po.check_error) for (std::size_t i = po.nonlinear_Robin ? c_name_start : 0; i < names.size(); ++i) { const std::string err = names[i] + "_ERROR"; s.sublist(err).set<std::string>("Type", "ERROR_CALC"); s.sublist(err).set<std::string>("Field A", names[i]); s.sublist(err).set<std::string>("Field B", "EXACT"); } if (po.check_error) s.sublist("EXACT").set<std::string>("Type", po.nonlinear_Robin ? "EXACT nonlinear Robin" : "EXACT"); } Teuchos::ParameterList user_data("User Data"); // user data can be empty here // setup field manager builder ///////////////////////////////////////////////////////////// Teuchos::RCP<panzer::FieldManagerBuilder> fmb = Teuchos::rcp(new panzer::FieldManagerBuilder); fmb->setWorksetContainer(wkstContainer); fmb->setupVolumeFieldManagers(physicsBlocks,cm_factory,closure_models,*linObjFactory,user_data); fmb->setupBCFieldManagers(bcs,physicsBlocks,*eqset_factory,cm_factory,bc_factory,closure_models, *linObjFactory,user_data); fmb->writeVolumeGraphvizDependencyFiles("volume", physicsBlocks); fmb->writeBCGraphvizDependencyFiles("bc"); // setup assembly engine ///////////////////////////////////////////////////////////// // build assembly engine: The key piece that brings together everything and // drives and controls the assembly process. Just add // matrices and vectors panzer::AssemblyEngine_TemplateManager<panzer::Traits> ae_tm; panzer::AssemblyEngine_TemplateBuilder builder(fmb,linObjFactory); ae_tm.buildObjects(builder); user_data.set<int>("Workset Size", workset_size); if (po.check_error) errorResponseLibrary->buildResponseEvaluators(physicsBlocks, cm_factory, closure_models, user_data); // assemble and solve ///////////////////////////////////////////////////////////// Teuchos::RCP<panzer::EpetraLinearObjContainer> ep_container; Teuchos::RCP<panzer::LinearObjContainer> ghost_container; if ( ! po.outer_iteration) { // Straightfoward solve // build linear algebra objects: Ghost is for parallel assembly, it contains // local element contributions summed, the global IDs // are not unique. The non-ghosted or "global" // container will contain the sum over all processors // of the ghosted objects. The global indices are unique. ghost_container = linObjFactory->buildGhostedLinearObjContainer(); RCP<panzer::LinearObjContainer> container = linObjFactory->buildLinearObjContainer(); linObjFactory->initializeGhostedContainer(panzer::LinearObjContainer::X | panzer::LinearObjContainer::F | panzer::LinearObjContainer::Mat,*ghost_container); linObjFactory->initializeContainer(panzer::LinearObjContainer::X | panzer::LinearObjContainer::F | panzer::LinearObjContainer::Mat,*container); ghost_container->initialize(); container->initialize(); panzer::AssemblyEngineInArgs input(ghost_container,container); input.alpha = 0; input.beta = 1; // evaluate physics: This does both the Jacobian and residual at once ae_tm.getAsObject<panzer::Traits::Jacobian>()->evaluate(input); // solve linear system ///////////////////////////////////////////////////////////// // convert generic linear object container to epetra container ep_container = rcp_dynamic_cast<panzer::EpetraLinearObjContainer>(container); // Setup the linear solve: notice A is used directly Epetra_LinearProblem problem(&*ep_container->get_A(),&*ep_container->get_x(),&*ep_container->get_f()); // build the solver AztecOO solver(problem); solver.SetAztecOption(AZ_solver,AZ_gmres); // we don't push out dirichlet conditions solver.SetAztecOption(AZ_precond,AZ_none); solver.SetAztecOption(AZ_kspace,300); solver.SetAztecOption(AZ_output,10); solver.SetAztecOption(AZ_precond,AZ_Jacobi); // solve the linear system solver.Iterate(1000,1e-5); // we have now solved for the residual correction from // zero in the context of a Newton solve. // J*e = -r = -(f - J*0) where f = J*u // Therefore we have J*e=-J*u which implies e = -u // thus we will scale the solution vector ep_container->get_x()->Scale(-1.0); } else { // Some analysis and an outer iteration if necessary. Teuchos::RCP<Epetra_CrsMatrix> J_fd; assembleAndSolve(ae_tm, linObjFactory, ep_container, ghost_container, po); if (po.test_Jacobian) { const double nwre = testJacobian(ae_tm, linObjFactory, ep_container->get_x()); out << "TEST JACOBIAN " << nwre << "\n"; if (nwre < 0 || nwre > 1e-5) pass = false; } } // output data (optional) ///////////////////////////////////////////////////////////// // write linear system if (false) { EpetraExt::RowMatrixToMatrixMarketFile("a_op.mm",*ep_container->get_A()); EpetraExt::VectorToMatrixMarketFile("x_vec.mm",*ep_container->get_x()); EpetraExt::VectorToMatrixMarketFile("b_vec.mm",*ep_container->get_f()); } if (po.check_error) { std::vector<Teuchos::RCP<panzer::Response_Functional<panzer::Traits::Residual> > > rfs(names.size()); for (std::size_t i = po.nonlinear_Robin ? c_name_start : 0; i < names.size(); ++i) { Teuchos::RCP<panzer::ResponseBase> resp = errorResponseLibrary->getResponse<panzer::Traits::Residual>(names[i] + " L2 Error"); rfs[i] = Teuchos::rcp_dynamic_cast<panzer::Response_Functional<panzer::Traits::Residual> >(resp); Teuchos::RCP<Thyra::VectorBase<double> > respVec = Thyra::createMember(rfs[i]->getVectorSpace()); rfs[i]->setVector(respVec); } panzer::AssemblyEngineInArgs respInput(ghost_container, ep_container); respInput.alpha = 0; respInput.beta = 1; errorResponseLibrary->addResponsesToInArgs<panzer::Traits::Residual>(respInput); errorResponseLibrary->evaluate<panzer::Traits::Residual>(respInput); // Record a max error so we can use convergence_rate.py. double max_err = -1; for (std::size_t i = po.nonlinear_Robin ? c_name_start : 0; i < names.size(); ++i) { const double err = sqrt(rfs[i]->value); max_err = std::max(max_err, err); out << names[i] << " ERROR = " << err << "\n"; if (err < 0 || err > (po.nonlinear_Robin ? 1e-10 : 0.03/(po.nxelem*po.nxelem/25.0))) pass = false; } out << "Error = " << max_err << "\n"; } // Write solution except in the special case of a generated 3D mesh and #rank // > 1. In that case, something in the mesh-gen and rebalance code is causing // a failure in IossBridge::write_side_data_to_ioss. if ( ! (po.is3d && mpiSession.getNProc() > 1 && po.mesh_filename.empty())) { // redistribute solution vector to ghosted vector linObjFactory->globalToGhostContainer(*ep_container,*ghost_container, panzer::EpetraLinearObjContainer::X | panzer::EpetraLinearObjContainer::DxDt); // get X Epetra_Vector from ghosted container RCP<panzer::EpetraLinearObjContainer> ep_ghost_container = rcp_dynamic_cast<panzer::EpetraLinearObjContainer>(ghost_container); panzer_stk_classic::write_solution_data(*dofManager,*mesh,*ep_ghost_container->get_x()); mesh->writeToExodus("output.exo"); } // all done! ///////////////////////////////////////////////////////////// out << (pass ? "PASS" : "FAIL") << " BASICS\n"; PHX::FinalizeKokkosDevice(); return 0; }
int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int argc, char *argv[]) { #include <MueLu_UseShortNames.hpp> using Teuchos::RCP; using Teuchos::rcp; using Teuchos::TimeMonitor; bool success = true; bool verbose = true; try { RCP<const Teuchos::Comm<int> > comm = Teuchos::DefaultComm<int>::getComm(); RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); Teuchos::FancyOStream& out = *fancy; typedef Teuchos::ScalarTraits<SC> STS; // ========================================================================= // Parameters initialization // ========================================================================= //Teuchos::CommandLineProcessor clp(false); GO nx = 100, ny = 100, nz = 100; Galeri::Xpetra::Parameters<GO> galeriParameters(clp, nx, ny, nz, "Laplace2D"); // manage parameters of the test case Xpetra::Parameters xpetraParameters(clp); // manage parameters of Xpetra std::string matFileName = ""; clp.setOption("matrix",&matFileName,"read matrix from a file"); LO blocksize = 1; clp.setOption("blocksize",&blocksize,"block size"); switch (clp.parse(argc, argv)) { case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; break; case Teuchos::CommandLineProcessor::PARSE_ERROR: case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; break; case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; } //Xpetra::UnderlyingLib lib = xpetraParameters.GetLib(); ParameterList galeriList = galeriParameters.GetParameterList(); if(lib!=Xpetra::UseTpetra) throw std::runtime_error("This test only works with Tpetra linear algebra"); // ========================================================================= // Problem construction // ========================================================================= RCP<const Map> map; RCP<Matrix> A; RCP<MultiVector> nullspace; typedef Tpetra::CrsMatrix<SC,LO,GO,NO> Tpetra_CrsMatrix; typedef Tpetra::Operator<SC,LO,GO,NO> Tpetra_Operator; typedef Tpetra::Experimental::BlockCrsMatrix<SC,LO,GO,NO> Tpetra_BlockCrsMatrix; typedef Xpetra::TpetraBlockCrsMatrix<SC,LO,GO,NO> Xpetra_TpetraBlockCrsMatrix; typedef Xpetra::CrsMatrix<SC,LO,GO,NO> Xpetra_CrsMatrix; typedef Xpetra::CrsMatrixWrap<SC,LO,GO,NO> Xpetra_CrsMatrixWrap; typedef typename Teuchos::ScalarTraits<SC>::magnitudeType SCN; RCP<Tpetra_CrsMatrix> Acrs; RCP<Tpetra_BlockCrsMatrix> Ablock; if(matFileName.length() > 0) { // Read matrix from disk out << thickSeparator << std::endl << "Reading matrix from disk" <<std::endl; typedef Tpetra::MatrixMarket::Reader<Tpetra_CrsMatrix> reader_type; Acrs = reader_type::readSparseFile(matFileName,comm); } else{ // Use Galeri out << thickSeparator << std::endl << xpetraParameters << galeriParameters; std::string matrixType = galeriParameters.GetMatrixType(); RCP<Xpetra::Matrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> > Axp; MueLuExamples::generate_user_matrix_and_nullspace<Scalar,LocalOrdinal,GlobalOrdinal,Node>(matrixType,lib,galeriList,comm,Axp,nullspace); Acrs = Xpetra::Helpers<SC,LO,GO,NO>::Op2NonConstTpetraCrs(Axp); } // Block this bad boy Ablock = Tpetra::Experimental::convertToBlockCrsMatrix(*Acrs,blocksize); // Now wrap BlockCrs to Xpetra::Matrix RCP<Xpetra_CrsMatrix> Axt = rcp(new Xpetra_TpetraBlockCrsMatrix(Ablock)); A = rcp(new Xpetra_CrsMatrixWrap(Axt)); // ========================================================================= // Setups and solves // ========================================================================= map=Xpetra::toXpetra(Acrs->getRowMap()); RCP<Vector> X1 = VectorFactory::Build(map); RCP<Vector> X2 = VectorFactory::Build(map); RCP<Vector> B = VectorFactory::Build(map); B->setSeed(846930886); B->randomize(); RCP<TimeMonitor> tm; // Belos Options RCP<Teuchos::ParameterList> SList = rcp(new Teuchos::ParameterList ); SList->set("Verbosity",Belos::Errors + Belos::Warnings + Belos::StatusTestDetails); SList->set("Output Frequency",10); SList->set("Output Style",Belos::Brief); SList->set("Maximum Iterations",10); SList->set("Convergence Tolerance",5e-2); // ========================================================================= // Solve #1 (fixed point + Jacobi) // ========================================================================= out << thickSeparator << std::endl; out << prefSeparator << " Solve 1: Fixed Point + Jacobi"<< prefSeparator <<std::endl; { Teuchos::ParameterList MueList; MueList.set("max levels",1); MueList.set("coarse: type", "RELAXATION"); std::string belos_solver("Fixed Point"); MueLuExamples::solve_system_belos<Scalar,LocalOrdinal,GlobalOrdinal,Node>(A,X1,B,MueList,belos_solver,SList); std::cout << "I" << std::endl; SCN result = MueLuExamples::compute_resid_norm<Scalar,LocalOrdinal,GlobalOrdinal,Node>(*A,*X1,*B); out<<"Solve #1: Residual Norm = "<<result<<std::endl; } // ========================================================================= // Solve #2 (striaght up Jacobi) // ========================================================================= out << thickSeparator << std::endl; out << prefSeparator << " Solve 2: Fixed Jacobi"<< prefSeparator <<std::endl; { Teuchos::ParameterList IList; IList.set("relaxation: type","Jacobi"); IList.set("relaxation: damping factor",1.0); IList.set("relaxation: sweeps",10); std::string ifpack2_precond("RELAXATION"); MueLuExamples::solve_system_ifpack2(A,X2,B,ifpack2_precond,IList); SCN result = MueLuExamples::compute_resid_norm<Scalar,LocalOrdinal,GlobalOrdinal,Node>(*A,*X2,*B); out<<"Solve #2: Residual Norm = "<<result<<std::endl; } // Compare 1 & 2 SCN norm = MueLuExamples::diff_vectors<Scalar,LocalOrdinal,GlobalOrdinal,Node>(*X1,*X2); if(norm > 1e-10) { out<<"ERROR: Norm of Solve #1 and Solve #2 differs by "<<norm<<std::endl; success=false; } }//end try TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success); return ( success ? EXIT_SUCCESS : EXIT_FAILURE ); }
int main (int argc, char *argv[]) { using Teuchos::Comm; using Teuchos::FancyOStream; using Teuchos::getFancyOStream; using Teuchos::oblackholestream; using Teuchos::OSTab; using Teuchos::ParameterList; using Teuchos::parameterList; using Teuchos::RCP; using Teuchos::rcpFromRef; using std::cout; using std::endl; // // Typedefs for Tpetra template arguments. // typedef double scalar_type; typedef long int global_ordinal_type; typedef int local_ordinal_type; typedef Kokkos::DefaultNode::DefaultNodeType node_type; // // Tpetra objects which are the MV and OP template parameters of the // Belos specialization which we are testing. // typedef Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type> MV; typedef Tpetra::Operator<scalar_type, local_ordinal_type, global_ordinal_type, node_type> OP; // // Other typedefs. // typedef Teuchos::ScalarTraits<scalar_type> STS; typedef Tpetra::CrsMatrix<scalar_type, local_ordinal_type, global_ordinal_type, node_type> sparse_matrix_type; Teuchos::GlobalMPISession mpiSession (&argc, &argv, &cout); RCP<const Comm<int> > comm = Tpetra::DefaultPlatform::getDefaultPlatform().getComm(); RCP<node_type> node = Tpetra::DefaultPlatform::getDefaultPlatform().getNode(); RCP<oblackholestream> blackHole (new oblackholestream); const int myRank = comm->getRank(); // Output stream that prints only on Rank 0. RCP<FancyOStream> out; if (myRank == 0) { out = Teuchos::getFancyOStream (rcpFromRef (cout)); } else { out = Teuchos::getFancyOStream (blackHole); } // // Get test parameters from command-line processor. // // CommandLineProcessor always understands int, but may not // understand global_ordinal_type. We convert to the latter below. int numRows = comm->getSize() * 100; bool tolerant = false; bool verbose = false; bool debug = false; Teuchos::CommandLineProcessor cmdp (false, true); cmdp.setOption("numRows", &numRows, "Global number of rows (and columns) in the sparse matrix to generate."); cmdp.setOption("tolerant", "intolerant", &tolerant, "Whether to parse files tolerantly."); cmdp.setOption("verbose", "quiet", &verbose, "Print messages and results."); cmdp.setOption("debug", "release", &debug, "Run debugging checks and print copious debugging output."); if (cmdp.parse(argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) { *out << "\nEnd Result: TEST FAILED" << endl; return EXIT_FAILURE; } // Output stream for verbose output. RCP<FancyOStream> verbOut = verbose ? out : getFancyOStream (blackHole); const bool success = true; // Test whether it's possible to instantiate the solver. // This is a minimal compilation test. *verbOut << "Instantiating Block GCRODR solver" << endl; Belos::BlockGCRODRSolMgr<scalar_type, MV, OP> solver; // // Test setting solver parameters. For now, we just use an empty // (but non-null) parameter list, which the solver should fill in // with defaults. // *verbOut << "Setting solver parameters" << endl; RCP<ParameterList> solverParams = parameterList (); solver.setParameters (solverParams); // // Create a linear system to solve. // *verbOut << "Creating linear system" << endl; RCP<sparse_matrix_type> A; RCP<MV> X_guess, X_exact, B; { typedef Belos::Tpetra::ProblemMaker<sparse_matrix_type> factory_type; factory_type factory (comm, node, out, tolerant, debug); RCP<ParameterList> problemParams = parameterList (); problemParams->set ("Global number of rows", static_cast<global_ordinal_type> (numRows)); problemParams->set ("Problem type", std::string ("Nonsymmetric")); factory.makeProblem (A, X_guess, X_exact, B, problemParams); } // Approximate solution vector is a copy of the guess vector. RCP<MV> X (new MV (*X_guess)); TEUCHOS_TEST_FOR_EXCEPTION(A.is_null(), std::logic_error, "The sparse matrix is null!"); TEUCHOS_TEST_FOR_EXCEPTION(X_guess.is_null(), std::logic_error, "The initial guess X_guess is null!"); TEUCHOS_TEST_FOR_EXCEPTION(X_exact.is_null(), std::logic_error, "The exact solution X_exact is null!"); TEUCHOS_TEST_FOR_EXCEPTION(B.is_null(), std::logic_error, "The right-hand side B is null!"); TEUCHOS_TEST_FOR_EXCEPTION(X.is_null(), std::logic_error, "The approximate solution vector X is null!"); typedef Belos::LinearProblem<scalar_type, MV, OP> problem_type; RCP<problem_type> problem (new problem_type (A, X, B)); problem->setProblem (); solver.setProblem (problem); *verbOut << "Solving linear system" << endl; Belos::ReturnType result = solver.solve (); *verbOut << "Result of solve: " << Belos::convertReturnTypeToString (result) << endl; if (success) { *out << "\nEnd Result: TEST PASSED" << endl; return EXIT_SUCCESS; } else { *out << "\nEnd Result: TEST FAILED" << endl; return EXIT_FAILURE; } }
int main(int argc,char * argv[]) { typedef panzer::unit_test::CartesianConnManager<int,panzer::Ordinal64> CCM; typedef panzer::DOFManager<int,panzer::Ordinal64> DOFManager; using Teuchos::RCP; using Teuchos::rcp; Teuchos::GlobalMPISession mpiSession(&argc, &argv); Kokkos::initialize(argc,argv); Teuchos::MpiComm<int> comm(MPI_COMM_WORLD); int np = comm.getSize(); // number of processors // timings output std::string timingsFile = "timings.yaml"; // mesh description int nx = 10, ny = 7, nz = 4; int px = np, py = 1, pz = 1; int bx = 1, by = 2, bz = 1; // parse command line arguments Teuchos::CommandLineProcessor clp; clp.setOption("nx",&nx); clp.setOption("ny",&ny); clp.setOption("nz",&nz); clp.setOption("px",&px); clp.setOption("py",&py); clp.setOption("pz",&pz); clp.setOption("timings-file",&timingsFile); auto cmdResult = clp.parse(argc,argv); if(cmdResult!=Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) { clp.printHelpMessage(argv[0],std::cout); return -1; } // build velocity, temperature and pressure fields RCP<const panzer::FieldPattern> pattern_U = buildFieldPattern<Intrepid2::Basis_HGRAD_HEX_C2_FEM<PHX::Device::execution_space,double,double>>(); RCP<const panzer::FieldPattern> pattern_P = buildFieldPattern<Intrepid2::Basis_HGRAD_HEX_C1_FEM<PHX::Device::execution_space,double,double>>(); RCP<const panzer::FieldPattern> pattern_T = buildFieldPattern<Intrepid2::Basis_HGRAD_HEX_C1_FEM<PHX::Device::execution_space,double,double>>(); RCP<const panzer::FieldPattern> pattern_B = buildFieldPattern<Intrepid2::Basis_HDIV_HEX_I1_FEM<PHX::Device::execution_space,double,double>>(); RCP<const panzer::FieldPattern> pattern_E = buildFieldPattern<Intrepid2::Basis_HCURL_HEX_I1_FEM<PHX::Device::execution_space,double,double>>(); // repeatedly construct DOFManager timing the buildGlobalUnknowns for(int repeats=0;repeats<100;repeats++) { // build the topology RCP<CCM> connManager = rcp(new CCM); connManager->initialize(comm, Teuchos::as<panzer::Ordinal64>(nx), Teuchos::as<panzer::Ordinal64>(ny), Teuchos::as<panzer::Ordinal64>(nz), px,py,pz,bx,by,bz); // build the dof manager, and assocaite with the topology RCP<DOFManager> dofManager = rcp(new DOFManager); dofManager->setConnManager(connManager,*comm.getRawMpiComm()); // add velocity (U) and PRESSURE fields to the MHD element block dofManager->addField("eblock-0_0_0","UX",pattern_U); dofManager->addField("eblock-0_0_0","UY",pattern_U); dofManager->addField("eblock-0_0_0","UZ",pattern_U); dofManager->addField("eblock-0_0_0","PRESSURE",pattern_P); dofManager->addField("eblock-0_0_0","B",pattern_B); dofManager->addField("eblock-0_0_0","E",pattern_E); // add velocity (U) fields to the solid element block dofManager->addField("eblock-0_1_0","UX",pattern_U); dofManager->addField("eblock-0_1_0","UY",pattern_U); dofManager->addField("eblock-0_1_0","UZ",pattern_U); // try to get them all synced up comm.barrier(); { PANZER_FUNC_TIME_MONITOR("panzer::ScalingTest::buildGlobalUnknowns"); dofManager->buildGlobalUnknowns(); } } Teuchos::TimeMonitor::summarize(std::cout,false,true,false); if ( timingsFile != "" ){ std::ofstream fout(timingsFile.c_str()); Teuchos::RCP<Teuchos::ParameterList> reportParams = parameterList(* (Teuchos::TimeMonitor::getValidReportParameters())); reportParams->set("Report format", "YAML"); reportParams->set("YAML style", "spacious"); Teuchos::TimeMonitor::report(fout,reportParams); } // this confirms the application passes std::cout << "Scaling test completed" << std::endl; return 0; }
int main(int argc, char * argv[]) { using Teuchos::RCP; using Teuchos::rcp; using Teuchos::rcpFromRef; Teuchos::GlobalMPISession mpiSession(&argc,&argv,&std::cout); std::string output_file_name = "square_mesh.gen"; int xBlocks=1,yBlocks=1, zBlocks=1; int xElements=1,yElements=1, zElements=1; double x0=0.0, xf=1.0; double y0=0.0, yf=1.0; double z0=0.0, zf=1.0; bool threeD = false; // setup input arguments { Teuchos::CommandLineProcessor clp; clp.throwExceptions(false); clp.setOption("o", &output_file_name, "Mesh output filename"); clp.setOption("3d", "2d", &threeD, "Cube versus square mesh."); clp.setOption("x-blocks", &xBlocks, "Number of blocks in 'x' direction"); clp.setOption("y-blocks", &yBlocks, "Number of blocks in 'y' direction"); clp.setOption("z-blocks", &zBlocks, "Number of blocks in 'z' direction"); clp.setOption("x-elmts", &xElements, "Number of elements in 'x' direction in each block"); clp.setOption("y-elmts", &yElements, "Number of elements in 'y' direction in each block"); clp.setOption("z-elmts", &zElements, "Number of elements in 'z' direction in each block"); clp.setOption("x0", &x0, "Location of left edge"); clp.setOption("xf", &xf, "Location of right edge"); clp.setOption("y0", &y0, "Location of left edge"); clp.setOption("yf", &yf, "Location of right edge"); clp.setOption("z0", &z0, "Location of front(?) edge"); clp.setOption("zf", &zf, "Location of back(?) edge"); Teuchos::CommandLineProcessor::EParseCommandLineReturn parse_return = clp.parse(argc,argv,&std::cerr); if(parse_return==Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return -1; TEUCHOS_TEST_FOR_EXCEPTION(parse_return != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL, std::runtime_error, "Failed to parse command line!"); } RCP<Teuchos::ParameterList> pl = rcp(new Teuchos::ParameterList); pl->set("X Blocks",xBlocks); pl->set("Y Blocks",yBlocks); pl->set("X Elements",xElements); pl->set("Y Elements",yElements); pl->set("X0",x0); pl->set("Y0",y0); pl->set("Xf",xf); pl->set("Yf",yf); if(threeD) { pl->set("Z Blocks",zBlocks); pl->set("Z Elements",zElements); pl->set("Z0",z0); pl->set("Zf",zf); } int numprocs = stk_classic::parallel_machine_size(MPI_COMM_WORLD); int rank = stk_classic::parallel_machine_rank(MPI_COMM_WORLD); RCP<panzer_stk_classic::STK_MeshFactory> factory; if(!threeD) factory = Teuchos::rcp(new panzer_stk_classic::SquareQuadMeshFactory); else factory = Teuchos::rcp(new panzer_stk_classic::CubeHexMeshFactory); factory->setParameterList(pl); RCP<panzer_stk_classic::STK_Interface> mesh = factory->buildMesh(MPI_COMM_WORLD); mesh->writeToExodus(output_file_name); return 0; }
int main(int argc, char *argv[]) { int r_val = 0; Teuchos::CommandLineProcessor clp; int nthreads = 1; clp.setOption("nthreads", &nthreads, "Number of threads"); clp.recogniseAllOptions(true); clp.throwExceptions(false); Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv ); if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL ) { cout << "Testing Kokkos::Qthread:: Failed in parsing command line input" << endl; return -1; } if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) { return 0; } unsigned threads_count = 0; if (Kokkos::hwloc::available()) { const unsigned numa_count = Kokkos::hwloc::get_available_numa_count(); const unsigned cores_per_numa = Kokkos::hwloc::get_available_cores_per_numa(); const unsigned threads_per_core = Kokkos::hwloc::get_available_threads_per_core(); const unsigned one = 1u; threads_count = max(one, numa_count)*max(one, cores_per_numa)*max(one, threads_per_core); cout << " = Kokkos::hwloc = " << endl << "NUMA count = " << numa_count << endl << "Cores per NUMA = " << cores_per_numa << endl << "Threads per core = " << threads_per_core << endl << "Threads count = " << threads_count << endl; } else { threads_count = thread::hardware_concurrency(); cout << " = std::thread::hardware_concurrency = " << endl << "Threads count = " << threads_count << endl; } if (static_cast<unsigned int>(nthreads) > threads_count) { ++r_val; cout << "Testing Kokkos::Threads:: Failed that the given nthreads is greater than the number of threads counted" << endl; } else { Kokkos::Threads::initialize( nthreads ); Kokkos::Threads::print_configuration( cout , true /* detailed */ ); //__TestSuiteDoUnitTests__(float,int,unsigned int,Kokkos::Serial,void); //__TestSuiteDoUnitTests__(float,long,unsigned long,Kokkos::Serial,void); __TestSuiteDoUnitTests__(double,int,unsigned int,Kokkos::Threads,void); // __TestSuiteDoUnitTests__(double,long,unsigned long,Kokkos::Serial,void); // __TestSuiteDoUnitTests__(complex<float>,int,unsigned int,Kokkos::Serial,void); // __TestSuiteDoUnitTests__(complex<float>,long,unsigned long,Kokkos::Serial,void); // __TestSuiteDoUnitTests__(complex<double>,int,unsigned int,Kokkos::Serial,void); // __TestSuiteDoUnitTests__(complex<double>,long,unsigned long,Kokkos::Serial,void); Kokkos::Threads::finalize(); } string eval; __EVAL_STRING__(r_val, eval); cout << "Testing Kokkos::Threads::" << eval << endl; return r_val; }
int main (int argc, char *argv[]) { Teuchos::CommandLineProcessor clp; clp.setDocString("This example program measure the performance of Chol algorithms on Kokkos::Threads execution space.\n"); int nthreads = 1; clp.setOption("nthreads", &nthreads, "Number of threads"); int max_task_dependence = 10; clp.setOption("max-task-dependence", &max_task_dependence, "Max number of task dependence"); int team_size = 1; clp.setOption("team-size", &team_size, "Team size"); int fill_level = 0; clp.setOption("fill-level", &fill_level, "Fill level"); bool team_interface = true; clp.setOption("enable-team-interface", "disable-team-interface", &team_interface, "Flag for team interface"); bool mkl_interface = false; clp.setOption("enable-mkl-interface", "disable-mkl-interface", &mkl_interface, "Flag for MKL interface"); int stack_size = 8192; clp.setOption("stack-size", &stack_size, "Stack size"); bool verbose = false; clp.setOption("enable-verbose", "disable-verbose", &verbose, "Flag for verbose printing"); string file_input = "test.mtx"; clp.setOption("file-input", &file_input, "Input file (MatrixMarket SPD matrix)"); int treecut = 15; clp.setOption("treecut", &treecut, "Level to cut tree from bottom"); int minblksize = 0; clp.setOption("minblksize", &minblksize, "Minimum block size for internal reordering"); int prunecut = 0; clp.setOption("prunecut", &prunecut, "Leve to prune tree from bottom"); int seed = 0; clp.setOption("seed", &seed, "Seed for random number generator in graph partition"); int niter = 10; clp.setOption("niter", &niter, "Number of iterations for testing"); clp.recogniseAllOptions(true); clp.throwExceptions(false); Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv ); if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0; if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL ) return -1; int r_val = 0; { const bool overwrite = true; const int nshepherds = (team_interface ? nthreads/team_size : nthreads); const int nworker_per_shepherd = nthreads/nshepherds; setenv("QT_HWPAR", to_string(nthreads).c_str(), overwrite); setenv("QT_NUM_SHEPHERDS", to_string(nshepherds).c_str(), overwrite); setenv("QT_NUM_WORKERS_PER_SHEPHERD", to_string(nworker_per_shepherd).c_str(), overwrite); setenv("QT_STACK_SIZE", to_string(stack_size).c_str(), overwrite); exec_space::initialize(nthreads); exec_space::print_configuration(cout, true); r_val = exampleCholPerformance <value_type,ordinal_type,size_type,exec_space,void> (file_input, treecut, minblksize, prunecut, seed, niter, nthreads, max_task_dependence, team_size, fill_level, nshepherds, team_interface, (nthreads != 1), mkl_interface, verbose); exec_space::finalize(); unsetenv("QT_HWPAR"); unsetenv("QT_NUM_SHEPHERDS"); unsetenv("QT_NUM_WORKERS_PER_SHEPHERD"); unsetenv("QT_STACK_SIZE"); } return r_val; }
int main(int argc, char *argv[]) { bool success = true; bool verbose = false; try { const size_t num_sockets = Kokkos::hwloc::get_available_numa_count(); const size_t num_cores_per_socket = Kokkos::hwloc::get_available_cores_per_numa(); const size_t num_threads_per_core = Kokkos::hwloc::get_available_threads_per_core(); // Setup command line options Teuchos::CommandLineProcessor CLP; CLP.setDocString( "This test performance of MP::Vector multiply routines.\n"); int nGrid = 32; CLP.setOption("n", &nGrid, "Number of mesh points in the each direction"); int nIter = 10; CLP.setOption("ni", &nIter, "Number of multiply iterations"); #ifdef KOKKOS_HAVE_PTHREAD bool threads = true; CLP.setOption("threads", "no-threads", &threads, "Enable Threads device"); int num_cores = num_cores_per_socket * num_sockets; CLP.setOption("cores", &num_cores, "Number of CPU cores to use (defaults to all)"); int num_hyper_threads = num_threads_per_core; CLP.setOption("hyperthreads", &num_hyper_threads, "Number of hyper threads per core to use (defaults to all)"); int threads_per_vector = 1; CLP.setOption("threads_per_vector", &threads_per_vector, "Number of threads to use within each vector"); #endif #ifdef KOKKOS_HAVE_CUDA bool cuda = true; CLP.setOption("cuda", "no-cuda", &cuda, "Enable Cuda device"); int cuda_threads_per_vector = 16; CLP.setOption("cuda_threads_per_vector", &cuda_threads_per_vector, "Number of Cuda threads to use within each vector"); int cuda_block_size = 0; CLP.setOption("cuda_block_size", &cuda_block_size, "Cuda block size (0 implies the default choice)"); int num_cuda_blocks = 0; CLP.setOption("num_cuda_blocks", &num_cuda_blocks, "Number of Cuda blocks (0 implies the default choice)"); int device_id = 0; CLP.setOption("device", &device_id, "CUDA device ID"); #endif CLP.parse( argc, argv ); typedef int Ordinal; typedef double Scalar; #ifdef KOKKOS_HAVE_PTHREAD if (threads) { typedef Kokkos::Threads Device; typedef Stokhos::StaticFixedStorage<Ordinal,Scalar,1,Device> Storage; Kokkos::Threads::initialize(num_cores*num_hyper_threads); std::cout << std::endl << "Threads performance with " << num_cores*num_hyper_threads << " threads:" << std::endl; Kokkos::DeviceConfig dev_config(num_cores, threads_per_vector, num_hyper_threads / threads_per_vector); mainHost<Storage>(nGrid, nIter, dev_config); Kokkos::Threads::finalize(); } #endif #ifdef KOKKOS_HAVE_CUDA if (cuda) { typedef Kokkos::Cuda Device; typedef Stokhos::StaticFixedStorage<Ordinal,Scalar,1,Device> Storage; Kokkos::Cuda::host_mirror_device_type::initialize(); Kokkos::Cuda::initialize(Kokkos::Cuda::SelectDevice(device_id)); cudaDeviceProp deviceProp; cudaGetDeviceProperties(&deviceProp, device_id); std::cout << std::endl << "CUDA performance for device " << device_id << " (" << deviceProp.name << "):" << std::endl; Kokkos::DeviceConfig dev_config( num_cuda_blocks, cuda_threads_per_vector, cuda_threads_per_vector == 0 ? 0 : cuda_block_size / cuda_threads_per_vector); mainCuda<Storage>(nGrid,nIter,dev_config); Kokkos::Cuda::host_mirror_device_type::finalize(); Kokkos::Cuda::finalize(); } #endif } TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success); if (success) return 0; return -1; }
int main(int argc,char * argv[]) { bool status = false; Kokkos::initialize(argc,argv); { // need to protect kokkos and MPI // calls MPI_Init and MPI_Finalize Teuchos::GlobalMPISession mpiSession(&argc,&argv); // build MPI/Serial communicators #ifdef HAVE_MPI Epetra_MpiComm Comm_epetra(MPI_COMM_WORLD); #else Epetra_SerialComm Comm_epetra; #endif Teuchos::RCP<const Teuchos::Comm<int> > Comm = Tpetra::DefaultPlatform::getDefaultPlatform ().getComm (); Teko::Test::UnitTest::SetComm(Teuchos::rcpFromRef(Comm_epetra)); Teko::Test::UnitTest::SetComm_tpetra(Comm); Teuchos::CommandLineProcessor clp; int verbosity = 1; std::string faillog = "failure.log"; bool isfast = false; clp.setOption("verb",&verbosity,"How verbose is the output? 1 is normal 10 is a lot."); clp.setOption("log",&faillog,"File for failure information to go to (also high verbosity text)"); clp.setOption("fast","notfast",&isfast,"Run only fast tests"); clp.parse(argc,argv); Teuchos::RCP<Teuchos::FancyOStream> termout = Teuchos::getFancyOStream(Teuchos::rcpFromRef(std::cout)); Teuchos::RCP<Teuchos::FancyOStream> failout; std::ofstream failure; if(faillog=="stdout") { failout = termout; } else { failure.open(faillog.c_str()); failout = Teuchos::getFancyOStream(Teuchos::rcpFromRef(failure)); } termout->setOutputToRootOnly(0); failout->setOutputToRootOnly(0); // gdbIn(); Teko_ADD_UNIT_TEST(Teko::Test::tSIMPLEPreconditionerFactory_tpetra,SIMPLEPreconditionerFactory_tpetra); Teko_ADD_UNIT_TEST(Teko::Test::tDiagonalPreconditionerFactory_tpetra,DiagonalPreconditionerFactory_tpetra); Teko_ADD_UNIT_TEST(Teko::Test::tLU2x2PreconditionerFactory_tpetra,LU2x2PreconditionerFactory_tpetra); Teko_ADD_UNIT_TEST(Teko::Test::tLSCStablePreconditionerFactory_tpetra,LSCStablePreconditionerFactory_tpetra); Teko_ADD_UNIT_TEST(Teko::Test::tLSCStabilized_tpetra,LSCStabilized_tpetra); Teko_ADD_UNIT_TEST(Teko::Test::tJacobi2x2PreconditionerFactory_tpetra,Jacobi2x2PreconditionerFactory_tpetra); Teko_ADD_UNIT_TEST(Teko::Test::tBlockJacobiPreconditionerFactory_tpetra,BlockJacobiPreconditionerFactory_tpetra); Teko_ADD_UNIT_TEST(Teko::Test::tBlockUpperTriInverseOp_tpetra,BlockUpperTriInverseOp_tpetra); Teko_ADD_UNIT_TEST(Teko::Test::tBlockLowerTriInverseOp_tpetra,BlockLowerTriInverseOp_tpetra); Teko_ADD_UNIT_TEST(Teko::Test::tTpetraOperatorWrapper,tTpetraOperatorWrapper); Teko_ADD_UNIT_TEST(Teko::Test::tInterlacedTpetra,InterlacedTpetra); Teko_ADD_UNIT_TEST(Teko::Test::tBlockingTpetra,BlockingTpetra); Teko_ADD_UNIT_TEST(Teko::Test::tTpetraThyraConverter,TpetraThyraConverter); Teko_ADD_UNIT_TEST(Teko::Test::tGraphLaplacian_tpetra,tGraphLaplacian_tpetra); Teko_ADD_UNIT_TEST(Teko::Test::tParallelInverse_tpetra,tParallelInverse_tpetra); Teko_ADD_UNIT_TEST(Teko::Test::tExplicitOps_tpetra,tExplicitOps_tpetra); Teko_ADD_UNIT_TEST(Teko::Test::tLSCHIntegrationTest_tpetra,LSCHIntegrationTest_tpetra); Teko_ADD_UNIT_TEST(Teko::Test::tLumping_tpetra,Lumping_tpetra); Teko_ADD_UNIT_TEST(Teko::Test::tAbsRowSum_tpetra,AbsRowSum_tpetra); Teko_ADD_UNIT_TEST(Teko::Test::tNeumannSeries_tpetra,NeumannSeries_tpetra); Teko_ADD_UNIT_TEST(Teko::Test::tPCDStrategy_tpetra,PCDStrategy_tpetra); if(not isfast) { Teko_ADD_UNIT_TEST(Teko::Test::tLSCIntegrationTest_tpetra,LSCIntegrationTest_tpetra); Teko_ADD_UNIT_TEST(Teko::Test::tStridedTpetraOperator,tStridedTpetraOperator); Teko_ADD_UNIT_TEST(Teko::Test::tBlockedTpetraOperator,tBlockedTpetraOperator); } status = Teko::Test::UnitTest::RunTests_tpetra(verbosity,*termout,*failout); if(not status) *termout << "Teko tests failed" << std::endl; // release any stored Kokkos memory Teko::Test::UnitTest::ClearTests(); } Kokkos::finalize(); return status ? 0 : -1; }
int main(int argc, char* argv[]) { int ierr = 0; try { double t, ta; int p = 2; int w = p+7; // Set up command line options Teuchos::CommandLineProcessor clp; clp.setDocString("This program tests the speed of various forward mode AD implementations for a single multiplication operation"); int nderiv = 10; clp.setOption("nderiv", &nderiv, "Number of derivative components"); int nloop = 1000000; clp.setOption("nloop", &nloop, "Number of loops"); // Parse options Teuchos::CommandLineProcessor::EParseCommandLineReturn parseReturn= clp.parse(argc, argv); if(parseReturn != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) return 1; // Memory pool & manager Sacado::Fad::MemPoolManager<double> poolManager(10); Sacado::Fad::MemPool* pool = poolManager.getMemoryPool(nderiv); Sacado::Fad::DMFad<double>::setDefaultPool(pool); std::cout.setf(std::ios::scientific); std::cout.precision(p); std::cout << "Times (sec) for nderiv = " << nderiv << " nloop = " << nloop << ": " << std::endl; ta = do_time_analytic(nderiv, nloop); std::cout << "Analytic: " << std::setw(w) << ta << std::endl; t = do_time< FAD::TFad<10,double> >(nderiv, nloop); std::cout << "TFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << std::endl; t = do_time< FAD::Fad<double> >(nderiv, nloop); std::cout << "Fad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << std::endl; t = do_time< Sacado::Fad::SFad<double,10> >(nderiv, nloop); std::cout << "SFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << std::endl; t = do_time< Sacado::Fad::SLFad<double,10> >(nderiv, nloop); std::cout << "SLFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << std::endl; t = do_time< Sacado::Fad::DFad<double> >(nderiv, nloop); std::cout << "DFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << std::endl; t = do_time< Sacado::Fad::DMFad<double> >(nderiv, nloop); std::cout << "DMFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << std::endl; t = do_time< Sacado::ELRFad::SFad<double,10> >(nderiv, nloop); std::cout << "ELRSFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << std::endl; t = do_time< Sacado::ELRFad::SLFad<double,10> >(nderiv, nloop); std::cout << "ELRSLFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << std::endl; t = do_time< Sacado::ELRFad::DFad<double> >(nderiv, nloop); std::cout << "ELRDFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << std::endl; t = do_time< Sacado::CacheFad::DFad<double> >(nderiv, nloop); std::cout << "CacheFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << std::endl; t = do_time< Sacado::Fad::DVFad<double> >(nderiv, nloop); std::cout << "DVFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << std::endl; } catch (std::exception& e) { std::cout << e.what() << std::endl; ierr = 1; } catch (const char *s) { std::cout << s << std::endl; ierr = 1; } catch (...) { std::cout << "Caught unknown exception!" << std::endl; ierr = 1; } return ierr; }
int main (int argc, char *argv[]) { using Teuchos::RCP; using std::cout; using std::endl; // // Initialize MPI. // Teuchos::oblackholestream blackhole; Teuchos::GlobalMPISession mpiSession (&argc, &argv, &blackhole); // // Get the default communicator and node // RCP<const Teuchos::Comm<int> > comm = Tpetra::DefaultPlatform::getDefaultPlatform ().getComm (); const int myRank = comm->getRank (); // // Get parameters from command-line processor // MyOp::global_ordinal_type n = 100; Teuchos::CommandLineProcessor cmdp (false, true); cmdp.setOption ("n", &n, "Number of rows of our operator."); if (cmdp.parse (argc, argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) { return -1; } // Construct the operator. Note that the operator does not have to // be an explicitly stored matrix. Here, we are using our // user-defined operator. MyOp K (n, comm); // Construct a Vector of all ones, using the above Operator's domain Map. typedef Tpetra::Vector<MyOp::scalar_type, MyOp::local_ordinal_type, MyOp::global_ordinal_type, MyOp::node_type> vec_type; vec_type x (K.getDomainMap ()); x.putScalar (1.0); // Construct an output Vector for K*x. vec_type y (K.getRangeMap ()); K.apply (x, y); // Compute y := K*x. // The operator has a stencil (-1, 2, -1), except for the // boundaries. At the left boundary (global row 0), the stencil is // (2, -1), and at the right boundary (global row n-1), the stencil // is (-1, 2). Thus, we know that if all entries of the input // Vector are 1, then all entries of the output Vector are 0, except // for the boundary entries, which are both 1. // // To test this, construct the expected output vector y_expected, // and compare y to y_expected using the max norm. Even in single // precision, the max norm of y - y_expected should be exactly zero. typedef MyOp::map_type map_type; RCP<const map_type> rangeMap = K.getRangeMap (); vec_type y_expected (rangeMap); y_expected.putScalar (0.0); if (rangeMap->isNodeGlobalElement (0)) { y_expected.replaceGlobalValue (0, 1.0); } if (rangeMap->isNodeGlobalElement (n - 1)) { y_expected.replaceGlobalValue (n - 1, 1.0); } y_expected.update (1.0, y, -1.0); // y_expected := y - y_expected typedef vec_type::mag_type mag_type; // type of a norm of vec_type const mag_type diffMaxNorm = y_expected.normInf (); bool success = true; if (myRank == 0) { if (diffMaxNorm == 0.0) { // This tells the Trilinos test framework that the test passed. cout << "Yay! ||y - y_expected||_inf = 0." << endl << "End Result: TEST PASSED" << endl; } else { success = false; // This tells the Trilinos test framework that the test passed. cout << "Oops! ||y - y_expected||_inf = " << diffMaxNorm << " != 0." << endl << "End Result: TEST FAILED" << endl; } } return success ? 0 : -1; }
int main(int argc, char *argv[]) { int np=1, rank=0; int splitrank, splitsize; int rc = 0; nssi_service xfer_svc; int server_index=0; int rank_in_server=0; int transport_index=-1; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &np); MPI_Barrier(MPI_COMM_WORLD); Teuchos::oblackholestream blackhole; std::ostream &out = ( rank == 0 ? std::cout : blackhole ); struct xfer_args args; const int num_io_methods = 8; const int io_method_vals[] = { XFER_WRITE_ENCODE_SYNC, XFER_WRITE_ENCODE_ASYNC, XFER_WRITE_RDMA_SYNC, XFER_WRITE_RDMA_ASYNC, XFER_READ_ENCODE_SYNC, XFER_READ_ENCODE_ASYNC, XFER_READ_RDMA_SYNC, XFER_READ_RDMA_ASYNC}; const char * io_method_names[] = { "write-encode-sync", "write-encode-async", "write-rdma-sync", "write-rdma-async", "read-encode-sync", "read-encode-async", "read-rdma-sync", "read-rdma-async"}; const int nssi_transport_list[] = { NSSI_RPC_PTL, NSSI_RPC_PTL, NSSI_RPC_IB, NSSI_RPC_IB, NSSI_RPC_GEMINI, NSSI_RPC_GEMINI, NSSI_RPC_BGPDCMF, NSSI_RPC_BGPDCMF, NSSI_RPC_BGQPAMI, NSSI_RPC_BGQPAMI, NSSI_RPC_MPI}; const int num_nssi_transports = 11; const int nssi_transport_vals[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; const char * nssi_transport_names[] = { "portals", "ptl", "infiniband", "ib", "gemini", "gni", "bgpdcmf", "dcmf", "bgqpami", "pami", "mpi" }; // Initialize arguments args.transport=NSSI_DEFAULT_TRANSPORT; args.len = 1; args.delay = 1; args.io_method = XFER_WRITE_RDMA_SYNC; args.debug_level = LOG_WARN; args.num_trials = 1; args.num_reqs = 1; args.result_file_mode = "a"; args.result_file = ""; args.url_file = ""; args.logfile = ""; args.client_flag = true; args.server_flag = true; args.num_servers = 1; args.num_threads = 0; args.timeout = 500; args.num_retries = 5; args.validate_flag = true; args.kill_server_flag = true; args.block_distribution = true; bool success = true; /** * We make extensive use of the \ref Teuchos::CommandLineProcessor for command-line * options to control the behavior of the test code. To evaluate performance, * the "num-trials", "num-reqs", and "len" options control the amount of data transferred * between client and server. The "io-method" selects the type of data transfer. The * server-url specifies the URL of the server. If running as a server, the server-url * provides a recommended URL when initializing the network transport. */ try { //out << Teuchos::Teuchos_Version() << std::endl << std::endl; // Creating an empty command line processor looks like: Teuchos::CommandLineProcessor parser; parser.setDocString( "This example program demonstrates a simple data-transfer service " "built using the NEtwork Scalable Service Interface (Nessie)." ); /* To set and option, it must be given a name and default value. Additionally, each option can be given a help std::string. Although it is not necessary, a help std::string aids a users comprehension of the acceptable command line arguments. Some examples of setting command line options are: */ parser.setOption("delay", &args.delay, "time(s) for client to wait for server to start" ); parser.setOption("timeout", &args.timeout, "time(ms) to wait for server to respond" ); parser.setOption("server", "no-server", &args.server_flag, "Run the server" ); parser.setOption("client", "no-client", &args.client_flag, "Run the client"); parser.setOption("len", &args.len, "The number of structures in an input buffer"); parser.setOption("debug",(int*)(&args.debug_level), "Debug level"); parser.setOption("logfile", &args.logfile, "log file"); parser.setOption("num-trials", &args.num_trials, "Number of trials (experiments)"); parser.setOption("num-reqs", &args.num_reqs, "Number of reqs/trial"); parser.setOption("result-file", &args.result_file, "Where to store results"); parser.setOption("result-file-mode", &args.result_file_mode, "Write mode for the result"); parser.setOption("server-url-file", &args.url_file, "File that has URL client uses to find server"); parser.setOption("validate", "no-validate", &args.validate_flag, "Validate the data"); parser.setOption("num-servers", &args.num_servers, "Number of server processes"); parser.setOption("num-threads", &args.num_threads, "Number of threads used by each server process"); parser.setOption("kill-server", "no-kill-server", &args.kill_server_flag, "Kill the server at the end of the experiment"); parser.setOption("block-distribution", "rr-distribution", &args.block_distribution, "Use a block distribution scheme to assign clients to servers"); // Set an enumeration command line option for the io_method parser.setOption("io-method", &args.io_method, num_io_methods, io_method_vals, io_method_names, "I/O Methods for the example: \n" "\t\t\twrite-encode-sync : Write data through the RPC args, synchronous\n" "\t\t\twrite-encode-async: Write data through the RPC args - asynchronous\n" "\t\t\twrite-rdma-sync : Write data using RDMA (server pulls) - synchronous\n" "\t\t\twrite-rdma-async: Write data using RDMA (server pulls) - asynchronous\n" "\t\t\tread-encode-sync : Read data through the RPC result - synchronous\n" "\t\t\tread-encode-async: Read data through the RPC result - asynchronous\n" "\t\t\tread-rdma-sync : Read data using RDMA (server puts) - synchronous\n" "\t\t\tread-rdma-async: Read data using RDMA (server puts) - asynchronous"); // Set an enumeration command line option for the NNTI transport parser.setOption("transport", &transport_index, num_nssi_transports, nssi_transport_vals, nssi_transport_names, "NSSI transports (not all are available on every platform): \n" "\t\t\tportals|ptl : Cray or Schutt\n" "\t\t\tinfiniband|ib : libibverbs\n" "\t\t\tgemini|gni : Cray libugni (Gemini or Aries)\n" "\t\t\tbgpdcmf|dcmf : IBM BG/P DCMF\n" "\t\t\tbgqpami|pami : IBM BG/Q PAMI\n" "\t\t\tmpi : isend/irecv implementation\n" ); /* There are also two methods that control the behavior of the command line processor. First, for the command line processor to allow an unrecognized a command line option to be ignored (and only have a warning printed), use: */ parser.recogniseAllOptions(true); /* Second, by default, if the parser finds a command line option it doesn't recognize or finds the --help option, it will throw an std::exception. If you want prevent a command line processor from throwing an std::exception (which is important in this program since we don't have an try/catch around this) when it encounters a unrecognized option or help is printed, use: */ parser.throwExceptions(false); /* We now parse the command line where argc and argv are passed to the parse method. Note that since we have turned off std::exception throwing above we had better grab the return argument so that we can see what happened and act accordingly. */ Teuchos::CommandLineProcessor::EParseCommandLineReturn parseReturn= parser.parse( argc, argv ); if( parseReturn == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED ) { return 0; } if( parseReturn != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL ) { return 1; // Error! } // Here is where you would use these command line arguments but for this example program // we will just print the help message with the new values of the command-line arguments. //if (rank == 0) // out << "\nPrinting help message with new values of command-line arguments ...\n\n"; //parser.printHelpMessage(argv[0],out); } TEUCHOS_STANDARD_CATCH_STATEMENTS(true,std::cerr,success); log_debug(args.debug_level, "transport_index=%d", transport_index); if (transport_index > -1) { args.transport =nssi_transport_list[transport_index]; args.transport_name=std::string(nssi_transport_names[transport_index]); } args.io_method_name=std::string(io_method_names[args.io_method]); log_debug(args.debug_level, "%d: Finished processing arguments", rank); if (!success) { MPI_Abort(MPI_COMM_WORLD, 1); } if (!args.server_flag && args.client_flag) { /* initialize logger */ if (args.logfile.empty()) { logger_init(args.debug_level, NULL); } else { char fn[1024]; sprintf(fn, "%s.client.%03d.log", args.logfile.c_str(), rank); logger_init(args.debug_level, fn); } } else if (args.server_flag && !args.client_flag) { /* initialize logger */ if (args.logfile.empty()) { logger_init(args.debug_level, NULL); } else { char fn[1024]; sprintf(fn, "%s.server.%03d.log", args.logfile.c_str(), rank); logger_init(args.debug_level, fn); } } else if (args.server_flag && args.client_flag) { /* initialize logger */ if (args.logfile.empty()) { logger_init(args.debug_level, NULL); } else { char fn[1024]; sprintf(fn, "%s.%03d.log", args.logfile.c_str(), rank); logger_init(args.debug_level, fn); } } log_level debug_level = args.debug_level; // Communicator used for both client and server (may split if using client and server) MPI_Comm comm; log_debug(debug_level, "%d: Starting xfer-service test", rank); #ifdef TRIOS_ENABLE_COMMSPLITTER if (args.transport == NSSI_RPC_MPI) { MPI_Pcontrol(0); } #endif /** * Since this test can be run as a server, client, or both, we need to play some fancy * MPI games to get the communicators working correctly. If we're executing as both * a client and a server, we split the communicator so that the client thinks its * running by itself. */ int color = 0; // color=0-->server, color=1-->client if (args.client_flag && args.server_flag) { if (np < 2) { log_error(debug_level, "Must use at least 2 MPI processes for client and server mode"); MPI_Abort(MPI_COMM_WORLD, -1); } // Split the communicators. Put all the servers as the first ranks. if (rank < args.num_servers) { color = 0; log_debug(debug_level, "rank=%d is a server", rank); } else { color = 1; // all others are clients log_debug(debug_level, "rank=%d is a client", rank); } MPI_Comm_split(MPI_COMM_WORLD, color, rank, &comm); } else { if (args.client_flag) { color=1; log_debug(debug_level, "rank=%d is a client", rank); } else if (args.server_flag) { color=0; log_debug(debug_level, "rank=%d is a server", rank); } else { log_error(debug_level, "Must be either a client or a server"); MPI_Abort(MPI_COMM_WORLD, -1); } MPI_Comm_split(MPI_COMM_WORLD, color, rank, &comm); } MPI_Comm_rank(comm, &splitrank); MPI_Comm_size(comm, &splitsize); log_debug(debug_level, "%d: Finished splitting communicators", rank); /** * Initialize the Nessie interface by specifying a transport, encoding scheme, and a * recommended URL. \ref NSSI_DEFAULT_TRANSPORT is usually the best choice, since it * is often the case that only one type of transport exists on a particular platform. * Currently supported transports are \ref NSSI_RPC_PTL, \ref NSSI_RPC_GNI, and * \ref NSSI_RPC_IB. We only support one type of encoding scheme so NSSI_DEFAULT_ENCODE * should always be used for the second argument. The URL can be specified (as we did for * the server, or NULL (as we did for the client). This is a recommended value. Use the * \ref nssi_get_url function to find the actual value. */ nssi_rpc_init((nssi_rpc_transport)args.transport, NSSI_DEFAULT_ENCODE, NULL); // Get the Server URL std::string my_url(NSSI_URL_LEN, '\0'); nssi_get_url((nssi_rpc_transport)args.transport, &my_url[0], NSSI_URL_LEN); // If running as both client and server, gather and distribute // the server URLs to all the clients. if (args.server_flag && args.client_flag) { std::string all_urls; // This needs to be a vector of chars, not a string all_urls.resize(args.num_servers * NSSI_URL_LEN, '\0'); // Have servers gather their URLs if (color == 0) { assert(args.num_servers == splitsize); // these should be equal log_debug(debug_level, "%d: Gathering urls: my_url=%s", rank, my_url.c_str()); // gather all urls to rank 0 of the server comm (also rank 0 of MPI_COMM_WORLD) MPI_Gather(&my_url[0], NSSI_URL_LEN, MPI_CHAR, &all_urls[0], NSSI_URL_LEN, MPI_CHAR, 0, comm); } // broadcast the full set of server urls to all processes MPI_Bcast(&all_urls[0], all_urls.size(), MPI_CHAR, 0, MPI_COMM_WORLD); log_debug(debug_level, "%d: Bcast urls, urls.size=%d", rank, all_urls.size()); if (color == 1) { // For block distribution scheme use the utility function (in xfer_util.cpp) if (args.block_distribution) { // Use this utility function to calculate the server_index xfer_block_partition(args.num_servers, splitsize, splitrank, &server_index, &rank_in_server); } // Use a simple round robin distribution scheme else { server_index = splitrank % args.num_servers; rank_in_server = splitrank / args.num_servers; } // Copy the server url out of the list of urls int offset = server_index * NSSI_URL_LEN; args.server_url = all_urls.substr(offset, NSSI_URL_LEN); log_debug(debug_level, "client %d assigned to server \"%s\"", splitrank, args.server_url.c_str()); } log_debug(debug_level, "%d: Finished distributing server urls, server_url=%s", rank, args.server_url.c_str()); } // If running as a client only, have to get the list of servers from the urlfile. else if (!args.server_flag && args.client_flag){ sleep(args.delay); // give server time to get started std::vector< std::string > urlbuf; xfer_read_server_url_file(args.url_file.c_str(), urlbuf, comm); args.num_servers = urlbuf.size(); // For block distribution scheme use the utility function (in xfer_util.cpp) if (args.block_distribution) { // Use this utility function to calculate the server_index xfer_block_partition(args.num_servers, splitsize, splitrank, &server_index, &rank_in_server); } // Use a simple round robin distribution scheme else { server_index = splitrank % args.num_servers; rank_in_server = splitrank / args.num_servers; } args.server_url = urlbuf[server_index]; log_debug(debug_level, "client %d assigned to server \"%s\"", splitrank, args.server_url.c_str()); } else if (args.server_flag && !args.client_flag) { args.server_url = my_url; if (args.url_file.empty()) { log_error(debug_level, "Must set --url-file"); MPI_Abort(MPI_COMM_WORLD, -1); } xfer_write_server_url_file(args.url_file.c_str(), my_url.c_str(), comm); } // Set the debug level for the xfer service. xfer_debug_level = args.debug_level; // Print the arguments after they've all been set. log_debug(debug_level, "%d: server_url=%s", rank, args.server_url.c_str()); print_args(out, args, "%"); log_debug(debug_level, "server_url=%s", args.server_url.c_str()); //------------------------------------------------------------------------------ /** If we're running this job with a server, the server always executes on node 0. * In this example, the server is a single process. */ if (color == 0) { rc = xfer_server_main((nssi_rpc_transport)args.transport, args.num_threads, comm); log_debug(debug_level, "Server is finished"); } // ------------------------------------------------------------------------------ /** The parallel client will execute this branch. The root node, node 0, of the client connects * connects with the server, using the \ref nssi_get_service function. Then the root * broadcasts the service description to the other clients before starting the main * loop of the client code by calling \ref xfer_client_main. */ else { int i; int client_rank; // get rank within the client communicator MPI_Comm_rank(comm, &client_rank); nssi_init((nssi_rpc_transport)args.transport); // Only one process needs to connect to the service // TODO: Make get_service a collective call (some transports do not need a connection) //if (client_rank == 0) { { // connect to remote server for (i=0; i < args.num_retries; i++) { log_debug(debug_level, "Try to connect to server: attempt #%d, url=%s", i, args.server_url.c_str()); rc=nssi_get_service((nssi_rpc_transport)args.transport, args.server_url.c_str(), args.timeout, &xfer_svc); if (rc == NSSI_OK) break; else if (rc != NSSI_ETIMEDOUT) { log_error(xfer_debug_level, "could not get svc description: %s", nssi_err_str(rc)); break; } } } // wait for all the clients to connect MPI_Barrier(comm); //MPI_Bcast(&rc, 1, MPI_INT, 0, comm); if (rc == NSSI_OK) { if (client_rank == 0) log_debug(debug_level, "Connected to service on attempt %d\n", i); // Broadcast the service description to the other clients //log_debug(xfer_debug_level, "Bcasting svc to other clients"); //MPI_Bcast(&xfer_svc, sizeof(nssi_service), MPI_BYTE, 0, comm); log_debug(debug_level, "Starting client main"); // Start the client code xfer_client_main(args, xfer_svc, comm); MPI_Barrier(comm); // Tell one of the clients to kill the server if ((args.kill_server_flag) && (rank_in_server == 0)) { log_debug(debug_level, "%d: Halting xfer service", rank); rc = nssi_kill(&xfer_svc, 0, 5000); } rc=nssi_free_service((nssi_rpc_transport)args.transport, &xfer_svc); if (rc != NSSI_OK) { log_error(xfer_debug_level, "could not free svc description: %s", nssi_err_str(rc)); } } else { if (client_rank == 0) log_error(debug_level, "Failed to connect to service after %d attempts: ABORTING", i); success = false; //MPI_Abort(MPI_COMM_WORLD, -1); } nssi_fini((nssi_rpc_transport)args.transport); } log_debug(debug_level, "%d: clean up nssi", rank); MPI_Barrier(MPI_COMM_WORLD); // Clean up nssi_rpc rc = nssi_rpc_fini((nssi_rpc_transport)args.transport); if (rc != NSSI_OK) log_error(debug_level, "Error in nssi_rpc_fini"); log_debug(debug_level, "%d: MPI_Finalize()", rank); MPI_Finalize(); logger_fini(); if(success && (rc == NSSI_OK)) out << "\nEnd Result: TEST PASSED" << std::endl; else out << "\nEnd Result: TEST FAILED" << std::endl; return ((success && (rc==NSSI_OK)) ? 0 : 1 ); }
int main(int narg, char *arg[]) { Teuchos::GlobalMPISession mpiSession(&narg, &arg,0); Platform &platform = Tpetra::DefaultPlatform::getDefaultPlatform(); RCP<const Teuchos::Comm<int> > CommT = platform.getComm(); int me = CommT->getRank(); //int numProcs = CommT->getSize(); if (me == 0){ cout << "====================================================================\n" << "| |\n" << "| Example: Partition APF Mesh |\n" << "| |\n" << "| Questions? Contact Karen Devine ([email protected]), |\n" << "| Erik Boman ([email protected]), |\n" << "| Siva Rajamanickam ([email protected]). |\n" << "| |\n" << "| Pamgen's website: http://trilinos.sandia.gov/packages/pamgen |\n" << "| Zoltan2's website: http://trilinos.sandia.gov/packages/zoltan2 |\n" << "| Trilinos website: http://trilinos.sandia.gov |\n" << "| |\n" << "====================================================================\n"; } #ifdef HAVE_MPI if (me == 0) { cout << "PARALLEL executable \n"; } #else if (me == 0) { cout << "SERIAL executable \n"; } #endif /***************************************************************************/ /******************************* GET INPUTS ********************************/ /***************************************************************************/ // default values for command-line arguments std::string meshFileName("4/"); std::string modelFileName("torus.dmg"); std::string action("parma"); std::string parma_method("VtxElm"); std::string output_loc(""); int nParts = CommT->getSize(); double imbalance = 1.1; // Read run-time options. Teuchos::CommandLineProcessor cmdp (false, false); cmdp.setOption("meshfile", &meshFileName, "Mesh file with APF specifications (.smb file(s))"); cmdp.setOption("modelfile", &modelFileName, "Model file with APF specifications (.dmg file)"); cmdp.setOption("action", &action, "Method to use: mj, scotch, zoltan_rcb, parma or color"); cmdp.setOption("parma_method", &parma_method, "Method to use: Vertex, Element, VtxElm, VtxEdgeElm, Ghost, or Shape "); cmdp.setOption("nparts", &nParts, "Number of parts to create"); cmdp.setOption("imbalance", &imbalance, "Target imbalance for the partitioning method"); cmdp.setOption("output", &output_loc, "Location of new partitioned apf mesh. Ex: 4/torus.smb"); cmdp.parse(narg, arg); /***************************************************************************/ /********************** GET CELL TOPOLOGY **********************************/ /***************************************************************************/ // Get dimensions //int dim = 3; /***************************************************************************/ /***************************** GENERATE MESH *******************************/ /***************************************************************************/ #ifdef HAVE_ZOLTAN2_PARMA if (me == 0) cout << "Generating mesh ... \n\n"; //Setup for SCOREC PCU_Comm_Init(); // Generate mesh with MDS double time_1=PCU_Time(); gmi_register_mesh(); apf::Mesh2* m = apf::loadMdsMesh(modelFileName.c_str(),meshFileName.c_str()); apf::verify(m); // Creating mesh adapter if (me == 0) cout << "Creating mesh adapter ... \n\n"; typedef Zoltan2::RPIMeshAdapter<apf::Mesh2*> inputAdapter_t; inputAdapter_t ia(*CommT, m); double time_2=PCU_Time(); // Set parameters for partitioning if (me == 0) cout << "Creating parameter list ... \n\n"; Teuchos::ParameterList params("test params"); params.set("timer_output_stream" , "std::cout"); bool do_partitioning = false; if (action == "mj") { do_partitioning = true; params.set("debug_level", "basic_status"); params.set("imbalance_tolerance", imbalance); params.set("num_global_parts", nParts); params.set("algorithm", "multijagged"); params.set("rectilinear", "yes"); } else if (action == "scotch") { do_partitioning = true; params.set("debug_level", "no_status"); params.set("imbalance_tolerance", imbalance); params.set("num_global_parts", nParts); params.set("partitioning_approach", "partition"); params.set("objects_to_partition","mesh_elements"); params.set("algorithm", "scotch"); } else if (action == "zoltan_rcb") { do_partitioning = true; params.set("debug_level", "verbose_detailed_status"); params.set("imbalance_tolerance", imbalance); params.set("num_global_parts", nParts); params.set("partitioning_approach", "partition"); params.set("algorithm", "zoltan"); } else if (action == "parma") { do_partitioning = true; params.set("debug_level", "no_status"); params.set("imbalance_tolerance", imbalance); params.set("algorithm", "parma"); Teuchos::ParameterList &pparams = params.sublist("parma_parameters",false); pparams.set("parma_method",parma_method); pparams.set("step_size",1.1); if (parma_method=="Ghost") { pparams.set("ghost_layers",3); pparams.set("ghost_bridge",m->getDimension()-1); } params.set("compute_metrics","yes"); } else if (action=="zoltan_hg") { do_partitioning = true; params.set("debug_level", "no_status"); params.set("imbalance_tolerance", imbalance); params.set("algorithm", "zoltan"); params.set("num_global_parts", nParts); Teuchos::ParameterList &zparams = params.sublist("zoltan_parameters",false); zparams.set("LB_METHOD","HYPERGRAPH"); zparams.set("LB_APPROACH","REPARTITION"); //params.set("compute_metrics","yes"); } else if (action == "color") { params.set("debug_level", "verbose_detailed_status"); params.set("debug_output_file", "kdd"); params.set("debug_procs", "all"); } Parma_PrintPtnStats(m,"before"); // create Partitioning problem double time_3 = PCU_Time(); if (do_partitioning) { if (me == 0) cout << "Creating partitioning problem ... \n\n"; Zoltan2::PartitioningProblem<inputAdapter_t> problem(&ia, ¶ms, CommT); // call the partitioner if (me == 0) cout << "Calling the partitioner ... \n\n"; problem.solve(); if (me==0) cout << "Applying Solution to Mesh\n\n"; apf::Mesh2** new_mesh = &m; ia.applyPartitioningSolution(m,new_mesh,problem.getSolution()); if (!me) problem.printMetrics(cout); } else { if (me == 0) cout << "Creating coloring problem ... \n\n"; Zoltan2::ColoringProblem<inputAdapter_t> problem(&ia, ¶ms); // call the partitioner if (me == 0) cout << "Calling the coloring algorithm ... \n\n"; problem.solve(); problem.printTimers(); } double time_4=PCU_Time(); //if (!me) Parma_PrintPtnStats(m,"after"); if (output_loc!="") { m->writeNative(output_loc.c_str()); } // delete mesh if (me == 0) cout << "Deleting the mesh ... \n\n"; time_4-=time_3; time_2-=time_1; PCU_Max_Doubles(&time_2,1); PCU_Max_Doubles(&time_4,1); if (!me) { std::cout<<"\nConstruction time: "<<time_2<<"\n" <<"Problem time: " << time_4<<"\n\n"; } //Delete_APF_Mesh(); ia.destroy(); m->destroyNative(); apf::destroyMesh(m); //End communications PCU_Comm_Free(); #endif if (me == 0) std::cout << "PASS" << std::endl; return 0; }
int main (int argc, char *argv[]) { Teuchos::CommandLineProcessor clp; clp.setDocString("This example program measure the performance of dense Herk on Kokkos::Threads execution space.\n"); int nthreads = 0; clp.setOption("nthreads", &nthreads, "Number of threads"); int numa = 0; clp.setOption("numa", &numa, "Number of numa node"); int core_per_numa = 0; clp.setOption("core-per-numa", &core_per_numa, "Number of cores per numa node"); int max_concurrency = 250000; clp.setOption("max-concurrency", &max_concurrency, "Max number of concurrent tasks"); int memory_pool_grain_size = 16; clp.setOption("memory-pool-grain-size", &memory_pool_grain_size, "Memorypool chunk size (12 - 16)"); int mkl_nthreads = 1; clp.setOption("mkl-nthreads", &mkl_nthreads, "MKL threads for nested parallelism"); bool verbose = false; clp.setOption("enable-verbose", "disable-verbose", &verbose, "Flag for verbose printing"); int mmin = 1000; clp.setOption("mmin", &mmin, "C(mmin,mmin)"); int mmax = 8000; clp.setOption("mmax", &mmax, "C(mmax,mmax)"); int minc = 1000; clp.setOption("minc", &minc, "Increment of m"); int k = 1024; clp.setOption("k", &k, "A(mmax,k) or A(k,mmax) according to transpose flags"); int mb = 256; clp.setOption("mb", &mb, "Blocksize"); bool check = true; clp.setOption("enable-check", "disable-check", &check, "Flag for check solution"); clp.recogniseAllOptions(true); clp.throwExceptions(false); Teuchos::CommandLineProcessor::EParseCommandLineReturn r_parse= clp.parse( argc, argv ); if (r_parse == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0; if (r_parse != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL ) return -1; int r_val = 0; { exec_space::initialize(nthreads, numa, core_per_numa); std::cout << std::endl << "DenseHerkByBlocks:: Upper, ConjTranspose, Variant::One (external)" << std::endl; r_val = exampleDenseHerkByBlocks <Uplo::Upper,Trans::ConjTranspose,Variant::One,exec_space> (mmin, mmax, minc, k, mb, max_concurrency, memory_pool_grain_size, mkl_nthreads, check, verbose); exec_space::finalize(); } return r_val; }
// calls MPI_Init and MPI_Finalize int main(int argc,char * argv[]) { using Teuchos::RCP; using Teuchos::rcp_dynamic_cast; using panzer::StrPureBasisPair; using panzer::StrPureBasisComp; Teuchos::GlobalMPISession mpiSession(&argc,&argv); RCP<Epetra_Comm> Comm = Teuchos::rcp(new Epetra_MpiComm(MPI_COMM_WORLD)); Teuchos::RCP<Teuchos::Comm<int> > comm = Teuchos::rcp(new Teuchos::MpiComm<int>(Teuchos::opaqueWrapper(MPI_COMM_WORLD))); Teuchos::FancyOStream out(Teuchos::rcpFromRef(std::cout)); out.setOutputToRootOnly(0); out.setShowProcRank(true); // Build command line processor //////////////////////////////////////////////////// bool useTpetra = false; Teuchos::CommandLineProcessor clp; clp.setOption("use-tpetra","use-epetra",&useTpetra); // parse commandline argument TEUCHOS_ASSERT(clp.parse(argc,argv)==Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL); // variable declarations //////////////////////////////////////////////////// // factory definitions Teuchos::RCP<Example::EquationSetFactory> eqset_factory = Teuchos::rcp(new Example::EquationSetFactory); // where poison equation is defined Example::BCStrategyFactory bc_factory; // where boundary conditions are defined panzer_stk::SquareQuadMeshFactory mesh_factory; // other declarations const std::size_t workset_size = 2*2; // construction of uncommitted (no elements) mesh //////////////////////////////////////////////////////// // set mesh factory parameters RCP<Teuchos::ParameterList> pl = rcp(new Teuchos::ParameterList); pl->set("X Blocks",1); pl->set("Y Blocks",1); pl->set("X Elements",20); pl->set("Y Elements",20); mesh_factory.setParameterList(pl); RCP<panzer_stk::STK_Interface> mesh = mesh_factory.buildUncommitedMesh(MPI_COMM_WORLD); // construct input physics and physics block //////////////////////////////////////////////////////// Teuchos::RCP<Teuchos::ParameterList> ipb = Teuchos::parameterList("Physics Blocks"); std::vector<panzer::BC> bcs; std::vector<RCP<panzer::PhysicsBlock> > physicsBlocks; { bool build_transient_support = false; testInitialization(ipb, bcs); const panzer::CellData volume_cell_data(workset_size, mesh->getCellTopology("eblock-0_0")); // GobalData sets ostream and parameter interface to physics Teuchos::RCP<panzer::GlobalData> gd = panzer::createGlobalData(); // Can be overridden by the equation set int default_integration_order = 1; // the physics block nows how to build and register evaluator with the field manager RCP<panzer::PhysicsBlock> pb = rcp(new panzer::PhysicsBlock(ipb, "eblock-0_0", default_integration_order, volume_cell_data, eqset_factory, gd, build_transient_support)); // we can have more than one physics block, one per element block physicsBlocks.push_back(pb); } // finish building mesh, set required field variables and mesh bulk data //////////////////////////////////////////////////////////////////////// { RCP<panzer::PhysicsBlock> pb = physicsBlocks[0]; // we are assuming only one physics block const std::vector<StrPureBasisPair> & blockFields = pb->getProvidedDOFs(); // insert all fields into a set std::set<StrPureBasisPair,StrPureBasisComp> fieldNames; fieldNames.insert(blockFields.begin(),blockFields.end()); // build string for modifiying vectors std::vector<std::string> dimenStr(3); dimenStr[0] = "X"; dimenStr[1] = "Y"; dimenStr[2] = "Z"; // add basis to DOF manager: block specific std::set<StrPureBasisPair,StrPureBasisComp>::const_iterator fieldItr; for (fieldItr=fieldNames.begin();fieldItr!=fieldNames.end();++fieldItr) { Teuchos::RCP<const panzer::PureBasis> basis = fieldItr->second; if(basis->getElementSpace()==panzer::PureBasis::HGRAD) mesh->addSolutionField(fieldItr->first,pb->elementBlockID()); else if(basis->getElementSpace()==panzer::PureBasis::HCURL) { for(int i=0;i<basis->dimension();i++) mesh->addCellField(fieldItr->first+dimenStr[i],pb->elementBlockID()); } } mesh_factory.completeMeshConstruction(*mesh,MPI_COMM_WORLD); } // build worksets //////////////////////////////////////////////////////// Teuchos::RCP<panzer_stk::WorksetFactory> wkstFactory = Teuchos::rcp(new panzer_stk::WorksetFactory(mesh)); // build STK workset factory Teuchos::RCP<panzer::WorksetContainer> wkstContainer // attach it to a workset container (uses lazy evaluation) = Teuchos::rcp(new panzer::WorksetContainer(wkstFactory,physicsBlocks,workset_size)); // build DOF Manager and linear object factory ///////////////////////////////////////////////////////////// // build the connection manager const Teuchos::RCP<panzer::ConnManager<int,int> > conn_manager = Teuchos::rcp(new panzer_stk::STKConnManager(mesh)); panzer::DOFManagerFactory<int,int> globalIndexerFactory; RCP<panzer::UniqueGlobalIndexer<int,int> > dofManager = globalIndexerFactory.buildUniqueGlobalIndexer(Teuchos::opaqueWrapper(MPI_COMM_WORLD),physicsBlocks,conn_manager); // construct some linear algebra object, build object to pass to evaluators Teuchos::RCP<panzer::LinearObjFactory<panzer::Traits> > linObjFactory; if(!useTpetra) linObjFactory = Teuchos::rcp(new panzer::EpetraLinearObjFactory<panzer::Traits,int>(Comm.getConst(),dofManager)); else linObjFactory = Teuchos::rcp(new panzer::TpetraLinearObjFactory<panzer::Traits,double,int,int>(comm,dofManager)); // Setup STK response library for writing out the solution fields //////////////////////////////////////////////////////////////////////// Teuchos::RCP<panzer::ResponseLibrary<panzer::Traits> > stkIOResponseLibrary = Teuchos::rcp(new panzer::ResponseLibrary<panzer::Traits>(wkstContainer,dofManager,linObjFactory)); { // get a vector of all the element blocks std::vector<std::string> eBlocks; { // get all element blocks and add them to the list std::vector<std::string> eBlockNames; mesh->getElementBlockNames(eBlockNames); for(std::size_t i=0;i<eBlockNames.size();i++) eBlocks.push_back(eBlockNames[i]); } panzer_stk::RespFactorySolnWriter_Builder builder; builder.mesh = mesh; stkIOResponseLibrary->addResponse("Main Field Output",eBlocks,builder); } // setup closure model ///////////////////////////////////////////////////////////// // Add in the application specific closure model factory panzer::ClosureModelFactory_TemplateManager<panzer::Traits> cm_factory; Example::ClosureModelFactory_TemplateBuilder cm_builder; cm_factory.buildObjects(cm_builder); Teuchos::ParameterList closure_models("Closure Models"); closure_models.sublist("solid").sublist("SOURCE_EFIELD").set<std::string>("Type","SIMPLE SOURCE"); // a constant source // SOURCE_EFIELD field is required by the CurlLaplacianEquationSet Teuchos::ParameterList user_data("User Data"); // user data can be empty here // setup field manager builder ///////////////////////////////////////////////////////////// Teuchos::RCP<panzer::FieldManagerBuilder> fmb = Teuchos::rcp(new panzer::FieldManagerBuilder); fmb->setWorksetContainer(wkstContainer); fmb->setupVolumeFieldManagers(physicsBlocks,cm_factory,closure_models,*linObjFactory,user_data); fmb->setupBCFieldManagers(bcs,physicsBlocks,*eqset_factory,cm_factory,bc_factory,closure_models, *linObjFactory,user_data); // setup assembly engine ///////////////////////////////////////////////////////////// // build assembly engine: The key piece that brings together everything and // drives and controls the assembly process. Just add // matrices and vectors panzer::AssemblyEngine_TemplateManager<panzer::Traits> ae_tm; panzer::AssemblyEngine_TemplateBuilder builder(fmb,linObjFactory); ae_tm.buildObjects(builder); // Finalize construcition of STK writer response library ///////////////////////////////////////////////////////////// { user_data.set<int>("Workset Size",workset_size); stkIOResponseLibrary->buildResponseEvaluators(physicsBlocks, cm_factory, closure_models, user_data); } // assemble linear system ///////////////////////////////////////////////////////////// // build linear algebra objects: Ghost is for parallel assembly, it contains // local element contributions summed, the global IDs // are not unique. The non-ghosted or "global" // container will contain the sum over all processors // of the ghosted objects. The global indices are unique. RCP<panzer::LinearObjContainer> ghostCont = linObjFactory->buildGhostedLinearObjContainer(); RCP<panzer::LinearObjContainer> container = linObjFactory->buildLinearObjContainer(); linObjFactory->initializeGhostedContainer(panzer::LinearObjContainer::X | panzer::LinearObjContainer::F | panzer::LinearObjContainer::Mat,*ghostCont); linObjFactory->initializeContainer(panzer::LinearObjContainer::X | panzer::LinearObjContainer::F | panzer::LinearObjContainer::Mat,*container); ghostCont->initialize(); container->initialize(); // Actually evaluate ///////////////////////////////////////////////////////////// panzer::AssemblyEngineInArgs input(ghostCont,container); input.alpha = 0; input.beta = 1; // evaluate physics: This does both the Jacobian and residual at once ae_tm.getAsObject<panzer::Traits::Jacobian>()->evaluate(input); // solve linear system ///////////////////////////////////////////////////////////// if(useTpetra) solveTpetraSystem(*container); else solveEpetraSystem(*container); // output data (optional) ///////////////////////////////////////////////////////////// // write out solution if(true) { // fill STK mesh objects Teuchos::RCP<panzer::ResponseBase> resp = stkIOResponseLibrary->getResponse<panzer::Traits::Residual>("Main Field Output"); panzer::AssemblyEngineInArgs respInput(ghostCont,container); respInput.alpha = 0; respInput.beta = 1; stkIOResponseLibrary->addResponsesToInArgs<panzer::Traits::Residual>(respInput); stkIOResponseLibrary->evaluate<panzer::Traits::Residual>(respInput); // write to exodus mesh->writeToExodus("output.exo"); } // all done! ///////////////////////////////////////////////////////////// if(useTpetra) std::cout << "ALL PASSED: Tpetra" << endl; else std::cout << "ALL PASSED: Epetra" << endl; return 0; }
int main(int narg, char *arg[]) { Teuchos::GlobalMPISession mpiSession(&narg, &arg,0); Platform &platform = Tpetra::DefaultPlatform::getDefaultPlatform(); RCP<const Teuchos::Comm<int> > CommT = platform.getComm(); int me = CommT->getRank(); //int numProcs = CommT->getSize(); if (me == 0){ cout << "====================================================================\n" << "| |\n" << "| Example: Partition APF Mesh |\n" << "| |\n" << "| Questions? Contact Karen Devine ([email protected]), |\n" << "| Erik Boman ([email protected]), |\n" << "| Siva Rajamanickam ([email protected]). |\n" << "| |\n" << "| Zoltan2's website: http://trilinos.sandia.gov/packages/zoltan2 |\n" << "| Trilinos website: http://trilinos.sandia.gov |\n" << "| |\n" << "====================================================================\n"; } #ifdef HAVE_MPI if (me == 0) { cout << "PARALLEL executable \n"; } #else if (me == 0) { cout << "SERIAL executable \n"; } #endif /***************************************************************************/ /******************************* GET INPUTS ********************************/ /***************************************************************************/ // default values for command-line arguments std::string meshFileName("4/"); std::string modelFileName("torus.dmg"); std::string action("parma"); std::string parma_method("VtxElm"); std::string output_loc(""); int nParts = CommT->getSize(); double imbalance = 1.1; int layers=2; int ghost_metric=false; // Read run-time options. Teuchos::CommandLineProcessor cmdp (false, false); cmdp.setOption("meshfile", &meshFileName, "Mesh file with APF specifications (.smb file(s))"); cmdp.setOption("modelfile", &modelFileName, "Model file with APF specifications (.dmg file)"); cmdp.setOption("action", &action, "Method to use: mj, scotch, zoltan_rcb, parma or color"); cmdp.setOption("parma_method", &parma_method, "Method to use: Vertex, Element, VtxElm, VtxEdgeElm, Ghost, Shape, or Centroid "); cmdp.setOption("nparts", &nParts, "Number of parts to create"); cmdp.setOption("imbalance", &imbalance, "Target imbalance for the partitioning method"); cmdp.setOption("output", &output_loc, "Location of new partitioned apf mesh. Ex: 4/torus.smb"); cmdp.setOption("layers", &layers, "Number of layers for ghosting"); cmdp.setOption("ghost_metric", &ghost_metric, "0 does not compute ghost metric otherwise compute both before and after"); cmdp.parse(narg, arg); /***************************************************************************/ /********************** GET CELL TOPOLOGY **********************************/ /***************************************************************************/ // Get dimensions //int dim = 3; /***************************************************************************/ /***************************** GENERATE MESH *******************************/ /***************************************************************************/ #ifdef HAVE_ZOLTAN2_PARMA if (me == 0) cout << "Generating mesh ... \n\n"; //Setup for SCOREC PCU_Comm_Init(); // Generate mesh with MDS gmi_register_mesh(); apf::Mesh2* m = apf::loadMdsMesh(modelFileName.c_str(),meshFileName.c_str()); apf::verify(m); //Data for APF MeshAdapter std::string primary="region"; std::string adjacency="face"; if (m->getDimension()==2) { primary="face"; adjacency="edge"; } bool needSecondAdj=false; // Set parameters for partitioning if (me == 0) cout << "Creating parameter list ... \n\n"; Teuchos::ParameterList params("test params"); params.set("timer_output_stream" , "std::cout"); bool do_partitioning = false; if (action == "mj") { do_partitioning = true; params.set("debug_level", "basic_status"); params.set("imbalance_tolerance", imbalance); params.set("num_global_parts", nParts); params.set("algorithm", "multijagged"); params.set("rectilinear", "yes"); } else if (action == "scotch") { do_partitioning = true; params.set("debug_level", "no_status"); params.set("imbalance_tolerance", imbalance); params.set("num_global_parts", nParts); params.set("partitioning_approach", "partition"); params.set("objects_to_partition","mesh_elements"); params.set("algorithm", "scotch"); needSecondAdj=true; } else if (action == "zoltan_rcb") { do_partitioning = true; params.set("debug_level", "verbose_detailed_status"); params.set("imbalance_tolerance", imbalance); params.set("num_global_parts", nParts); params.set("partitioning_approach", "partition"); params.set("algorithm", "zoltan"); } else if (action == "parma") { do_partitioning = true; params.set("debug_level", "no_status"); params.set("imbalance_tolerance", imbalance); params.set("algorithm", "parma"); Teuchos::ParameterList &pparams = params.sublist("parma_parameters",false); pparams.set("parma_method",parma_method); pparams.set("step_size",1.1); if (parma_method=="Ghost") { pparams.set("ghost_layers",layers); pparams.set("ghost_bridge",m->getDimension()-1); } adjacency="vertex"; } else if (action=="zoltan_hg") { do_partitioning = true; params.set("debug_level", "no_status"); params.set("imbalance_tolerance", imbalance); params.set("algorithm", "zoltan"); params.set("num_global_parts", nParts); Teuchos::ParameterList &zparams = params.sublist("zoltan_parameters",false); zparams.set("LB_METHOD","HYPERGRAPH"); zparams.set("LB_APPROACH","PARTITION"); //params.set("compute_metrics","yes"); adjacency="vertex"; } else if (action=="hg_ghost") { do_partitioning = true; params.set("debug_level", "no_status"); params.set("imbalance_tolerance", imbalance); params.set("algorithm", "zoltan"); params.set("num_global_parts", nParts); params.set("hypergraph_model_type","ghosting"); params.set("ghost_layers",layers); Teuchos::ParameterList &zparams = params.sublist("zoltan_parameters",false); zparams.set("LB_METHOD","HYPERGRAPH"); zparams.set("LB_APPROACH","PARTITION"); zparams.set("PHG_EDGE_SIZE_THRESHOLD", "1.0"); primary="vertex"; adjacency="edge"; needSecondAdj=true; } else if (action == "color") { params.set("debug_level", "verbose_detailed_status"); params.set("debug_output_file", "kdd"); params.set("debug_procs", "all"); } Parma_PrintPtnStats(m,"before"); // Creating mesh adapter if (me == 0) cout << "Creating mesh adapter ... \n\n"; typedef Zoltan2::APFMeshAdapter<apf::Mesh2*> inputAdapter_t; typedef Zoltan2::EvaluatePartition<inputAdapter_t> quality_t; typedef Zoltan2::MeshAdapter<apf::Mesh2*> baseMeshAdapter_t; double time_1=PCU_Time(); inputAdapter_t *ia = new inputAdapter_t(*CommT, m,primary,adjacency,needSecondAdj); double time_2=PCU_Time(); inputAdapter_t::scalar_t* arr = new inputAdapter_t::scalar_t[ia->getLocalNumOf(ia->getPrimaryEntityType())]; for (size_t i=0;i<ia->getLocalNumOf(ia->getPrimaryEntityType());i++) { arr[i]=PCU_Comm_Self()+1; } const inputAdapter_t::scalar_t* weights=arr; ia->setWeights(ia->getPrimaryEntityType(),weights,1); if (ghost_metric) { const baseMeshAdapter_t *base_ia = dynamic_cast<const baseMeshAdapter_t*>(ia); Zoltan2::modelFlag_t graphFlags_; RCP<Zoltan2::Environment> env; try{ env = rcp(new Zoltan2::Environment(params, Teuchos::DefaultComm<int>::getComm())); } Z2_FORWARD_EXCEPTIONS RCP<const Zoltan2::Environment> envConst = Teuchos::rcp_const_cast<const Zoltan2::Environment>(env); RCP<const baseMeshAdapter_t> baseInputAdapter_(base_ia,false); Zoltan2::HyperGraphModel<inputAdapter_t> model(baseInputAdapter_,envConst,CommT, graphFlags_,Zoltan2::HYPEREDGE_CENTRIC); PrintGhostMetrics(model); } // create Partitioning problem double time_3 = PCU_Time(); if (do_partitioning) { if (me == 0) cout << "Creating partitioning problem ... \n\n"; Zoltan2::PartitioningProblem<inputAdapter_t> problem(ia, ¶ms, CommT); // call the partitioner if (me == 0) cout << "Calling the partitioner ... \n\n"; problem.solve(); if (me==0) cout << "Applying Solution to Mesh\n\n"; apf::Mesh2** new_mesh = &m; ia->applyPartitioningSolution(m,new_mesh,problem.getSolution()); // create metric object RCP<quality_t> metricObject = rcp(new quality_t(ia, ¶ms, CommT, &problem.getSolution())); if (!me) { metricObject->printMetrics(cout); } } else { if (me == 0) cout << "Creating coloring problem ... \n\n"; Zoltan2::ColoringProblem<inputAdapter_t> problem(ia, ¶ms); // call the partitioner if (me == 0) cout << "Calling the coloring algorithm ... \n\n"; problem.solve(); problem.printTimers(); } double time_4=PCU_Time(); //Destroy the adapter ia->destroy(); delete [] arr; //Parma_PrintPtnStats(m,"after"); if (ghost_metric) { inputAdapter_t ia2(*CommT, m,primary,adjacency,true); const baseMeshAdapter_t *base_ia = dynamic_cast<const baseMeshAdapter_t*>(&ia2); Zoltan2::modelFlag_t graphFlags_; RCP<Zoltan2::Environment> env; try{ env = rcp(new Zoltan2::Environment(params, Teuchos::DefaultComm<int>::getComm())); } Z2_FORWARD_EXCEPTIONS RCP<const Zoltan2::Environment> envConst = Teuchos::rcp_const_cast<const Zoltan2::Environment>(env); RCP<const baseMeshAdapter_t> baseInputAdapter_(base_ia,false); Zoltan2::HyperGraphModel<inputAdapter_t> model(baseInputAdapter_, envConst, CommT, graphFlags_,Zoltan2::HYPEREDGE_CENTRIC); PrintGhostMetrics(model); ia2.destroy(); } if (output_loc!="") { m->writeNative(output_loc.c_str()); } // delete mesh if (me == 0) cout << "Deleting the mesh ... \n\n"; time_4-=time_3; time_2-=time_1; PCU_Max_Doubles(&time_2,1); PCU_Max_Doubles(&time_4,1); if (!me) { std::cout<<"\nConstruction time: "<<time_2<<"\n" <<"Problem time: " << time_4<<"\n\n"; } //Delete the APF Mesh m->destroyNative(); apf::destroyMesh(m); //End communications PCU_Comm_Free(); #endif if (me == 0) std::cout << "PASS" << std::endl; return 0; }
int main (int argc, char *argv[]) { using namespace Anasazi; using Teuchos::RCP; using Teuchos::rcp; using std::endl; #ifdef HAVE_MPI // Initialize MPI MPI_Init (&argc, &argv); #endif // HAVE_MPI // Create an Epetra communicator #ifdef HAVE_MPI Epetra_MpiComm Comm (MPI_COMM_WORLD); #else Epetra_SerialComm Comm; #endif // HAVE_MPI // Create an Anasazi output manager BasicOutputManager<double> printer; printer.stream(Errors) << Anasazi_Version() << std::endl << std::endl; // Get the sorting std::string from the command line std::string which ("LM"); Teuchos::CommandLineProcessor cmdp (false, true); cmdp.setOption("sort", &which, "Targetted eigenvalues (SM or LM)."); if (cmdp.parse (argc, argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) { #ifdef HAVE_MPI MPI_Finalize (); #endif // HAVE_MPI return -1; } // Dimension of the matrix // // Discretization points in any one direction. const int nx = 10; // Size of matrix nx*nx const int NumGlobalElements = nx*nx; // Construct a Map that puts approximately the same number of // equations on each process. Epetra_Map Map (NumGlobalElements, 0, Comm); // Get update list and number of local equations from newly created Map. int NumMyElements = Map.NumMyElements (); std::vector<int> MyGlobalElements (NumMyElements); Map.MyGlobalElements (&MyGlobalElements[0]); // Create an integer vector NumNz that is used to build the Petra // matrix. NumNz[i] is the number of OFF-DIAGONAL terms for the // i-th global equation on this process. std::vector<int> NumNz (NumMyElements); /* We are building a matrix of block structure: | T -I | |-I T -I | | -I T | | ... -I| | -I T| where each block is dimension nx by nx and the matrix is on the order of nx*nx. The block T is a tridiagonal matrix. */ for (int i=0; i<NumMyElements; ++i) { if (MyGlobalElements[i] == 0 || MyGlobalElements[i] == NumGlobalElements-1 || MyGlobalElements[i] == nx-1 || MyGlobalElements[i] == nx*(nx-1) ) { NumNz[i] = 3; } else if (MyGlobalElements[i] < nx || MyGlobalElements[i] > nx*(nx-1) || MyGlobalElements[i]%nx == 0 || (MyGlobalElements[i]+1)%nx == 0) { NumNz[i] = 4; } else { NumNz[i] = 5; } } // Create an Epetra_Matrix RCP<Epetra_CrsMatrix> A = rcp (new Epetra_CrsMatrix (Epetra_DataAccess::Copy, Map, &NumNz[0])); // Compute coefficients for discrete convection-diffution operator const double one = 1.0; std::vector<double> Values(4); std::vector<int> Indices(4); double rho = 0.0; double h = one /(nx+1); double h2 = h*h; double c = 5.0e-01*rho/ h; Values[0] = -one/h2 - c; Values[1] = -one/h2 + c; Values[2] = -one/h2; Values[3]= -one/h2; double diag = 4.0 / h2; int NumEntries; for (int i=0; i<NumMyElements; ++i) { if (MyGlobalElements[i]==0) { Indices[0] = 1; Indices[1] = nx; NumEntries = 2; int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[1], &Indices[0]); TEUCHOS_TEST_FOR_EXCEPTION (info != 0, std::runtime_error, "InsertGlobalValues returned info = " << info << " != 0." ); } else if (MyGlobalElements[i] == nx*(nx-1)) { Indices[0] = nx*(nx-1)+1; Indices[1] = nx*(nx-2); NumEntries = 2; int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[1], &Indices[0]); TEUCHOS_TEST_FOR_EXCEPTION (info != 0, std::runtime_error, "InsertGlobalValues returned info = " << info << " != 0." ); } else if (MyGlobalElements[i] == nx-1) { Indices[0] = nx-2; NumEntries = 1; int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]); TEUCHOS_TEST_FOR_EXCEPTION (info != 0, std::runtime_error, "InsertGlobalValues returned info = " << info << " != 0." ); Indices[0] = 2*nx-1; info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[2], &Indices[0]); TEUCHOS_TEST_FOR_EXCEPTION (info != 0, std::runtime_error, "InsertGlobalValues returned info = " << info << " != 0." ); } else if (MyGlobalElements[i] == NumGlobalElements-1) { Indices[0] = NumGlobalElements-2; NumEntries = 1; int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]); TEUCHOS_TEST_FOR_EXCEPTION (info != 0, std::runtime_error, "InsertGlobalValues returned info = " << info << " != 0." ); Indices[0] = nx*(nx-1)-1; info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[2], &Indices[0]); TEUCHOS_TEST_FOR_EXCEPTION (info != 0, std::runtime_error, "InsertGlobalValues returned info = " << info << " != 0." ); } else if (MyGlobalElements[i] < nx) { Indices[0] = MyGlobalElements[i]-1; Indices[1] = MyGlobalElements[i]+1; Indices[2] = MyGlobalElements[i]+nx; NumEntries = 3; int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]); TEUCHOS_TEST_FOR_EXCEPTION (info != 0, std::runtime_error, "InsertGlobalValues returned info = " << info << " != 0." ); } else if (MyGlobalElements[i] > nx*(nx-1)) { Indices[0] = MyGlobalElements[i]-1; Indices[1] = MyGlobalElements[i]+1; Indices[2] = MyGlobalElements[i]-nx; NumEntries = 3; int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]); TEUCHOS_TEST_FOR_EXCEPTION (info != 0, std::runtime_error, "InsertGlobalValues returned info = " << info << " != 0." ); } else if (MyGlobalElements[i]%nx == 0) { Indices[0] = MyGlobalElements[i]+1; Indices[1] = MyGlobalElements[i]-nx; Indices[2] = MyGlobalElements[i]+nx; NumEntries = 3; int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[1], &Indices[0]); TEUCHOS_TEST_FOR_EXCEPTION (info != 0, std::runtime_error, "InsertGlobalValues returned info = " << info << " != 0." ); } else if ((MyGlobalElements[i]+1)%nx == 0) { Indices[0] = MyGlobalElements[i]-nx; Indices[1] = MyGlobalElements[i]+nx; NumEntries = 2; int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[2], &Indices[0]); TEUCHOS_TEST_FOR_EXCEPTION (info != 0, std::runtime_error, "InsertGlobalValues returned info = " << info << " != 0." ); Indices[0] = MyGlobalElements[i]-1; NumEntries = 1; info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]); TEUCHOS_TEST_FOR_EXCEPTION (info != 0, std::runtime_error, "InsertGlobalValues returned info = " << info << " != 0." ); } else { Indices[0] = MyGlobalElements[i]-1; Indices[1] = MyGlobalElements[i]+1; Indices[2] = MyGlobalElements[i]-nx; Indices[3] = MyGlobalElements[i]+nx; NumEntries = 4; int info = A->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]); TEUCHOS_TEST_FOR_EXCEPTION (info != 0, std::runtime_error, "InsertGlobalValues returned info = " << info << " != 0." ); } // Put in the diagonal entry int info = A->InsertGlobalValues(MyGlobalElements[i], 1, &diag, &MyGlobalElements[i]); TEUCHOS_TEST_FOR_EXCEPTION (info != 0, std::runtime_error, "InsertGlobalValues returned info = " << info << " != 0." ); } // Finish up int info = A->FillComplete (); TEUCHOS_TEST_FOR_EXCEPTION (info != 0, std::runtime_error, "A->FillComplete() returned info = " << info << " != 0." ); A->SetTracebackMode (1); // Shutdown Epetra Warning tracebacks // Create a identity matrix for the temporary mass matrix RCP<Epetra_CrsMatrix> M = rcp (new Epetra_CrsMatrix (Epetra_DataAccess::Copy, Map, 1)); for (int i=0; i<NumMyElements; i++) { Values[0] = one; Indices[0] = i; NumEntries = 1; info = M->InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]); TEUCHOS_TEST_FOR_EXCEPTION (info != 0, std::runtime_error, "M->InsertGlobalValues() returned info = " << info << " != 0." ); } // Finish up info = M->FillComplete (); TEUCHOS_TEST_FOR_EXCEPTION (info != 0, std::runtime_error, "M->FillComplete() returned info = " << info << " != 0." ); M->SetTracebackMode (1); // Shutdown Epetra Warning tracebacks //************************************ // Call the LOBPCG solver manager //*********************************** // // Variables used for the LOBPCG Method const int nev = 10; const int blockSize = 5; const int maxIters = 500; const double tol = 1.0e-8; typedef Epetra_MultiVector MV; typedef Epetra_Operator OP; typedef MultiVecTraits<double, Epetra_MultiVector> MVT; // Create an Epetra_MultiVector for an initial vector to start the // solver. Note: This needs to have the same number of columns as // the blocksize. RCP<Epetra_MultiVector> ivec = rcp (new Epetra_MultiVector (Map, blockSize)); ivec->Random (); // fill the initial vector with random values // Create the eigenproblem. RCP<BasicEigenproblem<double, MV, OP> > MyProblem = rcp (new BasicEigenproblem<double, MV, OP> (A, ivec)); // Inform the eigenproblem that the operator A is symmetric MyProblem->setHermitian (true); // Set the number of eigenvalues requested MyProblem->setNEV (nev); // Tell the eigenproblem that you are finishing passing it information. const bool success = MyProblem->setProblem (); if (! success) { printer.print (Errors, "Anasazi::BasicEigenproblem::setProblem() reported an error.\n"); #ifdef HAVE_MPI MPI_Finalize (); #endif // HAVE_MPI return -1; } // Create parameter list to pass into the solver manager Teuchos::ParameterList MyPL; MyPL.set ("Which", which); MyPL.set ("Block Size", blockSize); MyPL.set ("Maximum Iterations", maxIters); MyPL.set ("Convergence Tolerance", tol); MyPL.set ("Full Ortho", true); MyPL.set ("Use Locking", true); // Create the solver manager LOBPCGSolMgr<double, MV, OP> MySolverMan (MyProblem, MyPL); // Solve the problem ReturnType returnCode = MySolverMan.solve (); // Get the eigenvalues and eigenvectors from the eigenproblem Eigensolution<double,MV> sol = MyProblem->getSolution (); std::vector<Value<double> > evals = sol.Evals; RCP<MV> evecs = sol.Evecs; // Compute residuals. std::vector<double> normR (sol.numVecs); if (sol.numVecs > 0) { Teuchos::SerialDenseMatrix<int,double> T (sol.numVecs, sol.numVecs); Epetra_MultiVector tempAevec (Map, sol.numVecs ); T.putScalar (0.0); for (int i = 0; i < sol.numVecs; ++i) { T(i,i) = evals[i].realpart; } A->Apply (*evecs, tempAevec); MVT::MvTimesMatAddMv (-1.0, *evecs, T, 1.0, tempAevec); MVT::MvNorm (tempAevec, normR); } // Print the results std::ostringstream os; os.setf (std::ios_base::right, std::ios_base::adjustfield); os << "Solver manager returned " << (returnCode == Converged ? "converged." : "unconverged.") << endl; os << endl; os << "------------------------------------------------------" << endl; os << std::setw(16) << "Eigenvalue" << std::setw(18) << "Direct Residual" << endl; os << "------------------------------------------------------" << endl; for (int i = 0; i < sol.numVecs; ++i) { os << std::setw(16) << evals[i].realpart << std::setw(18) << normR[i] / evals[i].realpart << endl; } os << "------------------------------------------------------" << endl; printer.print (Errors, os.str ()); #ifdef HAVE_MPI MPI_Finalize (); #endif // HAVE_MPI return 0; }
int main(int argc, char *argv[]) { typedef double MeshScalar; typedef double BasisScalar; typedef Tpetra::DefaultPlatform::DefaultPlatformType::NodeType Node; typedef Teuchos::ScalarTraits<Scalar>::magnitudeType magnitudeType; //double g_mean_exp = 1.906587e-01; // expected response mean //double g_std_dev_exp = 8.680605e-02; // expected response std. dev. //double g_tol = 1e-6; // tolerance on determining success using Teuchos::RCP; using Teuchos::rcp; using Teuchos::Array; using Teuchos::ArrayRCP; using Teuchos::ArrayView; using Teuchos::ParameterList; // Initialize MPI #ifdef HAVE_MPI MPI_Init(&argc,&argv); #endif // feenableexcept(FE_ALL_EXCEPT); LocalOrdinal MyPID; try { // Create a communicator for Epetra objects RCP<const Epetra_Comm> globalComm; #ifdef HAVE_MPI globalComm = rcp(new Epetra_MpiComm(MPI_COMM_WORLD)); #else globalComm = rcp(new Epetra_SerialComm); #endif MyPID = globalComm->MyPID(); // Setup command line options Teuchos::CommandLineProcessor CLP; CLP.setDocString( "This example runs an interlaced stochastic Galerkin solvers.\n"); int n = 32; CLP.setOption("num_mesh", &n, "Number of mesh points in each direction"); // multigrid specific options int minAggSize = 1; CLP.setOption("min_agg_size", &minAggSize, "multigrid aggregate size"); int smootherSweeps = 3; CLP.setOption("smoother_sweeps", &smootherSweeps, "# multigrid smoother sweeps"); int plainAgg=1; CLP.setOption("plain_aggregation", &plainAgg, "plain aggregation"); LocalOrdinal nsSize=-1; CLP.setOption("nullspace_size", &nsSize, "nullspace dimension"); bool symmetric = false; CLP.setOption("symmetric", "unsymmetric", &symmetric, "Symmetric discretization"); int num_spatial_procs = -1; CLP.setOption("num_spatial_procs", &num_spatial_procs, "Number of spatial processors (set -1 for all available procs)"); SG_RF randField = UNIFORM; CLP.setOption("rand_field", &randField, num_sg_rf, sg_rf_values, sg_rf_names, "Random field type"); double mu = 0.2; CLP.setOption("mean", &mu, "Mean"); double s = 0.1; CLP.setOption("std_dev", &s, "Standard deviation"); int num_KL = 2; CLP.setOption("num_kl", &num_KL, "Number of KL terms"); int order = 3; CLP.setOption("order", &order, "Polynomial order"); bool normalize_basis = true; CLP.setOption("normalize", "unnormalize", &normalize_basis, "Normalize PC basis"); Krylov_Method solver_method = GMRES; CLP.setOption("solver_method", &solver_method, num_krylov_method, krylov_method_values, krylov_method_names, "Krylov solver method"); SG_Prec prec_method = STOCHASTIC; CLP.setOption("prec_method", &prec_method, num_sg_prec, sg_prec_values, sg_prec_names, "Preconditioner method"); SG_Div division_method = DIRECT; CLP.setOption("division_method", &division_method, num_sg_div, sg_div_values, sg_div_names, "Stochastic division method"); SG_DivPrec divprec_method = NO; CLP.setOption("divprec_method", &divprec_method, num_sg_divprec, sg_divprec_values, sg_divprec_names, "Preconditioner for division method"); Schur_option schur_option = diag; CLP.setOption("schur_option", &schur_option, num_schur_option, Schur_option_values, schur_option_names, "Schur option"); Prec_option prec_option = whole; CLP.setOption("prec_option", &prec_option, num_prec_option, Prec_option_values, prec_option_names, "Prec option"); double solver_tol = 1e-12; CLP.setOption("solver_tol", &solver_tol, "Outer solver tolerance"); double div_tol = 1e-6; CLP.setOption("div_tol", &div_tol, "Tolerance in Iterative Solver"); int prec_level = 1; CLP.setOption("prec_level", &prec_level, "Level in Schur Complement Prec 0->Solve A0u0=g0 with division; 1->Form 1x1 Schur Complement"); int max_it_div = 50; CLP.setOption("max_it_div", &max_it_div, "Maximum # of Iterations in Iterative Solver for Division"); bool equilibrate = true; //JJH 8/26/12 changing to true to match ETP example CLP.setOption("equilibrate", "noequilibrate", &equilibrate, "Equilibrate the linear system"); CLP.parse( argc, argv ); if (MyPID == 0) { std::cout << "Summary of command line options:" << std::endl << "\tnum_mesh = " << n << std::endl << "\tsymmetric = " << symmetric << std::endl << "\tnum_spatial_procs = " << num_spatial_procs << std::endl << "\trand_field = " << sg_rf_names[randField] << std::endl << "\tmean = " << mu << std::endl << "\tstd_dev = " << s << std::endl << "\tnum_kl = " << num_KL << std::endl << "\torder = " << order << std::endl << "\tnormalize_basis = " << normalize_basis << std::endl << "\tsolver_method = " << krylov_method_names[solver_method] << std::endl << "\tprec_method = " << sg_prec_names[prec_method] << std::endl << "\tdivision_method = " << sg_div_names[division_method] << std::endl << "\tdiv_tol = " << div_tol << std::endl << "\tdiv_prec = " << sg_divprec_names[divprec_method] << std::endl << "\tprec_level = " << prec_level << std::endl << "\tmax_it_div = " << max_it_div << std::endl; } bool nonlinear_expansion = false; if (randField == UNIFORM) nonlinear_expansion = false; else if (randField == LOGNORMAL) nonlinear_expansion = true; { TEUCHOS_FUNC_TIME_MONITOR("Total PCE Calculation Time"); // Create Stochastic Galerkin basis and expansion Teuchos::Array< RCP<const Stokhos::OneDOrthogPolyBasis<LocalOrdinal,BasisScalar> > > bases(num_KL); for (LocalOrdinal i=0; i<num_KL; i++) if (randField == UNIFORM) bases[i] = rcp(new Stokhos::LegendreBasis<LocalOrdinal,BasisScalar>(order, normalize_basis)); else if (randField == LOGNORMAL) bases[i] = rcp(new Stokhos::HermiteBasis<int,double>(order, normalize_basis)); RCP<const Stokhos::CompletePolynomialBasis<LocalOrdinal,BasisScalar> > basis = rcp(new Stokhos::CompletePolynomialBasis<LocalOrdinal,BasisScalar>(bases, 1e-12)); LocalOrdinal sz = basis->size(); RCP<Stokhos::Sparse3Tensor<LocalOrdinal,BasisScalar> > Cijk = basis->computeTripleProductTensor(sz); RCP<const Stokhos::Quadrature<int,double> > quad = rcp(new Stokhos::TensorProductQuadrature<int,double>(basis)); RCP<ParameterList> expn_params = Teuchos::rcp(new ParameterList); if (division_method == MEAN_DIV) { expn_params->set("Division Strategy", "Mean-Based"); expn_params->set("Use Quadrature for Division", false); } else if (division_method == DIRECT) { expn_params->set("Division Strategy", "Dense Direct"); expn_params->set("Use Quadrature for Division", false); } else if (division_method == SPD_DIRECT) { expn_params->set("Division Strategy", "SPD Dense Direct"); expn_params->set("Use Quadrature for Division", false); } else if (division_method == CGD) { expn_params->set("Division Strategy", "CG"); expn_params->set("Use Quadrature for Division", false); } else if (division_method == QUAD) { expn_params->set("Use Quadrature for Division", true); } if (divprec_method == NO) expn_params->set("Prec Strategy", "None"); else if (divprec_method == DIAG) expn_params->set("Prec Strategy", "Diag"); else if (divprec_method == JACOBI) expn_params->set("Prec Strategy", "Jacobi"); else if (divprec_method == GS) expn_params->set("Prec Strategy", "GS"); else if (divprec_method == SCHUR) expn_params->set("Prec Strategy", "Schur"); if (schur_option == diag) expn_params->set("Schur option", "diag"); else expn_params->set("Schur option", "full"); if (prec_option == linear) expn_params->set("Prec option", "linear"); if (equilibrate) expn_params->set("Equilibrate", 1); else expn_params->set("Equilibrate", 0); expn_params->set("Division Tolerance", div_tol); expn_params->set("prec_iter", prec_level); expn_params->set("max_it_div", max_it_div); RCP<Stokhos::OrthogPolyExpansion<LocalOrdinal,BasisScalar> > expansion = rcp(new Stokhos::QuadOrthogPolyExpansion<LocalOrdinal,BasisScalar>( basis, Cijk, quad, expn_params)); if (MyPID == 0) std::cout << "Stochastic Galerkin expansion size = " << sz << std::endl; // Create stochastic parallel distribution ParameterList parallelParams; parallelParams.set("Number of Spatial Processors", num_spatial_procs); // parallelParams.set("Rebalance Stochastic Graph", true); // Teuchos::ParameterList& isorropia_params = // parallelParams.sublist("Isorropia"); // isorropia_params.set("Balance objective", "nonzeros"); RCP<Stokhos::ParallelData> sg_parallel_data = rcp(new Stokhos::ParallelData(basis, Cijk, globalComm, parallelParams)); RCP<const EpetraExt::MultiComm> sg_comm = sg_parallel_data->getMultiComm(); RCP<const Epetra_Comm> app_comm = sg_parallel_data->getSpatialComm(); // Create Teuchos::Comm from Epetra_Comm RCP< Teuchos::Comm<int> > teuchos_app_comm; #ifdef HAVE_MPI RCP<const Epetra_MpiComm> app_mpi_comm = Teuchos::rcp_dynamic_cast<const Epetra_MpiComm>(app_comm); RCP<const Teuchos::OpaqueWrapper<MPI_Comm> > raw_mpi_comm = Teuchos::opaqueWrapper(app_mpi_comm->Comm()); teuchos_app_comm = rcp(new Teuchos::MpiComm<int>(raw_mpi_comm)); #else teuchos_app_comm = rcp(new Teuchos::SerialComm<int>()); #endif // Create application typedef twoD_diffusion_problem<Scalar,MeshScalar,BasisScalar,LocalOrdinal,GlobalOrdinal,Node> problem_type; RCP<problem_type> model = rcp(new problem_type(teuchos_app_comm, n, num_KL, s, mu, nonlinear_expansion, symmetric)); // Create vectors and operators typedef problem_type::Tpetra_Vector Tpetra_Vector; typedef problem_type::Tpetra_CrsMatrix Tpetra_CrsMatrix; typedef Tpetra::MatrixMarket::Writer<Tpetra_CrsMatrix> Writer; //Xpetra matrices typedef Xpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> Xpetra_CrsMatrix; typedef Xpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> Xpetra_MultiVector; typedef Xpetra::MultiVectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node> Xpetra_MultiVectorFactory; typedef Xpetra::Operator<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> Xpetra_Operator; typedef Xpetra::TpetraCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> Xpetra_TpetraCrsMatrix; typedef Xpetra::CrsOperator<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> Xpetra_CrsOperator; typedef Belos::MueLuOp<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> Belos_MueLuOperator; //MueLu typedefs typedef MueLu::Hierarchy<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> MueLu_Hierarchy; typedef MueLu::SmootherPrototype<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps> SmootherPrototype; typedef MueLu::TrilinosSmoother<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps> TrilinosSmoother; typedef MueLu::SmootherFactory<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps> SmootherFactory; typedef MueLu::FactoryManager<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps> FactoryManager; RCP<Tpetra_Vector> p = Tpetra::createVector<Scalar>(model->get_p_map(0)); RCP<Tpetra_Vector> x = Tpetra::createVector<Scalar>(model->get_x_map()); x->putScalar(0.0); RCP<Tpetra_Vector> f = Tpetra::createVector<Scalar>(model->get_f_map()); RCP<Tpetra_Vector> dx = Tpetra::createVector<Scalar>(model->get_x_map()); RCP<Tpetra_CrsMatrix> J = model->create_W(); RCP<Tpetra_CrsMatrix> J0; if (prec_method == MEAN) J0 = model->create_W(); // Set PCE expansion of p p->putScalar(0.0); ArrayRCP<Scalar> p_view = p->get1dViewNonConst(); for (ArrayRCP<Scalar>::size_type i=0; i<p_view.size(); i++) { p_view[i].reset(expansion); p_view[i].copyForWrite(); } Array<double> point(num_KL, 1.0); Array<double> basis_vals(sz); basis->evaluateBases(point, basis_vals); if (order > 0) { for (int i=0; i<num_KL; i++) { p_view[i].term(i,1) = 1.0 / basis_vals[i+1]; } } // Create preconditioner typedef Ifpack2::Preconditioner<Scalar,LocalOrdinal,GlobalOrdinal,Node> Tprec; RCP<Belos_MueLuOperator> M; RCP<MueLu_Hierarchy> H; RCP<Xpetra_CrsMatrix> xcrsJ = rcp(new Xpetra_TpetraCrsMatrix(J)); RCP<Xpetra_Operator> xopJ = rcp(new Xpetra_CrsOperator(xcrsJ)); if (prec_method != NONE) { ParameterList precParams; std::string prec_name = "RILUK"; precParams.set("fact: iluk level-of-fill", 1); precParams.set("fact: iluk level-of-overlap", 0); //Ifpack2::Factory factory; RCP<Xpetra_Operator> xopJ0; if (prec_method == MEAN) { RCP<Xpetra_CrsMatrix> xcrsJ0 = rcp(new Xpetra_TpetraCrsMatrix(J0)); xopJ0 = rcp(new Xpetra_CrsOperator(xcrsJ0)); //M = factory.create<Tpetra_CrsMatrix>(prec_name, J0); } else if (prec_method == STOCHASTIC) { xopJ0 = xopJ; //M = factory.create<Tpetra_CrsMatrix>(prec_name, J); } H = rcp(new MueLu_Hierarchy(xopJ0)); M = rcp(new Belos_MueLuOperator(H)); //M->setParameters(precParams); if (nsSize!=-1) sz=nsSize; RCP<Xpetra_MultiVector> Z = Xpetra_MultiVectorFactory::Build(xcrsJ->getDomainMap(), sz); size_t n = Z->getLocalLength(); for (LocalOrdinal j=0; j<sz; ++j) { ArrayRCP<Scalar> col = Z->getDataNonConst(j); for (size_t i=0; i<n; ++i) { col[i].reset(expansion); col[i].copyForWrite(); col[i].fastAccessCoeff(j) = 1.0; } } H->GetLevel(0)->Set("Nullspace", Z); //RCP<Teuchos::FancyOStream> fos = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); //fos->setOutputToRootOnly(-1); //Z->describe(*fos); } // Evaluate model model->computeResidual(*x, *p, *f); model->computeJacobian(*x, *p, *J); // Compute mean for mean-based preconditioner if (prec_method == MEAN) { size_t nrows = J->getNodeNumRows(); ArrayView<const LocalOrdinal> indices; ArrayView<const Scalar> values; J0->resumeFill(); for (size_t i=0; i<nrows; i++) { J->getLocalRowView(i, indices, values); Array<Scalar> values0(values.size()); for (LocalOrdinal j=0; j<values.size(); j++) values0[j] = values[j].coeff(0); J0->replaceLocalValues(i, indices, values0); } J0->fillComplete(); } // compute preconditioner if (prec_method != NONE) { //M->initialize(); //M->compute(); //override MueLu defaults via factory manager RCP<FactoryManager> fm = rcp( new FactoryManager() );; //smoother ParameterList smootherParamList; /* smootherParamList.set("chebyshev: degree", smootherSweeps); smootherParamList.set("chebyshev: ratio eigenvalue", (double) 20); smootherParamList.set("chebyshev: max eigenvalue", (double) -1.0); smootherParamList.set("chebyshev: min eigenvalue", (double) 1.0); smootherParamList.set("chebyshev: zero starting solution", true); RCP<SmootherPrototype> smooPrototype = rcp( new TrilinosSmoother("CHEBYSHEV", smootherParamList) ); */ smootherParamList.set("relaxation: sweeps", smootherSweeps); smootherParamList.set("relaxation: type", "Symmetric Gauss-Seidel"); RCP<SmootherPrototype> smooPrototype = rcp( new TrilinosSmoother("RELAXATION", smootherParamList) ); RCP<SmootherFactory> smooFact = rcp( new SmootherFactory(smooPrototype) ); fm->SetFactory("Smoother", smooFact); // coarse level solve ParameterList coarseParamList; coarseParamList.set("fact: level-of-fill", 0); RCP<SmootherPrototype> coarsePrototype = rcp( new TrilinosSmoother("ILUT", coarseParamList) ); RCP<SmootherFactory> coarseSolverFact = rcp( new SmootherFactory(coarsePrototype, Teuchos::null) ); fm->SetFactory("CoarseSolver", coarseSolverFact); //allow for larger aggregates typedef MueLu::UCAggregationFactory<LocalOrdinal,GlobalOrdinal,Node,LocalMatOps> MueLu_UCAggregationFactory; RCP<MueLu_UCAggregationFactory> aggFact = rcp(new MueLu_UCAggregationFactory()); aggFact->SetMinNodesPerAggregate(minAggSize); fm->SetFactory("Aggregates", aggFact); //turn off damping typedef MueLu::SaPFactory<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps> MueLu_SaPFactory; if (plainAgg) { RCP<MueLu_SaPFactory> sapFactory = rcp(new MueLu_SaPFactory); sapFactory->SetDampingFactor( (Scalar) 0.0 ); fm->SetFactory("P", sapFactory); } H->Setup(*fm); } // Setup Belos solver RCP<ParameterList> belosParams = rcp(new ParameterList); belosParams->set("Flexible Gmres", false); belosParams->set("Num Blocks", 500);//20 belosParams->set("Convergence Tolerance", solver_tol); belosParams->set("Maximum Iterations", 1000); belosParams->set("Verbosity", 33); belosParams->set("Output Style", 1); belosParams->set("Output Frequency", 1); typedef Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> MV; typedef Belos::OperatorT<Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> > OP; typedef Belos::OperatorTraits<Scalar,MV,OP> BOPT; typedef Belos::MultiVecTraits<Scalar,MV> BMVT; typedef Belos::MultiVecTraits<double,MV> BTMVT; typedef Belos::LinearProblem<double,MV,OP> BLinProb; typedef Belos::XpetraOp<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> BXpetraOp; RCP<OP> belosJ = rcp(new BXpetraOp(xopJ)); // Turns an Xpetra::Operator object into a Belos operator RCP< BLinProb > problem = rcp(new BLinProb(belosJ, dx, f)); if (prec_method != NONE) problem->setRightPrec(M); problem->setProblem(); RCP<Belos::SolverManager<double,MV,OP> > solver; if (solver_method == CG) solver = rcp(new Belos::PseudoBlockCGSolMgr<double,MV,OP>(problem, belosParams)); else if (solver_method == GMRES) solver = rcp(new Belos::BlockGmresSolMgr<double,MV,OP>(problem, belosParams)); // Print initial residual norm std::vector<double> norm_f(1); //BMVT::MvNorm(*f, norm_f); BTMVT::MvNorm(*f, norm_f); if (MyPID == 0) std::cout << "\nInitial residual norm = " << norm_f[0] << std::endl; // Solve linear system Belos::ReturnType ret = solver->solve(); if (MyPID == 0) { if (ret == Belos::Converged) std::cout << "Solver converged!" << std::endl; else std::cout << "Solver failed to converge!" << std::endl; } // Update x x->update(-1.0, *dx, 1.0); Writer::writeDenseFile("stochastic_solution.mm", x); // Compute new residual & response function RCP<Tpetra_Vector> g = Tpetra::createVector<Scalar>(model->get_g_map(0)); f->putScalar(0.0); model->computeResidual(*x, *p, *f); model->computeResponse(*x, *p, *g); // Print final residual norm //BMVT::MvNorm(*f, norm_f); BTMVT::MvNorm(*f, norm_f); if (MyPID == 0) std::cout << "\nFinal residual norm = " << norm_f[0] << std::endl; // Print response std::cout << "\nResponse = " << std::endl; //Writer::writeDense(std::cout, g); Writer::writeDenseFile("stochastic_residual.mm", f); /* double g_mean = g->get1dView()[0].mean(); double g_std_dev = g->get1dView()[0].standard_deviation(); std::cout << "g mean = " << g_mean << std::endl; std::cout << "g std_dev = " << g_std_dev << std::endl; bool passed = false; if (norm_f[0] < 1.0e-10 && std::abs(g_mean-g_mean_exp) < g_tol && std::abs(g_std_dev - g_std_dev_exp) < g_tol) passed = true; if (MyPID == 0) { if (passed) std::cout << "Example Passed!" << std::endl; else{ std::cout << "Example Failed!" << std::endl; std::cout << "expected g_mean = "<< g_mean_exp << std::endl; std::cout << "expected g_std_dev = "<< g_std_dev_exp << std::endl; } } */ } Teuchos::TimeMonitor::summarize(std::cout); Teuchos::TimeMonitor::zeroOutTimers(); } catch (std::exception& e) { std::cout << e.what() << std::endl; } catch (string& s) { std::cout << s << std::endl; } catch (char *s) { std::cout << s << std::endl; } catch (...) { std::cout << "Caught unknown exception!" <<std:: endl; } #ifdef HAVE_MPI MPI_Finalize() ; #endif }
int main(int argc, char* argv[]) { int ierr = 0; try { double t, ta, tr; int p = 2; int w = p+7; // Maximum number of derivative components for SLFad const int slfad_max = 130; // Set up command line options Teuchos::CommandLineProcessor clp; clp.setDocString("This program tests the speed of various forward mode AD implementations for a finite-element-like Jacobian fill"); int num_nodes = 100000; int num_eqns = 2; int rt = 0; clp.setOption("n", &num_nodes, "Number of nodes"); clp.setOption("p", &num_eqns, "Number of equations"); clp.setOption("rt", &rt, "Include ADOL-C retaping test"); // Parse options Teuchos::CommandLineProcessor::EParseCommandLineReturn parseReturn= clp.parse(argc, argv); if(parseReturn != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) return 1; double mesh_spacing = 1.0 / static_cast<double>(num_nodes - 1); // Memory pool & manager Sacado::Fad::MemPoolManager<double> poolManager(num_nodes*num_eqns); Sacado::Fad::MemPool* pool = poolManager.getMemoryPool(num_nodes*num_eqns); Sacado::Fad::DMFad<double>::setDefaultPool(pool); std::cout.setf(std::ios::scientific); std::cout.precision(p); std::cout << "num_nodes = " << num_nodes << ", num_eqns = " << num_eqns << ": " << std::endl << " " << " Time " << "\t"<< "Time/Analytic" << "\t" << "Time/(2*p*Residual)" << std::endl; ta = 1.0; tr = 1.0; tr = residual_fill(num_nodes, num_eqns, mesh_spacing); ta = analytic_jac_fill(num_nodes, num_eqns, mesh_spacing); std::cout << "Analytic: " << std::setw(w) << ta << "\t" << std::setw(w) << ta/ta << "\t" << std::setw(w) << ta/(2.0*num_eqns*tr) << std::endl; #ifdef HAVE_ADOLC #ifndef ADOLC_TAPELESS t = adolc_jac_fill(num_nodes, num_eqns, mesh_spacing); std::cout << "ADOL-C: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl; if (rt != 0) { t = adolc_retape_jac_fill(num_nodes, num_eqns, mesh_spacing); std::cout << "ADOL-C(rt): " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl; } #else t = adolc_tapeless_jac_fill(num_nodes, num_eqns, mesh_spacing); std::cout << "ADOL-C(tl): " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl; #endif #endif #ifdef HAVE_ADIC t = adic_jac_fill(num_nodes, num_eqns, mesh_spacing); std::cout << "ADIC: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl; #endif if (num_eqns*2 == 4) { t = fad_jac_fill< FAD::TFad<16,double> >(num_nodes, num_eqns, mesh_spacing); std::cout << "TFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl; } else if (num_eqns*2 == 16) { t = fad_jac_fill< FAD::TFad<16,double> >(num_nodes, num_eqns, mesh_spacing); std::cout << "TFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl; } else if (num_eqns*2 == 32) { t = fad_jac_fill< FAD::TFad<32,double> >(num_nodes, num_eqns, mesh_spacing); std::cout << "TFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl; } else if (num_eqns*2 == 64) { t = fad_jac_fill< FAD::TFad<64,double> >(num_nodes, num_eqns, mesh_spacing); std::cout << "TFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl; } t = fad_jac_fill< FAD::Fad<double> >(num_nodes, num_eqns, mesh_spacing); std::cout << "Fad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl; if (num_eqns*2 == 4) { t = fad_jac_fill< Sacado::Fad::SFad<double,4> >(num_nodes, num_eqns, mesh_spacing); std::cout << "SFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl; } else if (num_eqns*2 == 16) { t = fad_jac_fill< Sacado::Fad::SFad<double,16> >(num_nodes, num_eqns, mesh_spacing); std::cout << "SFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl; } else if (num_eqns*2 == 32) { t = fad_jac_fill< Sacado::Fad::SFad<double,32> >(num_nodes, num_eqns, mesh_spacing); std::cout << "SFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl; } else if (num_eqns*2 == 64) { t = fad_jac_fill< Sacado::Fad::SFad<double,64> >(num_nodes, num_eqns, mesh_spacing); std::cout << "SFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl; } if (num_eqns*2 < slfad_max) { t = fad_jac_fill< Sacado::Fad::SLFad<double,slfad_max> >(num_nodes, num_eqns, mesh_spacing); std::cout << "SLFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl; } t = fad_jac_fill< Sacado::Fad::DFad<double> >(num_nodes, num_eqns, mesh_spacing); std::cout << "DFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl; t = fad_jac_fill< Sacado::Fad::SimpleFad<double> >(num_nodes, num_eqns, mesh_spacing); std::cout << "SimpleFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl; t = fad_jac_fill< Sacado::Fad::DMFad<double> >(num_nodes, num_eqns, mesh_spacing); std::cout << "DMFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl; if (num_eqns*2 == 4) { t = fad_jac_fill< Sacado::ELRFad::SFad<double,4> >(num_nodes, num_eqns, mesh_spacing); std::cout << "ELRSFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl; } else if (num_eqns*2 == 16) { t = fad_jac_fill< Sacado::ELRFad::SFad<double,16> >(num_nodes, num_eqns, mesh_spacing); std::cout << "ELRSFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl; } else if (num_eqns*2 == 32) { t = fad_jac_fill< Sacado::ELRFad::SFad<double,32> >(num_nodes, num_eqns, mesh_spacing); std::cout << "ELRSFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl; } else if (num_eqns*2 == 64) { t = fad_jac_fill< Sacado::ELRFad::SFad<double,64> >(num_nodes, num_eqns, mesh_spacing); std::cout << "ELRSFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl; } if (num_eqns*2 < slfad_max) { t = fad_jac_fill< Sacado::ELRFad::SLFad<double,slfad_max> >(num_nodes, num_eqns, mesh_spacing); std::cout << "ELRSLFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl; } t = fad_jac_fill< Sacado::ELRFad::DFad<double> >(num_nodes, num_eqns, mesh_spacing); std::cout << "ELRDFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl; if (num_eqns*2 == 4) { t = fad_jac_fill< Sacado::CacheFad::SFad<double,4> >(num_nodes, num_eqns, mesh_spacing); std::cout << "CacheSFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl; } else if (num_eqns*2 == 16) { t = fad_jac_fill< Sacado::CacheFad::SFad<double,16> >(num_nodes, num_eqns, mesh_spacing); std::cout << "CacheSFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl; } else if (num_eqns*2 == 32) { t = fad_jac_fill< Sacado::CacheFad::SFad<double,32> >(num_nodes, num_eqns, mesh_spacing); std::cout << "CacheSFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl; } else if (num_eqns*2 == 64) { t = fad_jac_fill< Sacado::CacheFad::SFad<double,64> >(num_nodes, num_eqns, mesh_spacing); std::cout << "CacheSFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl; } if (num_eqns*2 < slfad_max) { t = fad_jac_fill< Sacado::CacheFad::SLFad<double,slfad_max> >(num_nodes, num_eqns, mesh_spacing); std::cout << "CacheSLFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl; } t = fad_jac_fill< Sacado::CacheFad::DFad<double> >(num_nodes, num_eqns, mesh_spacing); std::cout << "CacheFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl; if (num_eqns*2 == 4) { t = fad_jac_fill< Sacado::ELRCacheFad::SFad<double,4> >(num_nodes, num_eqns, mesh_spacing); std::cout << "ELRCacheSFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl; } else if (num_eqns*2 == 16) { t = fad_jac_fill< Sacado::ELRCacheFad::SFad<double,16> >(num_nodes, num_eqns, mesh_spacing); std::cout << "ELRCacheSFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl; } else if (num_eqns*2 == 32) { t = fad_jac_fill< Sacado::ELRCacheFad::SFad<double,32> >(num_nodes, num_eqns, mesh_spacing); std::cout << "ELRCacheSFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl; } else if (num_eqns*2 == 64) { t = fad_jac_fill< Sacado::ELRCacheFad::SFad<double,64> >(num_nodes, num_eqns, mesh_spacing); std::cout << "ELRCacheSFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl; } if (num_eqns*2 < slfad_max) { t = fad_jac_fill< Sacado::ELRCacheFad::SLFad<double,slfad_max> >(num_nodes, num_eqns, mesh_spacing); std::cout << "ELRCacheSLFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl; } t = fad_jac_fill< Sacado::ELRCacheFad::DFad<double> >(num_nodes, num_eqns, mesh_spacing); std::cout << "ELRCacheFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl; t = fad_jac_fill< Sacado::Fad::DVFad<double> >(num_nodes, num_eqns, mesh_spacing); std::cout << "DVFad: " << std::setw(w) << t << "\t" << std::setw(w) << t/ta << "\t" << std::setw(w) << t/(2.0*num_eqns*tr) << std::endl; } catch (std::exception& e) { std::cout << e.what() << std::endl; ierr = 1; } catch (const char *s) { std::cout << s << std::endl; ierr = 1; } catch (...) { std::cout << "Caught unknown exception!" << std::endl; ierr = 1; } return ierr; }
int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int argc, char *argv[]) { #include <MueLu_UseShortNames.hpp> using Teuchos::RCP; using Teuchos::rcp; // // MPI initialization // Teuchos::oblackholestream blackhole; bool success = false; bool verbose = true; try { RCP< const Teuchos::Comm<int> > comm = Teuchos::DefaultComm<int>::getComm(); // // Process command line arguments // Galeri::Xpetra::Parameters<GO> matrixParameters(clp, 81); // manage parameters of the test case Xpetra::Parameters xpetraParameters(clp); // manage parameters of xpetra switch (clp.parse(argc,argv)) { case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; case Teuchos::CommandLineProcessor::PARSE_ERROR: case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; default:; } if (comm->getRank() == 0) std::cout << xpetraParameters << matrixParameters; // // Setup test case (Ax = b) // // Distribution RCP<const Map> map = MapFactory::Build(lib, matrixParameters.GetNumGlobalElements(), 0, comm); // Matrix RCP<Galeri::Xpetra::Problem<Map,CrsMatrixWrap,MultiVector> > Pr = Galeri::Xpetra::BuildProblem<SC, LO, GO, Map, CrsMatrixWrap, MultiVector>(matrixParameters.GetMatrixType(), map, matrixParameters.GetParameterList()); RCP<Matrix> A = Pr->BuildMatrix(); // User defined nullspace RCP<MultiVector> nullSpace = VectorFactory::Build(map,1); nullSpace->putScalar((SC) 1.0); // Define B RCP<Vector> X = VectorFactory::Build(map,1); RCP<Vector> B = VectorFactory::Build(map,1); X->setSeed(846930886); X->randomize(); A->apply(*X, *B, Teuchos::NO_TRANS, (SC)1.0, (SC)0.0); // X = 0 X->putScalar((SC) 0.0); // // Create a multigrid configuration // // Transfer operators RCP<TentativePFactory> TentativePFact = rcp( new TentativePFactory() ); RCP<SaPFactory> SaPFact = rcp( new SaPFactory() ); RCP<TransPFactory> RFact = rcp( new TransPFactory()); FactoryManager M; M.SetFactory("Ptent", TentativePFact); M.SetFactory("P", SaPFact); M.SetFactory("R", RFact); M.SetFactory("Smoother", Teuchos::null); //skips smoother setup M.SetFactory("CoarseSolver", Teuchos::null); //skips coarsest solve setup // // Multigrid setup phase // int startLevel = 0; int maxLevels = 10; std::cout << "=============== Setup transfers only ====================" << std::endl; Hierarchy H; H.SetDefaultVerbLevel(MueLu::Medium); RCP<Level> finestLevel = H.GetLevel(); finestLevel->Set("A", A); finestLevel->Set("Nullspace", nullSpace); // Indicate which Hierarchy operators we want to keep H.Keep("P", SaPFact.get()); //SaPFact is the generating factory for P. H.Keep("R", RFact.get()); //RFact is the generating factory for R. H.Keep("Ptent", TentativePFact.get()); //SaPFact is the generating factory for P. H.Setup(M,startLevel,maxLevels); std::cout << "=============== Setup smoothers only ====================" << std::endl; // Create a new A. RCP<Matrix> newA = Pr->BuildMatrix(); finestLevel->Set("A", newA); // Create Gauss-Seidel smoother. std::string ifpackType = "RELAXATION"; Teuchos::ParameterList ifpackList; ifpackList.set("relaxation: sweeps", (LO) 3); ifpackList.set("relaxation: damping factor", (SC) 1.0); RCP<SmootherPrototype> smootherPrototype = rcp(new TrilinosSmoother(ifpackType, ifpackList)); M.SetFactory("Smoother", rcp(new SmootherFactory(smootherPrototype))); // Create coarsest solver. RCP<SmootherPrototype> coarseSolverPrototype = rcp( new DirectSolver() ); RCP<SmootherFactory> coarseSolverFact = rcp( new SmootherFactory(coarseSolverPrototype, Teuchos::null) ); M.SetFactory("CoarseSolver", coarseSolverFact); // Note that we pass the number of levels back in. H.Setup(M,startLevel, H.GetNumLevels()); std::cout << "=============== Solve ====================" << std::endl; // // Solve Ax = B // LO nIts = 9; H.Iterate(*B, *X, nIts); // // Print relative residual norm // typename Teuchos::ScalarTraits<SC>::magnitudeType residualNorms = Utilities::ResidualNorm(*A, *X, *B)[0]; if (comm->getRank() == 0) { std::ios::fmtflags f(std::cout.flags()); std::cout << "||Residual|| = " << std::setiosflags(std::ios::fixed) << std::setprecision(20) << residualNorms << std::endl; std::cout.flags(f); } success = true; } TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success); return ( success ? EXIT_SUCCESS : EXIT_FAILURE ); }