Teuchos::RCP<Tpetra::Operator<Scalar,LocalOrdinal,GlobalOrdinal,Node> > build_precond (Teuchos::ParameterList& test_params, const Teuchos::RCP<const Tpetra::CrsMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> >& A) { using Teuchos::FancyOStream; using Teuchos::getFancyOStream; using Teuchos::OSTab; using Teuchos::RCP; using Teuchos::rcpFromRef; using std::cout; using std::endl; typedef Tpetra::RowMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> row_matrix_type; Teuchos::Time timer("precond"); const int myRank = A->getRowMap ()->getComm ()->getRank (); RCP<FancyOStream> out = getFancyOStream (rcpFromRef (cout)); typedef Ifpack2::Preconditioner<Scalar,LocalOrdinal,GlobalOrdinal,Node> Tprec; Teuchos::RCP<Tprec> prec; Ifpack2::Factory factory; std::string prec_name("not specified"); Ifpack2::getParameter(test_params, "Ifpack2::Preconditioner", prec_name); prec = factory.create<row_matrix_type> (prec_name, A); Teuchos::ParameterList tif_params; if (test_params.isSublist("Ifpack2")) { tif_params = test_params.sublist("Ifpack2"); } if (myRank == 0) { *out << "Configuring, initializing, and computing Ifpack2 preconditioner" << endl; } { OSTab tab (*out); prec->setParameters (tif_params); prec->initialize (); { Teuchos::TimeMonitor timeMon (timer); prec->compute (); } if (myRank == 0) { *out << "Finished computing Ifpack2 preconditioner" << endl; OSTab tab2 (*out); *out << "Time (s): " << timer.totalElapsedTime () << endl; } } if (myRank == 0) { *out << "Preconditioner attributes:" << endl; OSTab tab (*out); prec->describe (*out, Teuchos::VERB_LOW); } return prec; }
void DistObjectKA<Packet,LocalOrdinal,GlobalOrdinal,Node>::print (std::ostream &os) const { using Teuchos::FancyOStream; using Teuchos::getFancyOStream; using Teuchos::RCP; using Teuchos::rcpFromRef; using std::endl; RCP<FancyOStream> out = getFancyOStream (rcpFromRef (os)); this->describe (*out, Teuchos::VERB_DEFAULT); }
int main (int argc, char *argv[]) { using Teuchos::Comm; using Teuchos::FancyOStream; using Teuchos::getFancyOStream; using Teuchos::oblackholestream; using Teuchos::OSTab; using Teuchos::ParameterList; using Teuchos::parameterList; using Teuchos::RCP; using Teuchos::rcpFromRef; using std::cout; using std::endl; // // Typedefs for Tpetra template arguments. // typedef double scalar_type; typedef long int global_ordinal_type; typedef int local_ordinal_type; typedef Kokkos::DefaultNode::DefaultNodeType node_type; // // Tpetra objects which are the MV and OP template parameters of the // Belos specialization which we are testing. // typedef Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type> MV; typedef Tpetra::Operator<scalar_type, local_ordinal_type, global_ordinal_type, node_type> OP; // // Other typedefs. // typedef Teuchos::ScalarTraits<scalar_type> STS; typedef Tpetra::CrsMatrix<scalar_type, local_ordinal_type, global_ordinal_type, node_type> sparse_matrix_type; Teuchos::GlobalMPISession mpiSession (&argc, &argv, &cout); RCP<const Comm<int> > comm = Tpetra::DefaultPlatform::getDefaultPlatform().getComm(); RCP<node_type> node = Tpetra::DefaultPlatform::getDefaultPlatform().getNode(); RCP<oblackholestream> blackHole (new oblackholestream); const int myRank = comm->getRank(); // Output stream that prints only on Rank 0. RCP<FancyOStream> out; if (myRank == 0) { out = Teuchos::getFancyOStream (rcpFromRef (cout)); } else { out = Teuchos::getFancyOStream (blackHole); } // // Get test parameters from command-line processor. // // CommandLineProcessor always understands int, but may not // understand global_ordinal_type. We convert to the latter below. int numRows = comm->getSize() * 100; bool tolerant = false; bool verbose = false; bool debug = false; Teuchos::CommandLineProcessor cmdp (false, true); cmdp.setOption("numRows", &numRows, "Global number of rows (and columns) in the sparse matrix to generate."); cmdp.setOption("tolerant", "intolerant", &tolerant, "Whether to parse files tolerantly."); cmdp.setOption("verbose", "quiet", &verbose, "Print messages and results."); cmdp.setOption("debug", "release", &debug, "Run debugging checks and print copious debugging output."); if (cmdp.parse(argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) { *out << "\nEnd Result: TEST FAILED" << endl; return EXIT_FAILURE; } // Output stream for verbose output. RCP<FancyOStream> verbOut = verbose ? out : getFancyOStream (blackHole); const bool success = true; // Test whether it's possible to instantiate the solver. // This is a minimal compilation test. *verbOut << "Instantiating Block GCRODR solver" << endl; Belos::BlockGCRODRSolMgr<scalar_type, MV, OP> solver; // // Test setting solver parameters. For now, we just use an empty // (but non-null) parameter list, which the solver should fill in // with defaults. // *verbOut << "Setting solver parameters" << endl; RCP<ParameterList> solverParams = parameterList (); solver.setParameters (solverParams); // // Create a linear system to solve. // *verbOut << "Creating linear system" << endl; RCP<sparse_matrix_type> A; RCP<MV> X_guess, X_exact, B; { typedef Belos::Tpetra::ProblemMaker<sparse_matrix_type> factory_type; factory_type factory (comm, node, out, tolerant, debug); RCP<ParameterList> problemParams = parameterList (); problemParams->set ("Global number of rows", static_cast<global_ordinal_type> (numRows)); problemParams->set ("Problem type", std::string ("Nonsymmetric")); factory.makeProblem (A, X_guess, X_exact, B, problemParams); } // Approximate solution vector is a copy of the guess vector. RCP<MV> X (new MV (*X_guess)); TEUCHOS_TEST_FOR_EXCEPTION(A.is_null(), std::logic_error, "The sparse matrix is null!"); TEUCHOS_TEST_FOR_EXCEPTION(X_guess.is_null(), std::logic_error, "The initial guess X_guess is null!"); TEUCHOS_TEST_FOR_EXCEPTION(X_exact.is_null(), std::logic_error, "The exact solution X_exact is null!"); TEUCHOS_TEST_FOR_EXCEPTION(B.is_null(), std::logic_error, "The right-hand side B is null!"); TEUCHOS_TEST_FOR_EXCEPTION(X.is_null(), std::logic_error, "The approximate solution vector X is null!"); typedef Belos::LinearProblem<scalar_type, MV, OP> problem_type; RCP<problem_type> problem (new problem_type (A, X, B)); problem->setProblem (); solver.setProblem (problem); *verbOut << "Solving linear system" << endl; Belos::ReturnType result = solver.solve (); *verbOut << "Result of solve: " << Belos::convertReturnTypeToString (result) << endl; if (success) { *out << "\nEnd Result: TEST PASSED" << endl; return EXIT_SUCCESS; } else { *out << "\nEnd Result: TEST FAILED" << endl; return EXIT_FAILURE; } }
void Export<LocalOrdinal,GlobalOrdinal,Node>:: print (std::ostream& os) const { using Teuchos::Comm; using Teuchos::getFancyOStream; using Teuchos::RCP; using Teuchos::rcpFromRef; using Teuchos::toString; using std::endl; RCP<const Comm<int> > comm = getSourceMap ()->getComm (); const int myImageID = comm->getRank (); const int numImages = comm->getSize (); for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) { if (myImageID == imageCtr) { os << endl; if (myImageID == 0) { // I'm the root node (only output this info once) os << "Export Data Members:" << endl; } os << "Image ID : " << myImageID << endl; os << "permuteFromLIDs: " << toString (getPermuteFromLIDs ()) << endl; os << "permuteToLIDs : " << toString (getPermuteToLIDs ()) << endl; os << "remoteLIDs : " << toString (getRemoteLIDs ()) << endl; os << "exportLIDs : " << toString (getExportLIDs ()) << endl; os << "exportPIDs : " << toString (getExportPIDs ()) << endl; os << "numSameIDs : " << getNumSameIDs () << endl; os << "numPermuteIDs : " << getNumPermuteIDs () << endl; os << "numRemoteIDs : " << getNumRemoteIDs () << endl; os << "numExportIDs : " << getNumExportIDs () << endl; } // A few global barriers give output a chance to complete. comm->barrier(); comm->barrier(); comm->barrier(); } if (myImageID == 0) { os << endl << endl << "Source Map:" << endl << std::flush; } comm->barrier(); os << *getSourceMap(); comm->barrier(); if (myImageID == 0) { os << endl << endl << "Target Map:" << endl << std::flush; } comm->barrier(); os << *getTargetMap(); comm->barrier(); // It's also helpful for debugging to print the Distributor // object. Epetra_Export::Print() does this, so we can do a // side-by-side comparison. if (myImageID == 0) { os << endl << endl << "Distributor:" << endl << std::flush; } comm->barrier(); getDistributor().describe (*(getFancyOStream (rcpFromRef (os))), Teuchos::VERB_EXTREME); }
void SupportGraph<MatrixType>:: apply (const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X, Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y, Teuchos::ETransp mode, scalar_type alpha, scalar_type beta) const { using Teuchos::FancyOStream; using Teuchos::getFancyOStream; using Teuchos::RCP; using Teuchos::rcp; using Teuchos::rcpFromRef; using Teuchos::Time; using Teuchos::TimeMonitor; typedef scalar_type DomainScalar; typedef scalar_type RangeScalar; typedef Tpetra::MultiVector<DomainScalar, local_ordinal_type, global_ordinal_type, node_type> MV; RCP<FancyOStream> out = getFancyOStream(rcpFromRef(std::cout)); // Create a timer for this method, if it doesn't exist already. // TimeMonitor::getNewCounter registers the timer, so that // TimeMonitor's class methods like summarize() will report the // total time spent in successful calls to this method. const std::string timerName ("Ifpack2::SupportGraph::apply"); RCP<Time> timer = TimeMonitor::lookupCounter(timerName); if (timer.is_null()) { timer = TimeMonitor::getNewCounter(timerName); } { // Start timing here. Teuchos::TimeMonitor timeMon (*timer); TEUCHOS_TEST_FOR_EXCEPTION( ! isComputed(), std::runtime_error, "Ifpack2::SupportGraph::apply: You must call compute() to compute the " "incomplete factorization, before calling apply()."); TEUCHOS_TEST_FOR_EXCEPTION( X.getNumVectors() != Y.getNumVectors(), std::runtime_error, "Ifpack2::SupportGraph::apply: X and Y must have the same number of " "columns. X has " << X.getNumVectors() << " columns, but Y has " << Y.getNumVectors() << " columns."); TEUCHOS_TEST_FOR_EXCEPTION( beta != STS::zero(), std::logic_error, "Ifpack2::SupportGraph::apply: This method does not currently work when " "beta != 0."); // If X and Y are pointing to the same memory location, // we need to create an auxiliary vector, Xcopy RCP<const MV> Xcopy; if (X.getLocalMV().getValues() == Y.getLocalMV().getValues()) { Xcopy = rcp (new MV(X)); } else { Xcopy = rcpFromRef(X); } if (alpha != STS::one()) { Y.scale(alpha); } RCP<MV> Ycopy = rcpFromRef(Y); solver_->setB(Xcopy); solver_->setX(Ycopy); solver_->solve (); } // Stop timing here. ++NumApply_; // timer->totalElapsedTime() returns the total time over all timer // calls. Thus, we use = instead of +=. ApplyTime_ = timer->totalElapsedTime(); }
int main (int argc, char *argv[]) { using namespace TrilinosCouplings; // Yes, this means I'm lazy. using TpetraIntrepidPoissonExample::exactResidualNorm; using TpetraIntrepidPoissonExample::makeMatrixAndRightHandSide; using TpetraIntrepidPoissonExample::solveWithBelos; using TpetraIntrepidPoissonExample::solveWithBelosGPU; using IntrepidPoissonExample::makeMeshInput; using IntrepidPoissonExample::parseCommandLineArguments; using IntrepidPoissonExample::setCommandLineArgumentDefaults; using IntrepidPoissonExample::setMaterialTensorOffDiagonalValue; using IntrepidPoissonExample::setUpCommandLineArguments; using Tpetra::DefaultPlatform; using Teuchos::Comm; using Teuchos::outArg; using Teuchos::ParameterList; using Teuchos::parameterList; using Teuchos::RCP; using Teuchos::rcp; using Teuchos::rcpFromRef; using Teuchos::getFancyOStream; using Teuchos::FancyOStream; using std::endl; // Pull in typedefs from the example's namespace. typedef TpetraIntrepidPoissonExample::ST ST; #ifdef HAVE_TRILINOSCOUPLINGS_MUELU typedef TpetraIntrepidPoissonExample::LO LO; typedef TpetraIntrepidPoissonExample::GO GO; #endif // HAVE_TRILINOSCOUPLINGS_MUELU typedef TpetraIntrepidPoissonExample::Node Node; typedef Teuchos::ScalarTraits<ST> STS; typedef STS::magnitudeType MT; typedef Teuchos::ScalarTraits<MT> STM; typedef TpetraIntrepidPoissonExample::sparse_matrix_type sparse_matrix_type; typedef TpetraIntrepidPoissonExample::vector_type vector_type; typedef TpetraIntrepidPoissonExample::operator_type operator_type; bool success = true; try { Teuchos::oblackholestream blackHole; Teuchos::GlobalMPISession mpiSession (&argc, &argv, &blackHole); const int myRank = mpiSession.getRank (); //const int numProcs = mpiSession.getNProc (); // Get the default communicator and Kokkos Node instance RCP<const Comm<int> > comm = DefaultPlatform::getDefaultPlatform ().getComm (); RCP<Node> node = DefaultPlatform::getDefaultPlatform ().getNode (); // Did the user specify --help at the command line to print help // with command-line arguments? bool printedHelp = false; // Values of command-line arguments. int nx, ny, nz; std::string xmlInputParamsFile; bool verbose, debug; int maxNumItersFromCmdLine = -1; // -1 means "read from XML file" double tolFromCmdLine = -1.0; // -1 means "read from XML file" std::string solverName = "GMRES"; ST materialTensorOffDiagonalValue = 0.0; // Set default values of command-line arguments. setCommandLineArgumentDefaults (nx, ny, nz, xmlInputParamsFile, solverName, verbose, debug); // Parse and validate command-line arguments. Teuchos::CommandLineProcessor cmdp (false, true); setUpCommandLineArguments (cmdp, nx, ny, nz, xmlInputParamsFile, solverName, tolFromCmdLine, maxNumItersFromCmdLine, verbose, debug); cmdp.setOption ("materialTensorOffDiagonalValue", &materialTensorOffDiagonalValue, "Off-diagonal value in " "the material tensor. This controls the iteration count. " "Be careful with this if you use CG, since you can easily " "make the matrix indefinite."); // Additional command-line arguments for GPU experimentation. bool gpu = false; cmdp.setOption ("gpu", "no-gpu", &gpu, "Run example using GPU node (if supported)"); int ranks_per_node = 1; cmdp.setOption ("ranks_per_node", &ranks_per_node, "Number of MPI ranks per node"); int gpu_ranks_per_node = 1; cmdp.setOption ("gpu_ranks_per_node", &gpu_ranks_per_node, "Number of MPI ranks per node for GPUs"); int device_offset = 0; cmdp.setOption ("device_offset", &device_offset, "Offset for attaching MPI ranks to CUDA devices"); // Additional command-line arguments for dumping the generated // matrix or its row Map to output files. // // FIXME (mfh 09 Apr 2014) Need to port these command-line // arguments to the Epetra version. // If matrixFilename is nonempty, dump the matrix to that file // in MatrixMarket format. std::string matrixFilename; cmdp.setOption ("matrixFilename", &matrixFilename, "If nonempty, dump the " "generated matrix to that file in MatrixMarket format."); // If rowMapFilename is nonempty, dump the matrix's row Map to // that file in MatrixMarket format. std::string rowMapFilename; cmdp.setOption ("rowMapFilename", &rowMapFilename, "If nonempty, dump the " "generated matrix's row Map to that file in a format that " "Tpetra::MatrixMarket::Reader can read."); // Option to exit after building A and b (and dumping stuff to // files, if requested). bool exitAfterAssembly = false; cmdp.setOption ("exitAfterAssembly", "dontExitAfterAssembly", &exitAfterAssembly, "If true, exit after building the " "sparse matrix and dense right-hand side vector. If either" " --matrixFilename or --rowMapFilename are nonempty strings" ", dump the matrix resp. row Map to their respective files " "before exiting."); parseCommandLineArguments (cmdp, printedHelp, argc, argv, nx, ny, nz, xmlInputParamsFile, solverName, verbose, debug); if (printedHelp) { // The user specified --help at the command line to print help // with command-line arguments. We printed help already, so quit // with a happy return code. return EXIT_SUCCESS; } setMaterialTensorOffDiagonalValue (materialTensorOffDiagonalValue); // Both streams only print on MPI Rank 0. "out" only prints if the // user specified --verbose. RCP<FancyOStream> out = getFancyOStream (rcpFromRef ((myRank == 0 && verbose) ? std::cout : blackHole)); RCP<FancyOStream> err = getFancyOStream (rcpFromRef ((myRank == 0 && debug) ? std::cerr : blackHole)); #ifdef HAVE_MPI *out << "PARALLEL executable" << endl; #else *out << "SERIAL executable" << endl; #endif /**********************************************************************************/ /********************************** GET XML INPUTS ********************************/ /**********************************************************************************/ ParameterList inputList; if (xmlInputParamsFile != "") { *out << "Reading parameters from XML file \"" << xmlInputParamsFile << "\"..." << endl; Teuchos::updateParametersFromXmlFile (xmlInputParamsFile, outArg (inputList)); if (myRank == 0) { inputList.print (*out, 2, true, true); *out << endl; } } // Get Pamgen mesh definition string, either from the input // ParameterList or from our function that makes a cube and fills in // the number of cells along each dimension. std::string meshInput = inputList.get("meshInput", ""); if (meshInput == "") { *out << "Generating mesh input string: nx = " << nx << ", ny = " << ny << ", nz = " << nz << endl; meshInput = makeMeshInput (nx, ny, nz); } // Total application run time { TEUCHOS_FUNC_TIME_MONITOR_DIFF("Total Time", total_time); RCP<sparse_matrix_type> A; RCP<vector_type> B, X_exact, X; { TEUCHOS_FUNC_TIME_MONITOR_DIFF("Total Assembly", total_assembly); makeMatrixAndRightHandSide (A, B, X_exact, X, comm, node, meshInput, out, err, verbose, debug); } // Optionally dump the matrix and/or its row Map to files. { typedef Tpetra::MatrixMarket::Writer<sparse_matrix_type> writer_type; if (matrixFilename != "") { writer_type::writeSparseFile (matrixFilename, A); } if (rowMapFilename != "") { writer_type::writeMapFile (rowMapFilename, * (A->getRowMap ())); } } if (exitAfterAssembly) { // Users might still be interested in assembly time. Teuchos::TimeMonitor::report (comm.ptr (), std::cout); return EXIT_SUCCESS; } const std::vector<MT> norms = exactResidualNorm (A, B, X_exact); // X_exact is the exact solution of the PDE, projected onto the // discrete mesh. It may not necessarily equal the exact solution // of the linear system. *out << "||B - A*X_exact||_2 = " << norms[0] << endl << "||B||_2 = " << norms[1] << endl << "||A||_F = " << norms[2] << endl; // Setup preconditioner std::string prec_type = inputList.get ("Preconditioner", "None"); RCP<operator_type> M; { TEUCHOS_FUNC_TIME_MONITOR_DIFF("Total Preconditioner Setup", total_prec); if (prec_type == "MueLu") { #ifdef HAVE_TRILINOSCOUPLINGS_MUELU if (inputList.isSublist("MueLu")) { ParameterList mueluParams = inputList.sublist("MueLu"); M = MueLu::CreateTpetraPreconditioner<ST,LO,GO,Node>(A,mueluParams); } else { M = MueLu::CreateTpetraPreconditioner<ST,LO,GO,Node>(A); } #else // NOT HAVE_TRILINOSCOUPLINGS_MUELU TEUCHOS_TEST_FOR_EXCEPTION( prec_type == "MueLu", std::runtime_error, "Tpetra scaling example: " "In order to precondition with MueLu, you must have built Trilinos " "with the MueLu package enabled."); #endif // HAVE_TRILINOSCOUPLINGS_MUELU } } // setup preconditioner // Get the convergence tolerance for each linear solve. // If the user provided a nonnegative value at the command // line, it overrides any value in the input ParameterList. MT tol = STM::squareroot (STM::eps ()); // default value if (tolFromCmdLine < STM::zero ()) { tol = inputList.get ("Convergence Tolerance", tol); } else { tol = tolFromCmdLine; } // Get the maximum number of iterations for each linear solve. // If the user provided a value other than -1 at the command // line, it overrides any value in the input ParameterList. int maxNumIters = 200; // default value if (maxNumItersFromCmdLine == -1) { maxNumIters = inputList.get ("Maximum Iterations", maxNumIters); } else { maxNumIters = maxNumItersFromCmdLine; } // Get the number of "time steps." We imitate a time-dependent // PDE by doing this many linear solves. const int num_steps = inputList.get ("Number of Time Steps", 1); // Do the linear solve(s). bool converged = false; int numItersPerformed = 0; if (gpu) { TEUCHOS_FUNC_TIME_MONITOR_DIFF("Total GPU Solve", total_solve); solveWithBelosGPU (converged, numItersPerformed, tol, maxNumIters, num_steps, ranks_per_node, gpu_ranks_per_node, device_offset, prec_type, X, A, B, Teuchos::null, M); } else { TEUCHOS_FUNC_TIME_MONITOR_DIFF("Total Solve", total_solve); solveWithBelos (converged, numItersPerformed, solverName, tol, maxNumIters, num_steps, X, A, B, Teuchos::null, M); } // Compute ||X-X_exact||_2 const MT norm_x = X_exact->norm2 (); X_exact->update (-1.0, *X, 1.0); const MT norm_error = X_exact->norm2 (); *out << endl << "||X - X_exact||_2 / ||X_exact||_2 = " << norm_error / norm_x << endl; } // total time block // Summarize timings Teuchos::TimeMonitor::report (comm.ptr (), std::cout); } // try TEUCHOS_STANDARD_CATCH_STATEMENTS(true, std::cerr, success); if (success) { return EXIT_SUCCESS; } else { return EXIT_FAILURE; } }
int main (int argc, char *argv[]) { using Teuchos::ArrayRCP; using Teuchos::ArrayView; using Teuchos::Comm; using Teuchos::CommandLineProcessor; using Teuchos::FancyOStream; using Teuchos::getFancyOStream; using Teuchos::OSTab; using Teuchos::ptr; using Teuchos::RCP; using Teuchos::rcp; using Teuchos::rcpFromRef; using std::cout; using std::endl; bool success = true; // May be changed by tests Teuchos::oblackholestream blackHole; //Teuchos::GlobalMPISession (&argc, &argv, &blackHole); MPI_Init (&argc, &argv); // // Construct communicators, and verify that we are on 4 processors. // // Construct a Teuchos Comm object. RCP<const Comm<int> > teuchosComm = Teuchos::DefaultComm<int>::getComm(); const int numProcs = teuchosComm->getSize(); const int pid = teuchosComm->getRank(); RCP<FancyOStream> pOut = getFancyOStream (rcpFromRef ((pid == 0) ? std::cout : blackHole)); FancyOStream& out = *pOut; // Verify that we are on four processors (which manifests the bug). if (teuchosComm->getSize() != 4) { out << "This test must be run on four processors. Exiting ..." << endl; return EXIT_FAILURE; } // We also need an Epetra Comm, so that we can compare Tpetra and // Epetra results. Epetra_MpiComm epetraComm (MPI_COMM_WORLD); // // Default values of command-line options. // bool verbose = false; bool printEpetra = false; bool printTpetra = false; CommandLineProcessor cmdp (false,true); // // Set command-line options. // cmdp.setOption ("verbose", "quiet", &verbose, "Print verbose output."); // Epetra and Tpetra output will ask the Maps and Import objects to // print themselves in distributed, maximally verbose fashion. It's // best to turn on either Epetra or Tpetra, but not both. Then you // can compare their output side by side. cmdp.setOption ("printEpetra", "dontPrintEpetra", &printEpetra, "Print Epetra output (in verbose mode only)."); cmdp.setOption ("printTpetra", "dontPrintTpetra", &printTpetra, "Print Tpetra output (in verbose mode only)."); // Parse command-line options. if (cmdp.parse (argc,argv) != CommandLineProcessor::PARSE_SUCCESSFUL) { out << "End Result: TEST FAILED" << endl; MPI_Finalize (); return EXIT_FAILURE; } if (verbose) { out << "Running test on " << numProcs << " process" << (numProcs != 1 ? "es" : "") << "." << endl; } // The maps for this problem are derived from a 3D structured mesh. // In this example, the dimensions are 4x4x2 and there are 2 // processors assigned to the first dimension and 2 processors // assigned to the second dimension, with no parallel decomposition // along the third dimension. The "owned" arrays represent the // one-to-one map, with each array representing a 2x2x2 slice. If // DIMENSIONS == 2, then only the first 4 values will be used, // representing a 2x2(x1) slice. int owned0[8] = { 0, 1, 4, 5,16,17,20,21}; int owned1[8] = { 2, 3, 6, 7,18,19,22,23}; int owned2[8] = { 8, 9,12,13,24,25,28,29}; int owned3[8] = {10,11,14,15,26,27,30,31}; // The "overlap" arrays represent the map with communication // elements, with each array representing a 3x3x2 slice. If // DIMENSIONS == 2, then only the first 9 values will be used, // representing a 3x3(x1) slice. int overlap0[18] = {0,1,2,4, 5, 6, 8, 9,10,16,17,18,20,21,22,24,25,26}; int overlap1[18] = {1,2,3,5, 6, 7, 9,10,11,17,18,19,21,22,23,25,26,27}; int overlap2[18] = {4,5,6,8, 9,10,12,13,14,20,21,22,24,25,26,28,29,30}; int overlap3[18] = {5,6,7,9,10,11,13,14,15,21,22,23,25,26,27,29,30,31}; // Construct the owned and overlap maps for both Epetra and Tpetra. int* owned; int* overlap; if (pid == 0) { owned = owned0; overlap = overlap0; } else if (pid == 1) { owned = owned1; overlap = overlap1; } else if (pid == 2) { owned = owned2; overlap = overlap2; } else { owned = owned3; overlap = overlap3; } #if DIMENSIONS == 2 int ownedSize = 4; int overlapSize = 9; #elif DIMENSIONS == 3 int ownedSize = 8; int overlapSize = 18; #endif // Create the two Epetra Maps. Source for the Import is the owned // map; target for the Import is the overlap map. Epetra_Map epetraOwnedMap ( -1, ownedSize, owned, 0, epetraComm); Epetra_Map epetraOverlapMap (-1, overlapSize, overlap, 0, epetraComm); if (verbose && printEpetra) { // Have the Epetra_Map objects describe themselves. // // Epetra_BlockMap::Print() takes an std::ostream&, and expects // all MPI processes to be able to write to it. (The method // handles its own synchronization.) out << "Epetra owned map:" << endl; epetraOwnedMap.Print (std::cout); out << "Epetra overlap map:" << endl; epetraOverlapMap.Print (std::cout); } // Create the two Tpetra Maps. The "invalid" global element count // input tells Tpetra::Map to compute the global number of elements // itself. const int invalid = Teuchos::OrdinalTraits<int>::invalid(); RCP<Tpetra::Map<int> > tpetraOwnedMap = rcp (new Tpetra::Map<int> (invalid, ArrayView<int> (owned, ownedSize), 0, teuchosComm)); tpetraOwnedMap->setObjectLabel ("Owned Map"); RCP<Tpetra::Map<int> > tpetraOverlapMap = rcp (new Tpetra::Map<int> (invalid, ArrayView<int> (overlap, overlapSize), 0, teuchosComm)); tpetraOverlapMap->setObjectLabel ("Overlap Map"); // In verbose mode, have the Tpetra::Map objects describe themselves. if (verbose && printTpetra) { Teuchos::EVerbosityLevel verb = Teuchos::VERB_EXTREME; // Tpetra::Map::describe() takes a FancyOStream, but expects all // MPI processes to be able to write to it. (The method handles // its own synchronization.) RCP<FancyOStream> globalOut = getFancyOStream (rcpFromRef (std::cout)); out << "Tpetra owned map:" << endl; { OSTab tab (globalOut); tpetraOwnedMap->describe (*globalOut, verb); } out << "Tpetra overlap map:" << endl; { OSTab tab (globalOut); tpetraOverlapMap->describe (*globalOut, verb); } } // Use the owned and overlap maps to construct an importer for both // Epetra and Tpetra. Epetra_Import epetraImporter (epetraOverlapMap, epetraOwnedMap ); Tpetra::Import<int> tpetraImporter (tpetraOwnedMap , tpetraOverlapMap); // In verbose mode, have the Epetra_Import object describe itself. if (verbose && printEpetra) { out << "Epetra importer:" << endl; // The importer's Print() method takes an std::ostream& and plans // to write to it on all MPI processes (handling synchronization // itself). epetraImporter.Print (std::cout); out << endl; } // In verbose mode, have the Tpetra::Import object describe itself. if (verbose && printTpetra) { out << "Tpetra importer:" << endl; // The importer doesn't implement Teuchos::Describable. It wants // std::cout and plans to write to it on all MPI processes (with // its own synchronization). tpetraImporter.print (std::cout); out << endl; } // Construct owned and overlap vectors for both Epetra and Tpetra. Epetra_Vector epetraOwnedVector (epetraOwnedMap ); Epetra_Vector epetraOverlapVector (epetraOverlapMap); Tpetra::Vector<double,int> tpetraOwnedVector (tpetraOwnedMap ); Tpetra::Vector<double,int> tpetraOverlapVector (tpetraOverlapMap); // The test is as follows: initialize the owned and overlap vectors // with global IDs in the owned regions. Initialize the overlap // vectors to equal -1 in the overlap regions. Then perform a // communication from the owned vectors to the overlap vectors. The // resulting overlap vectors should have global IDs everywhere and // all of the -1 values should be overwritten. // Initialize. We cannot assign directly to the Tpetra Vectors; // instead, we extract nonconst views and assign to those. The // results aren't guaranteed to be committed to the vector unless // the views are released (by assigning Teuchos::null to them). epetraOverlapVector.PutScalar(-1); tpetraOverlapVector.putScalar(-1); ArrayRCP<double> tpetraOwnedArray = tpetraOwnedVector.getDataNonConst(0); ArrayRCP<double> tpetraOverlapArray = tpetraOverlapVector.getDataNonConst(0); for (int owned_lid = 0; owned_lid < tpetraOwnedMap->getNodeElementList().size(); ++owned_lid) { int gid = tpetraOwnedMap->getGlobalElement(owned_lid); int overlap_lid = tpetraOverlapMap->getLocalElement(gid); epetraOwnedVector[owned_lid] = gid; epetraOverlapVector[overlap_lid] = gid; tpetraOwnedArray[owned_lid] = gid; tpetraOverlapArray[overlap_lid] = gid; } // Make sure that the changes to the Tpetra Vector were committed, // by releasing the nonconst views. tpetraOwnedArray = Teuchos::null; tpetraOverlapArray = Teuchos::null; // Test the Epetra and Tpetra Import. if (verbose) { out << "Testing Import from owned Map to overlap Map:" << endl << endl; } epetraOverlapVector.Import( epetraOwnedVector, epetraImporter, Insert); tpetraOverlapVector.doImport(tpetraOwnedVector, tpetraImporter, Tpetra::INSERT); // Check the Import results. success = countFailures (teuchosComm, epetraOwnedMap, epetraOwnedVector, epetraOverlapMap, epetraOverlapVector, tpetraOwnedMap, tpetraOwnedVector, tpetraOverlapMap, tpetraOverlapVector, verbose); const bool testOtherDirections = false; if (testOtherDirections) { // // Reinitialize the Tpetra vectors and test whether Export works. // tpetraOverlapVector.putScalar(-1); tpetraOwnedArray = tpetraOwnedVector.getDataNonConst(0); tpetraOverlapArray = tpetraOverlapVector.getDataNonConst(0); for (int owned_lid = 0; owned_lid < tpetraOwnedMap->getNodeElementList().size(); ++owned_lid) { int gid = tpetraOwnedMap->getGlobalElement(owned_lid); int overlap_lid = tpetraOverlapMap->getLocalElement(gid); tpetraOwnedArray[owned_lid] = gid; tpetraOverlapArray[overlap_lid] = gid; } // Make sure that the changes to the Tpetra Vector were committed, // by releasing the nonconst views. tpetraOwnedArray = Teuchos::null; tpetraOverlapArray = Teuchos::null; // Make a Tpetra Export object, and test the export. Tpetra::Export<int> tpetraExporter1 (tpetraOwnedMap, tpetraOverlapMap); if (verbose) { out << "Testing Export from owned Map to overlap Map:" << endl << endl; } tpetraOverlapVector.doExport (tpetraOwnedVector, tpetraExporter1, Tpetra::INSERT); // Check the Export results. success = countFailures (teuchosComm, epetraOwnedMap, epetraOwnedVector, epetraOverlapMap, epetraOverlapVector, tpetraOwnedMap, tpetraOwnedVector, tpetraOverlapMap, tpetraOverlapVector, verbose); // // Reinitialize the Tpetra vectors and see what Import in the // other direction does. // tpetraOverlapVector.putScalar(-1); tpetraOwnedArray = tpetraOwnedVector.getDataNonConst(0); tpetraOverlapArray = tpetraOverlapVector.getDataNonConst(0); for (int owned_lid = 0; owned_lid < tpetraOwnedMap->getNodeElementList().size(); ++owned_lid) { int gid = tpetraOwnedMap->getGlobalElement(owned_lid); int overlap_lid = tpetraOverlapMap->getLocalElement(gid); tpetraOwnedArray[owned_lid] = gid; tpetraOverlapArray[overlap_lid] = gid; } // Make sure that the changes to the Tpetra Vector were committed, // by releasing the nonconst views. tpetraOwnedArray = Teuchos::null; tpetraOverlapArray = Teuchos::null; if (verbose) { out << "Testing Import from overlap Map to owned Map:" << endl << endl; } Tpetra::Import<int> tpetraImporter2 (tpetraOverlapMap, tpetraOwnedMap); tpetraOwnedVector.doImport (tpetraOverlapVector, tpetraImporter2, Tpetra::INSERT); // Check the Import results. success = countFailures (teuchosComm, epetraOwnedMap, epetraOwnedVector, epetraOverlapMap, epetraOverlapVector, tpetraOwnedMap, tpetraOwnedVector, tpetraOverlapMap, tpetraOverlapVector, verbose); } // if testOtherDirections out << "End Result: TEST " << (success ? "PASSED" : "FAILED") << endl; MPI_Finalize (); return success ? EXIT_SUCCESS : EXIT_FAILURE; }
int main (int argc, char *argv[]) { using Teuchos::as; using Teuchos::Comm; using Teuchos::FancyOStream; using Teuchos::getFancyOStream; using Teuchos::ParameterList; using Teuchos::parameterList; using Teuchos::RCP; using Teuchos::rcp; using Teuchos::rcpFromRef; using std::cerr; using std::cout; using std::endl; typedef double scalar_type; typedef int local_ordinal_type; #if defined(HAVE_TPETRA_EXPLICIT_INSTANTIATION) && defined(HAVE_TPETRA_INST_INT_LONG) typedef long global_ordinal_type; #else typedef int global_ordinal_type; #endif typedef Kokkos::SerialNode node_type; Teuchos::oblackholestream blackHole; Teuchos::GlobalMPISession mpiSession (&argc, &argv, &blackHole); RCP<const Comm<int> > comm = Teuchos::DefaultComm<int>::getComm(); // // Read in command line arguments. // int unknownsPerNode = 20; // number of unknowns per process int unknownsPerElt = 3; // number of unknowns per (overlapping) element int numCols = 1; bool verbose = false; Teuchos::CommandLineProcessor cmdp (false, true); cmdp.setOption ("unknownsPerNode", &unknownsPerNode, "Number of unknowns per process"); cmdp.setOption ("unknownsPerElt", &unknownsPerElt, "Number of unknowns per (overlapping) element."); cmdp.setOption ("numCols", &numCols, "Number of columns in the multivector. Must be positive."); cmdp.setOption ("verbose", "quiet", &verbose, "Whether to print verbose output."); if (cmdp.parse(argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) { return EXIT_FAILURE; } const Teuchos::EVerbosityLevel verbLevel = verbose ? Teuchos::VERB_EXTREME : Teuchos::VERB_DEFAULT; RCP<FancyOStream> out = verbose ? getFancyOStream (rcpFromRef (std::cout)) : getFancyOStream (rcpFromRef (blackHole)); RCP<ParameterList> nodeParams = parameterList ("Kokkos Node"); RCP<node_type> node = makeSerialNode (nodeParams); // Run the test. bool succeeded = true; try { Tpetra::Test::testMultiVectorFiller<scalar_type, local_ordinal_type, global_ordinal_type, node_type> (comm, node, as<size_t> (unknownsPerNode), as<global_ordinal_type> (unknownsPerElt), as<size_t> (numCols), out, verbLevel); succeeded = true; } catch (std::exception& e) { *out << "MultiVectorFiller test threw an exception: " << e.what() << endl; succeeded = false; } const int localSuccess = succeeded ? 1 : 0; int globalSuccess = localSuccess; Teuchos::reduceAll (*comm, Teuchos::REDUCE_SUM, localSuccess, Teuchos::ptr (&globalSuccess)); if (globalSuccess) { std::cout << "End Result: TEST PASSED" << endl; return EXIT_SUCCESS; } else { std::cout << "End Result: TEST FAILED" << endl; return EXIT_FAILURE; } }
int main (int argc, char *argv[]) { using namespace TrilinosCouplings; // Yes, this means I'm lazy. using TpetraIntrepidPoissonExample::exactResidualNorm; using TpetraIntrepidPoissonExample::makeMatrixAndRightHandSide; using TpetraIntrepidPoissonExample::solveWithBelos; using TpetraIntrepidPoissonExample::solveWithBelosGPU; using IntrepidPoissonExample::makeMeshInput; using IntrepidPoissonExample::setCommandLineArgumentDefaults; using IntrepidPoissonExample::setUpCommandLineArguments; using IntrepidPoissonExample::parseCommandLineArguments; using Tpetra::DefaultPlatform; using Teuchos::Comm; using Teuchos::outArg; using Teuchos::ParameterList; using Teuchos::parameterList; using Teuchos::RCP; using Teuchos::rcp; using Teuchos::rcpFromRef; using Teuchos::getFancyOStream; using Teuchos::FancyOStream; using std::endl; // Pull in typedefs from the example's namespace. typedef TpetraIntrepidPoissonExample::ST ST; typedef TpetraIntrepidPoissonExample::LO LO; typedef TpetraIntrepidPoissonExample::GO GO; typedef TpetraIntrepidPoissonExample::Node Node; typedef Teuchos::ScalarTraits<ST> STS; typedef STS::magnitudeType MT; typedef Teuchos::ScalarTraits<MT> STM; typedef TpetraIntrepidPoissonExample::sparse_matrix_type sparse_matrix_type; typedef TpetraIntrepidPoissonExample::vector_type vector_type; typedef TpetraIntrepidPoissonExample::operator_type operator_type; bool success = true; try { Teuchos::oblackholestream blackHole; Teuchos::GlobalMPISession mpiSession (&argc, &argv, &blackHole); const int myRank = mpiSession.getRank (); //const int numProcs = mpiSession.getNProc (); // Get the default communicator and Kokkos Node instance RCP<const Comm<int> > comm = DefaultPlatform::getDefaultPlatform ().getComm (); RCP<Node> node = DefaultPlatform::getDefaultPlatform ().getNode (); // Did the user specify --help at the command line to print help // with command-line arguments? bool printedHelp = false; // Values of command-line arguments. int nx, ny, nz; std::string xmlInputParamsFile; bool verbose, debug; // Set default values of command-line arguments. setCommandLineArgumentDefaults (nx, ny, nz, xmlInputParamsFile, verbose, debug); // Parse and validate command-line arguments. Teuchos::CommandLineProcessor cmdp (false, true); setUpCommandLineArguments (cmdp, nx, ny, nz, xmlInputParamsFile, verbose, debug); bool gpu = false; cmdp.setOption ("gpu", "no-gpu", &gpu, "Run example using GPU node (if supported)"); int ranks_per_node = 1; cmdp.setOption("ranks_per_node", &ranks_per_node, "Number of MPI ranks per node"); int gpu_ranks_per_node = 1; cmdp.setOption("gpu_ranks_per_node", &gpu_ranks_per_node, "Number of MPI ranks per node for GPUs"); int device_offset = 0; cmdp.setOption("device_offset", &device_offset, "Offset for attaching MPI ranks to CUDA devices"); parseCommandLineArguments (cmdp, printedHelp, argc, argv, nx, ny, nz, xmlInputParamsFile, verbose, debug); if (printedHelp) { // The user specified --help at the command line to print help // with command-line arguments. We printed help already, so quit // with a happy return code. return EXIT_SUCCESS; } // Both streams only print on MPI Rank 0. "out" only prints if the // user specified --verbose. RCP<FancyOStream> out = getFancyOStream (rcpFromRef ((myRank == 0 && verbose) ? std::cout : blackHole)); RCP<FancyOStream> err = getFancyOStream (rcpFromRef ((myRank == 0 && debug) ? std::cerr : blackHole)); #ifdef HAVE_MPI *out << "PARALLEL executable" << endl; #else *out << "SERIAL executable" << endl; #endif /**********************************************************************************/ /********************************** GET XML INPUTS ********************************/ /**********************************************************************************/ ParameterList inputList; if (xmlInputParamsFile != "") { *out << "Reading parameters from XML file \"" << xmlInputParamsFile << "\"..." << endl; Teuchos::updateParametersFromXmlFile (xmlInputParamsFile, outArg (inputList)); if (myRank == 0) { inputList.print (*out, 2, true, true); *out << endl; } } // Get Pamgen mesh definition string, either from the input // ParameterList or from our function that makes a cube and fills in // the number of cells along each dimension. std::string meshInput = inputList.get("meshInput", ""); if (meshInput == "") { *out << "Generating mesh input string: nx = " << nx << ", ny = " << ny << ", nz = " << nz << endl; meshInput = makeMeshInput (nx, ny, nz); } // Total application run time { TEUCHOS_FUNC_TIME_MONITOR_DIFF("Total Time", total_time); RCP<sparse_matrix_type> A; RCP<vector_type> B, X_exact, X; { TEUCHOS_FUNC_TIME_MONITOR_DIFF("Total Assembly", total_assembly); makeMatrixAndRightHandSide (A, B, X_exact, X, comm, node, meshInput, out, err, verbose, debug); } const std::vector<MT> norms = exactResidualNorm (A, B, X_exact); // X_exact is the exact solution of the PDE, projected onto the // discrete mesh. It may not necessarily equal the exact solution // of the linear system. *out << "||B - A*X_exact||_2 = " << norms[0] << endl << "||B||_2 = " << norms[1] << endl << "||A||_F = " << norms[2] << endl; // Setup preconditioner std::string prec_type = inputList.get("Preconditioner", "None"); RCP<operator_type> M; { TEUCHOS_FUNC_TIME_MONITOR_DIFF("Total Preconditioner Setup", total_prec); if (prec_type == "MueLu") { if (inputList.isSublist("MueLu")) { ParameterList mueluParams = inputList.sublist("MueLu"); M = MueLu::CreateTpetraPreconditioner<ST,LO,GO,Node>(A,mueluParams); } else { M = MueLu::CreateTpetraPreconditioner<ST,LO,GO,Node>(A); } } } bool converged = false; int numItersPerformed = 0; const MT tol = inputList.get("Convergence Tolerance", STM::squareroot (STM::eps ())); const int maxNumIters = inputList.get("Maximum Iterations", 200); const int num_steps = inputList.get("Number of Time Steps", 1); if (gpu) { TEUCHOS_FUNC_TIME_MONITOR_DIFF("Total GPU Solve", total_solve); solveWithBelosGPU(converged, numItersPerformed, tol, maxNumIters, num_steps, ranks_per_node, gpu_ranks_per_node, device_offset, prec_type, X, A, B, Teuchos::null, M); } else { TEUCHOS_FUNC_TIME_MONITOR_DIFF("Total Solve", total_solve); solveWithBelos (converged, numItersPerformed, tol, maxNumIters, num_steps, X, A, B, Teuchos::null, M); } // Compute ||X-X_exact||_2 MT norm_x = X_exact->norm2(); X_exact->update(-1.0, *X, 1.0); MT norm_error = X_exact->norm2(); *out << endl << "||X-X_exact||_2 / ||X_exact||_2 = " << norm_error / norm_x << endl; } // total time block // Summarize timings // RCP<ParameterList> reportParams = parameterList ("TimeMonitor::report"); // reportParams->set ("Report format", std::string ("YAML")); // reportParams->set ("writeGlobalStats", true); // Teuchos::TimeMonitor::report (*out, reportParams); Teuchos::TimeMonitor::summarize(std::cout); } //try TEUCHOS_STANDARD_CATCH_STATEMENTS(true, std::cerr, success); if (success) return EXIT_SUCCESS; return EXIT_FAILURE; }