int main (int argc, char *argv[]) { using namespace TrilinosCouplings; // Yes, this means I'm lazy. using TpetraIntrepidPoissonExample::exactResidualNorm; using TpetraIntrepidPoissonExample::makeMatrixAndRightHandSide; using TpetraIntrepidPoissonExample::solveWithBelos; using TpetraIntrepidPoissonExample::solveWithBelosGPU; using IntrepidPoissonExample::makeMeshInput; using IntrepidPoissonExample::parseCommandLineArguments; using IntrepidPoissonExample::setCommandLineArgumentDefaults; using IntrepidPoissonExample::setMaterialTensorOffDiagonalValue; using IntrepidPoissonExample::setUpCommandLineArguments; using Tpetra::DefaultPlatform; using Teuchos::Comm; using Teuchos::outArg; using Teuchos::ParameterList; using Teuchos::parameterList; using Teuchos::RCP; using Teuchos::rcp; using Teuchos::rcpFromRef; using Teuchos::getFancyOStream; using Teuchos::FancyOStream; using std::endl; // Pull in typedefs from the example's namespace. typedef TpetraIntrepidPoissonExample::ST ST; #ifdef HAVE_TRILINOSCOUPLINGS_MUELU typedef TpetraIntrepidPoissonExample::LO LO; typedef TpetraIntrepidPoissonExample::GO GO; #endif // HAVE_TRILINOSCOUPLINGS_MUELU typedef TpetraIntrepidPoissonExample::Node Node; typedef Teuchos::ScalarTraits<ST> STS; typedef STS::magnitudeType MT; typedef Teuchos::ScalarTraits<MT> STM; typedef TpetraIntrepidPoissonExample::sparse_matrix_type sparse_matrix_type; typedef TpetraIntrepidPoissonExample::vector_type vector_type; typedef TpetraIntrepidPoissonExample::operator_type operator_type; bool success = true; try { Teuchos::oblackholestream blackHole; Teuchos::GlobalMPISession mpiSession (&argc, &argv, &blackHole); const int myRank = mpiSession.getRank (); //const int numProcs = mpiSession.getNProc (); // Get the default communicator and Kokkos Node instance RCP<const Comm<int> > comm = DefaultPlatform::getDefaultPlatform ().getComm (); RCP<Node> node = DefaultPlatform::getDefaultPlatform ().getNode (); // Did the user specify --help at the command line to print help // with command-line arguments? bool printedHelp = false; // Values of command-line arguments. int nx, ny, nz; std::string xmlInputParamsFile; bool verbose, debug; int maxNumItersFromCmdLine = -1; // -1 means "read from XML file" double tolFromCmdLine = -1.0; // -1 means "read from XML file" std::string solverName = "GMRES"; ST materialTensorOffDiagonalValue = 0.0; // Set default values of command-line arguments. setCommandLineArgumentDefaults (nx, ny, nz, xmlInputParamsFile, solverName, verbose, debug); // Parse and validate command-line arguments. Teuchos::CommandLineProcessor cmdp (false, true); setUpCommandLineArguments (cmdp, nx, ny, nz, xmlInputParamsFile, solverName, tolFromCmdLine, maxNumItersFromCmdLine, verbose, debug); cmdp.setOption ("materialTensorOffDiagonalValue", &materialTensorOffDiagonalValue, "Off-diagonal value in " "the material tensor. This controls the iteration count. " "Be careful with this if you use CG, since you can easily " "make the matrix indefinite."); // Additional command-line arguments for GPU experimentation. bool gpu = false; cmdp.setOption ("gpu", "no-gpu", &gpu, "Run example using GPU node (if supported)"); int ranks_per_node = 1; cmdp.setOption ("ranks_per_node", &ranks_per_node, "Number of MPI ranks per node"); int gpu_ranks_per_node = 1; cmdp.setOption ("gpu_ranks_per_node", &gpu_ranks_per_node, "Number of MPI ranks per node for GPUs"); int device_offset = 0; cmdp.setOption ("device_offset", &device_offset, "Offset for attaching MPI ranks to CUDA devices"); // Additional command-line arguments for dumping the generated // matrix or its row Map to output files. // // FIXME (mfh 09 Apr 2014) Need to port these command-line // arguments to the Epetra version. // If matrixFilename is nonempty, dump the matrix to that file // in MatrixMarket format. std::string matrixFilename; cmdp.setOption ("matrixFilename", &matrixFilename, "If nonempty, dump the " "generated matrix to that file in MatrixMarket format."); // If rowMapFilename is nonempty, dump the matrix's row Map to // that file in MatrixMarket format. std::string rowMapFilename; cmdp.setOption ("rowMapFilename", &rowMapFilename, "If nonempty, dump the " "generated matrix's row Map to that file in a format that " "Tpetra::MatrixMarket::Reader can read."); // Option to exit after building A and b (and dumping stuff to // files, if requested). bool exitAfterAssembly = false; cmdp.setOption ("exitAfterAssembly", "dontExitAfterAssembly", &exitAfterAssembly, "If true, exit after building the " "sparse matrix and dense right-hand side vector. If either" " --matrixFilename or --rowMapFilename are nonempty strings" ", dump the matrix resp. row Map to their respective files " "before exiting."); parseCommandLineArguments (cmdp, printedHelp, argc, argv, nx, ny, nz, xmlInputParamsFile, solverName, verbose, debug); if (printedHelp) { // The user specified --help at the command line to print help // with command-line arguments. We printed help already, so quit // with a happy return code. return EXIT_SUCCESS; } setMaterialTensorOffDiagonalValue (materialTensorOffDiagonalValue); // Both streams only print on MPI Rank 0. "out" only prints if the // user specified --verbose. RCP<FancyOStream> out = getFancyOStream (rcpFromRef ((myRank == 0 && verbose) ? std::cout : blackHole)); RCP<FancyOStream> err = getFancyOStream (rcpFromRef ((myRank == 0 && debug) ? std::cerr : blackHole)); #ifdef HAVE_MPI *out << "PARALLEL executable" << endl; #else *out << "SERIAL executable" << endl; #endif /**********************************************************************************/ /********************************** GET XML INPUTS ********************************/ /**********************************************************************************/ ParameterList inputList; if (xmlInputParamsFile != "") { *out << "Reading parameters from XML file \"" << xmlInputParamsFile << "\"..." << endl; Teuchos::updateParametersFromXmlFile (xmlInputParamsFile, outArg (inputList)); if (myRank == 0) { inputList.print (*out, 2, true, true); *out << endl; } } // Get Pamgen mesh definition string, either from the input // ParameterList or from our function that makes a cube and fills in // the number of cells along each dimension. std::string meshInput = inputList.get("meshInput", ""); if (meshInput == "") { *out << "Generating mesh input string: nx = " << nx << ", ny = " << ny << ", nz = " << nz << endl; meshInput = makeMeshInput (nx, ny, nz); } // Total application run time { TEUCHOS_FUNC_TIME_MONITOR_DIFF("Total Time", total_time); RCP<sparse_matrix_type> A; RCP<vector_type> B, X_exact, X; { TEUCHOS_FUNC_TIME_MONITOR_DIFF("Total Assembly", total_assembly); makeMatrixAndRightHandSide (A, B, X_exact, X, comm, node, meshInput, out, err, verbose, debug); } // Optionally dump the matrix and/or its row Map to files. { typedef Tpetra::MatrixMarket::Writer<sparse_matrix_type> writer_type; if (matrixFilename != "") { writer_type::writeSparseFile (matrixFilename, A); } if (rowMapFilename != "") { writer_type::writeMapFile (rowMapFilename, * (A->getRowMap ())); } } if (exitAfterAssembly) { // Users might still be interested in assembly time. Teuchos::TimeMonitor::report (comm.ptr (), std::cout); return EXIT_SUCCESS; } const std::vector<MT> norms = exactResidualNorm (A, B, X_exact); // X_exact is the exact solution of the PDE, projected onto the // discrete mesh. It may not necessarily equal the exact solution // of the linear system. *out << "||B - A*X_exact||_2 = " << norms[0] << endl << "||B||_2 = " << norms[1] << endl << "||A||_F = " << norms[2] << endl; // Setup preconditioner std::string prec_type = inputList.get ("Preconditioner", "None"); RCP<operator_type> M; { TEUCHOS_FUNC_TIME_MONITOR_DIFF("Total Preconditioner Setup", total_prec); if (prec_type == "MueLu") { #ifdef HAVE_TRILINOSCOUPLINGS_MUELU if (inputList.isSublist("MueLu")) { ParameterList mueluParams = inputList.sublist("MueLu"); M = MueLu::CreateTpetraPreconditioner<ST,LO,GO,Node>(A,mueluParams); } else { M = MueLu::CreateTpetraPreconditioner<ST,LO,GO,Node>(A); } #else // NOT HAVE_TRILINOSCOUPLINGS_MUELU TEUCHOS_TEST_FOR_EXCEPTION( prec_type == "MueLu", std::runtime_error, "Tpetra scaling example: " "In order to precondition with MueLu, you must have built Trilinos " "with the MueLu package enabled."); #endif // HAVE_TRILINOSCOUPLINGS_MUELU } } // setup preconditioner // Get the convergence tolerance for each linear solve. // If the user provided a nonnegative value at the command // line, it overrides any value in the input ParameterList. MT tol = STM::squareroot (STM::eps ()); // default value if (tolFromCmdLine < STM::zero ()) { tol = inputList.get ("Convergence Tolerance", tol); } else { tol = tolFromCmdLine; } // Get the maximum number of iterations for each linear solve. // If the user provided a value other than -1 at the command // line, it overrides any value in the input ParameterList. int maxNumIters = 200; // default value if (maxNumItersFromCmdLine == -1) { maxNumIters = inputList.get ("Maximum Iterations", maxNumIters); } else { maxNumIters = maxNumItersFromCmdLine; } // Get the number of "time steps." We imitate a time-dependent // PDE by doing this many linear solves. const int num_steps = inputList.get ("Number of Time Steps", 1); // Do the linear solve(s). bool converged = false; int numItersPerformed = 0; if (gpu) { TEUCHOS_FUNC_TIME_MONITOR_DIFF("Total GPU Solve", total_solve); solveWithBelosGPU (converged, numItersPerformed, tol, maxNumIters, num_steps, ranks_per_node, gpu_ranks_per_node, device_offset, prec_type, X, A, B, Teuchos::null, M); } else { TEUCHOS_FUNC_TIME_MONITOR_DIFF("Total Solve", total_solve); solveWithBelos (converged, numItersPerformed, solverName, tol, maxNumIters, num_steps, X, A, B, Teuchos::null, M); } // Compute ||X-X_exact||_2 const MT norm_x = X_exact->norm2 (); X_exact->update (-1.0, *X, 1.0); const MT norm_error = X_exact->norm2 (); *out << endl << "||X - X_exact||_2 / ||X_exact||_2 = " << norm_error / norm_x << endl; } // total time block // Summarize timings Teuchos::TimeMonitor::report (comm.ptr (), std::cout); } // try TEUCHOS_STANDARD_CATCH_STATEMENTS(true, std::cerr, success); if (success) { return EXIT_SUCCESS; } else { return EXIT_FAILURE; } }
int main (int argc, char *argv[]) { using namespace TrilinosCouplings; // Yes, this means I'm lazy. using TpetraIntrepidPoissonExample::exactResidualNorm; using TpetraIntrepidPoissonExample::makeMatrixAndRightHandSide; using TpetraIntrepidPoissonExample::solveWithBelos; using TpetraIntrepidPoissonExample::solveWithBelosGPU; using IntrepidPoissonExample::makeMeshInput; using IntrepidPoissonExample::setCommandLineArgumentDefaults; using IntrepidPoissonExample::setUpCommandLineArguments; using IntrepidPoissonExample::parseCommandLineArguments; using Tpetra::DefaultPlatform; using Teuchos::Comm; using Teuchos::outArg; using Teuchos::ParameterList; using Teuchos::parameterList; using Teuchos::RCP; using Teuchos::rcp; using Teuchos::rcpFromRef; using Teuchos::getFancyOStream; using Teuchos::FancyOStream; using std::endl; // Pull in typedefs from the example's namespace. typedef TpetraIntrepidPoissonExample::ST ST; typedef TpetraIntrepidPoissonExample::LO LO; typedef TpetraIntrepidPoissonExample::GO GO; typedef TpetraIntrepidPoissonExample::Node Node; typedef Teuchos::ScalarTraits<ST> STS; typedef STS::magnitudeType MT; typedef Teuchos::ScalarTraits<MT> STM; typedef TpetraIntrepidPoissonExample::sparse_matrix_type sparse_matrix_type; typedef TpetraIntrepidPoissonExample::vector_type vector_type; typedef TpetraIntrepidPoissonExample::operator_type operator_type; bool success = true; try { Teuchos::oblackholestream blackHole; Teuchos::GlobalMPISession mpiSession (&argc, &argv, &blackHole); const int myRank = mpiSession.getRank (); //const int numProcs = mpiSession.getNProc (); // Get the default communicator and Kokkos Node instance RCP<const Comm<int> > comm = DefaultPlatform::getDefaultPlatform ().getComm (); RCP<Node> node = DefaultPlatform::getDefaultPlatform ().getNode (); // Did the user specify --help at the command line to print help // with command-line arguments? bool printedHelp = false; // Values of command-line arguments. int nx, ny, nz; std::string xmlInputParamsFile; bool verbose, debug; // Set default values of command-line arguments. setCommandLineArgumentDefaults (nx, ny, nz, xmlInputParamsFile, verbose, debug); // Parse and validate command-line arguments. Teuchos::CommandLineProcessor cmdp (false, true); setUpCommandLineArguments (cmdp, nx, ny, nz, xmlInputParamsFile, verbose, debug); bool gpu = false; cmdp.setOption ("gpu", "no-gpu", &gpu, "Run example using GPU node (if supported)"); int ranks_per_node = 1; cmdp.setOption("ranks_per_node", &ranks_per_node, "Number of MPI ranks per node"); int gpu_ranks_per_node = 1; cmdp.setOption("gpu_ranks_per_node", &gpu_ranks_per_node, "Number of MPI ranks per node for GPUs"); int device_offset = 0; cmdp.setOption("device_offset", &device_offset, "Offset for attaching MPI ranks to CUDA devices"); parseCommandLineArguments (cmdp, printedHelp, argc, argv, nx, ny, nz, xmlInputParamsFile, verbose, debug); if (printedHelp) { // The user specified --help at the command line to print help // with command-line arguments. We printed help already, so quit // with a happy return code. return EXIT_SUCCESS; } // Both streams only print on MPI Rank 0. "out" only prints if the // user specified --verbose. RCP<FancyOStream> out = getFancyOStream (rcpFromRef ((myRank == 0 && verbose) ? std::cout : blackHole)); RCP<FancyOStream> err = getFancyOStream (rcpFromRef ((myRank == 0 && debug) ? std::cerr : blackHole)); #ifdef HAVE_MPI *out << "PARALLEL executable" << endl; #else *out << "SERIAL executable" << endl; #endif /**********************************************************************************/ /********************************** GET XML INPUTS ********************************/ /**********************************************************************************/ ParameterList inputList; if (xmlInputParamsFile != "") { *out << "Reading parameters from XML file \"" << xmlInputParamsFile << "\"..." << endl; Teuchos::updateParametersFromXmlFile (xmlInputParamsFile, outArg (inputList)); if (myRank == 0) { inputList.print (*out, 2, true, true); *out << endl; } } // Get Pamgen mesh definition string, either from the input // ParameterList or from our function that makes a cube and fills in // the number of cells along each dimension. std::string meshInput = inputList.get("meshInput", ""); if (meshInput == "") { *out << "Generating mesh input string: nx = " << nx << ", ny = " << ny << ", nz = " << nz << endl; meshInput = makeMeshInput (nx, ny, nz); } // Total application run time { TEUCHOS_FUNC_TIME_MONITOR_DIFF("Total Time", total_time); RCP<sparse_matrix_type> A; RCP<vector_type> B, X_exact, X; { TEUCHOS_FUNC_TIME_MONITOR_DIFF("Total Assembly", total_assembly); makeMatrixAndRightHandSide (A, B, X_exact, X, comm, node, meshInput, out, err, verbose, debug); } const std::vector<MT> norms = exactResidualNorm (A, B, X_exact); // X_exact is the exact solution of the PDE, projected onto the // discrete mesh. It may not necessarily equal the exact solution // of the linear system. *out << "||B - A*X_exact||_2 = " << norms[0] << endl << "||B||_2 = " << norms[1] << endl << "||A||_F = " << norms[2] << endl; // Setup preconditioner std::string prec_type = inputList.get("Preconditioner", "None"); RCP<operator_type> M; { TEUCHOS_FUNC_TIME_MONITOR_DIFF("Total Preconditioner Setup", total_prec); if (prec_type == "MueLu") { if (inputList.isSublist("MueLu")) { ParameterList mueluParams = inputList.sublist("MueLu"); M = MueLu::CreateTpetraPreconditioner<ST,LO,GO,Node>(A,mueluParams); } else { M = MueLu::CreateTpetraPreconditioner<ST,LO,GO,Node>(A); } } } bool converged = false; int numItersPerformed = 0; const MT tol = inputList.get("Convergence Tolerance", STM::squareroot (STM::eps ())); const int maxNumIters = inputList.get("Maximum Iterations", 200); const int num_steps = inputList.get("Number of Time Steps", 1); if (gpu) { TEUCHOS_FUNC_TIME_MONITOR_DIFF("Total GPU Solve", total_solve); solveWithBelosGPU(converged, numItersPerformed, tol, maxNumIters, num_steps, ranks_per_node, gpu_ranks_per_node, device_offset, prec_type, X, A, B, Teuchos::null, M); } else { TEUCHOS_FUNC_TIME_MONITOR_DIFF("Total Solve", total_solve); solveWithBelos (converged, numItersPerformed, tol, maxNumIters, num_steps, X, A, B, Teuchos::null, M); } // Compute ||X-X_exact||_2 MT norm_x = X_exact->norm2(); X_exact->update(-1.0, *X, 1.0); MT norm_error = X_exact->norm2(); *out << endl << "||X-X_exact||_2 / ||X_exact||_2 = " << norm_error / norm_x << endl; } // total time block // Summarize timings // RCP<ParameterList> reportParams = parameterList ("TimeMonitor::report"); // reportParams->set ("Report format", std::string ("YAML")); // reportParams->set ("writeGlobalStats", true); // Teuchos::TimeMonitor::report (*out, reportParams); Teuchos::TimeMonitor::summarize(std::cout); } //try TEUCHOS_STANDARD_CATCH_STATEMENTS(true, std::cerr, success); if (success) return EXIT_SUCCESS; return EXIT_FAILURE; }