//clone a parameterList with new objects void ParameterList::clone(const ParameterList& parameterListR){ std::string key; std::string idAux=parameterListR.getId(); setId(idAux); for (unsigned int it=0; it<parameterListR.size();it++){ Parameter* parameter=parameterListR.get(it,key); switch (parameter->getType()){ case ACTIVE_INT_PARAMETER:{ IntParameter* intParameter= new IntParameter(((IntParameter*)parameter)->getValue()); insertToMap(key,intParameter); } break; case ACTIVE_REAL_PARAMETER:{ RealParameter* realParameter= new RealParameter(((RealParameter*)parameter)->getValue()); insertToMap(key,realParameter); } break; case ACTIVE_STRING_PARAMETER:{ StringParameter* stringParameter= new StringParameter(((StringParameter*)parameter)->getValue()); insertToMap(key,stringParameter); } break; case ACTIVE_BYTES_PARAMETER:{ BytesParameter* bytesParameter= new BytesParameter(((BytesParameter*)parameter)->getValue()); insertToMap(key,bytesParameter); } break; } } }
int main(int argc, char *argv[]) { #ifdef HAVE_MPI MPI_Init(&argc,&argv); Epetra_MpiComm Comm(MPI_COMM_WORLD); #else Epetra_SerialComm Comm; #endif #define ML_SCALING #ifdef ML_SCALING const int ntimers=4; enum {total, probBuild, precBuild, solve}; ml_DblLoc timeVec[ntimers], maxTime[ntimers], minTime[ntimers]; for (int i=0; i<ntimers; i++) timeVec[i].rank = Comm.MyPID(); timeVec[total].value = MPI_Wtime(); #endif int nx; if (argc > 1) nx = (int) strtol(argv[1],NULL,10); else nx = 256; if (nx < 1) nx = 256; // input a nonpositive integer if you want to specify // the XML input file name. nx = nx*(int)sqrt((double)Comm.NumProc()); int ny = nx; printf("nx = %d\nny = %d\n",nx,ny); fflush(stdout); char xmlFile[80]; bool readXML=false; if (argc > 2) {strcpy(xmlFile,argv[2]); readXML = true;} else sprintf(xmlFile,"%s","params.xml"); ParameterList GaleriList; GaleriList.set("nx", nx); GaleriList.set("ny", ny); #ifdef ML_SCALING timeVec[probBuild].value = MPI_Wtime(); #endif Epetra_Map* Map = CreateMap("Cartesian2D", Comm, GaleriList); Epetra_CrsMatrix* A = CreateCrsMatrix("Laplace2D", Map, GaleriList); if (!Comm.MyPID()) printf("nx = %d, ny = %d, mx = %d, my = %d\n",nx,ny,GaleriList.get("mx",-1),GaleriList.get("my",-1)); fflush(stdout); //avoid potential overflow double numMyRows = A->NumMyRows(); double numGlobalRows; Comm.SumAll(&numMyRows,&numGlobalRows,1); if (!Comm.MyPID()) printf("# global rows = %1.0f\n",numGlobalRows); //printf("pid %d: #rows = %d\n",Comm.MyPID(),A->NumMyRows()); fflush(stdout); Epetra_MultiVector *coords = CreateCartesianCoordinates("2D", Map,GaleriList); double *x_coord=0,*y_coord=0,*z_coord=0; double **ttt; if (!coords->ExtractView(&ttt)) { x_coord = ttt[0]; y_coord = ttt[1]; } else { if (!Comm.MyPID()) printf("Error extracting coordinate vectors\n"); MPI_Finalize(); exit(EXIT_FAILURE); } Epetra_Vector LHS(*Map); LHS.Random(); Epetra_Vector RHS(*Map); RHS.PutScalar(0.0); Epetra_LinearProblem Problem(A, &LHS, &RHS); AztecOO solver(Problem); #ifdef ML_SCALING timeVec[probBuild].value = MPI_Wtime() - timeVec[probBuild].value; #endif // =========================== begin of ML part =========================== #ifdef ML_SCALING timeVec[precBuild].value = MPI_Wtime(); #endif ParameterList MLList; if (readXML) { MLList.set("read XML",true); MLList.set("XML input file",xmlFile); } else { cout << "here" << endl; ML_Epetra::SetDefaults("SA",MLList); MLList.set("smoother: type","Chebyshev"); MLList.set("smoother: sweeps",3); MLList.set("coarse: max size",1); } MLList.set("x-coordinates", x_coord); MLList.set("y-coordinates", y_coord); MLList.set("z-coordinates", z_coord); /* RCP<std::vector<int> > m_smootherAztecOptions = rcp(new std::vector<int>(AZ_OPTIONS_SIZE)); RCP<std::vector<double> > m_smootherAztecParams = rcp(new std::vector<double>(AZ_PARAMS_SIZE)); //int m_smootherAztecOptions[AZ_OPTIONS_SIZE]; //double m_smootherAztecParams[AZ_PARAMS_SIZE]; std::string smootherType("Aztec"); AZ_defaults(&(*m_smootherAztecOptions)[0],&(*m_smootherAztecParams)[0]); (*m_smootherAztecOptions)[AZ_precond] = AZ_dom_decomp; (*m_smootherAztecOptions)[AZ_subdomain_solve] = AZ_icc; bool smootherAztecAsSolver = true; */ MLList.set("ML output",10); MLList.set("repartition: enable",1); MLList.set("repartition: max min ratio",1.3); MLList.set("repartition: min per proc",200); MLList.set("repartition: partitioner","Zoltan"); MLList.set("repartition: Zoltan dimensions",2); MLList.set("repartition: put on single proc",1); MLList.set("repartition: Zoltan type","hypergraph"); MLList.set("repartition: estimated iterations",13); /* MLList.set("smoother: Aztec options",m_smootherAztecOptions); MLList.set("smoother: Aztec params",m_smootherAztecParams); MLList.set("smoother: type",smootherType.c_str()); MLList.set("smoother: Aztec as solver", smootherAztecAsSolver); MLList.set("ML print initial list", 0); MLList.set("ML print final list", 0); */ ML_Epetra::MultiLevelPreconditioner* MLPrec = new ML_Epetra::MultiLevelPreconditioner(*A, MLList); // verify unused parameters on process 0 (put -1 to print on all // processes) MLPrec->PrintUnused(0); #ifdef ML_SCALING timeVec[precBuild].value = MPI_Wtime() - timeVec[precBuild].value; #endif // =========================== end of ML part ============================= #ifdef ML_SCALING timeVec[solve].value = MPI_Wtime(); #endif solver.SetPrecOperator(MLPrec); solver.SetAztecOption(AZ_solver, AZ_cg); solver.SetAztecOption(AZ_output, 1); solver.Iterate(500, 1e-12); #ifdef ML_SCALING timeVec[solve].value = MPI_Wtime() - timeVec[solve].value; #endif // destroy the preconditioner delete MLPrec; // compute the real residual double residual; LHS.Norm2(&residual); if( Comm.MyPID()==0 ) { cout << "||b-Ax||_2 = " << residual << endl; } // for testing purposes if (residual > 1e-5) exit(EXIT_FAILURE); delete A; delete Map; delete coords; #ifdef ML_SCALING timeVec[total].value = MPI_Wtime() - timeVec[total].value; //avg double dupTime[ntimers],avgTime[ntimers]; for (int i=0; i<ntimers; i++) dupTime[i] = timeVec[i].value; MPI_Reduce(dupTime,avgTime,ntimers,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD); for (int i=0; i<ntimers; i++) avgTime[i] = avgTime[i]/Comm.NumProc(); //min MPI_Reduce(timeVec,minTime,ntimers,MPI_DOUBLE_INT,MPI_MINLOC,0,MPI_COMM_WORLD); //max MPI_Reduce(timeVec,maxTime,ntimers,MPI_DOUBLE_INT,MPI_MAXLOC,0,MPI_COMM_WORLD); if (Comm.MyPID() == 0) { printf("timing : max (pid) min (pid) avg\n"); printf("Problem build : %2.3e (%d) %2.3e (%d) %2.3e \n", maxTime[probBuild].value,maxTime[probBuild].rank, minTime[probBuild].value,minTime[probBuild].rank, avgTime[probBuild]); printf("Preconditioner build : %2.3e (%d) %2.3e (%d) %2.3e \n", maxTime[precBuild].value,maxTime[precBuild].rank, minTime[precBuild].value,minTime[precBuild].rank, avgTime[precBuild]); printf("Solve : %2.3e (%d) %2.3e (%d) %2.3e \n", maxTime[solve].value,maxTime[solve].rank, minTime[solve].value,minTime[solve].rank, avgTime[solve]); printf("Total : %2.3e (%d) %2.3e (%d) %2.3e \n", maxTime[total].value,maxTime[total].rank, minTime[total].value,minTime[total].rank, avgTime[total]); } #endif #ifdef HAVE_MPI MPI_Finalize(); #endif return(EXIT_SUCCESS); }
int main(int argc, char *argv[]) { #ifdef HAVE_MPI MPI_Init(&argc,&argv); Epetra_MpiComm Comm(MPI_COMM_WORLD); #else Epetra_SerialComm Comm; #endif Teuchos::CommandLineProcessor clp(false); clp.setDocString("This is the canonical ML scaling example"); //Problem std::string optMatrixType = "Laplace2D"; clp.setOption("matrixType", &optMatrixType, "matrix type ('Laplace2D', 'Laplace3D')"); int optNx = 100; clp.setOption("nx", &optNx, "mesh size in x direction"); int optNy = -1; clp.setOption("ny", &optNy, "mesh size in y direction"); int optNz = -1; clp.setOption("nz", &optNz, "mesh size in z direction"); //Smoothers //std::string optSmooType = "Chebshev"; clp.setOption("smooType", &optSmooType, "smoother type ('l1-sgs', 'sgs 'or 'cheby')"); int optSweeps = 3; clp.setOption("sweeps", &optSweeps, "Chebyshev degreee (or SGS sweeps)"); double optAlpha = 7; clp.setOption("alpha", &optAlpha, "Chebyshev eigenvalue ratio (recommend 7 in 2D, 20 in 3D)"); //Coarsening int optMaxCoarseSize = 500; clp.setOption("maxcoarse", &optMaxCoarseSize, "Size of coarsest grid when coarsening should stop"); int optMaxLevels = 10; clp.setOption("maxlevels", &optMaxLevels, "Maximum number of levels"); //Krylov solver double optTol = 1e-12; clp.setOption("tol", &optTol, "stopping tolerance for Krylov method"); int optMaxIts = 500; clp.setOption("maxits", &optMaxIts, "maximum iterations for Krylov method"); //XML file with additional options std::string xmlFile = ""; clp.setOption("xml", &xmlFile, "XML file containing ML options. [OPTIONAL]"); //Debugging int optWriteMatrices = -2; clp.setOption("write", &optWriteMatrices, "write matrices to file (-1 means all; i>=0 means level i)"); switch (clp.parse(argc, argv)) { case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; break; case Teuchos::CommandLineProcessor::PARSE_ERROR: case Teuchos::CommandLineProcessor::PARSE_UNRECOGNIZED_OPTION: return EXIT_FAILURE; break; case Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL: break; } #ifdef ML_SCALING const int ntimers=4; enum {total, probBuild, precBuild, solve}; ml_DblLoc timeVec[ntimers], maxTime[ntimers], minTime[ntimers]; for (int i=0; i<ntimers; i++) timeVec[i].rank = Comm.MyPID(); timeVec[total].value = MPI_Wtime(); #endif // Creates the linear problem using the Galeri package. // Several matrix examples are supported; please refer to the // Galeri documentation for more details. // Most of the examples using the ML_Epetra::MultiLevelPreconditioner // class are based on Epetra_CrsMatrix. Example // `ml_EpetraVbr.cpp' shows how to define a Epetra_VbrMatrix. // `Laplace2D' is a symmetric matrix; an example of non-symmetric // matrices is `Recirc2D' (advection-diffusion in a box, with // recirculating flow). The grid has optNx x optNy nodes, divided into // mx x my subdomains, each assigned to a different processor. if (optNy == -1) optNy = optNx; if (optNz == -1) optNz = optNx; ParameterList GaleriList; GaleriList.set("nx", optNx); GaleriList.set("ny", optNy); GaleriList.set("nz", optNz); //GaleriList.set("mx", 1); //GaleriList.set("my", Comm.NumProc()); #ifdef ML_SCALING timeVec[probBuild].value = MPI_Wtime(); #endif Epetra_Map* Map; Epetra_CrsMatrix* A; Epetra_MultiVector* Coord; if (optMatrixType == "Laplace2D") { Map = CreateMap("Cartesian2D", Comm, GaleriList); A = CreateCrsMatrix("Laplace2D", Map, GaleriList); Coord = CreateCartesianCoordinates("2D", &(A->Map()), GaleriList); } else if (optMatrixType == "Laplace3D") { Map = CreateMap("Cartesian3D", Comm, GaleriList); A = CreateCrsMatrix("Laplace3D", Map, GaleriList); Coord = CreateCartesianCoordinates("3D", &(A->Map()), GaleriList); } else { throw(std::runtime_error("Bad matrix type")); } //EpetraExt::RowMatrixToMatlabFile("A.m",*A); double *x_coord = (*Coord)[0]; double *y_coord = (*Coord)[1]; double* z_coord=NULL; if (optMatrixType == "Laplace3D") z_coord = (*Coord)[2]; //EpetraExt::MultiVectorToMatrixMarketFile("mlcoords.m",*Coord); if( Comm.MyPID()==0 ) { std::cout << "========================================================" << std::endl; std::cout << " Matrix type: " << optMatrixType << std::endl; if (optMatrixType == "Laplace2D") std::cout << " Problem size: " << optNx*optNy << " (" << optNx << "x" << optNy << ")" << std::endl; else if (optMatrixType == "Laplace3D") std::cout << " Problem size: " << optNx*optNy*optNz << " (" << optNx << "x" << optNy << "x" << optNz << ")" << std::endl; int mx = GaleriList.get("mx", -1); int my = GaleriList.get("my", -1); int mz = GaleriList.get("my", -1); std::cout << " Processor subdomains in x direction: " << mx << std::endl << " Processor subdomains in y direction: " << my << std::endl; if (optMatrixType == "Laplace3D") std::cout << " Processor subdomains in z direction: " << mz << std::endl; std::cout << "========================================================" << std::endl; } // Build a linear system with trivial solution, using a random vector // as starting solution. Epetra_Vector LHS(*Map); LHS.Random(); Epetra_Vector RHS(*Map); RHS.PutScalar(0.0); Epetra_LinearProblem Problem(A, &LHS, &RHS); // As we wish to use AztecOO, we need to construct a solver object // for this problem AztecOO solver(Problem); #ifdef ML_SCALING timeVec[probBuild].value = MPI_Wtime() - timeVec[probBuild].value; #endif // =========================== begin of ML part =========================== #ifdef ML_SCALING timeVec[precBuild].value = MPI_Wtime(); #endif // create a parameter list for ML options ParameterList MLList; // Sets default parameters for classic smoothed aggregation. After this // call, MLList contains the default values for the ML parameters, // as required by typical smoothed aggregation for symmetric systems. // Other sets of parameters are available for non-symmetric systems // ("DD" and "DD-ML"), and for the Maxwell equations ("maxwell"). ML_Epetra::SetDefaults("SA",MLList); // overwrite some parameters. Please refer to the user's guide // for more information // some of the parameters do not differ from their default value, // and they are here reported for the sake of clarity // output level, 0 being silent and 10 verbose MLList.set("ML output", 10); // maximum number of levels MLList.set("max levels",optMaxLevels); // set finest level to 0 MLList.set("increasing or decreasing","increasing"); MLList.set("coarse: max size",optMaxCoarseSize); // use Uncoupled scheme to create the aggregate MLList.set("aggregation: type", "Uncoupled"); // smoother is Chebyshev. Example file // `ml/examples/TwoLevelDD/ml_2level_DD.cpp' shows how to use // AZTEC's preconditioners as smoothers MLList.set("smoother: type","Chebyshev"); MLList.set("smoother: Chebyshev alpha",optAlpha); MLList.set("smoother: sweeps",optSweeps); // use both pre and post smoothing MLList.set("smoother: pre or post", "both"); #ifdef HAVE_ML_AMESOS // solve with serial direct solver KLU MLList.set("coarse: type","Amesos-KLU"); #else // this is for testing purposes only, you should have // a direct solver for the coarse problem (either Amesos, or the SuperLU/ // SuperLU_DIST interface of ML) MLList.set("coarse: type","Jacobi"); #endif MLList.set("repartition: enable",1); MLList.set("repartition: start level",1); MLList.set("repartition: max min ratio",1.1); MLList.set("repartition: min per proc",800); MLList.set("repartition: partitioner","Zoltan"); MLList.set("repartition: put on single proc",1); MLList.set("x-coordinates", x_coord); MLList.set("y-coordinates", y_coord); if (optMatrixType == "Laplace2D") { MLList.set("repartition: Zoltan dimensions",2); } else if (optMatrixType == "Laplace3D") { MLList.set("repartition: Zoltan dimensions",3); MLList.set("z-coordinates", z_coord); } MLList.set("print hierarchy",optWriteMatrices); //MLList.set("aggregation: damping factor",0.); // Read in XML options if (xmlFile != "") ML_Epetra::ReadXML(xmlFile,MLList,Comm); // Creates the preconditioning object. We suggest to use `new' and // `delete' because the destructor contains some calls to MPI (as // required by ML and possibly Amesos). This is an issue only if the // destructor is called **after** MPI_Finalize(). ML_Epetra::MultiLevelPreconditioner* MLPrec = new ML_Epetra::MultiLevelPreconditioner(*A, MLList); // verify unused parameters on process 0 (put -1 to print on all // processes) MLPrec->PrintUnused(0); #ifdef ML_SCALING timeVec[precBuild].value = MPI_Wtime() - timeVec[precBuild].value; #endif // ML allows the user to cheaply recompute the preconditioner. You can // simply uncomment the following line: // // MLPrec->ReComputePreconditioner(); // // It is supposed that the linear system matrix has different values, but // **exactly** the same structure and layout. The code re-built the // hierarchy and re-setup the smoothers and the coarse solver using // already available information on the hierarchy. A particular // care is required to use ReComputePreconditioner() with nonzero // threshold. // =========================== end of ML part ============================= // tell AztecOO to use the ML preconditioner, specify the solver // and the output, then solve with 500 maximum iterations and 1e-12 // of tolerance (see AztecOO's user guide for more details) #ifdef ML_SCALING timeVec[solve].value = MPI_Wtime(); #endif solver.SetPrecOperator(MLPrec); solver.SetAztecOption(AZ_solver, AZ_cg); solver.SetAztecOption(AZ_output, 32); solver.Iterate(optMaxIts, optTol); #ifdef ML_SCALING timeVec[solve].value = MPI_Wtime() - timeVec[solve].value; #endif // destroy the preconditioner delete MLPrec; // compute the real residual double residual; LHS.Norm2(&residual); if( Comm.MyPID()==0 ) { cout << "||b-Ax||_2 = " << residual << endl; } // for testing purposes if (residual > 1e-5) exit(EXIT_FAILURE); delete A; delete Map; #ifdef ML_SCALING timeVec[total].value = MPI_Wtime() - timeVec[total].value; //avg double dupTime[ntimers],avgTime[ntimers]; for (int i=0; i<ntimers; i++) dupTime[i] = timeVec[i].value; MPI_Reduce(dupTime,avgTime,ntimers,MPI_DOUBLE,MPI_SUM,0,MPI_COMM_WORLD); for (int i=0; i<ntimers; i++) avgTime[i] = avgTime[i]/Comm.NumProc(); //min MPI_Reduce(timeVec,minTime,ntimers,MPI_DOUBLE_INT,MPI_MINLOC,0,MPI_COMM_WORLD); //max MPI_Reduce(timeVec,maxTime,ntimers,MPI_DOUBLE_INT,MPI_MAXLOC,0,MPI_COMM_WORLD); if (Comm.MyPID() == 0) { printf("timing : max (pid) min (pid) avg\n"); printf("Problem build : %2.3e (%d) %2.3e (%d) %2.3e \n", maxTime[probBuild].value,maxTime[probBuild].rank, minTime[probBuild].value,minTime[probBuild].rank, avgTime[probBuild]); printf("Preconditioner build : %2.3e (%d) %2.3e (%d) %2.3e \n", maxTime[precBuild].value,maxTime[precBuild].rank, minTime[precBuild].value,minTime[precBuild].rank, avgTime[precBuild]); printf("Solve : %2.3e (%d) %2.3e (%d) %2.3e \n", maxTime[solve].value,maxTime[solve].rank, minTime[solve].value,minTime[solve].rank, avgTime[solve]); printf("Total : %2.3e (%d) %2.3e (%d) %2.3e \n", maxTime[total].value,maxTime[total].rank, minTime[total].value,minTime[total].rank, avgTime[total]); } #endif #ifdef HAVE_MPI MPI_Finalize(); #endif return(EXIT_SUCCESS); }
int main (int argc, char *argv[]) { using namespace TrilinosCouplings; // Yes, this means I'm lazy. using TpetraIntrepidPoissonExample::exactResidualNorm; using TpetraIntrepidPoissonExample::makeMatrixAndRightHandSide; using TpetraIntrepidPoissonExample::solveWithBelos; using TpetraIntrepidPoissonExample::solveWithBelosGPU; using IntrepidPoissonExample::makeMeshInput; using IntrepidPoissonExample::parseCommandLineArguments; using IntrepidPoissonExample::setCommandLineArgumentDefaults; using IntrepidPoissonExample::setMaterialTensorOffDiagonalValue; using IntrepidPoissonExample::setUpCommandLineArguments; using Tpetra::DefaultPlatform; using Teuchos::Comm; using Teuchos::outArg; using Teuchos::ParameterList; using Teuchos::parameterList; using Teuchos::RCP; using Teuchos::rcp; using Teuchos::rcpFromRef; using Teuchos::getFancyOStream; using Teuchos::FancyOStream; using std::endl; // Pull in typedefs from the example's namespace. typedef TpetraIntrepidPoissonExample::ST ST; #ifdef HAVE_TRILINOSCOUPLINGS_MUELU typedef TpetraIntrepidPoissonExample::LO LO; typedef TpetraIntrepidPoissonExample::GO GO; #endif // HAVE_TRILINOSCOUPLINGS_MUELU typedef TpetraIntrepidPoissonExample::Node Node; typedef Teuchos::ScalarTraits<ST> STS; typedef STS::magnitudeType MT; typedef Teuchos::ScalarTraits<MT> STM; typedef TpetraIntrepidPoissonExample::sparse_matrix_type sparse_matrix_type; typedef TpetraIntrepidPoissonExample::vector_type vector_type; typedef TpetraIntrepidPoissonExample::operator_type operator_type; bool success = true; try { Teuchos::oblackholestream blackHole; Teuchos::GlobalMPISession mpiSession (&argc, &argv, &blackHole); const int myRank = mpiSession.getRank (); //const int numProcs = mpiSession.getNProc (); // Get the default communicator and Kokkos Node instance RCP<const Comm<int> > comm = DefaultPlatform::getDefaultPlatform ().getComm (); RCP<Node> node = DefaultPlatform::getDefaultPlatform ().getNode (); // Did the user specify --help at the command line to print help // with command-line arguments? bool printedHelp = false; // Values of command-line arguments. int nx, ny, nz; std::string xmlInputParamsFile; bool verbose, debug; int maxNumItersFromCmdLine = -1; // -1 means "read from XML file" double tolFromCmdLine = -1.0; // -1 means "read from XML file" std::string solverName = "GMRES"; ST materialTensorOffDiagonalValue = 0.0; // Set default values of command-line arguments. setCommandLineArgumentDefaults (nx, ny, nz, xmlInputParamsFile, solverName, verbose, debug); // Parse and validate command-line arguments. Teuchos::CommandLineProcessor cmdp (false, true); setUpCommandLineArguments (cmdp, nx, ny, nz, xmlInputParamsFile, solverName, tolFromCmdLine, maxNumItersFromCmdLine, verbose, debug); cmdp.setOption ("materialTensorOffDiagonalValue", &materialTensorOffDiagonalValue, "Off-diagonal value in " "the material tensor. This controls the iteration count. " "Be careful with this if you use CG, since you can easily " "make the matrix indefinite."); // Additional command-line arguments for GPU experimentation. bool gpu = false; cmdp.setOption ("gpu", "no-gpu", &gpu, "Run example using GPU node (if supported)"); int ranks_per_node = 1; cmdp.setOption ("ranks_per_node", &ranks_per_node, "Number of MPI ranks per node"); int gpu_ranks_per_node = 1; cmdp.setOption ("gpu_ranks_per_node", &gpu_ranks_per_node, "Number of MPI ranks per node for GPUs"); int device_offset = 0; cmdp.setOption ("device_offset", &device_offset, "Offset for attaching MPI ranks to CUDA devices"); // Additional command-line arguments for dumping the generated // matrix or its row Map to output files. // // FIXME (mfh 09 Apr 2014) Need to port these command-line // arguments to the Epetra version. // If matrixFilename is nonempty, dump the matrix to that file // in MatrixMarket format. std::string matrixFilename; cmdp.setOption ("matrixFilename", &matrixFilename, "If nonempty, dump the " "generated matrix to that file in MatrixMarket format."); // If rowMapFilename is nonempty, dump the matrix's row Map to // that file in MatrixMarket format. std::string rowMapFilename; cmdp.setOption ("rowMapFilename", &rowMapFilename, "If nonempty, dump the " "generated matrix's row Map to that file in a format that " "Tpetra::MatrixMarket::Reader can read."); // Option to exit after building A and b (and dumping stuff to // files, if requested). bool exitAfterAssembly = false; cmdp.setOption ("exitAfterAssembly", "dontExitAfterAssembly", &exitAfterAssembly, "If true, exit after building the " "sparse matrix and dense right-hand side vector. If either" " --matrixFilename or --rowMapFilename are nonempty strings" ", dump the matrix resp. row Map to their respective files " "before exiting."); parseCommandLineArguments (cmdp, printedHelp, argc, argv, nx, ny, nz, xmlInputParamsFile, solverName, verbose, debug); if (printedHelp) { // The user specified --help at the command line to print help // with command-line arguments. We printed help already, so quit // with a happy return code. return EXIT_SUCCESS; } setMaterialTensorOffDiagonalValue (materialTensorOffDiagonalValue); // Both streams only print on MPI Rank 0. "out" only prints if the // user specified --verbose. RCP<FancyOStream> out = getFancyOStream (rcpFromRef ((myRank == 0 && verbose) ? std::cout : blackHole)); RCP<FancyOStream> err = getFancyOStream (rcpFromRef ((myRank == 0 && debug) ? std::cerr : blackHole)); #ifdef HAVE_MPI *out << "PARALLEL executable" << endl; #else *out << "SERIAL executable" << endl; #endif /**********************************************************************************/ /********************************** GET XML INPUTS ********************************/ /**********************************************************************************/ ParameterList inputList; if (xmlInputParamsFile != "") { *out << "Reading parameters from XML file \"" << xmlInputParamsFile << "\"..." << endl; Teuchos::updateParametersFromXmlFile (xmlInputParamsFile, outArg (inputList)); if (myRank == 0) { inputList.print (*out, 2, true, true); *out << endl; } } // Get Pamgen mesh definition string, either from the input // ParameterList or from our function that makes a cube and fills in // the number of cells along each dimension. std::string meshInput = inputList.get("meshInput", ""); if (meshInput == "") { *out << "Generating mesh input string: nx = " << nx << ", ny = " << ny << ", nz = " << nz << endl; meshInput = makeMeshInput (nx, ny, nz); } // Total application run time { TEUCHOS_FUNC_TIME_MONITOR_DIFF("Total Time", total_time); RCP<sparse_matrix_type> A; RCP<vector_type> B, X_exact, X; { TEUCHOS_FUNC_TIME_MONITOR_DIFF("Total Assembly", total_assembly); makeMatrixAndRightHandSide (A, B, X_exact, X, comm, node, meshInput, out, err, verbose, debug); } // Optionally dump the matrix and/or its row Map to files. { typedef Tpetra::MatrixMarket::Writer<sparse_matrix_type> writer_type; if (matrixFilename != "") { writer_type::writeSparseFile (matrixFilename, A); } if (rowMapFilename != "") { writer_type::writeMapFile (rowMapFilename, * (A->getRowMap ())); } } if (exitAfterAssembly) { // Users might still be interested in assembly time. Teuchos::TimeMonitor::report (comm.ptr (), std::cout); return EXIT_SUCCESS; } const std::vector<MT> norms = exactResidualNorm (A, B, X_exact); // X_exact is the exact solution of the PDE, projected onto the // discrete mesh. It may not necessarily equal the exact solution // of the linear system. *out << "||B - A*X_exact||_2 = " << norms[0] << endl << "||B||_2 = " << norms[1] << endl << "||A||_F = " << norms[2] << endl; // Setup preconditioner std::string prec_type = inputList.get ("Preconditioner", "None"); RCP<operator_type> M; { TEUCHOS_FUNC_TIME_MONITOR_DIFF("Total Preconditioner Setup", total_prec); if (prec_type == "MueLu") { #ifdef HAVE_TRILINOSCOUPLINGS_MUELU if (inputList.isSublist("MueLu")) { ParameterList mueluParams = inputList.sublist("MueLu"); M = MueLu::CreateTpetraPreconditioner<ST,LO,GO,Node>(A,mueluParams); } else { M = MueLu::CreateTpetraPreconditioner<ST,LO,GO,Node>(A); } #else // NOT HAVE_TRILINOSCOUPLINGS_MUELU TEUCHOS_TEST_FOR_EXCEPTION( prec_type == "MueLu", std::runtime_error, "Tpetra scaling example: " "In order to precondition with MueLu, you must have built Trilinos " "with the MueLu package enabled."); #endif // HAVE_TRILINOSCOUPLINGS_MUELU } } // setup preconditioner // Get the convergence tolerance for each linear solve. // If the user provided a nonnegative value at the command // line, it overrides any value in the input ParameterList. MT tol = STM::squareroot (STM::eps ()); // default value if (tolFromCmdLine < STM::zero ()) { tol = inputList.get ("Convergence Tolerance", tol); } else { tol = tolFromCmdLine; } // Get the maximum number of iterations for each linear solve. // If the user provided a value other than -1 at the command // line, it overrides any value in the input ParameterList. int maxNumIters = 200; // default value if (maxNumItersFromCmdLine == -1) { maxNumIters = inputList.get ("Maximum Iterations", maxNumIters); } else { maxNumIters = maxNumItersFromCmdLine; } // Get the number of "time steps." We imitate a time-dependent // PDE by doing this many linear solves. const int num_steps = inputList.get ("Number of Time Steps", 1); // Do the linear solve(s). bool converged = false; int numItersPerformed = 0; if (gpu) { TEUCHOS_FUNC_TIME_MONITOR_DIFF("Total GPU Solve", total_solve); solveWithBelosGPU (converged, numItersPerformed, tol, maxNumIters, num_steps, ranks_per_node, gpu_ranks_per_node, device_offset, prec_type, X, A, B, Teuchos::null, M); } else { TEUCHOS_FUNC_TIME_MONITOR_DIFF("Total Solve", total_solve); solveWithBelos (converged, numItersPerformed, solverName, tol, maxNumIters, num_steps, X, A, B, Teuchos::null, M); } // Compute ||X-X_exact||_2 const MT norm_x = X_exact->norm2 (); X_exact->update (-1.0, *X, 1.0); const MT norm_error = X_exact->norm2 (); *out << endl << "||X - X_exact||_2 / ||X_exact||_2 = " << norm_error / norm_x << endl; } // total time block // Summarize timings Teuchos::TimeMonitor::report (comm.ptr (), std::cout); } // try TEUCHOS_STANDARD_CATCH_STATEMENTS(true, std::cerr, success); if (success) { return EXIT_SUCCESS; } else { return EXIT_FAILURE; } }
int main (int argc, char *argv[]) { using namespace TrilinosCouplings; // Yes, this means I'm lazy. using TpetraIntrepidPoissonExample::exactResidualNorm; using TpetraIntrepidPoissonExample::makeMatrixAndRightHandSide; using TpetraIntrepidPoissonExample::solveWithBelos; using TpetraIntrepidPoissonExample::solveWithBelosGPU; using IntrepidPoissonExample::makeMeshInput; using IntrepidPoissonExample::setCommandLineArgumentDefaults; using IntrepidPoissonExample::setUpCommandLineArguments; using IntrepidPoissonExample::parseCommandLineArguments; using Tpetra::DefaultPlatform; using Teuchos::Comm; using Teuchos::outArg; using Teuchos::ParameterList; using Teuchos::parameterList; using Teuchos::RCP; using Teuchos::rcp; using Teuchos::rcpFromRef; using Teuchos::getFancyOStream; using Teuchos::FancyOStream; using std::endl; // Pull in typedefs from the example's namespace. typedef TpetraIntrepidPoissonExample::ST ST; typedef TpetraIntrepidPoissonExample::LO LO; typedef TpetraIntrepidPoissonExample::GO GO; typedef TpetraIntrepidPoissonExample::Node Node; typedef Teuchos::ScalarTraits<ST> STS; typedef STS::magnitudeType MT; typedef Teuchos::ScalarTraits<MT> STM; typedef TpetraIntrepidPoissonExample::sparse_matrix_type sparse_matrix_type; typedef TpetraIntrepidPoissonExample::vector_type vector_type; typedef TpetraIntrepidPoissonExample::operator_type operator_type; bool success = true; try { Teuchos::oblackholestream blackHole; Teuchos::GlobalMPISession mpiSession (&argc, &argv, &blackHole); const int myRank = mpiSession.getRank (); //const int numProcs = mpiSession.getNProc (); // Get the default communicator and Kokkos Node instance RCP<const Comm<int> > comm = DefaultPlatform::getDefaultPlatform ().getComm (); RCP<Node> node = DefaultPlatform::getDefaultPlatform ().getNode (); // Did the user specify --help at the command line to print help // with command-line arguments? bool printedHelp = false; // Values of command-line arguments. int nx, ny, nz; std::string xmlInputParamsFile; bool verbose, debug; // Set default values of command-line arguments. setCommandLineArgumentDefaults (nx, ny, nz, xmlInputParamsFile, verbose, debug); // Parse and validate command-line arguments. Teuchos::CommandLineProcessor cmdp (false, true); setUpCommandLineArguments (cmdp, nx, ny, nz, xmlInputParamsFile, verbose, debug); bool gpu = false; cmdp.setOption ("gpu", "no-gpu", &gpu, "Run example using GPU node (if supported)"); int ranks_per_node = 1; cmdp.setOption("ranks_per_node", &ranks_per_node, "Number of MPI ranks per node"); int gpu_ranks_per_node = 1; cmdp.setOption("gpu_ranks_per_node", &gpu_ranks_per_node, "Number of MPI ranks per node for GPUs"); int device_offset = 0; cmdp.setOption("device_offset", &device_offset, "Offset for attaching MPI ranks to CUDA devices"); parseCommandLineArguments (cmdp, printedHelp, argc, argv, nx, ny, nz, xmlInputParamsFile, verbose, debug); if (printedHelp) { // The user specified --help at the command line to print help // with command-line arguments. We printed help already, so quit // with a happy return code. return EXIT_SUCCESS; } // Both streams only print on MPI Rank 0. "out" only prints if the // user specified --verbose. RCP<FancyOStream> out = getFancyOStream (rcpFromRef ((myRank == 0 && verbose) ? std::cout : blackHole)); RCP<FancyOStream> err = getFancyOStream (rcpFromRef ((myRank == 0 && debug) ? std::cerr : blackHole)); #ifdef HAVE_MPI *out << "PARALLEL executable" << endl; #else *out << "SERIAL executable" << endl; #endif /**********************************************************************************/ /********************************** GET XML INPUTS ********************************/ /**********************************************************************************/ ParameterList inputList; if (xmlInputParamsFile != "") { *out << "Reading parameters from XML file \"" << xmlInputParamsFile << "\"..." << endl; Teuchos::updateParametersFromXmlFile (xmlInputParamsFile, outArg (inputList)); if (myRank == 0) { inputList.print (*out, 2, true, true); *out << endl; } } // Get Pamgen mesh definition string, either from the input // ParameterList or from our function that makes a cube and fills in // the number of cells along each dimension. std::string meshInput = inputList.get("meshInput", ""); if (meshInput == "") { *out << "Generating mesh input string: nx = " << nx << ", ny = " << ny << ", nz = " << nz << endl; meshInput = makeMeshInput (nx, ny, nz); } // Total application run time { TEUCHOS_FUNC_TIME_MONITOR_DIFF("Total Time", total_time); RCP<sparse_matrix_type> A; RCP<vector_type> B, X_exact, X; { TEUCHOS_FUNC_TIME_MONITOR_DIFF("Total Assembly", total_assembly); makeMatrixAndRightHandSide (A, B, X_exact, X, comm, node, meshInput, out, err, verbose, debug); } const std::vector<MT> norms = exactResidualNorm (A, B, X_exact); // X_exact is the exact solution of the PDE, projected onto the // discrete mesh. It may not necessarily equal the exact solution // of the linear system. *out << "||B - A*X_exact||_2 = " << norms[0] << endl << "||B||_2 = " << norms[1] << endl << "||A||_F = " << norms[2] << endl; // Setup preconditioner std::string prec_type = inputList.get("Preconditioner", "None"); RCP<operator_type> M; { TEUCHOS_FUNC_TIME_MONITOR_DIFF("Total Preconditioner Setup", total_prec); if (prec_type == "MueLu") { if (inputList.isSublist("MueLu")) { ParameterList mueluParams = inputList.sublist("MueLu"); M = MueLu::CreateTpetraPreconditioner<ST,LO,GO,Node>(A,mueluParams); } else { M = MueLu::CreateTpetraPreconditioner<ST,LO,GO,Node>(A); } } } bool converged = false; int numItersPerformed = 0; const MT tol = inputList.get("Convergence Tolerance", STM::squareroot (STM::eps ())); const int maxNumIters = inputList.get("Maximum Iterations", 200); const int num_steps = inputList.get("Number of Time Steps", 1); if (gpu) { TEUCHOS_FUNC_TIME_MONITOR_DIFF("Total GPU Solve", total_solve); solveWithBelosGPU(converged, numItersPerformed, tol, maxNumIters, num_steps, ranks_per_node, gpu_ranks_per_node, device_offset, prec_type, X, A, B, Teuchos::null, M); } else { TEUCHOS_FUNC_TIME_MONITOR_DIFF("Total Solve", total_solve); solveWithBelos (converged, numItersPerformed, tol, maxNumIters, num_steps, X, A, B, Teuchos::null, M); } // Compute ||X-X_exact||_2 MT norm_x = X_exact->norm2(); X_exact->update(-1.0, *X, 1.0); MT norm_error = X_exact->norm2(); *out << endl << "||X-X_exact||_2 / ||X_exact||_2 = " << norm_error / norm_x << endl; } // total time block // Summarize timings // RCP<ParameterList> reportParams = parameterList ("TimeMonitor::report"); // reportParams->set ("Report format", std::string ("YAML")); // reportParams->set ("writeGlobalStats", true); // Teuchos::TimeMonitor::report (*out, reportParams); Teuchos::TimeMonitor::summarize(std::cout); } //try TEUCHOS_STANDARD_CATCH_STATEMENTS(true, std::cerr, success); if (success) return EXIT_SUCCESS; return EXIT_FAILURE; }
void Krylov<MatrixType>::setParameters (const Teuchos::ParameterList& plist) { using Teuchos::as; using Teuchos::ParameterList; using Teuchos::Exceptions::InvalidParameterName; using Teuchos::Exceptions::InvalidParameterType; // FIXME (mfh 12 Sep 2014) Don't rewrite Belos::SolverFactory!!! Use // that instead. ParameterList params = plist; // Get the current parameters' values. We don't assign to the // instance data directly until we've gotten all the parameters. // This ensures "transactional" semantics, so that if attempting to // get some parameter throws an exception, the class' state doesn't // change. magnitude_type resTol = resTol_; int numIters = numIters_; std::string iterType = iterationType_; int blockSize = BlockSize_; bool zeroStartingSolution = ZeroStartingSolution_; int precType = PreconditionerType_; // // Get the "krylov: iteration type" parameter. // // We prefer std::string (name of Belos solver), but allow int // ("enum" value) for backwards compatibility. // bool gotIterType = false; try { iterType = params.get<std::string> ("krylov: iteration type"); gotIterType = true; } catch (InvalidParameterName) { gotIterType = true; // the parameter is not there, so don't try to get it anymore } catch (InvalidParameterType) { // Perhaps the user specified it as int. } // If it's not string, it has to be int. // We've already checked whether the name exists. if (! gotIterType) { const int iterTypeInt = params.get<int> ("krylov: iteration type"); gotIterType = true; if (iterTypeInt == 1) { iterType = "GMRES"; } else if (iterTypeInt == 2) { iterType = "CG"; } else { TEUCHOS_TEST_FOR_EXCEPTION( true, std::invalid_argument, "Ifpack2::Krylov::setParameters: Invalid " "\"krylov: iteration type\" value " << iterTypeInt << ". Valid int " "values are 1 (GMRES) and 2 (CG). Please prefer setting this " "parameter as a string (the name of the Krylov solver to use)."); } } resTol = params.get ("krylov: residual tolerance", resTol); numIters = params.get ("krylov: number of iterations", numIters); blockSize = params.get ("krylov: block size", blockSize); zeroStartingSolution = params.get ("krylov: zero starting solution", zeroStartingSolution); precType = params.get ("krylov: preconditioner type", precType); // Separate preconditioner parameters into another list // // FIXME (mfh 17 Jan 2014) Inner preconditioner's parameters should // be a sublist, not part of the main list!!! if (PreconditionerType_ == 1) { precParams_.set ("relaxation: sweeps", params.get ("relaxation: sweeps", 1)); precParams_.set ("relaxation: damping factor", params.get ("relaxation: damping factor", (scalar_type) 1.0)); precParams_.set ("relaxation: min diagonal value", params.get ("relaxation: min diagonal value", STS::one ())); precParams_.set ("relaxation: zero starting solution", params.get ("relaxation: zero starting solution", true)); precParams_.set ("relaxation: backward mode", params.get ("relaxation: backward mode", false)); } // FIXME (mfh 17 Jan 2014) AdditiveSchwarz's ParameterList no longer // takes parameters for its subdomain solver! You have to pass them // into a sublist. if (PreconditionerType_ == 2 || PreconditionerType_ == 3) { // FIXME (mfh 17 Jan 2014) should be an integer, given how ILUT // works! Furthermore, this parameter does not mean what you // think it means. precParams_.set ("fact: ilut level-of-fill", params.get ("fact: ilut level-of-fill", (double) 1.0)); precParams_.set ("fact: iluk level-of-fill", params.get ("fact: iluk level-of-fill", (double) 1.0)); // FIXME (mfh 17 Jan 2014) scalar_type or magnitude_type? not // sure, but double is definitely wrong. precParams_.set ("fact: absolute threshold", params.get ("fact: absolute threshold", (double) 0.0)); // FIXME (mfh 17 Jan 2014) scalar_type or magnitude_type? not // sure, but double is definitely wrong. precParams_.set ("fact: relative threshold", params.get("fact: relative threshold", (double) 1.0)); // FIXME (mfh 17 Jan 2014) scalar_type or magnitude_type? not // sure, but double is definitely wrong. precParams_.set ("fact: relax value", params.get ("fact: relax value", (double) 0.0)); } // "Commit" the new values to the instance data. iterationType_ = iterType; numIters_ = numIters; resTol_ = resTol; BlockSize_ = blockSize; ZeroStartingSolution_ = zeroStartingSolution; PreconditionerType_ = precType; }