Teuchos::RCP<Epetra_LinearProblem> build_problem_mm(Teuchos::ParameterList& test_params, Epetra_CrsMatrix* A, Epetra_MultiVector* b) { const Epetra_Map& rowmap = A->RowMap(); Epetra_MultiVector* x = new Epetra_MultiVector(rowmap, 1); if (b == NULL) { std::cout << "creating b = A*random" << std::endl; b = new Epetra_MultiVector(rowmap, 1); x->Random(); A->Apply(*x, *b); } x->PutScalar(0); Teuchos::RCP<Epetra_LinearProblem> problem = Teuchos::rcp(new Epetra_LinearProblem(A,x,b)); return problem; }
// // Amesos_TestMultiSolver.cpp reads in a matrix in Harwell-Boeing format, // calls one of the sparse direct solvers, using blocked right hand sides // and computes the error and residual. // // TestSolver ignores the Harwell-Boeing right hand sides, creating // random right hand sides instead. // // Amesos_TestMultiSolver can test either A x = b or A^T x = b. // This can be a bit confusing because sparse direct solvers // use compressed column storage - the transpose of Trilinos' // sparse row storage. // // Matrices: // readA - Serial. As read from the file. // transposeA - Serial. The transpose of readA. // serialA - if (transpose) then transposeA else readA // distributedA - readA distributed to all processes // passA - if ( distributed ) then distributedA else serialA // // int Amesos_TestMultiSolver( Epetra_Comm &Comm, char *matrix_file, int numsolves, SparseSolverType SparseSolver, bool transpose, int special, AMESOS_MatrixType matrix_type ) { int iam = Comm.MyPID() ; // int hatever; // if ( iam == 0 ) std::cin >> hatever ; Comm.Barrier(); Epetra_Map * readMap; Epetra_CrsMatrix * readA; Epetra_Vector * readx; Epetra_Vector * readb; Epetra_Vector * readxexact; std::string FileName = matrix_file ; int FN_Size = FileName.size() ; std::string LastFiveBytes = FileName.substr( EPETRA_MAX(0,FN_Size-5), FN_Size ); std::string LastFourBytes = FileName.substr( EPETRA_MAX(0,FN_Size-4), FN_Size ); bool NonContiguousMap = false; if ( LastFiveBytes == ".triU" ) { NonContiguousMap = true; // Call routine to read in unsymmetric Triplet matrix EPETRA_CHK_ERR( Trilinos_Util_ReadTriples2Epetra( matrix_file, false, Comm, readMap, readA, readx, readb, readxexact, NonContiguousMap ) ); } else { if ( LastFiveBytes == ".triS" ) { NonContiguousMap = true; // Call routine to read in symmetric Triplet matrix EPETRA_CHK_ERR( Trilinos_Util_ReadTriples2Epetra( matrix_file, true, Comm, readMap, readA, readx, readb, readxexact, NonContiguousMap ) ); } else { if ( LastFourBytes == ".mtx" ) { EPETRA_CHK_ERR( Trilinos_Util_ReadMatrixMarket2Epetra( matrix_file, Comm, readMap, readA, readx, readb, readxexact) ); } else { // Call routine to read in HB problem Trilinos_Util_ReadHb2Epetra( matrix_file, Comm, readMap, readA, readx, readb, readxexact) ; } } } Epetra_CrsMatrix transposeA(Copy, *readMap, 0); Epetra_CrsMatrix *serialA ; if ( transpose ) { assert( CrsMatrixTranspose( readA, &transposeA ) == 0 ); serialA = &transposeA ; } else { serialA = readA ; } // Create uniform distributed map Epetra_Map map(readMap->NumGlobalElements(), 0, Comm); Epetra_Map* map_; if( NonContiguousMap ) { // // map gives us NumMyElements and MyFirstElement; // int NumGlobalElements = readMap->NumGlobalElements(); int NumMyElements = map.NumMyElements(); int MyFirstElement = map.MinMyGID(); std::vector<int> MapMap_( NumGlobalElements ); readMap->MyGlobalElements( &MapMap_[0] ) ; Comm.Broadcast( &MapMap_[0], NumGlobalElements, 0 ) ; map_ = new Epetra_Map( NumGlobalElements, NumMyElements, &MapMap_[MyFirstElement], 0, Comm); } else { map_ = new Epetra_Map( map ) ; } // Create Exporter to distribute read-in matrix and vectors Epetra_Export exporter(*readMap, *map_); Epetra_CrsMatrix A(Copy, *map_, 0); Epetra_RowMatrix * passA = 0; Epetra_MultiVector * passx = 0; Epetra_MultiVector * passb = 0; Epetra_MultiVector * passxexact = 0; Epetra_MultiVector * passresid = 0; Epetra_MultiVector * passtmp = 0; Epetra_MultiVector x(*map_,numsolves); Epetra_MultiVector b(*map_,numsolves); Epetra_MultiVector xexact(*map_,numsolves); Epetra_MultiVector resid(*map_,numsolves); Epetra_MultiVector tmp(*map_,numsolves); Epetra_MultiVector serialx(*readMap,numsolves); Epetra_MultiVector serialb(*readMap,numsolves); Epetra_MultiVector serialxexact(*readMap,numsolves); Epetra_MultiVector serialresid(*readMap,numsolves); Epetra_MultiVector serialtmp(*readMap,numsolves); bool distribute_matrix = ( matrix_type == AMESOS_Distributed ) ; if ( distribute_matrix ) { // // Initialize x, b and xexact to the values read in from the file // A.Export(*serialA, exporter, Add); Comm.Barrier(); assert(A.FillComplete()==0); Comm.Barrier(); passA = &A; passx = &x; passb = &b; passxexact = &xexact; passresid = &resid; passtmp = &tmp; } else { passA = serialA; passx = &serialx; passb = &serialb; passxexact = &serialxexact; passresid = &serialresid; passtmp = &serialtmp; } passxexact->SetSeed(131) ; passxexact->Random(); passx->SetSeed(11231) ; passx->Random(); passb->PutScalar( 0.0 ); passA->Multiply( transpose, *passxexact, *passb ) ; Epetra_MultiVector CopyB( *passb ) ; double Anorm = passA->NormInf() ; SparseDirectTimingVars::SS_Result.Set_Anorm(Anorm) ; Epetra_LinearProblem Problem( (Epetra_RowMatrix *) passA, (Epetra_MultiVector *) passx, (Epetra_MultiVector *) passb ); double max_resid = 0.0; for ( int j = 0 ; j < special+1 ; j++ ) { Epetra_Time TotalTime( Comm ) ; if ( false ) { #ifdef TEST_UMFPACK unused code } else if ( SparseSolver == UMFPACK ) { UmfpackOO umfpack( (Epetra_RowMatrix *) passA, (Epetra_MultiVector *) passx, (Epetra_MultiVector *) passb ) ; umfpack.SetTrans( transpose ) ; umfpack.Solve() ; #endif #ifdef TEST_SUPERLU } else if ( SparseSolver == SuperLU ) { SuperluserialOO superluserial( (Epetra_RowMatrix *) passA, (Epetra_MultiVector *) passx, (Epetra_MultiVector *) passb ) ; superluserial.SetPermc( SuperLU_permc ) ; superluserial.SetTrans( transpose ) ; superluserial.SetUseDGSSV( special == 0 ) ; superluserial.Solve() ; #endif #ifdef HAVE_AMESOS_SLUD } else if ( SparseSolver == SuperLUdist ) { SuperludistOO superludist( Problem ) ; superludist.SetTrans( transpose ) ; EPETRA_CHK_ERR( superludist.Solve( true ) ) ; #endif #ifdef HAVE_AMESOS_SLUD2 } else if ( SparseSolver == SuperLUdist2 ) { Superludist2_OO superludist2( Problem ) ; superludist2.SetTrans( transpose ) ; EPETRA_CHK_ERR( superludist2.Solve( true ) ) ; #endif #ifdef TEST_SPOOLES } else if ( SparseSolver == SPOOLES ) { SpoolesOO spooles( (Epetra_RowMatrix *) passA, (Epetra_MultiVector *) passx, (Epetra_MultiVector *) passb ) ; spooles.SetTrans( transpose ) ; spooles.Solve() ; #endif #ifdef HAVE_AMESOS_DSCPACK } else if ( SparseSolver == DSCPACK ) { Teuchos::ParameterList ParamList ; Amesos_Dscpack dscpack( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( dscpack.SetParameters( ParamList ) ); EPETRA_CHK_ERR( dscpack.Solve( ) ); #endif #ifdef HAVE_AMESOS_UMFPACK } else if ( SparseSolver == UMFPACK ) { Teuchos::ParameterList ParamList ; Amesos_Umfpack umfpack( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( umfpack.SetParameters( ParamList ) ); EPETRA_CHK_ERR( umfpack.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( umfpack.Solve( ) ); #endif #ifdef HAVE_AMESOS_KLU } else if ( SparseSolver == KLU ) { Teuchos::ParameterList ParamList ; Amesos_Klu klu( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( klu.SetParameters( ParamList ) ); EPETRA_CHK_ERR( klu.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( klu.SymbolicFactorization( ) ); EPETRA_CHK_ERR( klu.NumericFactorization( ) ); EPETRA_CHK_ERR( klu.Solve( ) ); #endif #ifdef HAVE_AMESOS_PARAKLETE } else if ( SparseSolver == PARAKLETE ) { Teuchos::ParameterList ParamList ; Amesos_Paraklete paraklete( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( paraklete.SetParameters( ParamList ) ); EPETRA_CHK_ERR( paraklete.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( paraklete.SymbolicFactorization( ) ); EPETRA_CHK_ERR( paraklete.NumericFactorization( ) ); EPETRA_CHK_ERR( paraklete.Solve( ) ); #endif #ifdef HAVE_AMESOS_SLUS } else if ( SparseSolver == SuperLU ) { Epetra_SLU superluserial( &Problem ) ; EPETRA_CHK_ERR( superluserial.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( superluserial.SymbolicFactorization( ) ); EPETRA_CHK_ERR( superluserial.NumericFactorization( ) ); EPETRA_CHK_ERR( superluserial.Solve( ) ); #endif #ifdef HAVE_AMESOS_LAPACK } else if ( SparseSolver == LAPACK ) { Teuchos::ParameterList ParamList ; ParamList.set( "MaxProcs", -3 ); Amesos_Lapack lapack( Problem ) ; EPETRA_CHK_ERR( lapack.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( lapack.SymbolicFactorization( ) ); EPETRA_CHK_ERR( lapack.NumericFactorization( ) ); EPETRA_CHK_ERR( lapack.Solve( ) ); #endif #ifdef HAVE_AMESOS_TAUCS } else if ( SparseSolver == TAUCS ) { Teuchos::ParameterList ParamList ; Amesos_Taucs taucs( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( taucs.SetParameters( ParamList ) ); EPETRA_CHK_ERR( taucs.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( taucs.SymbolicFactorization( ) ); EPETRA_CHK_ERR( taucs.NumericFactorization( ) ); EPETRA_CHK_ERR( taucs.Solve( ) ); #endif #ifdef HAVE_AMESOS_PARDISO } else if ( SparseSolver == PARDISO ) { Teuchos::ParameterList ParamList ; Amesos_Pardiso pardiso( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( pardiso.SetParameters( ParamList ) ); EPETRA_CHK_ERR( pardiso.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( pardiso.SymbolicFactorization( ) ); EPETRA_CHK_ERR( pardiso.NumericFactorization( ) ); EPETRA_CHK_ERR( pardiso.Solve( ) ); #endif #ifdef HAVE_AMESOS_PARKLETE } else if ( SparseSolver == PARKLETE ) { Teuchos::ParameterList ParamList ; Amesos_Parklete parklete( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( parklete.SetParameters( ParamList ) ); EPETRA_CHK_ERR( parklete.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( parklete.SymbolicFactorization( ) ); EPETRA_CHK_ERR( parklete.NumericFactorization( ) ); EPETRA_CHK_ERR( parklete.Solve( ) ); #endif #ifdef HAVE_AMESOS_MUMPS } else if ( SparseSolver == MUMPS ) { Teuchos::ParameterList ParamList ; Amesos_Mumps mumps( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( mumps.SetParameters( ParamList ) ); EPETRA_CHK_ERR( mumps.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( mumps.SymbolicFactorization( ) ); EPETRA_CHK_ERR( mumps.NumericFactorization( ) ); EPETRA_CHK_ERR( mumps.Solve( ) ); #endif #ifdef HAVE_AMESOS_SCALAPACK } else if ( SparseSolver == SCALAPACK ) { Teuchos::ParameterList ParamList ; Amesos_Scalapack scalapack( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( scalapack.SetParameters( ParamList ) ); EPETRA_CHK_ERR( scalapack.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( scalapack.SymbolicFactorization( ) ); EPETRA_CHK_ERR( scalapack.NumericFactorization( ) ); EPETRA_CHK_ERR( scalapack.Solve( ) ); #endif #ifdef HAVE_AMESOS_SUPERLUDIST } else if ( SparseSolver == SUPERLUDIST ) { Teuchos::ParameterList ParamList ; Amesos_Superludist superludist( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( superludist.SetParameters( ParamList ) ); EPETRA_CHK_ERR( superludist.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( superludist.SymbolicFactorization( ) ); EPETRA_CHK_ERR( superludist.NumericFactorization( ) ); EPETRA_CHK_ERR( superludist.Solve( ) ); #endif #ifdef HAVE_AMESOS_SUPERLU } else if ( SparseSolver == SUPERLU ) { Teuchos::ParameterList ParamList ; Amesos_Superlu superlu( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( superlu.SetParameters( ParamList ) ); EPETRA_CHK_ERR( superlu.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( superlu.SymbolicFactorization( ) ); EPETRA_CHK_ERR( superlu.NumericFactorization( ) ); EPETRA_CHK_ERR( superlu.Solve( ) ); #endif #ifdef TEST_SPOOLESSERIAL } else if ( SparseSolver == SPOOLESSERIAL ) { SpoolesserialOO spoolesserial( (Epetra_RowMatrix *) passA, (Epetra_MultiVector *) passx, (Epetra_MultiVector *) passb ) ; spoolesserial.Solve() ; #endif } else { SparseDirectTimingVars::log_file << "Solver not implemented yet" << std::endl ; std::cerr << "\n\n#################### Requested solver not available (Or not tested with blocked RHS) on this platform #####################\n" << std::endl ; } SparseDirectTimingVars::SS_Result.Set_Total_Time( TotalTime.ElapsedTime() ); // SparseDirectTimingVars::SS_Result.Set_First_Time( 0.0 ); // SparseDirectTimingVars::SS_Result.Set_Middle_Time( 0.0 ); // SparseDirectTimingVars::SS_Result.Set_Last_Time( 0.0 ); // // Compute the error = norm(xcomp - xexact ) // std::vector <double> error(numsolves) ; double max_error = 0.0; passresid->Update(1.0, *passx, -1.0, *passxexact, 0.0); passresid->Norm2(&error[0]); for ( int i = 0 ; i< numsolves; i++ ) if ( error[i] > max_error ) max_error = error[i] ; SparseDirectTimingVars::SS_Result.Set_Error(max_error) ; // passxexact->Norm2(&error[0] ) ; // passx->Norm2(&error ) ; // // Compute the residual = norm(Ax - b) // std::vector <double> residual(numsolves) ; passtmp->PutScalar(0.0); passA->Multiply( transpose, *passx, *passtmp); passresid->Update(1.0, *passtmp, -1.0, *passb, 0.0); // passresid->Update(1.0, *passtmp, -1.0, CopyB, 0.0); passresid->Norm2(&residual[0]); for ( int i = 0 ; i< numsolves; i++ ) if ( residual[i] > max_resid ) max_resid = residual[i] ; SparseDirectTimingVars::SS_Result.Set_Residual(max_resid) ; std::vector <double> bnorm(numsolves); passb->Norm2( &bnorm[0] ) ; SparseDirectTimingVars::SS_Result.Set_Bnorm(bnorm[0]) ; std::vector <double> xnorm(numsolves); passx->Norm2( &xnorm[0] ) ; SparseDirectTimingVars::SS_Result.Set_Xnorm(xnorm[0]) ; if ( false && iam == 0 ) { std::cout << " Amesos_TestMutliSolver.cpp " << std::endl ; for ( int i = 0 ; i< numsolves && i < 10 ; i++ ) { std::cout << "i=" << i << " error = " << error[i] << " xnorm = " << xnorm[i] << " residual = " << residual[i] << " bnorm = " << bnorm[i] << std::endl ; } std::cout << std::endl << " max_resid = " << max_resid ; std::cout << " max_error = " << max_error << std::endl ; std::cout << " Get_residual() again = " << SparseDirectTimingVars::SS_Result.Get_Residual() << std::endl ; } } delete readA; delete readx; delete readb; delete readxexact; delete readMap; delete map_; Comm.Barrier(); return 0 ; }
int main(int argc, char *argv[]) { int ierr = 0; double elapsed_time; double total_flops; double MFLOPs; #ifdef EPETRA_MPI // Initialize MPI MPI_Init(&argc,&argv); Epetra_MpiComm comm( MPI_COMM_WORLD ); #else Epetra_SerialComm comm; #endif bool verbose = false; bool summary = false; // Check if we should print verbose results to standard out if (argc>6) if (argv[6][0]=='-' && argv[6][1]=='v') verbose = true; // Check if we should print verbose results to standard out if (argc>6) if (argv[6][0]=='-' && argv[6][1]=='s') summary = true; if(argc < 6) { cerr << "Usage: " << argv[0] << " NumNodesX NumNodesY NumProcX NumProcY NumPoints [-v|-s]" << endl << "where:" << endl << "NumNodesX - Number of mesh nodes in X direction per processor" << endl << "NumNodesY - Number of mesh nodes in Y direction per processor" << endl << "NumProcX - Number of processors to use in X direction" << endl << "NumProcY - Number of processors to use in Y direction" << endl << "NumPoints - Number of points to use in stencil (5, 9 or 25 only)" << endl << "-v|-s - (Optional) Run in verbose mode if -v present or summary mode if -s present" << endl << " NOTES: NumProcX*NumProcY must equal the number of processors used to run the problem." << endl << endl << " Serial example:" << endl << argv[0] << " 16 12 1 1 25 -v" << endl << " Run this program in verbose mode on 1 processor using a 16 X 12 grid with a 25 point stencil."<< endl <<endl << " MPI example:" << endl << "mpirun -np 32 " << argv[0] << " 10 12 4 8 9 -v" << endl << " Run this program in verbose mode on 32 processors putting a 10 X 12 subgrid on each processor using 4 processors "<< endl << " in the X direction and 8 in the Y direction. Total grid size is 40 points in X and 96 in Y with a 9 point stencil."<< endl << endl; return(1); } //char tmp; //if (comm.MyPID()==0) cout << "Press any key to continue..."<< endl; //if (comm.MyPID()==0) cin >> tmp; //comm.Barrier(); comm.SetTracebackMode(0); // This should shut down any error traceback reporting if (verbose && comm.MyPID()==0) cout << Epetra_Version() << endl << endl; if (summary && comm.MyPID()==0) { if (comm.NumProc()==1) cout << Epetra_Version() << endl << endl; else cout << endl << endl; // Print two blank line to keep output columns lined up } if (verbose) cout << comm <<endl; // Redefine verbose to only print on PE 0 if (verbose && comm.MyPID()!=0) verbose = false; if (summary && comm.MyPID()!=0) summary = false; int numNodesX = atoi(argv[1]); int numNodesY = atoi(argv[2]); int numProcsX = atoi(argv[3]); int numProcsY = atoi(argv[4]); int numPoints = atoi(argv[5]); if (verbose || (summary && comm.NumProc()==1)) { cout << " Number of local nodes in X direction = " << numNodesX << endl << " Number of local nodes in Y direction = " << numNodesY << endl << " Number of global nodes in X direction = " << numNodesX*numProcsX << endl << " Number of global nodes in Y direction = " << numNodesY*numProcsY << endl << " Number of local nonzero entries = " << numNodesX*numNodesY*numPoints << endl << " Number of global nonzero entries = " << numNodesX*numNodesY*numPoints*numProcsX*numProcsY << endl << " Number of Processors in X direction = " << numProcsX << endl << " Number of Processors in Y direction = " << numProcsY << endl << " Number of Points in stencil = " << numPoints << endl << endl; } // Print blank line to keep output columns lined up if (summary && comm.NumProc()>1) cout << endl << endl << endl << endl << endl << endl << endl << endl<< endl << endl; if (numProcsX*numProcsY!=comm.NumProc()) { cerr << "Number of processors = " << comm.NumProc() << endl << " is not the product of " << numProcsX << " and " << numProcsY << endl << endl; return(1); } if (numPoints!=5 && numPoints!=9 && numPoints!=25) { cerr << "Number of points specified = " << numPoints << endl << " is not 5, 9, 25" << endl << endl; return(1); } if (numNodesX*numNodesY<=0) { cerr << "Product of number of nodes is <= zero" << endl << endl; return(1); } Epetra_IntSerialDenseVector Xoff, XLoff, XUoff; Epetra_IntSerialDenseVector Yoff, YLoff, YUoff; if (numPoints==5) { // Generate a 5-point 2D Finite Difference matrix Xoff.Size(5); Yoff.Size(5); Xoff[0] = -1; Xoff[1] = 1; Xoff[2] = 0; Xoff[3] = 0; Xoff[4] = 0; Yoff[0] = 0; Yoff[1] = 0; Yoff[2] = 0; Yoff[3] = -1; Yoff[4] = 1; // Generate a 2-point 2D Lower triangular Finite Difference matrix XLoff.Size(2); YLoff.Size(2); XLoff[0] = -1; XLoff[1] = 0; YLoff[0] = 0; YLoff[1] = -1; // Generate a 3-point 2D upper triangular Finite Difference matrix XUoff.Size(3); YUoff.Size(3); XUoff[0] = 0; XUoff[1] = 1; XUoff[2] = 0; YUoff[0] = 0; YUoff[1] = 0; YUoff[2] = 1; } else if (numPoints==9) { // Generate a 9-point 2D Finite Difference matrix Xoff.Size(9); Yoff.Size(9); Xoff[0] = -1; Xoff[1] = 0; Xoff[2] = 1; Yoff[0] = -1; Yoff[1] = -1; Yoff[2] = -1; Xoff[3] = -1; Xoff[4] = 0; Xoff[5] = 1; Yoff[3] = 0; Yoff[4] = 0; Yoff[5] = 0; Xoff[6] = -1; Xoff[7] = 0; Xoff[8] = 1; Yoff[6] = 1; Yoff[7] = 1; Yoff[8] = 1; // Generate a 5-point lower triangular 2D Finite Difference matrix XLoff.Size(5); YLoff.Size(5); XLoff[0] = -1; XLoff[1] = 0; Xoff[2] = 1; YLoff[0] = -1; YLoff[1] = -1; Yoff[2] = -1; XLoff[3] = -1; XLoff[4] = 0; YLoff[3] = 0; YLoff[4] = 0; // Generate a 4-point upper triangular 2D Finite Difference matrix XUoff.Size(4); YUoff.Size(4); XUoff[0] = 1; YUoff[0] = 0; XUoff[1] = -1; XUoff[2] = 0; XUoff[3] = 1; YUoff[1] = 1; YUoff[2] = 1; YUoff[3] = 1; } else { // Generate a 25-point 2D Finite Difference matrix Xoff.Size(25); Yoff.Size(25); int xi = 0, yi = 0; int xo = -2, yo = -2; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; xo = -2, yo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; xo = -2, yo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; xo = -2, yo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; xo = -2, yo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; // Generate a 13-point lower triangular 2D Finite Difference matrix XLoff.Size(13); YLoff.Size(13); xi = 0, yi = 0; xo = -2, yo = -2; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; YLoff[yi++] = yo ; YLoff[yi++] = yo ; YLoff[yi++] = yo ; YLoff[yi++] = yo ; YLoff[yi++] = yo ; xo = -2, yo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; YLoff[yi++] = yo ; YLoff[yi++] = yo ; YLoff[yi++] = yo ; YLoff[yi++] = yo ; YLoff[yi++] = yo ; xo = -2, yo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; YLoff[yi++] = yo ; YLoff[yi++] = yo ; YLoff[yi++] = yo ; // Generate a 13-point upper triangular 2D Finite Difference matrix XUoff.Size(13); YUoff.Size(13); xi = 0, yi = 0; xo = 0, yo = 0; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; YUoff[yi++] = yo ; YUoff[yi++] = yo ; YUoff[yi++] = yo ; xo = -2, yo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; YUoff[yi++] = yo ; YUoff[yi++] = yo ; YUoff[yi++] = yo ; YUoff[yi++] = yo ; YUoff[yi++] = yo ; xo = -2, yo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; YUoff[yi++] = yo ; YUoff[yi++] = yo ; YUoff[yi++] = yo ; YUoff[yi++] = yo ; YUoff[yi++] = yo ; } Epetra_Map * map; Epetra_Map * mapL; Epetra_Map * mapU; Epetra_CrsMatrix * A; Epetra_CrsMatrix * L; Epetra_CrsMatrix * U; Epetra_MultiVector * b; Epetra_MultiVector * bt; Epetra_MultiVector * xexact; Epetra_MultiVector * bL; Epetra_MultiVector * btL; Epetra_MultiVector * xexactL; Epetra_MultiVector * bU; Epetra_MultiVector * btU; Epetra_MultiVector * xexactU; Epetra_SerialDenseVector resvec(0); //Timings Epetra_Flops flopcounter; Epetra_Time timer(comm); #ifdef EPETRA_VERY_SHORT_PERFTEST int jstop = 1; #elif EPETRA_SHORT_PERFTEST int jstop = 1; #else int jstop = 2; #endif for (int j=0; j<jstop; j++) { for (int k=1; k<17; k++) { #ifdef EPETRA_VERY_SHORT_PERFTEST if (k<3 || (k%4==0 && k<9)) { #elif EPETRA_SHORT_PERFTEST if (k<6 || k%4==0) { #else if (k<7 || k%2==0) { #endif int nrhs=k; if (verbose) cout << "\n*************** Results for " << nrhs << " RHS with "; bool StaticProfile = (j!=0); if (verbose) { if (StaticProfile) cout << " static profile\n"; else cout << " dynamic profile\n"; } GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, numPoints, Xoff.Values(), Yoff.Values(), nrhs, comm, verbose, summary, map, A, b, bt, xexact, StaticProfile, false); #ifdef EPETRA_HAVE_JADMATRIX timer.ResetStartTime(); Epetra_JadMatrix JA(*A); elapsed_time = timer.ElapsedTime(); if (verbose) cout << "Time to create Jagged diagonal matrix = " << elapsed_time << endl; //cout << "A = " << *A << endl; //cout << "JA = " << JA << endl; runJadMatrixTests(&JA, b, bt, xexact, StaticProfile, verbose, summary); #endif runMatrixTests(A, b, bt, xexact, StaticProfile, verbose, summary); delete A; delete b; delete bt; delete xexact; GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, XLoff.Length(), XLoff.Values(), YLoff.Values(), nrhs, comm, verbose, summary, mapL, L, bL, btL, xexactL, StaticProfile, true); GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, XUoff.Length(), XUoff.Values(), YUoff.Values(), nrhs, comm, verbose, summary, mapU, U, bU, btU, xexactU, StaticProfile, true); runLUMatrixTests(L, bL, btL, xexactL, U, bU, btU, xexactU, StaticProfile, verbose, summary); delete L; delete bL; delete btL; delete xexactL; delete mapL; delete U; delete bU; delete btU; delete xexactU; delete mapU; Epetra_MultiVector q(*map, nrhs); Epetra_MultiVector z(q); Epetra_MultiVector r(q); delete map; q.SetFlopCounter(flopcounter); z.SetFlopCounter(q); r.SetFlopCounter(q); resvec.Resize(nrhs); flopcounter.ResetFlops(); timer.ResetStartTime(); //10 norms for( int i = 0; i < 10; ++i ) q.Norm2( resvec.Values() ); elapsed_time = timer.ElapsedTime(); total_flops = q.Flops(); MFLOPs = total_flops/elapsed_time/1000000.0; if (verbose) cout << "\nTotal MFLOPs for 10 Norm2's= " << MFLOPs << endl; if (summary) { if (comm.NumProc()==1) cout << "Norm2" << '\t'; cout << MFLOPs << endl; } flopcounter.ResetFlops(); timer.ResetStartTime(); //10 dot's for( int i = 0; i < 10; ++i ) q.Dot(z, resvec.Values()); elapsed_time = timer.ElapsedTime(); total_flops = q.Flops(); MFLOPs = total_flops/elapsed_time/1000000.0; if (verbose) cout << "Total MFLOPs for 10 Dot's = " << MFLOPs << endl; if (summary) { if (comm.NumProc()==1) cout << "DotProd" << '\t'; cout << MFLOPs << endl; } flopcounter.ResetFlops(); timer.ResetStartTime(); //10 dot's for( int i = 0; i < 10; ++i ) q.Update(1.0, z, 1.0, r, 0.0); elapsed_time = timer.ElapsedTime(); total_flops = q.Flops(); MFLOPs = total_flops/elapsed_time/1000000.0; if (verbose) cout << "Total MFLOPs for 10 Updates= " << MFLOPs << endl; if (summary) { if (comm.NumProc()==1) cout << "Update" << '\t'; cout << MFLOPs << endl; } } } } #ifdef EPETRA_MPI MPI_Finalize() ; #endif return ierr ; } // Constructs a 2D PDE finite difference matrix using the list of x and y offsets. // // nx (In) - number of grid points in x direction // ny (In) - number of grid points in y direction // The total number of equations will be nx*ny ordered such that the x direction changes // most rapidly: // First equation is at point (0,0) // Second at (1,0) // ... // nx equation at (nx-1,0) // nx+1st equation at (0,1) // numPoints (In) - number of points in finite difference stencil // xoff (In) - stencil offsets in x direction (of length numPoints) // yoff (In) - stencil offsets in y direction (of length numPoints) // A standard 5-point finite difference stencil would be described as: // numPoints = 5 // xoff = [-1, 1, 0, 0, 0] // yoff = [ 0, 0, 0, -1, 1] // nrhs - Number of rhs to generate. (First interface produces vectors, so nrhs is not needed // comm (In) - an Epetra_Comm object describing the parallel machine (numProcs and my proc ID) // map (Out) - Epetra_Map describing distribution of matrix and vectors/multivectors // A (Out) - Epetra_CrsMatrix constructed for nx by ny grid using prescribed stencil // Off-diagonal values are random between 0 and 1. If diagonal is part of stencil, // diagonal will be slightly diag dominant. // b (Out) - Generated RHS. Values satisfy b = A*xexact // bt (Out) - Generated RHS. Values satisfy b = A'*xexact // xexact (Out) - Generated exact solution to Ax = b and b' = A'xexact // Note: Caller of this function is responsible for deleting all output objects. void GenerateCrsProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints, int * xoff, int * yoff, const Epetra_Comm &comm, bool verbose, bool summary, Epetra_Map *& map, Epetra_CrsMatrix *& A, Epetra_Vector *& b, Epetra_Vector *& bt, Epetra_Vector *&xexact, bool StaticProfile, bool MakeLocalOnly) { Epetra_MultiVector * b1, * bt1, * xexact1; GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, numPoints, xoff, yoff, 1, comm, verbose, summary, map, A, b1, bt1, xexact1, StaticProfile, MakeLocalOnly); b = dynamic_cast<Epetra_Vector *>(b1); bt = dynamic_cast<Epetra_Vector *>(bt1); xexact = dynamic_cast<Epetra_Vector *>(xexact1); return; } void GenerateCrsProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints, int * xoff, int * yoff, int nrhs, const Epetra_Comm &comm, bool verbose, bool summary, Epetra_Map *& map, Epetra_CrsMatrix *& A, Epetra_MultiVector *& b, Epetra_MultiVector *& bt, Epetra_MultiVector *&xexact, bool StaticProfile, bool MakeLocalOnly) { Epetra_Time timer(comm); // Determine my global IDs long long * myGlobalElements; GenerateMyGlobalElements(numNodesX, numNodesY, numProcsX, numProcsY, comm.MyPID(), myGlobalElements); int numMyEquations = numNodesX*numNodesY; map = new Epetra_Map((long long)-1, numMyEquations, myGlobalElements, 0, comm); // Create map with 2D block partitioning. delete [] myGlobalElements; long long numGlobalEquations = map->NumGlobalElements64(); int profile = 0; if (StaticProfile) profile = numPoints; #ifdef EPETRA_HAVE_STATICPROFILE if (MakeLocalOnly) A = new Epetra_CrsMatrix(Copy, *map, *map, profile, StaticProfile); // Construct matrix with rowmap=colmap else A = new Epetra_CrsMatrix(Copy, *map, profile, StaticProfile); // Construct matrix #else if (MakeLocalOnly) A = new Epetra_CrsMatrix(Copy, *map, *map, profile); // Construct matrix with rowmap=colmap else A = new Epetra_CrsMatrix(Copy, *map, profile); // Construct matrix #endif long long * indices = new long long[numPoints]; double * values = new double[numPoints]; double dnumPoints = (double) numPoints; int nx = numNodesX*numProcsX; for (int i=0; i<numMyEquations; i++) { long long rowID = map->GID64(i); int numIndices = 0; for (int j=0; j<numPoints; j++) { long long colID = rowID + xoff[j] + nx*yoff[j]; // Compute column ID based on stencil offsets if (colID>-1 && colID<numGlobalEquations) { indices[numIndices] = colID; double value = - ((double) rand())/ ((double) RAND_MAX); if (colID==rowID) values[numIndices++] = dnumPoints - value; // Make diagonal dominant else values[numIndices++] = value; } } //cout << "Building row " << rowID << endl; A->InsertGlobalValues(rowID, numIndices, values, indices); } delete [] indices; delete [] values; double insertTime = timer.ElapsedTime(); timer.ResetStartTime(); A->FillComplete(false); double fillCompleteTime = timer.ElapsedTime(); if (verbose) cout << "Time to insert matrix values = " << insertTime << endl << "Time to complete fill = " << fillCompleteTime << endl; if (summary) { if (comm.NumProc()==1) cout << "InsertTime" << '\t'; cout << insertTime << endl; if (comm.NumProc()==1) cout << "FillCompleteTime" << '\t'; cout << fillCompleteTime << endl; } if (nrhs<=1) { b = new Epetra_Vector(*map); bt = new Epetra_Vector(*map); xexact = new Epetra_Vector(*map); } else { b = new Epetra_MultiVector(*map, nrhs); bt = new Epetra_MultiVector(*map, nrhs); xexact = new Epetra_MultiVector(*map, nrhs); } xexact->Random(); // Fill xexact with random values A->Multiply(false, *xexact, *b); A->Multiply(true, *xexact, *bt); return; }
int main(int argc, char** argv) { #ifdef HAVE_MPI Teuchos::GlobalMPISession mpiSession(&argc, &argv, 0); Epetra_MpiComm Comm(MPI_COMM_WORLD); #else Epetra_SerialComm Comm; #endif bool success = true; string pass = "******"; string fail = "End Result: TEST FAILED"; int myPID = Comm.MyPID(); if(myPID == 0) { cout << "Starting Epetra interface test" << endl; } /*----------------Load a test matrix---------------*/ string matrixFileName = "wathenSmall.mtx"; Epetra_CrsMatrix *A; Epetra_CrsMatrix *AHat; Epetra_MultiVector *b; Epetra_MultiVector *bHat; Epetra_MultiVector *x; int n = 0; //Get Matrix int err = EpetraExt::MatrixMarketFileToCrsMatrix(matrixFileName.c_str(), Comm, A); if(err!=0 && myPID ==0) { cout << "Error reading matrix file, info = " << err << endl; cout << fail << endl; exit(1); } n = A->NumGlobalRows(); //Make b vecotor Epetra_Map vecMap(n,0,Comm); b = new Epetra_MultiVector(vecMap,1,false); b->Random(); x = new Epetra_MultiVector(vecMap,1,false); x->Random(); cout << "Epetra matrices loaded" << endl; /*-----------------have_interface-----------------*/ /*---The have_interface checks is all the parameter list makes sense---*/ Teuchos::RCP <Teuchos::ParameterList> pLUList; string pListFileName = "ShyLU_epetra_interface.xml"; pLUList = Teuchos::getParametersFromXmlFile(pListFileName); /*----------------partitioning_interface--------------*/ /*-----------Will use check the epetra matrix on partition_interface------*/ //Isorropia Test - graph/Parmetis pLUList->set("Partitioning Package","Isorropia"); Teuchos::ParameterList ptemp; ptemp = pLUList->sublist("Isorropia Input"); Teuchos::ParameterList pptemp; pptemp = ptemp.sublist("Zoltan"); pptemp.set("GRAPH_PACKAGE", "Parmetis"); pptemp.set("DEBUG_LEVEL", "1"); ptemp.set("partitioning method", "graph"); ptemp.set("Zoltan", pptemp); pLUList->set("Isorropia Input", ptemp); cout << " \n\n--------------------BIG BREAK --------------\n\n"; Teuchos::writeParameterListToXmlOStream(*pLUList, std::cout); ShyLU::PartitionInterface<Epetra_CrsMatrix, Epetra_MultiVector> partI(A, pLUList.get()); partI.partition(); AHat = partI.reorderMatrix(); bHat = partI.reorderVector(b); EpetraExt::RowMatrixToMatlabFile("Epetra_Isorropia_Parmetis.mat", *AHat); cout << "Done with graph - parmetis" << endl; /* //Isorropia Test - Graph/PT-Scotch pLUList->set("Partitioning Package","Isorropia"); ptemp = pLUList->sublist("Isorropia Input"); //Teuchos::ParameterList pptemp; pptemp = ptemp.sublist("Zoltan"); pptemp.set("GRAPH_PACKAGE", "scotch"); pptemp.set("DEBUG_LEVEL", "1"); ptemp.set("partitioning method", "graph"); ptemp.set("Zoltan", pptemp); pLUList->set("Isorropia Input", ptemp); cout << " \n\n--------------------BIG BREAK --------------\n\n"; Teuchos::writeParameterListToXmlOStream(*pLUList, std::cout); PartitionInterface<Epetra_CrsMatrix, Epetra_MultiVector> partI2(A, pLUList.get()); partI2.partition(); AHat = partI2.reorderMatrix(); bHat = partI2.reorderVector(b); cout << "Done with graph - pt-scotch" << endl; */ //Zoltan2 Test #if defined(HAVE_SHYLU_ZOLTAN2) || defined(HAVE_SHYLU_ZOLTAN2) //Isorropia Test - Graph/ParMetis pLUList->set("Partitioning Package","Zoltan2"); ptemp = pLUList->sublist("Zoltan2 Input"); ptemp.set("algorithm", "parmetis"); ptemp.set("debug_level", "detailed_status"); pLUList->set("Zoltan2 Input", ptemp); cout << " \n\n--------------------BIG BREAK --------------\n\n"; Teuchos::writeParameterListToXmlOStream(*pLUList, std::cout); ShyLU::PartitionInterface<Epetra_CrsMatrix, Epetra_MultiVector> partI3(A, pLUList.get()); partI3.partition(); AHat = partI3.reorderMatrix(); bHat = partI3.reorderVector(b); cout << "Done with graph - parmetis" << endl; EpetraExt::RowMatrixToMatlabFile("Epetra_Zoltan2_Parmetis.mat", *AHat); #endif /*----------------------Direct Solver Interfaces----------------*/ //#ifdef HAVE_SHYLU_AMESOS //Amesos - klu pLUList->set("Direct Solver Package", "Amesos"); ptemp = pLUList->sublist("Amesos Input"); pptemp = ptemp.sublist("Amesos_Klu Input"); pptemp.set("PrintTiming", true); pptemp.set("PrintStatus", true); ptemp.set("Solver", "Amesos_Klu"); ptemp.set("Amesos_Klu Input", pptemp); pLUList->set("Amesos Input", ptemp); cout << " \n\n--------------------BIG BREAK --------------\n\n"; Teuchos::writeParameterListToXmlOStream(*pLUList, std::cout); ShyLU::DirectSolverInterface<Epetra_CrsMatrix, Epetra_MultiVector> directsolver(A, pLUList.get()); directsolver.factor(); directsolver.solve(b,x); cout << "Done with Amesos-KLU" << endl; //#endif //Amesos2 -klu2 #ifdef HAVE_SHYLU_AMESOS2 pLUList->set("Direct Solver Package", "Amesos2"); ptemp = pLUList->sublist("Amesos2 Input"); //pptemp = ptemp.sublist("Amesos_Klu Input"); pptemp.set("PrintTiming", true); pptemp.set("PrintStatus", true); ptemp.set("Solver", "KLU2"); //ptemp.set("Amesos_Klu Input", pptemp); pLUList->set("Amesos2 Input", ptemp); cout << " \n\n--------------------BIG BREAK --------------\n\n"; Teuchos::writeParameterListToXmlOStream(*pLUList, std::cout); ShyLU::DirectSolverInterface<Epetra_CrsMatrix, Epetra_MultiVector> directsolver2(A, pLUList.get()); directsolver2.factor(); directsolver2.solve(b,x); cout << "Done with Amesos-KLU2" << endl; #endif if(success) { cout << pass << endl; } }