void runJadMatrixTests(Epetra_JadMatrix * A, Epetra_MultiVector * b, Epetra_MultiVector * bt, Epetra_MultiVector * xexact, bool StaticProfile, bool verbose, bool summary) { Epetra_MultiVector z(*b); Epetra_MultiVector r(*b); Epetra_SerialDenseVector resvec(b->NumVectors()); //Timings Epetra_Flops flopcounter; A->SetFlopCounter(flopcounter); Epetra_Time timer(A->Comm()); for (int j=0; j<2; j++) { // j = 0 is notrans, j = 1 is trans bool TransA = (j==1); A->SetUseTranspose(TransA); flopcounter.ResetFlops(); timer.ResetStartTime(); //10 matvecs for( int i = 0; i < 10; ++i ) A->Apply(*xexact, z); // Compute z = A*xexact or z = A'*xexact double elapsed_time = timer.ElapsedTime(); double total_flops = A->Flops(); // Compute residual if (TransA) r.Update(-1.0, z, 1.0, *bt, 0.0); // r = bt - z else r.Update(-1.0, z, 1.0, *b, 0.0); // r = b - z r.Norm2(resvec.Values()); if (verbose) cout << "ResNorm = " << resvec.NormInf() << ": "; double MFLOPs = total_flops/elapsed_time/1000000.0; if (verbose) cout << "Total MFLOPs for 10 " << " Jagged Diagonal MatVec's with (Trans = " << TransA << ") " << MFLOPs << " (" << elapsed_time << " s)" <<endl; if (summary) { if (A->Comm().NumProc()==1) { if (TransA) cout << "TransMv" << '\t'; else cout << "NoTransMv" << '\t'; } cout << MFLOPs << endl; } } return; }
int main(int argc, char *argv[]) { int ierr = 0, i, j, k; bool debug = false; #ifdef EPETRA_MPI MPI_Init(&argc,&argv); Epetra_MpiComm Comm( MPI_COMM_WORLD ); #else Epetra_SerialComm Comm; #endif bool verbose = false; // Check if we should print results to standard out if (argc>1) if (argv[1][0]=='-' && argv[1][1]=='v') verbose = true; if (verbose && Comm.MyPID()==0) cout << Epetra_Version() << endl << endl; int rank = Comm.MyPID(); // char tmp; // if (rank==0) cout << "Press any key to continue..."<< endl; // if (rank==0) cin >> tmp; // Comm.Barrier(); Comm.SetTracebackMode(0); // This should shut down any error traceback reporting if (verbose) cout << Comm <<endl; // bool verbose1 = verbose; // Redefine verbose to only print on PE 0 if (verbose && rank!=0) verbose = false; int N = 20; int NRHS = 4; double * A = new double[N*N]; double * A1 = new double[N*N]; double * X = new double[(N+1)*NRHS]; double * X1 = new double[(N+1)*NRHS]; int LDX = N+1; int LDX1 = N+1; double * B = new double[N*NRHS]; double * B1 = new double[N*NRHS]; int LDB = N; int LDB1 = N; int LDA = N; int LDA1 = LDA; double OneNorm1; bool Transpose = false; Epetra_SerialDenseSolver solver; Epetra_SerialDenseMatrix * Matrix; for (int kk=0; kk<2; kk++) { for (i=1; i<=N; i++) { GenerateHilbert(A, LDA, i); OneNorm1 = 0.0; for (j=1; j<=i; j++) OneNorm1 += 1.0/((double) j); // 1-Norm = 1 + 1/2 + ...+1/n if (kk==0) { Matrix = new Epetra_SerialDenseMatrix(View, A, LDA, i, i); LDA1 = LDA; } else { Matrix = new Epetra_SerialDenseMatrix(Copy, A, LDA, i, i); LDA1 = i; } GenerateHilbert(A1, LDA1, i); if (kk==1) { solver.FactorWithEquilibration(true); solver.SolveWithTranspose(true); Transpose = true; solver.SolveToRefinedSolution(true); } for (k=0; k<NRHS; k++) for (j=0; j<i; j++) { B[j+k*LDB] = 1.0/((double) (k+3)*(j+3)); B1[j+k*LDB1] = B[j+k*LDB1]; } Epetra_SerialDenseMatrix Epetra_B(View, B, LDB, i, NRHS); Epetra_SerialDenseMatrix Epetra_X(View, X, LDX, i, NRHS); solver.SetMatrix(*Matrix); solver.SetVectors(Epetra_X, Epetra_B); ierr = check(solver, A1, LDA1, i, NRHS, OneNorm1, B1, LDB1, X1, LDX1, Transpose, verbose); assert (ierr>-1); delete Matrix; if (ierr!=0) { if (verbose) cout << "Factorization failed due to bad conditioning. This is normal if RCOND is small." << endl; break; } } } delete [] A; delete [] A1; delete [] X; delete [] X1; delete [] B; delete [] B1; ///////////////////////////////////////////////////////////////////// // Now test norms and scaling functions ///////////////////////////////////////////////////////////////////// Epetra_SerialDenseMatrix D; double ScalarA = 2.0; int DM = 10; int DN = 8; D.Shape(DM, DN); for (j=0; j<DN; j++) for (i=0; i<DM; i++) D[j][i] = (double) (1+i+j*DM) ; //cout << D << endl; double NormInfD_ref = (double)(DM*(DN*(DN+1))/2); double NormOneD_ref = (double)((DM*DN*(DM*DN+1))/2 - (DM*(DN-1)*(DM*(DN-1)+1))/2 ); double NormInfD = D.NormInf(); double NormOneD = D.NormOne(); if (verbose) { cout << " *** Before scaling *** " << endl << " Computed one-norm of test matrix = " << NormOneD << endl << " Expected one-norm = " << NormOneD_ref << endl << " Computed inf-norm of test matrix = " << NormInfD << endl << " Expected inf-norm = " << NormInfD_ref << endl; } D.Scale(ScalarA); // Scale entire D matrix by this value NormInfD = D.NormInf(); NormOneD = D.NormOne(); if (verbose) { cout << " *** After scaling *** " << endl << " Computed one-norm of test matrix = " << NormOneD << endl << " Expected one-norm = " << NormOneD_ref*ScalarA << endl << " Computed inf-norm of test matrix = " << NormInfD << endl << " Expected inf-norm = " << NormInfD_ref*ScalarA << endl; } ///////////////////////////////////////////////////////////////////// // Now test that A.Multiply(false, x, y) produces the same result // as y.Multiply('N','N', 1.0, A, x, 0.0). ///////////////////////////////////////////////////////////////////// N = 10; int M = 10; LDA = N; Epetra_SerialDenseMatrix smallA(N, M, false); Epetra_SerialDenseMatrix x(N, 1, false); Epetra_SerialDenseMatrix y1(N, 1, false); Epetra_SerialDenseMatrix y2(N, 1, false); for(i=0; i<N; ++i) { for(j=0; j<M; ++j) { smallA(i,j) = 1.0*i+2.0*j+1.0; } x(i,0) = 1.0; y1(i,0) = 0.0; y2(i,0) = 0.0; } //quick check of operator== if (x == y1) { if (verbose) cout << "err in Epetra_SerialDenseMatrix::operator==, " << "erroneously returned true." << std::endl; return(-1); } //quick check of operator!= if (x != x) { if (verbose) cout << "err in Epetra_SerialDenseMatrix::operator==, " << "erroneously returned true." << std::endl; return(-1); } int err1 = smallA.Multiply(false, x, y1); int err2 = y2.Multiply('N','N', 1.0, smallA, x, 0.0); if (err1 != 0 || err2 != 0) { if (verbose) cout << "err in Epetra_SerialDenseMatrix::Multiply"<<endl; return(err1+err2); } for(i=0; i<N; ++i) { if (y1(i,0) != y2(i,0)) { if (verbose) cout << "different versions of Multiply don't match."<<endl; return(-99); } } ///////////////////////////////////////////////////////////////////// // Now test for larger system, both correctness and performance. ///////////////////////////////////////////////////////////////////// N = 2000; NRHS = 5; LDA = N; LDB = N; LDX = N; if (verbose) cout << "\n\nComputing factor of an " << N << " x " << N << " general matrix...Please wait.\n\n" << endl; // Define A and X A = new double[LDA*N]; X = new double[LDB*NRHS]; for (j=0; j<N; j++) { for (k=0; k<NRHS; k++) X[j+k*LDX] = 1.0/((double) (j+5+k)); for (i=0; i<N; i++) { if (i==((j+2)%N)) A[i+j*LDA] = 100.0 + i; else A[i+j*LDA] = -11.0/((double) (i+5)*(j+2)); } } // Define Epetra_SerialDenseMatrix object Epetra_SerialDenseMatrix BigMatrix(Copy, A, LDA, N, N); Epetra_SerialDenseMatrix OrigBigMatrix(View, A, LDA, N, N); Epetra_SerialDenseSolver BigSolver; BigSolver.FactorWithEquilibration(true); BigSolver.SetMatrix(BigMatrix); // Time factorization Epetra_Flops counter; BigSolver.SetFlopCounter(counter); Epetra_Time Timer(Comm); double tstart = Timer.ElapsedTime(); ierr = BigSolver.Factor(); if (ierr!=0 && verbose) cout << "Error in factorization = "<<ierr<< endl; assert(ierr==0); double time = Timer.ElapsedTime() - tstart; double FLOPS = counter.Flops(); double MFLOPS = FLOPS/time/1000000.0; if (verbose) cout << "MFLOPS for Factorization = " << MFLOPS << endl; // Define Left hand side and right hand side Epetra_SerialDenseMatrix LHS(View, X, LDX, N, NRHS); Epetra_SerialDenseMatrix RHS; RHS.Shape(N,NRHS); // Allocate RHS // Compute RHS from A and X Epetra_Flops RHS_counter; RHS.SetFlopCounter(RHS_counter); tstart = Timer.ElapsedTime(); RHS.Multiply('N', 'N', 1.0, OrigBigMatrix, LHS, 0.0); time = Timer.ElapsedTime() - tstart; Epetra_SerialDenseMatrix OrigRHS = RHS; FLOPS = RHS_counter.Flops(); MFLOPS = FLOPS/time/1000000.0; if (verbose) cout << "MFLOPS to build RHS (NRHS = " << NRHS <<") = " << MFLOPS << endl; // Set LHS and RHS and solve BigSolver.SetVectors(LHS, RHS); tstart = Timer.ElapsedTime(); ierr = BigSolver.Solve(); if (ierr==1 && verbose) cout << "LAPACK guidelines suggest this matrix might benefit from equilibration." << endl; else if (ierr!=0 && verbose) cout << "Error in solve = "<<ierr<< endl; assert(ierr>=0); time = Timer.ElapsedTime() - tstart; FLOPS = BigSolver.Flops(); MFLOPS = FLOPS/time/1000000.0; if (verbose) cout << "MFLOPS for Solve (NRHS = " << NRHS <<") = " << MFLOPS << endl; double * resid = new double[NRHS]; bool OK = Residual(N, NRHS, A, LDA, BigSolver.Transpose(), BigSolver.X(), BigSolver.LDX(), OrigRHS.A(), OrigRHS.LDA(), resid); if (verbose) { if (!OK) cout << "************* Residual do not meet tolerance *************" << endl; for (i=0; i<NRHS; i++) cout << "Residual[" << i <<"] = "<< resid[i] << endl; cout << endl; } // Solve again using the Epetra_SerialDenseVector class for LHS and RHS Epetra_SerialDenseVector X2; Epetra_SerialDenseVector B2; X2.Size(BigMatrix.N()); B2.Size(BigMatrix.M()); int length = BigMatrix.N(); {for (int kk=0; kk<length; kk++) X2[kk] = ((double ) kk)/ ((double) length);} // Define entries of X2 RHS_counter.ResetFlops(); B2.SetFlopCounter(RHS_counter); tstart = Timer.ElapsedTime(); B2.Multiply('N', 'N', 1.0, OrigBigMatrix, X2, 0.0); // Define B2 = A*X2 time = Timer.ElapsedTime() - tstart; Epetra_SerialDenseVector OrigB2 = B2; FLOPS = RHS_counter.Flops(); MFLOPS = FLOPS/time/1000000.0; if (verbose) cout << "MFLOPS to build single RHS = " << MFLOPS << endl; // Set LHS and RHS and solve BigSolver.SetVectors(X2, B2); tstart = Timer.ElapsedTime(); ierr = BigSolver.Solve(); time = Timer.ElapsedTime() - tstart; if (ierr==1 && verbose) cout << "LAPACK guidelines suggest this matrix might benefit from equilibration." << endl; else if (ierr!=0 && verbose) cout << "Error in solve = "<<ierr<< endl; assert(ierr>=0); FLOPS = counter.Flops(); MFLOPS = FLOPS/time/1000000.0; if (verbose) cout << "MFLOPS to solve single RHS = " << MFLOPS << endl; OK = Residual(N, 1, A, LDA, BigSolver.Transpose(), BigSolver.X(), BigSolver.LDX(), OrigB2.A(), OrigB2.LDA(), resid); if (verbose) { if (!OK) cout << "************* Residual do not meet tolerance *************" << endl; cout << "Residual = "<< resid[0] << endl; } delete [] resid; delete [] A; delete [] X; /////////////////////////////////////////////////// // Now test default constructor and index operators /////////////////////////////////////////////////// N = 5; Epetra_SerialDenseMatrix C; // Implicit call to default constructor, should not need to call destructor C.Shape(5,5); // Make it 5 by 5 double * C1 = new double[N*N]; GenerateHilbert(C1, N, N); // Generate Hilber matrix C1[1+2*N] = 1000.0; // Make matrix nonsymmetric // Fill values of C with Hilbert values for (i=0; i<N; i++) for (j=0; j<N; j++) C(i,j) = C1[i+j*N]; // Test if values are correctly written and read for (i=0; i<N; i++) for (j=0; j<N; j++) { assert(C(i,j) == C1[i+j*N]); assert(C(i,j) == C[j][i]); } if (verbose) cout << "Default constructor and index operator check OK. Values of Hilbert matrix = " << endl << C << endl << "Values should be 1/(i+j+1), except value (1,2) should be 1000" << endl; delete [] C1; // now test sized/shaped constructor Epetra_SerialDenseMatrix shapedMatrix(10, 12); assert(shapedMatrix.M() == 10); assert(shapedMatrix.N() == 12); for(i = 0; i < 10; i++) for(j = 0; j < 12; j++) assert(shapedMatrix(i, j) == 0.0); Epetra_SerialDenseVector sizedVector(20); assert(sizedVector.Length() == 20); for(i = 0; i < 20; i++) assert(sizedVector(i) == 0.0); if (verbose) cout << "Shaped/sized constructors check OK." << endl; // test Copy/View mode in op= and cpy ctr int temperr = 0; temperr = matrixAssignment(verbose, debug); if(verbose && temperr == 0) cout << "Operator = checked OK." << endl; EPETRA_TEST_ERR(temperr, ierr); temperr = matrixCpyCtr(verbose, debug); if(verbose && temperr == 0) cout << "Copy ctr checked OK." << endl; EPETRA_TEST_ERR(temperr, ierr); // Test some vector methods Epetra_SerialDenseVector v1(3); v1[0] = 1.0; v1[1] = 3.0; v1[2] = 2.0; Epetra_SerialDenseVector v2(3); v2[0] = 2.0; v2[1] = 1.0; v2[2] = -2.0; temperr = 0; if (v1.Norm1()!=6.0) temperr++; if (fabs(sqrt(14.0)-v1.Norm2())>1.0e-6) temperr++; if (v1.NormInf()!=3.0) temperr++; if(verbose && temperr == 0) cout << "Vector Norms checked OK." << endl; temperr = 0; if (v1.Dot(v2)!=1.0) temperr++; if(verbose && temperr == 0) cout << "Vector Dot product checked OK." << endl; #ifdef EPETRA_MPI MPI_Finalize() ; #endif /* end main */ return ierr ; }
int main(int argc, char *argv[]) { int ierr = 0, i; #ifdef EPETRA_MPI // Initialize MPI MPI_Init(&argc,&argv); Epetra_MpiComm Comm( MPI_COMM_WORLD ); #else Epetra_SerialComm Comm; #endif int MyPID = Comm.MyPID(); int NumProc = Comm.NumProc(); bool verbose = (MyPID==0); if (verbose) cout << Epetra_Version() << endl << endl; cout << Comm << endl; // Get the number of local equations from the command line if (argc!=2) { if (verbose) cout << "Usage: " << argv[0] << " number_of_equations" << endl; std::exit(1); } long long NumGlobalElements = std::atoi(argv[1]); if (NumGlobalElements < NumProc) { if (verbose) cout << "numGlobalBlocks = " << NumGlobalElements << " cannot be < number of processors = " << NumProc << endl; std::exit(1); } // Construct a Map that puts approximately the same number of // equations on each processor. Epetra_Map Map(NumGlobalElements, 0LL, Comm); // Get update list and number of local equations from newly created Map. int NumMyElements = Map.NumMyElements(); std::vector<long long> MyGlobalElements(NumMyElements); Map.MyGlobalElements(&MyGlobalElements[0]); // Create an integer vector NumNz that is used to build the Petra Matrix. // NumNz[i] is the Number of OFF-DIAGONAL term for the ith global equation // on this processor std::vector<int> NumNz(NumMyElements); // We are building a tridiagonal matrix where each row has (-1 2 -1) // So we need 2 off-diagonal terms (except for the first and last equation) for (i=0; i<NumMyElements; i++) if (MyGlobalElements[i]==0 || MyGlobalElements[i] == NumGlobalElements-1) NumNz[i] = 2; else NumNz[i] = 3; // Create a Epetra_Matrix Epetra_CrsMatrix A(Copy, Map, &NumNz[0]); // Add rows one-at-a-time // Need some vectors to help // Off diagonal Values will always be -1 std::vector<double> Values(2); Values[0] = -1.0; Values[1] = -1.0; std::vector<long long> Indices(2); double two = 2.0; int NumEntries; for (i=0; i<NumMyElements; i++) { if (MyGlobalElements[i]==0) { Indices[0] = 1; NumEntries = 1; } else if (MyGlobalElements[i] == NumGlobalElements-1) { Indices[0] = NumGlobalElements-2; NumEntries = 1; } else { Indices[0] = MyGlobalElements[i]-1; Indices[1] = MyGlobalElements[i]+1; NumEntries = 2; } ierr = A.InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]); assert(ierr==0); // Put in the diagonal entry ierr = A.InsertGlobalValues(MyGlobalElements[i], 1, &two, &MyGlobalElements[i]); assert(ierr==0); } // Finish up ierr = A.FillComplete(); assert(ierr==0); // Create vectors for Power method // variable needed for iteration double lambda = 0.0; int niters = (int) NumGlobalElements*10; double tolerance = 1.0e-2; // Iterate Epetra_Flops counter; A.SetFlopCounter(counter); Epetra_Time timer(Comm); ierr += power_method(A, lambda, niters, tolerance, verbose); double elapsed_time = timer.ElapsedTime(); double total_flops =counter.Flops(); double MFLOPs = total_flops/elapsed_time/1000000.0; if (verbose) cout << "\n\nTotal MFLOPs for first solve = " << MFLOPs << endl<< endl; // Increase diagonal dominance if (verbose) cout << "\nIncreasing magnitude of first diagonal term, solving again\n\n" << endl; if (A.MyGlobalRow(0)) { int numvals = A.NumGlobalEntries(0); std::vector<double> Rowvals(numvals); std::vector<long long> Rowinds(numvals); A.ExtractGlobalRowCopy(0, numvals, numvals, &Rowvals[0], &Rowinds[0]); // Get A[0,0] for (i=0; i<numvals; i++) if (Rowinds[i] == 0) Rowvals[i] *= 10.0; A.ReplaceGlobalValues(0, numvals, &Rowvals[0], &Rowinds[0]); } // Iterate (again) lambda = 0.0; timer.ResetStartTime(); counter.ResetFlops(); ierr += power_method(A, lambda, niters, tolerance, verbose); elapsed_time = timer.ElapsedTime(); total_flops = counter.Flops(); MFLOPs = total_flops/elapsed_time/1000000.0; if (verbose) cout << "\n\nTotal MFLOPs for second solve = " << MFLOPs << endl<< endl; // Release all objects #ifdef EPETRA_MPI MPI_Finalize() ; #endif /* end main */ return ierr ; }
int main(int argc, char *argv[]) { #ifdef EPETRA_MPI MPI_Init(&argc,&argv); Epetra_MpiComm Comm (MPI_COMM_WORLD); #else Epetra_SerialComm Comm; #endif cout << Comm << endl; int MyPID = Comm.MyPID(); bool verbose = false; if (MyPID==0) verbose = true; if(argc < 2 && verbose) { cerr << "Usage: " << argv[0] << " HB_filename [level_fill [level_overlap [absolute_threshold [ relative_threshold]]]]" << endl << "where:" << endl << "HB_filename - filename and path of a Harwell-Boeing data set" << endl << "level_fill - The amount of fill to use for ILU(k) preconditioner (default 0)" << endl << "level_overlap - The amount of overlap used for overlapping Schwarz subdomains (default 0)" << endl << "absolute_threshold - The minimum value to place on the diagonal prior to factorization (default 0.0)" << endl << "relative_threshold - The relative amount to perturb the diagonal prior to factorization (default 1.0)" << endl << endl << "To specify a non-default value for one of these parameters, you must specify all" << endl << " preceding values but not any subsequent parameters. Example:" << endl << "ifpackHbSerialMsr.exe mymatrix.hb 1 - loads mymatrix.hb, uses level fill of one, all other values are defaults" << endl << endl; return(1); } // Uncomment the next three lines to debug in mpi mode //int tmp; //if (MyPID==0) cin >> tmp; //Comm.Barrier(); Epetra_Map * readMap; Epetra_CrsMatrix * readA; Epetra_Vector * readx; Epetra_Vector * readb; Epetra_Vector * readxexact; // Call routine to read in HB problem Trilinos_Util_ReadHb2Epetra(argv[1], Comm, readMap, readA, readx, readb, readxexact); // Create uniform distributed map Epetra_Map map(readMap->NumGlobalElements(), 0, Comm); // Create Exporter to distribute read-in matrix and vectors Epetra_Export exporter(*readMap, map); Epetra_CrsMatrix A(Copy, map, 0); Epetra_Vector x(map); Epetra_Vector b(map); Epetra_Vector xexact(map); Epetra_Time FillTimer(Comm); x.Export(*readx, exporter, Add); b.Export(*readb, exporter, Add); xexact.Export(*readxexact, exporter, Add); Comm.Barrier(); double vectorRedistributeTime = FillTimer.ElapsedTime(); A.Export(*readA, exporter, Add); Comm.Barrier(); double matrixRedistributeTime = FillTimer.ElapsedTime() - vectorRedistributeTime; assert(A.FillComplete()==0); Comm.Barrier(); double fillCompleteTime = FillTimer.ElapsedTime() - matrixRedistributeTime; if (Comm.MyPID()==0) { cout << "\n\n****************************************************" << endl; cout << "\n Vector redistribute time (sec) = " << vectorRedistributeTime<< endl; cout << " Matrix redistribute time (sec) = " << matrixRedistributeTime << endl; cout << " Transform to Local time (sec) = " << fillCompleteTime << endl<< endl; } Epetra_Vector tmp1(*readMap); Epetra_Vector tmp2(map); readA->Multiply(false, *readxexact, tmp1); A.Multiply(false, xexact, tmp2); double residual; tmp1.Norm2(&residual); if (verbose) cout << "Norm of Ax from file = " << residual << endl; tmp2.Norm2(&residual); if (verbose) cout << "Norm of Ax after redistribution = " << residual << endl << endl << endl; //cout << "A from file = " << *readA << endl << endl << endl; //cout << "A after dist = " << A << endl << endl << endl; delete readA; delete readx; delete readb; delete readxexact; delete readMap; Comm.Barrier(); // Construct ILU preconditioner double elapsed_time, total_flops, MFLOPs; Epetra_Time timer(Comm); int LevelFill = 0; if (argc > 2) LevelFill = atoi(argv[2]); if (verbose) cout << "Using Level Fill = " << LevelFill << endl; int Overlap = 0; if (argc > 3) Overlap = atoi(argv[3]); if (verbose) cout << "Using Level Overlap = " << Overlap << endl; double Athresh = 0.0; if (argc > 4) Athresh = atof(argv[4]); if (verbose) cout << "Using Absolute Threshold Value of = " << Athresh << endl; double Rthresh = 1.0; if (argc > 5) Rthresh = atof(argv[5]); if (verbose) cout << "Using Relative Threshold Value of = " << Rthresh << endl; Ifpack_IlukGraph * IlukGraph = 0; Ifpack_CrsRiluk * ILUK = 0; if (LevelFill>-1) { elapsed_time = timer.ElapsedTime(); IlukGraph = new Ifpack_IlukGraph(A.Graph(), LevelFill, Overlap); assert(IlukGraph->ConstructFilledGraph()==0); elapsed_time = timer.ElapsedTime() - elapsed_time; if (verbose) cout << "Time to construct ILUK graph = " << elapsed_time << endl; Epetra_Flops fact_counter; elapsed_time = timer.ElapsedTime(); ILUK = new Ifpack_CrsRiluk(*IlukGraph); ILUK->SetFlopCounter(fact_counter); ILUK->SetAbsoluteThreshold(Athresh); ILUK->SetRelativeThreshold(Rthresh); //assert(ILUK->InitValues()==0); int initerr = ILUK->InitValues(A); if (initerr!=0) cout << Comm << "InitValues error = " << initerr; assert(ILUK->Factor()==0); elapsed_time = timer.ElapsedTime() - elapsed_time; total_flops = ILUK->Flops(); MFLOPs = total_flops/elapsed_time/1000000.0; if (verbose) cout << "Time to compute preconditioner values = " << elapsed_time << endl << "MFLOPS for Factorization = " << MFLOPs << endl; //cout << *ILUK << endl; } double Condest; ILUK->Condest(false, Condest); if (verbose) cout << "Condition number estimate for this preconditioner = " << Condest << endl; int Maxiter = 500; double Tolerance = 1.0E-14; Epetra_Vector xcomp(map); Epetra_Vector resid(map); Epetra_Flops counter; A.SetFlopCounter(counter); xcomp.SetFlopCounter(A); b.SetFlopCounter(A); resid.SetFlopCounter(A); ILUK->SetFlopCounter(A); elapsed_time = timer.ElapsedTime(); BiCGSTAB(A, xcomp, b, ILUK, Maxiter, Tolerance, &residual, verbose); elapsed_time = timer.ElapsedTime() - elapsed_time; total_flops = counter.Flops(); MFLOPs = total_flops/elapsed_time/1000000.0; if (verbose) cout << "Time to compute solution = " << elapsed_time << endl << "Number of operations in solve = " << total_flops << endl << "MFLOPS for Solve = " << MFLOPs<< endl << endl; resid.Update(1.0, xcomp, -1.0, xexact, 0.0); // resid = xcomp - xexact resid.Norm2(&residual); if (verbose) cout << "Norm of the difference between exact and computed solutions = " << residual << endl; if (ILUK!=0) delete ILUK; if (IlukGraph!=0) delete IlukGraph; #ifdef EPETRA_MPI MPI_Finalize() ; #endif return 0 ; }
int main (int argc, char *argv[]) { // These "using" statements make the code a bit more concise. using std::cout; using std::endl; int ierr = 0, i; // If Trilinos was built with MPI, initialize MPI, otherwise // initialize the serial "communicator" that stands in for MPI. #ifdef EPETRA_MPI MPI_Init (&argc,&argv); Epetra_MpiComm Comm (MPI_COMM_WORLD); #else Epetra_SerialComm Comm; #endif const int MyPID = Comm.MyPID(); const int NumProc = Comm.NumProc(); // We only allow (MPI) Process 0 to write to stdout. const bool verbose = (MyPID == 0); const int NumGlobalElements = 100; if (verbose) cout << Epetra_Version() << endl << endl; // Asking the Epetra_Comm to print itself is a good test for whether // you are running in an MPI environment. However, it will print // something on all MPI processes, so you should remove it for a // large-scale parallel run. cout << Comm << endl; if (NumGlobalElements < NumProc) { if (verbose) cout << "numGlobalBlocks = " << NumGlobalElements << " cannot be < number of processors = " << NumProc << endl; std::exit (EXIT_FAILURE); } // Construct a Map that puts approximately the same number of rows // of the matrix A on each processor. Epetra_Map Map (NumGlobalElements, 0, Comm); // Get update list and number of local equations from newly created Map. int NumMyElements = Map.NumMyElements(); std::vector<int> MyGlobalElements(NumMyElements); Map.MyGlobalElements(&MyGlobalElements[0]); // NumNz[i] is the number of nonzero elements in row i of the sparse // matrix on this MPI process. Epetra_CrsMatrix uses this to figure // out how much space to allocate. std::vector<int> NumNz (NumMyElements); // We are building a tridiagonal matrix where each row contains the // nonzero elements (-1 2 -1). Thus, we need 2 off-diagonal terms, // except for the first and last row of the matrix. for (int i = 0; i < NumMyElements; ++i) if (MyGlobalElements[i] == 0 || MyGlobalElements[i] == NumGlobalElements-1) NumNz[i] = 2; // First or last row else NumNz[i] = 3; // Not the (first or last row) // Create the Epetra_CrsMatrix. Epetra_CrsMatrix A (Copy, Map, &NumNz[0]); // // Add rows to the sparse matrix one at a time. // std::vector<double> Values(2); Values[0] = -1.0; Values[1] = -1.0; std::vector<int> Indices(2); const double two = 2.0; int NumEntries; for (int i = 0; i < NumMyElements; ++i) { if (MyGlobalElements[i] == 0) { // The first row of the matrix. Indices[0] = 1; NumEntries = 1; } else if (MyGlobalElements[i] == NumGlobalElements - 1) { // The last row of the matrix. Indices[0] = NumGlobalElements-2; NumEntries = 1; } else { // Any row of the matrix other than the first or last. Indices[0] = MyGlobalElements[i]-1; Indices[1] = MyGlobalElements[i]+1; NumEntries = 2; } ierr = A.InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0]); assert (ierr==0); // Insert the diagonal entry. ierr = A.InsertGlobalValues(MyGlobalElements[i], 1, &two, &MyGlobalElements[i]); assert(ierr==0); } // Finish up. We can call FillComplete() with no arguments, because // the matrix is square. ierr = A.FillComplete (); assert (ierr==0); // Parameters for the power method. const int niters = NumGlobalElements*10; const double tolerance = 1.0e-2; // // Run the power method. Keep track of the flop count and the total // elapsed time. // Epetra_Flops counter; A.SetFlopCounter(counter); Epetra_Time timer(Comm); double lambda = 0.0; ierr += powerMethod (lambda, A, niters, tolerance, verbose); double elapsedTime = timer.ElapsedTime (); double totalFlops =counter.Flops (); // Mflop/s: Million floating-point arithmetic operations per second. double Mflop_per_s = totalFlops / elapsedTime / 1000000.0; if (verbose) cout << endl << endl << "Total Mflop/s for first solve = " << Mflop_per_s << endl<< endl; // Increase the first (0,0) diagonal entry of the matrix. if (verbose) cout << endl << "Increasing magnitude of first diagonal term, solving again" << endl << endl << endl; if (A.MyGlobalRow (0)) { int numvals = A.NumGlobalEntries (0); std::vector<double> Rowvals (numvals); std::vector<int> Rowinds (numvals); A.ExtractGlobalRowCopy (0, numvals, numvals, &Rowvals[0], &Rowinds[0]); // Get A(0,0) for (int i = 0; i < numvals; ++i) if (Rowinds[i] == 0) Rowvals[i] *= 10.0; A.ReplaceGlobalValues (0, numvals, &Rowvals[0], &Rowinds[0]); } // // Run the power method again. Keep track of the flop count and the // total elapsed time. // lambda = 0.0; timer.ResetStartTime(); counter.ResetFlops(); ierr += powerMethod (lambda, A, niters, tolerance, verbose); elapsedTime = timer.ElapsedTime(); totalFlops = counter.Flops(); Mflop_per_s = totalFlops / elapsedTime / 1000000.0; if (verbose) cout << endl << endl << "Total Mflop/s for second solve = " << Mflop_per_s << endl << endl; #ifdef EPETRA_MPI MPI_Finalize() ; #endif return ierr; }
// ************************************************************* // main program - This benchmark code reads a Harwell-Boeing data // set and finds the minimal eigenvalue of the matrix // using inverse iteration. // ************************************************************* int main(int argc, char *argv[]) { #ifdef EPETRA_MPI MPI_Init(&argc,&argv); Epetra_MpiComm Comm (MPI_COMM_WORLD); #else Epetra_SerialComm Comm; #endif cout << Comm << endl; int MyPID = Comm.MyPID(); bool verbose = false; if (MyPID==0) verbose = true; // Print out detailed results (turn off for best performance) if(argc != 2) { if (verbose) cerr << "Usage: " << argv[0] << " HB_data_file" << endl; exit(1); // Error } // Define pointers that will be set by HB read function Epetra_Map * readMap; Epetra_CrsMatrix * readA; Epetra_Vector * readx; Epetra_Vector * readb; Epetra_Vector * readxexact; // Call function to read in HB problem Trilinos_Util_ReadHb2Epetra(argv[1], Comm, readMap, readA, readx, readb, readxexact); // Not interested in x, b or xexact for an eigenvalue problem delete readx; delete readb; delete readxexact; #ifdef EPETRA_MPI // If running in parallel, we need to distribute matrix across all PEs. // Create uniform distributed map Epetra_Map map(readMap->NumGlobalElements(), 0, Comm); // Create Exporter to distribute read-in matrix and vectors Epetra_Export exporter(*readMap, map); Epetra_CrsMatrix A(Copy, map, 0); A.Export(*readA, exporter, Add); assert(A.FillComplete()==0); delete readA; delete readMap; #else // If not running in parallel, we do not need to distribute the matrix Epetra_CrsMatrix & A = *readA; #endif // Create flop counter to collect all FLOPS Epetra_Flops counter; A.SetFlopCounter(counter); double lambda = 0; // Minimal eigenvalue returned here // Call inverse iteration solver Epetra_Time timer(Comm); invIteration(A, lambda, verbose); double elapsedTime = timer.ElapsedTime(); double totalFlops = counter.Flops(); double MFLOPS = totalFlops/elapsedTime/1000000.0; cout << endl << "*************************************************" << endl << " Approximate smallest eigenvalue = " << lambda << endl << " Total Time = " << elapsedTime << endl << " Total FLOPS = " << totalFlops << endl << " Total MFLOPS = " << MFLOPS << endl << "*************************************************" << endl; // All done #ifdef EPETRA_MPI MPI_Finalize(); #else delete readA; delete readMap; #endif return (0); }
//========================================================================================= void runLUMatrixTests(Epetra_CrsMatrix * L, Epetra_MultiVector * bL, Epetra_MultiVector * btL, Epetra_MultiVector * xexactL, Epetra_CrsMatrix * U, Epetra_MultiVector * bU, Epetra_MultiVector * btU, Epetra_MultiVector * xexactU, bool StaticProfile, bool verbose, bool summary) { if (L->NoDiagonal()) { bL->Update(1.0, *xexactL, 1.0); // Add contribution of a unit diagonal to bL btL->Update(1.0, *xexactL, 1.0); // Add contribution of a unit diagonal to btL } if (U->NoDiagonal()) { bU->Update(1.0, *xexactU, 1.0); // Add contribution of a unit diagonal to bU btU->Update(1.0, *xexactU, 1.0); // Add contribution of a unit diagonal to btU } Epetra_MultiVector z(*bL); Epetra_MultiVector r(*bL); Epetra_SerialDenseVector resvec(bL->NumVectors()); //Timings Epetra_Flops flopcounter; L->SetFlopCounter(flopcounter); U->SetFlopCounter(flopcounter); Epetra_Time timer(L->Comm()); std::string statdyn = "dynamic"; if (StaticProfile) statdyn = "static "; for (int j=0; j<4; j++) { // j = 0/2 is notrans, j = 1/3 is trans bool TransA = (j==1 || j==3); std::string contig = "without"; if (j>1) contig = "with "; if (j==2) { L->OptimizeStorage(); U->OptimizeStorage(); } flopcounter.ResetFlops(); timer.ResetStartTime(); //10 lower solves bool Upper = false; bool UnitDiagonal = L->NoDiagonal(); // If no diagonal, then unit must be used Epetra_MultiVector * b = TransA ? btL : bL; // solve with the appropriate b vector for( int i = 0; i < 10; ++i ) L->Solve(Upper, TransA, UnitDiagonal, *b, z); // Solve Lz = bL or L'z = bLt double elapsed_time = timer.ElapsedTime(); double total_flops = L->Flops(); // Compute residual r.Update(-1.0, z, 1.0, *xexactL, 0.0); // r = bt - z r.Norm2(resvec.Values()); if (resvec.NormInf()>0.000001) { cout << "resvec = " << resvec << endl; cout << "z = " << z << endl; cout << "xexactL = " << *xexactL << endl; cout << "r = " << r << endl; } if (verbose) cout << "ResNorm = " << resvec.NormInf() << ": "; double MFLOPs = total_flops/elapsed_time/1000000.0; if (verbose) cout << "Total MFLOPs for 10 " << " Lower solves " << statdyn << " Profile (Trans = " << TransA << ") and " << contig << " opt storage = " << MFLOPs << " (" << elapsed_time << " s)" <<endl; if (summary) { if (L->Comm().NumProc()==1) { if (TransA) cout << "TransLSv" << statdyn<< "Prof" << contig << "OptStor" << '\t'; else cout << "NoTransLSv" << statdyn << "Prof" << contig << "OptStor" << '\t'; } cout << MFLOPs << endl; } flopcounter.ResetFlops(); timer.ResetStartTime(); //10 upper solves Upper = true; UnitDiagonal = U->NoDiagonal(); // If no diagonal, then unit must be used b = TransA ? btU : bU; // solve with the appropriate b vector for( int i = 0; i < 10; ++i ) U->Solve(Upper, TransA, UnitDiagonal, *b, z); // Solve Lz = bL or L'z = bLt elapsed_time = timer.ElapsedTime(); total_flops = U->Flops(); // Compute residual r.Update(-1.0, z, 1.0, *xexactU, 0.0); // r = bt - z r.Norm2(resvec.Values()); if (resvec.NormInf()>0.001) { cout << "U = " << *U << endl; //cout << "resvec = " << resvec << endl; cout << "z = " << z << endl; cout << "xexactU = " << *xexactU << endl; //cout << "r = " << r << endl; cout << "b = " << *b << endl; } if (verbose) cout << "ResNorm = " << resvec.NormInf() << ": "; MFLOPs = total_flops/elapsed_time/1000000.0; if (verbose) cout << "Total MFLOPs for 10 " << " Upper solves " << statdyn << " Profile (Trans = " << TransA << ") and " << contig << " opt storage = " << MFLOPs <<endl; if (summary) { if (L->Comm().NumProc()==1) { if (TransA) cout << "TransUSv" << statdyn<< "Prof" << contig << "OptStor" << '\t'; else cout << "NoTransUSv" << statdyn << "Prof" << contig << "OptStor" << '\t'; } cout << MFLOPs << endl; } } return; }
void runMatrixTests(Epetra_CrsMatrix * A, Epetra_MultiVector * b, Epetra_MultiVector * bt, Epetra_MultiVector * xexact, bool StaticProfile, bool verbose, bool summary) { Epetra_MultiVector z(*b); Epetra_MultiVector r(*b); Epetra_SerialDenseVector resvec(b->NumVectors()); //Timings Epetra_Flops flopcounter; A->SetFlopCounter(flopcounter); Epetra_Time timer(A->Comm()); std::string statdyn = "dynamic"; if (StaticProfile) statdyn = "static "; for (int j=0; j<4; j++) { // j = 0/2 is notrans, j = 1/3 is trans bool TransA = (j==1 || j==3); std::string contig = "without"; if (j>1) contig = "with "; #ifdef EPETRA_SHORT_PERFTEST int kstart = 1; #else int kstart = 0; #endif for (int k=kstart; k<2; k++) { // Loop over old multiply vs. new multiply std::string oldnew = "old"; if (k>0) oldnew = "new"; if (j==2) A->OptimizeStorage(); flopcounter.ResetFlops(); timer.ResetStartTime(); if (k==0) { //10 matvecs #ifndef EPETRA_SHORT_PERFTEST for( int i = 0; i < 10; ++i ) A->Multiply1(TransA, *xexact, z); // Compute z = A*xexact or z = A'*xexact using old Multiply method #endif } else { //10 matvecs for( int i = 0; i < 10; ++i ) A->Multiply(TransA, *xexact, z); // Compute z = A*xexact or z = A'*xexact } double elapsed_time = timer.ElapsedTime(); double total_flops = A->Flops(); // Compute residual if (TransA) r.Update(-1.0, z, 1.0, *bt, 0.0); // r = bt - z else r.Update(-1.0, z, 1.0, *b, 0.0); // r = b - z r.Norm2(resvec.Values()); if (verbose) cout << "ResNorm = " << resvec.NormInf() << ": "; double MFLOPs = total_flops/elapsed_time/1000000.0; if (verbose) cout << "Total MFLOPs for 10 " << oldnew << " MatVec's with " << statdyn << " Profile (Trans = " << TransA << ") and " << contig << " optimized storage = " << MFLOPs << " (" << elapsed_time << " s)" <<endl; if (summary) { if (A->Comm().NumProc()==1) { if (TransA) cout << "TransMv" << statdyn<< "Prof" << contig << "OptStor" << '\t'; else cout << "NoTransMv" << statdyn << "Prof" << contig << "OptStor" << '\t'; } cout << MFLOPs << endl; } } } return; }
int main(int argc, char *argv[]) { int ierr = 0; double elapsed_time; double total_flops; double MFLOPs; #ifdef EPETRA_MPI // Initialize MPI MPI_Init(&argc,&argv); Epetra_MpiComm comm( MPI_COMM_WORLD ); #else Epetra_SerialComm comm; #endif bool verbose = false; bool summary = false; // Check if we should print verbose results to standard out if (argc>6) if (argv[6][0]=='-' && argv[6][1]=='v') verbose = true; // Check if we should print verbose results to standard out if (argc>6) if (argv[6][0]=='-' && argv[6][1]=='s') summary = true; if(argc < 6) { cerr << "Usage: " << argv[0] << " NumNodesX NumNodesY NumProcX NumProcY NumPoints [-v|-s]" << endl << "where:" << endl << "NumNodesX - Number of mesh nodes in X direction per processor" << endl << "NumNodesY - Number of mesh nodes in Y direction per processor" << endl << "NumProcX - Number of processors to use in X direction" << endl << "NumProcY - Number of processors to use in Y direction" << endl << "NumPoints - Number of points to use in stencil (5, 9 or 25 only)" << endl << "-v|-s - (Optional) Run in verbose mode if -v present or summary mode if -s present" << endl << " NOTES: NumProcX*NumProcY must equal the number of processors used to run the problem." << endl << endl << " Serial example:" << endl << argv[0] << " 16 12 1 1 25 -v" << endl << " Run this program in verbose mode on 1 processor using a 16 X 12 grid with a 25 point stencil."<< endl <<endl << " MPI example:" << endl << "mpirun -np 32 " << argv[0] << " 10 12 4 8 9 -v" << endl << " Run this program in verbose mode on 32 processors putting a 10 X 12 subgrid on each processor using 4 processors "<< endl << " in the X direction and 8 in the Y direction. Total grid size is 40 points in X and 96 in Y with a 9 point stencil."<< endl << endl; return(1); } //char tmp; //if (comm.MyPID()==0) cout << "Press any key to continue..."<< endl; //if (comm.MyPID()==0) cin >> tmp; //comm.Barrier(); comm.SetTracebackMode(0); // This should shut down any error traceback reporting if (verbose && comm.MyPID()==0) cout << Epetra_Version() << endl << endl; if (summary && comm.MyPID()==0) { if (comm.NumProc()==1) cout << Epetra_Version() << endl << endl; else cout << endl << endl; // Print two blank line to keep output columns lined up } if (verbose) cout << comm <<endl; // Redefine verbose to only print on PE 0 if (verbose && comm.MyPID()!=0) verbose = false; if (summary && comm.MyPID()!=0) summary = false; int numNodesX = atoi(argv[1]); int numNodesY = atoi(argv[2]); int numProcsX = atoi(argv[3]); int numProcsY = atoi(argv[4]); int numPoints = atoi(argv[5]); if (verbose || (summary && comm.NumProc()==1)) { cout << " Number of local nodes in X direction = " << numNodesX << endl << " Number of local nodes in Y direction = " << numNodesY << endl << " Number of global nodes in X direction = " << numNodesX*numProcsX << endl << " Number of global nodes in Y direction = " << numNodesY*numProcsY << endl << " Number of local nonzero entries = " << numNodesX*numNodesY*numPoints << endl << " Number of global nonzero entries = " << numNodesX*numNodesY*numPoints*numProcsX*numProcsY << endl << " Number of Processors in X direction = " << numProcsX << endl << " Number of Processors in Y direction = " << numProcsY << endl << " Number of Points in stencil = " << numPoints << endl << endl; } // Print blank line to keep output columns lined up if (summary && comm.NumProc()>1) cout << endl << endl << endl << endl << endl << endl << endl << endl<< endl << endl; if (numProcsX*numProcsY!=comm.NumProc()) { cerr << "Number of processors = " << comm.NumProc() << endl << " is not the product of " << numProcsX << " and " << numProcsY << endl << endl; return(1); } if (numPoints!=5 && numPoints!=9 && numPoints!=25) { cerr << "Number of points specified = " << numPoints << endl << " is not 5, 9, 25" << endl << endl; return(1); } if (numNodesX*numNodesY<=0) { cerr << "Product of number of nodes is <= zero" << endl << endl; return(1); } Epetra_IntSerialDenseVector Xoff, XLoff, XUoff; Epetra_IntSerialDenseVector Yoff, YLoff, YUoff; if (numPoints==5) { // Generate a 5-point 2D Finite Difference matrix Xoff.Size(5); Yoff.Size(5); Xoff[0] = -1; Xoff[1] = 1; Xoff[2] = 0; Xoff[3] = 0; Xoff[4] = 0; Yoff[0] = 0; Yoff[1] = 0; Yoff[2] = 0; Yoff[3] = -1; Yoff[4] = 1; // Generate a 2-point 2D Lower triangular Finite Difference matrix XLoff.Size(2); YLoff.Size(2); XLoff[0] = -1; XLoff[1] = 0; YLoff[0] = 0; YLoff[1] = -1; // Generate a 3-point 2D upper triangular Finite Difference matrix XUoff.Size(3); YUoff.Size(3); XUoff[0] = 0; XUoff[1] = 1; XUoff[2] = 0; YUoff[0] = 0; YUoff[1] = 0; YUoff[2] = 1; } else if (numPoints==9) { // Generate a 9-point 2D Finite Difference matrix Xoff.Size(9); Yoff.Size(9); Xoff[0] = -1; Xoff[1] = 0; Xoff[2] = 1; Yoff[0] = -1; Yoff[1] = -1; Yoff[2] = -1; Xoff[3] = -1; Xoff[4] = 0; Xoff[5] = 1; Yoff[3] = 0; Yoff[4] = 0; Yoff[5] = 0; Xoff[6] = -1; Xoff[7] = 0; Xoff[8] = 1; Yoff[6] = 1; Yoff[7] = 1; Yoff[8] = 1; // Generate a 5-point lower triangular 2D Finite Difference matrix XLoff.Size(5); YLoff.Size(5); XLoff[0] = -1; XLoff[1] = 0; Xoff[2] = 1; YLoff[0] = -1; YLoff[1] = -1; Yoff[2] = -1; XLoff[3] = -1; XLoff[4] = 0; YLoff[3] = 0; YLoff[4] = 0; // Generate a 4-point upper triangular 2D Finite Difference matrix XUoff.Size(4); YUoff.Size(4); XUoff[0] = 1; YUoff[0] = 0; XUoff[1] = -1; XUoff[2] = 0; XUoff[3] = 1; YUoff[1] = 1; YUoff[2] = 1; YUoff[3] = 1; } else { // Generate a 25-point 2D Finite Difference matrix Xoff.Size(25); Yoff.Size(25); int xi = 0, yi = 0; int xo = -2, yo = -2; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; xo = -2, yo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; xo = -2, yo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; xo = -2, yo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; xo = -2, yo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; // Generate a 13-point lower triangular 2D Finite Difference matrix XLoff.Size(13); YLoff.Size(13); xi = 0, yi = 0; xo = -2, yo = -2; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; YLoff[yi++] = yo ; YLoff[yi++] = yo ; YLoff[yi++] = yo ; YLoff[yi++] = yo ; YLoff[yi++] = yo ; xo = -2, yo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; YLoff[yi++] = yo ; YLoff[yi++] = yo ; YLoff[yi++] = yo ; YLoff[yi++] = yo ; YLoff[yi++] = yo ; xo = -2, yo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; YLoff[yi++] = yo ; YLoff[yi++] = yo ; YLoff[yi++] = yo ; // Generate a 13-point upper triangular 2D Finite Difference matrix XUoff.Size(13); YUoff.Size(13); xi = 0, yi = 0; xo = 0, yo = 0; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; YUoff[yi++] = yo ; YUoff[yi++] = yo ; YUoff[yi++] = yo ; xo = -2, yo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; YUoff[yi++] = yo ; YUoff[yi++] = yo ; YUoff[yi++] = yo ; YUoff[yi++] = yo ; YUoff[yi++] = yo ; xo = -2, yo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; YUoff[yi++] = yo ; YUoff[yi++] = yo ; YUoff[yi++] = yo ; YUoff[yi++] = yo ; YUoff[yi++] = yo ; } Epetra_Map * map; Epetra_Map * mapL; Epetra_Map * mapU; Epetra_CrsMatrix * A; Epetra_CrsMatrix * L; Epetra_CrsMatrix * U; Epetra_MultiVector * b; Epetra_MultiVector * bt; Epetra_MultiVector * xexact; Epetra_MultiVector * bL; Epetra_MultiVector * btL; Epetra_MultiVector * xexactL; Epetra_MultiVector * bU; Epetra_MultiVector * btU; Epetra_MultiVector * xexactU; Epetra_SerialDenseVector resvec(0); //Timings Epetra_Flops flopcounter; Epetra_Time timer(comm); #ifdef EPETRA_VERY_SHORT_PERFTEST int jstop = 1; #elif EPETRA_SHORT_PERFTEST int jstop = 1; #else int jstop = 2; #endif for (int j=0; j<jstop; j++) { for (int k=1; k<17; k++) { #ifdef EPETRA_VERY_SHORT_PERFTEST if (k<3 || (k%4==0 && k<9)) { #elif EPETRA_SHORT_PERFTEST if (k<6 || k%4==0) { #else if (k<7 || k%2==0) { #endif int nrhs=k; if (verbose) cout << "\n*************** Results for " << nrhs << " RHS with "; bool StaticProfile = (j!=0); if (verbose) { if (StaticProfile) cout << " static profile\n"; else cout << " dynamic profile\n"; } GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, numPoints, Xoff.Values(), Yoff.Values(), nrhs, comm, verbose, summary, map, A, b, bt, xexact, StaticProfile, false); #ifdef EPETRA_HAVE_JADMATRIX timer.ResetStartTime(); Epetra_JadMatrix JA(*A); elapsed_time = timer.ElapsedTime(); if (verbose) cout << "Time to create Jagged diagonal matrix = " << elapsed_time << endl; //cout << "A = " << *A << endl; //cout << "JA = " << JA << endl; runJadMatrixTests(&JA, b, bt, xexact, StaticProfile, verbose, summary); #endif runMatrixTests(A, b, bt, xexact, StaticProfile, verbose, summary); delete A; delete b; delete bt; delete xexact; GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, XLoff.Length(), XLoff.Values(), YLoff.Values(), nrhs, comm, verbose, summary, mapL, L, bL, btL, xexactL, StaticProfile, true); GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, XUoff.Length(), XUoff.Values(), YUoff.Values(), nrhs, comm, verbose, summary, mapU, U, bU, btU, xexactU, StaticProfile, true); runLUMatrixTests(L, bL, btL, xexactL, U, bU, btU, xexactU, StaticProfile, verbose, summary); delete L; delete bL; delete btL; delete xexactL; delete mapL; delete U; delete bU; delete btU; delete xexactU; delete mapU; Epetra_MultiVector q(*map, nrhs); Epetra_MultiVector z(q); Epetra_MultiVector r(q); delete map; q.SetFlopCounter(flopcounter); z.SetFlopCounter(q); r.SetFlopCounter(q); resvec.Resize(nrhs); flopcounter.ResetFlops(); timer.ResetStartTime(); //10 norms for( int i = 0; i < 10; ++i ) q.Norm2( resvec.Values() ); elapsed_time = timer.ElapsedTime(); total_flops = q.Flops(); MFLOPs = total_flops/elapsed_time/1000000.0; if (verbose) cout << "\nTotal MFLOPs for 10 Norm2's= " << MFLOPs << endl; if (summary) { if (comm.NumProc()==1) cout << "Norm2" << '\t'; cout << MFLOPs << endl; } flopcounter.ResetFlops(); timer.ResetStartTime(); //10 dot's for( int i = 0; i < 10; ++i ) q.Dot(z, resvec.Values()); elapsed_time = timer.ElapsedTime(); total_flops = q.Flops(); MFLOPs = total_flops/elapsed_time/1000000.0; if (verbose) cout << "Total MFLOPs for 10 Dot's = " << MFLOPs << endl; if (summary) { if (comm.NumProc()==1) cout << "DotProd" << '\t'; cout << MFLOPs << endl; } flopcounter.ResetFlops(); timer.ResetStartTime(); //10 dot's for( int i = 0; i < 10; ++i ) q.Update(1.0, z, 1.0, r, 0.0); elapsed_time = timer.ElapsedTime(); total_flops = q.Flops(); MFLOPs = total_flops/elapsed_time/1000000.0; if (verbose) cout << "Total MFLOPs for 10 Updates= " << MFLOPs << endl; if (summary) { if (comm.NumProc()==1) cout << "Update" << '\t'; cout << MFLOPs << endl; } } } } #ifdef EPETRA_MPI MPI_Finalize() ; #endif return ierr ; } // Constructs a 2D PDE finite difference matrix using the list of x and y offsets. // // nx (In) - number of grid points in x direction // ny (In) - number of grid points in y direction // The total number of equations will be nx*ny ordered such that the x direction changes // most rapidly: // First equation is at point (0,0) // Second at (1,0) // ... // nx equation at (nx-1,0) // nx+1st equation at (0,1) // numPoints (In) - number of points in finite difference stencil // xoff (In) - stencil offsets in x direction (of length numPoints) // yoff (In) - stencil offsets in y direction (of length numPoints) // A standard 5-point finite difference stencil would be described as: // numPoints = 5 // xoff = [-1, 1, 0, 0, 0] // yoff = [ 0, 0, 0, -1, 1] // nrhs - Number of rhs to generate. (First interface produces vectors, so nrhs is not needed // comm (In) - an Epetra_Comm object describing the parallel machine (numProcs and my proc ID) // map (Out) - Epetra_Map describing distribution of matrix and vectors/multivectors // A (Out) - Epetra_CrsMatrix constructed for nx by ny grid using prescribed stencil // Off-diagonal values are random between 0 and 1. If diagonal is part of stencil, // diagonal will be slightly diag dominant. // b (Out) - Generated RHS. Values satisfy b = A*xexact // bt (Out) - Generated RHS. Values satisfy b = A'*xexact // xexact (Out) - Generated exact solution to Ax = b and b' = A'xexact // Note: Caller of this function is responsible for deleting all output objects. void GenerateCrsProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints, int * xoff, int * yoff, const Epetra_Comm &comm, bool verbose, bool summary, Epetra_Map *& map, Epetra_CrsMatrix *& A, Epetra_Vector *& b, Epetra_Vector *& bt, Epetra_Vector *&xexact, bool StaticProfile, bool MakeLocalOnly) { Epetra_MultiVector * b1, * bt1, * xexact1; GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, numPoints, xoff, yoff, 1, comm, verbose, summary, map, A, b1, bt1, xexact1, StaticProfile, MakeLocalOnly); b = dynamic_cast<Epetra_Vector *>(b1); bt = dynamic_cast<Epetra_Vector *>(bt1); xexact = dynamic_cast<Epetra_Vector *>(xexact1); return; } void GenerateCrsProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints, int * xoff, int * yoff, int nrhs, const Epetra_Comm &comm, bool verbose, bool summary, Epetra_Map *& map, Epetra_CrsMatrix *& A, Epetra_MultiVector *& b, Epetra_MultiVector *& bt, Epetra_MultiVector *&xexact, bool StaticProfile, bool MakeLocalOnly) { Epetra_Time timer(comm); // Determine my global IDs long long * myGlobalElements; GenerateMyGlobalElements(numNodesX, numNodesY, numProcsX, numProcsY, comm.MyPID(), myGlobalElements); int numMyEquations = numNodesX*numNodesY; map = new Epetra_Map((long long)-1, numMyEquations, myGlobalElements, 0, comm); // Create map with 2D block partitioning. delete [] myGlobalElements; long long numGlobalEquations = map->NumGlobalElements64(); int profile = 0; if (StaticProfile) profile = numPoints; #ifdef EPETRA_HAVE_STATICPROFILE if (MakeLocalOnly) A = new Epetra_CrsMatrix(Copy, *map, *map, profile, StaticProfile); // Construct matrix with rowmap=colmap else A = new Epetra_CrsMatrix(Copy, *map, profile, StaticProfile); // Construct matrix #else if (MakeLocalOnly) A = new Epetra_CrsMatrix(Copy, *map, *map, profile); // Construct matrix with rowmap=colmap else A = new Epetra_CrsMatrix(Copy, *map, profile); // Construct matrix #endif long long * indices = new long long[numPoints]; double * values = new double[numPoints]; double dnumPoints = (double) numPoints; int nx = numNodesX*numProcsX; for (int i=0; i<numMyEquations; i++) { long long rowID = map->GID64(i); int numIndices = 0; for (int j=0; j<numPoints; j++) { long long colID = rowID + xoff[j] + nx*yoff[j]; // Compute column ID based on stencil offsets if (colID>-1 && colID<numGlobalEquations) { indices[numIndices] = colID; double value = - ((double) rand())/ ((double) RAND_MAX); if (colID==rowID) values[numIndices++] = dnumPoints - value; // Make diagonal dominant else values[numIndices++] = value; } } //cout << "Building row " << rowID << endl; A->InsertGlobalValues(rowID, numIndices, values, indices); } delete [] indices; delete [] values; double insertTime = timer.ElapsedTime(); timer.ResetStartTime(); A->FillComplete(false); double fillCompleteTime = timer.ElapsedTime(); if (verbose) cout << "Time to insert matrix values = " << insertTime << endl << "Time to complete fill = " << fillCompleteTime << endl; if (summary) { if (comm.NumProc()==1) cout << "InsertTime" << '\t'; cout << insertTime << endl; if (comm.NumProc()==1) cout << "FillCompleteTime" << '\t'; cout << fillCompleteTime << endl; } if (nrhs<=1) { b = new Epetra_Vector(*map); bt = new Epetra_Vector(*map); xexact = new Epetra_Vector(*map); } else { b = new Epetra_MultiVector(*map, nrhs); bt = new Epetra_MultiVector(*map, nrhs); xexact = new Epetra_MultiVector(*map, nrhs); } xexact->Random(); // Fill xexact with random values A->Multiply(false, *xexact, *b); A->Multiply(true, *xexact, *bt); return; }
int main(int argc, char *argv[]) { int ierr = 0, i, j, k; #ifdef EPETRA_MPI MPI_Init(&argc,&argv); Epetra_MpiComm Comm( MPI_COMM_WORLD ); #else Epetra_SerialComm Comm; #endif bool verbose = false; // Check if we should print results to standard out if (argc>1) if (argv[1][0]=='-' && argv[1][1]=='v') verbose = true; if(verbose && Comm.MyPID()==0) std::cout << Epetra_Version() << std::endl << std::endl; int rank = Comm.MyPID(); // char tmp; // if (rank==0) std::cout << "Press any key to continue..."<< std::endl; // if (rank==0) cin >> tmp; // Comm.Barrier(); Comm.SetTracebackMode(0); // This should shut down any error traceback reporting if (verbose) std::cout << Comm << std::endl; // bool verbose1 = verbose; // Redefine verbose to only print on PE 0 if (verbose && rank!=0) verbose = false; int N = 20; int NRHS = 4; double * A = new double[N*N]; double * A1 = new double[N*N]; double * X = new double[(N+1)*NRHS]; double * X1 = new double[(N+1)*NRHS]; int LDX = N+1; int LDX1 = N+1; double * B = new double[N*NRHS]; double * B1 = new double[N*NRHS]; int LDB = N; int LDB1 = N; int LDA = N; int LDA1 = LDA; double OneNorm1; bool Upper = false; Epetra_SerialSpdDenseSolver solver; Epetra_SerialSymDenseMatrix * Matrix; for (int kk=0; kk<2; kk++) { for (i=1; i<=N; i++) { GenerateHilbert(A, LDA, i); OneNorm1 = 0.0; for (j=1; j<=i; j++) OneNorm1 += 1.0/((double) j); // 1-Norm = 1 + 1/2 + ...+1/n if (kk==0) { Matrix = new Epetra_SerialSymDenseMatrix(View, A, LDA, i); LDA1 = LDA; } else { Matrix = new Epetra_SerialSymDenseMatrix(Copy, A, LDA, i); LDA1 = i; } GenerateHilbert(A1, LDA1, i); if (kk==1) { solver.FactorWithEquilibration(true); Matrix->SetUpper(); Upper = true; solver.SolveToRefinedSolution(false); } for (k=0; k<NRHS; k++) for (j=0; j<i; j++) { B[j+k*LDB] = 1.0/((double) (k+3)*(j+3)); B1[j+k*LDB1] = B[j+k*LDB1]; } Epetra_SerialDenseMatrix Epetra_B(View, B, LDB, i, NRHS); Epetra_SerialDenseMatrix Epetra_X(View, X, LDX, i, NRHS); solver.SetMatrix(*Matrix); solver.SetVectors(Epetra_X, Epetra_B); ierr = check(solver, A1, LDA1, i, NRHS, OneNorm1, B1, LDB1, X1, LDX1, Upper, verbose); assert (ierr>-1); delete Matrix; if (ierr!=0) { if (verbose) std::cout << "Factorization failed due to bad conditioning. This is normal if SCOND is small." << std::endl; break; } } } delete [] A; delete [] A1; delete [] X; delete [] X1; delete [] B; delete [] B1; ///////////////////////////////////////////////////////////////////// // Now test norms and scaling functions ///////////////////////////////////////////////////////////////////// Epetra_SerialSymDenseMatrix D; double ScalarA = 2.0; int DM = 10; int DN = 10; D.Shape(DM); for (j=0; j<DN; j++) for (i=0; i<DM; i++) D[j][i] = (double) (1+i+j*DM) ; //std::cout << D << std::endl; double NormInfD_ref = (double)(DM*(DN*(DN+1))/2); double NormOneD_ref = NormInfD_ref; double NormInfD = D.NormInf(); double NormOneD = D.NormOne(); if (verbose) { std::cout << " *** Before scaling *** " << std::endl << " Computed one-norm of test matrix = " << NormOneD << std::endl << " Expected one-norm = " << NormOneD_ref << std::endl << " Computed inf-norm of test matrix = " << NormInfD << std::endl << " Expected inf-norm = " << NormInfD_ref << std::endl; } D.Scale(ScalarA); // Scale entire D matrix by this value //std::cout << D << std::endl; NormInfD = D.NormInf(); NormOneD = D.NormOne(); if (verbose) { std::cout << " *** After scaling *** " << std::endl << " Computed one-norm of test matrix = " << NormOneD << std::endl << " Expected one-norm = " << NormOneD_ref*ScalarA << std::endl << " Computed inf-norm of test matrix = " << NormInfD << std::endl << " Expected inf-norm = " << NormInfD_ref*ScalarA << std::endl; } ///////////////////////////////////////////////////////////////////// // Now test for larger system, both correctness and performance. ///////////////////////////////////////////////////////////////////// N = 2000; NRHS = 5; LDA = N; LDB = N; LDX = N; if (verbose) std::cout << "\n\nComputing factor of an " << N << " x " << N << " SPD matrix...Please wait.\n\n" << std::endl; // Define A and X A = new double[LDA*N]; X = new double[LDB*NRHS]; for (j=0; j<N; j++) { for (k=0; k<NRHS; k++) X[j+k*LDX] = 1.0/((double) (j+5+k)); for (i=0; i<N; i++) { if (i==j) A[i+j*LDA] = 100.0 + i; else A[i+j*LDA] = -1.0/((double) (i+10)*(j+10)); } } // Define Epetra_SerialDenseMatrix object Epetra_SerialSymDenseMatrix BigMatrix(Copy, A, LDA, N); Epetra_SerialSymDenseMatrix OrigBigMatrix(View, A, LDA, N); Epetra_SerialSpdDenseSolver BigSolver; BigSolver.FactorWithEquilibration(true); BigSolver.SetMatrix(BigMatrix); // Time factorization Epetra_Flops counter; BigSolver.SetFlopCounter(counter); Epetra_Time Timer(Comm); double tstart = Timer.ElapsedTime(); ierr = BigSolver.Factor(); if (ierr!=0 && verbose) std::cout << "Error in factorization = "<<ierr<< std::endl; assert(ierr==0); double time = Timer.ElapsedTime() - tstart; double FLOPS = counter.Flops(); double MFLOPS = FLOPS/time/1000000.0; if (verbose) std::cout << "MFLOPS for Factorization = " << MFLOPS << std::endl; // Define Left hand side and right hand side Epetra_SerialDenseMatrix LHS(View, X, LDX, N, NRHS); Epetra_SerialDenseMatrix RHS; RHS.Shape(N,NRHS); // Allocate RHS // Compute RHS from A and X Epetra_Flops RHS_counter; RHS.SetFlopCounter(RHS_counter); tstart = Timer.ElapsedTime(); RHS.Multiply('L', 1.0, OrigBigMatrix, LHS, 0.0); // Symmetric Matrix-multiply time = Timer.ElapsedTime() - tstart; Epetra_SerialDenseMatrix OrigRHS = RHS; FLOPS = RHS_counter.Flops(); MFLOPS = FLOPS/time/1000000.0; if (verbose) std::cout << "MFLOPS to build RHS (NRHS = " << NRHS <<") = " << MFLOPS << std::endl; // Set LHS and RHS and solve BigSolver.SetVectors(LHS, RHS); tstart = Timer.ElapsedTime(); ierr = BigSolver.Solve(); if (ierr==1 && verbose) std::cout << "LAPACK guidelines suggest this matrix might benefit from equilibration." << std::endl; else if (ierr!=0 && verbose) std::cout << "Error in solve = "<<ierr<< std::endl; assert(ierr>=0); time = Timer.ElapsedTime() - tstart; FLOPS = BigSolver.Flops(); MFLOPS = FLOPS/time/1000000.0; if (verbose) std::cout << "MFLOPS for Solve (NRHS = " << NRHS <<") = " << MFLOPS << std::endl; double * resid = new double[NRHS]; bool OK = Residual(N, NRHS, A, LDA, BigSolver.X(), BigSolver.LDX(), OrigRHS.A(), OrigRHS.LDA(), resid); if (verbose) { if (!OK) std::cout << "************* Residual do not meet tolerance *************" << std::endl; for (i=0; i<NRHS; i++) std::cout << "Residual[" << i <<"] = "<< resid[i] << std::endl; std::cout << std::endl; } // Solve again using the Epetra_SerialDenseVector class for LHS and RHS Epetra_SerialDenseVector X2; Epetra_SerialDenseVector B2; X2.Size(BigMatrix.N()); B2.Size(BigMatrix.M()); int length = BigMatrix.N(); {for (int kk=0; kk<length; kk++) X2[kk] = ((double ) kk)/ ((double) length);} // Define entries of X2 RHS_counter.ResetFlops(); B2.SetFlopCounter(RHS_counter); tstart = Timer.ElapsedTime(); B2.Multiply('N', 'N', 1.0, OrigBigMatrix, X2, 0.0); // Define B2 = A*X2 time = Timer.ElapsedTime() - tstart; Epetra_SerialDenseVector OrigB2 = B2; FLOPS = RHS_counter.Flops(); MFLOPS = FLOPS/time/1000000.0; if (verbose) std::cout << "MFLOPS to build single RHS = " << MFLOPS << std::endl; // Set LHS and RHS and solve BigSolver.SetVectors(X2, B2); tstart = Timer.ElapsedTime(); ierr = BigSolver.Solve(); time = Timer.ElapsedTime() - tstart; if (ierr==1 && verbose) std::cout << "LAPACK guidelines suggest this matrix might benefit from equilibration." << std::endl; else if (ierr!=0 && verbose) std::cout << "Error in solve = "<<ierr<< std::endl; assert(ierr>=0); FLOPS = counter.Flops(); MFLOPS = FLOPS/time/1000000.0; if (verbose) std::cout << "MFLOPS to solve single RHS = " << MFLOPS << std::endl; OK = Residual(N, 1, A, LDA, BigSolver.X(), BigSolver.LDX(), OrigB2.A(), OrigB2.LDA(), resid); if (verbose) { if (!OK) std::cout << "************* Residual do not meet tolerance *************" << std::endl; std::cout << "Residual = "<< resid[0] << std::endl; } delete [] resid; delete [] A; delete [] X; /////////////////////////////////////////////////// // Now test default constructor and index operators /////////////////////////////////////////////////// N = 5; Epetra_SerialSymDenseMatrix C; // Implicit call to default constructor, should not need to call destructor C.Shape(5); // Make it 5 by 5 double * C1 = new double[N*N]; GenerateHilbert(C1, N, N); // Generate Hilber matrix C1[1+2*N] = 1000.0; // Make matrix nonsymmetric // Fill values of C with Hilbert values for (i=0; i<N; i++) for (j=0; j<N; j++) C(i,j) = C1[i+j*N]; // Test if values are correctly written and read for (i=0; i<N; i++) for (j=0; j<N; j++) { assert(C(i,j) == C1[i+j*N]); assert(C(i,j) == C[j][i]); } if (verbose) std::cout << "Default constructor and index operator check OK. Values of Hilbert matrix = " << std::endl << C << std::endl << "Values should be 1/(i+j+1), except value (1,2) should be 1000" << std::endl; delete [] C1; #ifdef EPETRA_MPI MPI_Finalize() ; #endif /* end main */ return ierr ; }
int main(int argc, char *argv[]) { int ierr = 0, forierr = 0; bool debug = false; #ifdef EPETRA_MPI // Initialize MPI MPI_Init(&argc,&argv); int rank; // My process ID MPI_Comm_rank(MPI_COMM_WORLD, &rank); Epetra_MpiComm Comm( MPI_COMM_WORLD ); #else int rank = 0; Epetra_SerialComm Comm; #endif bool verbose = false; // Check if we should print results to standard out if (argc>1) if (argv[1][0]=='-' && argv[1][1]=='v') verbose = true; int verbose_int = verbose ? 1 : 0; Comm.Broadcast(&verbose_int, 1, 0); verbose = verbose_int==1 ? true : false; // char tmp; // if (rank==0) cout << "Press any key to continue..."<< std::endl; // if (rank==0) cin >> tmp; // Comm.Barrier(); Comm.SetTracebackMode(0); // This should shut down any error traceback reporting int MyPID = Comm.MyPID(); int NumProc = Comm.NumProc(); if(verbose && MyPID==0) cout << Epetra_Version() << std::endl << std::endl; if (verbose) cout << "Processor "<<MyPID<<" of "<< NumProc << " is alive."<<endl; bool verbose1 = verbose; // Redefine verbose to only print on PE 0 if(verbose && rank!=0) verbose = false; int NumMyEquations = 10000; int NumGlobalEquations = (NumMyEquations * NumProc) + EPETRA_MIN(NumProc,3); if(MyPID < 3) NumMyEquations++; // Construct a Map that puts approximately the same Number of equations on each processor Epetra_Map Map(NumGlobalEquations, NumMyEquations, 0, Comm); // Get update list and number of local equations from newly created Map int* MyGlobalElements = new int[Map.NumMyElements()]; Map.MyGlobalElements(MyGlobalElements); // Create an integer vector NumNz that is used to build the Petra Matrix. // NumNz[i] is the Number of OFF-DIAGONAL term for the ith global equation on this processor int* NumNz = new int[NumMyEquations]; // We are building a tridiagonal matrix where each row has (-1 2 -1) // So we need 2 off-diagonal terms (except for the first and last equation) for (int i = 0; i < NumMyEquations; i++) if((MyGlobalElements[i] == 0) || (MyGlobalElements[i] == NumGlobalEquations - 1)) NumNz[i] = 1; else NumNz[i] = 2; // Create a Epetra_Matrix Epetra_CrsMatrix A(Copy, Map, NumNz); EPETRA_TEST_ERR(A.IndicesAreGlobal(),ierr); EPETRA_TEST_ERR(A.IndicesAreLocal(),ierr); // Add rows one-at-a-time // Need some vectors to help // Off diagonal Values will always be -1 double* Values = new double[2]; Values[0] = -1.0; Values[1] = -1.0; int* Indices = new int[2]; double two = 2.0; int NumEntries; forierr = 0; for (int i = 0; i < NumMyEquations; i++) { if(MyGlobalElements[i] == 0) { Indices[0] = 1; NumEntries = 1; } else if (MyGlobalElements[i] == NumGlobalEquations-1) { Indices[0] = NumGlobalEquations-2; NumEntries = 1; } else { Indices[0] = MyGlobalElements[i]-1; Indices[1] = MyGlobalElements[i]+1; NumEntries = 2; } forierr += !(A.InsertGlobalValues(MyGlobalElements[i], NumEntries, Values, Indices)==0); forierr += !(A.InsertGlobalValues(MyGlobalElements[i], 1, &two, MyGlobalElements+i)>0); // Put in the diagonal entry } EPETRA_TEST_ERR(forierr,ierr); int * indexOffsetTmp; int * indicesTmp; double * valuesTmp; // Finish up EPETRA_TEST_ERR(!(A.IndicesAreGlobal()),ierr); EPETRA_TEST_ERR(!(A.ExtractCrsDataPointers(indexOffsetTmp, indicesTmp, valuesTmp)==-1),ierr); // Should fail EPETRA_TEST_ERR(!(A.FillComplete(false)==0),ierr); EPETRA_TEST_ERR(!(A.ExtractCrsDataPointers(indexOffsetTmp, indicesTmp, valuesTmp)==-1),ierr); // Should fail EPETRA_TEST_ERR(!(A.IndicesAreLocal()),ierr); EPETRA_TEST_ERR(A.StorageOptimized(),ierr); A.OptimizeStorage(); EPETRA_TEST_ERR(!(A.StorageOptimized()),ierr); EPETRA_TEST_ERR(!(A.ExtractCrsDataPointers(indexOffsetTmp, indicesTmp, valuesTmp)==0),ierr); // Should succeed const Epetra_CrsGraph & GofA = A.Graph(); EPETRA_TEST_ERR((indicesTmp!=GofA[0] || valuesTmp!=A[0]),ierr); // Extra check to see if operator[] is consistent EPETRA_TEST_ERR(A.UpperTriangular(),ierr); EPETRA_TEST_ERR(A.LowerTriangular(),ierr); int NumMyNonzeros = 3 * NumMyEquations; if(A.LRID(0) >= 0) NumMyNonzeros--; // If I own first global row, then there is one less nonzero if(A.LRID(NumGlobalEquations-1) >= 0) NumMyNonzeros--; // If I own last global row, then there is one less nonzero EPETRA_TEST_ERR(check(A, NumMyEquations, NumGlobalEquations, NumMyNonzeros, 3*NumGlobalEquations-2, MyGlobalElements, verbose),ierr); forierr = 0; for (int i = 0; i < NumMyEquations; i++) forierr += !(A.NumGlobalEntries(MyGlobalElements[i])==NumNz[i]+1); EPETRA_TEST_ERR(forierr,ierr); forierr = 0; for (int i = 0; i < NumMyEquations; i++) forierr += !(A.NumMyEntries(i)==NumNz[i]+1); EPETRA_TEST_ERR(forierr,ierr); if (verbose) cout << "\n\nNumEntries function check OK" << std::endl<< std::endl; EPETRA_TEST_ERR(check_graph_sharing(Comm),ierr); // Create vectors for Power method Epetra_Vector q(Map); Epetra_Vector z(Map); Epetra_Vector resid(Map); // variable needed for iteration double lambda = 0.0; // int niters = 10000; int niters = 200; double tolerance = 1.0e-1; ///////////////////////////////////////////////////////////////////////////////////////////////// // Iterate Epetra_Flops flopcounter; A.SetFlopCounter(flopcounter); q.SetFlopCounter(A); z.SetFlopCounter(A); resid.SetFlopCounter(A); Epetra_Time timer(Comm); EPETRA_TEST_ERR(power_method(false, A, q, z, resid, &lambda, niters, tolerance, verbose),ierr); double elapsed_time = timer.ElapsedTime(); double total_flops = A.Flops() + q.Flops() + z.Flops() + resid.Flops(); double MFLOPs = total_flops/elapsed_time/1000000.0; if (verbose) cout << "\n\nTotal MFLOPs for first solve = " << MFLOPs << std::endl<< std::endl; ///////////////////////////////////////////////////////////////////////////////////////////////// // Solve transpose problem if (verbose) cout << "\n\nUsing transpose of matrix and solving again (should give same result).\n\n" << std::endl; // Iterate lambda = 0.0; flopcounter.ResetFlops(); timer.ResetStartTime(); EPETRA_TEST_ERR(power_method(true, A, q, z, resid, &lambda, niters, tolerance, verbose),ierr); elapsed_time = timer.ElapsedTime(); total_flops = A.Flops() + q.Flops() + z.Flops() + resid.Flops(); MFLOPs = total_flops/elapsed_time/1000000.0; if (verbose) cout << "\n\nTotal MFLOPs for transpose solve = " << MFLOPs << std::endl<< endl; ///////////////////////////////////////////////////////////////////////////////////////////////// // Increase diagonal dominance if (verbose) cout << "\n\nIncreasing the magnitude of first diagonal term and solving again\n\n" << endl; if (A.MyGlobalRow(0)) { int numvals = A.NumGlobalEntries(0); double * Rowvals = new double [numvals]; int * Rowinds = new int [numvals]; A.ExtractGlobalRowCopy(0, numvals, numvals, Rowvals, Rowinds); // Get A[0,0] for (int i=0; i<numvals; i++) if (Rowinds[i] == 0) Rowvals[i] *= 10.0; A.ReplaceGlobalValues(0, numvals, Rowvals, Rowinds); delete [] Rowvals; delete [] Rowinds; } // Iterate (again) lambda = 0.0; flopcounter.ResetFlops(); timer.ResetStartTime(); EPETRA_TEST_ERR(power_method(false, A, q, z, resid, &lambda, niters, tolerance, verbose),ierr); elapsed_time = timer.ElapsedTime(); total_flops = A.Flops() + q.Flops() + z.Flops() + resid.Flops(); MFLOPs = total_flops/elapsed_time/1000000.0; if (verbose) cout << "\n\nTotal MFLOPs for second solve = " << MFLOPs << endl<< endl; ///////////////////////////////////////////////////////////////////////////////////////////////// // Solve transpose problem if (verbose) cout << "\n\nUsing transpose of matrix and solving again (should give same result).\n\n" << endl; // Iterate (again) lambda = 0.0; flopcounter.ResetFlops(); timer.ResetStartTime(); EPETRA_TEST_ERR(power_method(true, A, q, z, resid, &lambda, niters, tolerance, verbose),ierr); elapsed_time = timer.ElapsedTime(); total_flops = A.Flops() + q.Flops() + z.Flops() + resid.Flops(); MFLOPs = total_flops/elapsed_time/1000000.0; if (verbose) cout << "\n\nTotal MFLOPs for tranpose of second solve = " << MFLOPs << endl<< endl; if (verbose) cout << "\n\n*****Testing constant entry constructor" << endl<< endl; Epetra_CrsMatrix AA(Copy, Map, 5); if (debug) Comm.Barrier(); double dble_one = 1.0; for (int i=0; i< NumMyEquations; i++) AA.InsertGlobalValues(MyGlobalElements[i], 1, &dble_one, MyGlobalElements+i); // Note: All processors will call the following Insert routines, but only the processor // that owns it will actually do anything int One = 1; if (AA.MyGlobalRow(0)) { EPETRA_TEST_ERR(!(AA.InsertGlobalValues(0, 0, &dble_one, &One)==0),ierr); } else EPETRA_TEST_ERR(!(AA.InsertGlobalValues(0, 1, &dble_one, &One)==-1),ierr); EPETRA_TEST_ERR(!(AA.FillComplete(false)==0),ierr); EPETRA_TEST_ERR(AA.StorageOptimized(),ierr); EPETRA_TEST_ERR(!(AA.UpperTriangular()),ierr); EPETRA_TEST_ERR(!(AA.LowerTriangular()),ierr); if (debug) Comm.Barrier(); EPETRA_TEST_ERR(check(AA, NumMyEquations, NumGlobalEquations, NumMyEquations, NumGlobalEquations, MyGlobalElements, verbose),ierr); if (debug) Comm.Barrier(); forierr = 0; for (int i=0; i<NumMyEquations; i++) forierr += !(AA.NumGlobalEntries(MyGlobalElements[i])==1); EPETRA_TEST_ERR(forierr,ierr); if (verbose) cout << "\n\nNumEntries function check OK" << endl<< endl; if (debug) Comm.Barrier(); if (verbose) cout << "\n\n*****Testing copy constructor" << endl<< endl; Epetra_CrsMatrix B(AA); EPETRA_TEST_ERR(check(B, NumMyEquations, NumGlobalEquations, NumMyEquations, NumGlobalEquations, MyGlobalElements, verbose),ierr); forierr = 0; for (int i=0; i<NumMyEquations; i++) forierr += !(B.NumGlobalEntries(MyGlobalElements[i])==1); EPETRA_TEST_ERR(forierr,ierr); if (verbose) cout << "\n\nNumEntries function check OK" << endl<< endl; if (debug) Comm.Barrier(); if (verbose) cout << "\n\n*****Testing local view constructor" << endl<< endl; Epetra_CrsMatrix BV(View, AA.RowMap(), AA.ColMap(), 0); forierr = 0; int* Inds; double* Vals; for (int i = 0; i < NumMyEquations; i++) { forierr += !(AA.ExtractMyRowView(i, NumEntries, Vals, Inds)==0); forierr += !(BV.InsertMyValues(i, NumEntries, Vals, Inds)==0); } BV.FillComplete(false); EPETRA_TEST_ERR(check(BV, NumMyEquations, NumGlobalEquations, NumMyEquations, NumGlobalEquations, MyGlobalElements, verbose),ierr); forierr = 0; for (int i=0; i<NumMyEquations; i++) forierr += !(BV.NumGlobalEntries(MyGlobalElements[i])==1); EPETRA_TEST_ERR(forierr,ierr); if (verbose) cout << "\n\nNumEntries function check OK" << endl<< endl; if (debug) Comm.Barrier(); if (verbose) cout << "\n\n*****Testing post construction modifications" << endl<< endl; EPETRA_TEST_ERR(!(B.InsertGlobalValues(0, 1, &dble_one, &One)==-2),ierr); // Release all objects delete [] NumNz; delete [] Values; delete [] Indices; delete [] MyGlobalElements; if (verbose1) { // Test ostream << operator (if verbose1) // Construct a Map that puts 2 equations on each PE int NumMyElements1 = 2; int NumMyEquations1 = NumMyElements1; int NumGlobalEquations1 = NumMyEquations1*NumProc; Epetra_Map Map1(-1, NumMyElements1, 0, Comm); // Get update list and number of local equations from newly created Map int * MyGlobalElements1 = new int[Map1.NumMyElements()]; Map1.MyGlobalElements(MyGlobalElements1); // Create an integer vector NumNz that is used to build the Petra Matrix. // NumNz[i] is the Number of OFF-DIAGONAL term for the ith global equation on this processor int * NumNz1 = new int[NumMyEquations1]; // We are building a tridiagonal matrix where each row has (-1 2 -1) // So we need 2 off-diagonal terms (except for the first and last equation) for (int i=0; i<NumMyEquations1; i++) if (MyGlobalElements1[i]==0 || MyGlobalElements1[i] == NumGlobalEquations1-1) NumNz1[i] = 1; else NumNz1[i] = 2; // Create a Epetra_Matrix Epetra_CrsMatrix A1(Copy, Map1, NumNz1); // Add rows one-at-a-time // Need some vectors to help // Off diagonal Values will always be -1 double *Values1 = new double[2]; Values1[0] = -1.0; Values1[1] = -1.0; int *Indices1 = new int[2]; double two1 = 2.0; int NumEntries1; forierr = 0; for (int i=0; i<NumMyEquations1; i++) { if (MyGlobalElements1[i]==0) { Indices1[0] = 1; NumEntries1 = 1; } else if (MyGlobalElements1[i] == NumGlobalEquations1-1) { Indices1[0] = NumGlobalEquations1-2; NumEntries1 = 1; } else { Indices1[0] = MyGlobalElements1[i]-1; Indices1[1] = MyGlobalElements1[i]+1; NumEntries1 = 2; } forierr += !(A1.InsertGlobalValues(MyGlobalElements1[i], NumEntries1, Values1, Indices1)==0); forierr += !(A1.InsertGlobalValues(MyGlobalElements1[i], 1, &two1, MyGlobalElements1+i)>0); // Put in the diagonal entry } EPETRA_TEST_ERR(forierr,ierr); delete [] Indices1; delete [] Values1; // Finish up EPETRA_TEST_ERR(!(A1.FillComplete(false)==0),ierr); // Test diagonal extraction function Epetra_Vector checkDiag(Map1); EPETRA_TEST_ERR(!(A1.ExtractDiagonalCopy(checkDiag)==0),ierr); forierr = 0; for (int i=0; i<NumMyEquations1; i++) forierr += !(checkDiag[i]==two1); EPETRA_TEST_ERR(forierr,ierr); // Test diagonal replacement method forierr = 0; for (int i=0; i<NumMyEquations1; i++) checkDiag[i]=two1*two1; EPETRA_TEST_ERR(forierr,ierr); EPETRA_TEST_ERR(!(A1.ReplaceDiagonalValues(checkDiag)==0),ierr); Epetra_Vector checkDiag1(Map1); EPETRA_TEST_ERR(!(A1.ExtractDiagonalCopy(checkDiag1)==0),ierr); forierr = 0; for (int i=0; i<NumMyEquations1; i++) forierr += !(checkDiag[i]==checkDiag1[i]); EPETRA_TEST_ERR(forierr,ierr); if (verbose) cout << "\n\nDiagonal extraction and replacement OK.\n\n" << endl; double orignorm = A1.NormOne(); EPETRA_TEST_ERR(!(A1.Scale(4.0)==0),ierr); EPETRA_TEST_ERR(!(A1.NormOne()!=orignorm),ierr); if (verbose) cout << "\n\nMatrix scale OK.\n\n" << endl; if (verbose) cout << "\n\nPrint out tridiagonal matrix, each part on each processor.\n\n" << endl; cout << A1 << endl; // Release all objects delete [] NumNz1; delete [] MyGlobalElements1; } if (verbose) cout << "\n\n*****Testing LeftScale and RightScale" << endl << endl; int NumMyElements2 = 7; int NumMyRows2 = 1;//This value should not be changed without editing the // code below. Epetra_Map RowMap(-1,NumMyRows2,0,Comm); Epetra_Map ColMap(NumMyElements2,NumMyElements2,0,Comm); // The DomainMap needs to be different from the ColMap for the test to // be meaningful. Epetra_Map DomainMap(NumMyElements2,0,Comm); int NumMyRangeElements2 = 0; // We need to distribute the elements differently for the range map also. if (MyPID % 2 == 0) NumMyRangeElements2 = NumMyRows2*2; //put elements on even number procs if (NumProc % 2 == 1 && MyPID == NumProc-1) NumMyRangeElements2 = NumMyRows2; //If number of procs is odd, put // the last NumMyElements2 elements on the last proc Epetra_Map RangeMap(-1,NumMyRangeElements2,0,Comm); Epetra_CrsMatrix A2(Copy,RowMap,ColMap,NumMyElements2); double * Values2 = new double[NumMyElements2]; int * Indices2 = new int[NumMyElements2]; for (int i=0; i<NumMyElements2; i++) { Values2[i] = i+MyPID; Indices2[i]=i; } A2.InsertMyValues(0,NumMyElements2,Values2,Indices2); A2.FillComplete(DomainMap,RangeMap,false); Epetra_CrsMatrix A2copy(A2); double * RowLeftScaleValues = new double[NumMyRows2]; double * ColRightScaleValues = new double[NumMyElements2]; int RowLoopLength = RowMap.MaxMyGID()-RowMap.MinMyGID()+1; for (int i=0; i<RowLoopLength; i++) RowLeftScaleValues[i] = (i + RowMap.MinMyGID() ) % 2 + 1; // For the column map, all procs own all elements for (int i=0; i<NumMyElements2;i++) ColRightScaleValues[i] = i % 2 + 1; int RangeLoopLength = RangeMap.MaxMyGID()-RangeMap.MinMyGID()+1; double * RangeLeftScaleValues = new double[RangeLoopLength]; int DomainLoopLength = DomainMap.MaxMyGID()-DomainMap.MinMyGID()+1; double * DomainRightScaleValues = new double[DomainLoopLength]; for (int i=0; i<RangeLoopLength; i++) RangeLeftScaleValues[i] = 1.0/((i + RangeMap.MinMyGID() ) % 2 + 1); for (int i=0; i<DomainLoopLength;i++) DomainRightScaleValues[i] = 1.0/((i + DomainMap.MinMyGID() ) % 2 + 1); Epetra_Vector xRow(View,RowMap,RowLeftScaleValues); Epetra_Vector xCol(View,ColMap,ColRightScaleValues); Epetra_Vector xRange(View,RangeMap,RangeLeftScaleValues); Epetra_Vector xDomain(View,DomainMap,DomainRightScaleValues); double A2infNorm = A2.NormInf(); double A2oneNorm = A2.NormOne(); if (verbose1) cout << A2; EPETRA_TEST_ERR(A2.LeftScale(xRow),ierr); double A2infNorm1 = A2.NormInf(); double A2oneNorm1 = A2.NormOne(); bool ScalingBroke = false; if (A2infNorm1>2*A2infNorm||A2infNorm1<A2infNorm) { EPETRA_TEST_ERR(-31,ierr); ScalingBroke = true; } if (A2oneNorm1>2*A2oneNorm||A2oneNorm1<A2oneNorm) { EPETRA_TEST_ERR(-32,ierr); ScalingBroke = true; } if (verbose1) cout << A2; EPETRA_TEST_ERR(A2.RightScale(xCol),ierr); double A2infNorm2 = A2.NormInf(); double A2oneNorm2 = A2.NormOne(); if (A2infNorm2>=2*A2infNorm1||A2infNorm2<=A2infNorm1) { EPETRA_TEST_ERR(-33,ierr); ScalingBroke = true; } if (A2oneNorm2>2*A2oneNorm1||A2oneNorm2<=A2oneNorm1) { EPETRA_TEST_ERR(-34,ierr); ScalingBroke = true; } if (verbose1) cout << A2; EPETRA_TEST_ERR(A2.RightScale(xDomain),ierr); double A2infNorm3 = A2.NormInf(); double A2oneNorm3 = A2.NormOne(); // The last two scaling ops cancel each other out if (A2infNorm3!=A2infNorm1) { EPETRA_TEST_ERR(-35,ierr) ScalingBroke = true; } if (A2oneNorm3!=A2oneNorm1) { EPETRA_TEST_ERR(-36,ierr) ScalingBroke = true; } if (verbose1) cout << A2; EPETRA_TEST_ERR(A2.LeftScale(xRange),ierr); double A2infNorm4 = A2.NormInf(); double A2oneNorm4 = A2.NormOne(); // The 4 scaling ops all cancel out if (A2infNorm4!=A2infNorm) { EPETRA_TEST_ERR(-37,ierr) ScalingBroke = true; } if (A2oneNorm4!=A2oneNorm) { EPETRA_TEST_ERR(-38,ierr) ScalingBroke = true; } // // Now try changing the values underneath and make sure that // telling one process about the change causes NormInf() and // NormOne() to recompute the norm on all processes. // double *values; int num_my_rows = A2.NumMyRows() ; int num_entries; for ( int i=0 ; i< num_my_rows; i++ ) { EPETRA_TEST_ERR( A2.ExtractMyRowView( i, num_entries, values ), ierr ); for ( int j = 0 ; j <num_entries; j++ ) { values[j] *= 2.0; } } if ( MyPID == 0 ) A2.SumIntoGlobalValues( 0, 0, 0, 0 ) ; double A2infNorm5 = A2.NormInf(); double A2oneNorm5 = A2.NormOne(); if (A2infNorm5!=2.0 * A2infNorm4) { EPETRA_TEST_ERR(-39,ierr) ScalingBroke = true; } if (A2oneNorm5!= 2.0 * A2oneNorm4) { EPETRA_TEST_ERR(-40,ierr) ScalingBroke = true; } // // Restore the values underneath // for ( int i=0 ; i< num_my_rows; i++ ) { EPETRA_TEST_ERR( A2.ExtractMyRowView( i, num_entries, values ), ierr ); for ( int j = 0 ; j <num_entries; j++ ) { values[j] /= 2.0; } } if (verbose1) cout << A2; if (ScalingBroke) { if (verbose) cout << endl << "LeftScale and RightScale tests FAILED" << endl << endl; } else { if (verbose) cout << endl << "LeftScale and RightScale tests PASSED" << endl << endl; } Comm.Barrier(); if (verbose) cout << "\n\n*****Testing InvRowMaxs and InvColMaxs" << endl << endl; if (verbose1) cout << A2 << endl; EPETRA_TEST_ERR(A2.InvRowMaxs(xRow),ierr); EPETRA_TEST_ERR(A2.InvRowMaxs(xRange),ierr); if (verbose1) cout << xRow << endl << xRange << endl; if (verbose) cout << "\n\n*****Testing InvRowSums and InvColSums" << endl << endl; bool InvSumsBroke = false; // Works! EPETRA_TEST_ERR(A2.InvRowSums(xRow),ierr); if (verbose1) cout << xRow; EPETRA_TEST_ERR(A2.LeftScale(xRow),ierr); float A2infNormFloat = A2.NormInf(); if (verbose1) cout << A2 << endl; if (fabs(1.0-A2infNormFloat) > 1.e-5) { EPETRA_TEST_ERR(-41,ierr); InvSumsBroke = true; } // Works int expectedcode = 1; if (Comm.NumProc()>1) expectedcode = 0; EPETRA_TEST_ERR(!(A2.InvColSums(xDomain)==expectedcode),ierr); // This matrix has a single row, the first column has a zero, so a warning is issued. if (verbose1) cout << xDomain << endl; EPETRA_TEST_ERR(A2.RightScale(xDomain),ierr); float A2oneNormFloat2 = A2.NormOne(); if (verbose1) cout << A2; if (fabs(1.0-A2oneNormFloat2)>1.e-5) { EPETRA_TEST_ERR(-42,ierr) InvSumsBroke = true; } // Works! EPETRA_TEST_ERR(A2.InvRowSums(xRange),ierr); if (verbose1) cout << xRange; EPETRA_TEST_ERR(A2.LeftScale(xRange),ierr); float A2infNormFloat2 = A2.NormInf(); // We use a float so that rounding error // will not prevent the sum from being 1.0. if (verbose1) cout << A2; if (fabs(1.0-A2infNormFloat2)>1.e-5) { cout << "InfNorm should be = 1, but InfNorm = " << A2infNormFloat2 << endl; EPETRA_TEST_ERR(-43,ierr); InvSumsBroke = true; } // Doesn't work - may not need this test because column ownership is not unique /* EPETRA_TEST_ERR(A2.InvColSums(xCol),ierr); cout << xCol; EPETRA_TEST_ERR(A2.RightScale(xCol),ierr); float A2oneNormFloat = A2.NormOne(); cout << A2; if (fabs(1.0-A2oneNormFloat)>1.e-5) { EPETRA_TEST_ERR(-44,ierr); InvSumsBroke = true; } */ delete [] ColRightScaleValues; delete [] DomainRightScaleValues; if (verbose) cout << "Begin partial sum testing." << endl; // Test with a matrix that has partial sums for a subset of the rows // on multiple processors. (Except for the serial case, of course.) int NumMyRows3 = 2; // Changing this requires further changes below int * myGlobalElements = new int[NumMyRows3]; for (int i=0; i<NumMyRows3; i++) myGlobalElements[i] = MyPID+i; Epetra_Map RowMap3(NumProc*2, NumMyRows3, myGlobalElements, 0, Comm); int NumMyElements3 = 5; Epetra_CrsMatrix A3(Copy, RowMap3, NumMyElements3); double * Values3 = new double[NumMyElements3]; int * Indices3 = new int[NumMyElements3]; for (int i=0; i < NumMyElements3; i++) { Values3[i] = (int) (MyPID + (i+1)); Indices3[i]=i; } for (int i=0; i<NumMyRows3; i++) { A3.InsertGlobalValues(myGlobalElements[i],NumMyElements3,Values3,Indices3); } Epetra_Map RangeMap3(NumProc+1, 0, Comm); Epetra_Map DomainMap3(NumMyElements3, 0, Comm); EPETRA_TEST_ERR(A3.FillComplete(DomainMap3, RangeMap3,false),ierr); if (verbose1) cout << A3; Epetra_Vector xRange3(RangeMap3,false); Epetra_Vector xDomain3(DomainMap3,false); EPETRA_TEST_ERR(A3.InvRowSums(xRange3),ierr); if (verbose1) cout << xRange3; EPETRA_TEST_ERR(A3.LeftScale(xRange3),ierr); float A3infNormFloat = A3.NormInf(); if (verbose1) cout << A3; if (1.0!=A3infNormFloat) { cout << "InfNorm should be = 1, but InfNorm = " << A3infNormFloat <<endl; EPETRA_TEST_ERR(-61,ierr); InvSumsBroke = true; } // we want to take the transpose of our matrix and fill in different values. int NumMyColumns3 = NumMyRows3; Epetra_Map ColMap3cm(RowMap3); Epetra_Map RowMap3cm(A3.ColMap()); Epetra_CrsMatrix A3cm(Copy,RowMap3cm,ColMap3cm,NumProc+1); double *Values3cm = new double[NumMyColumns3]; int * Indices3cm = new int[NumMyColumns3]; for (int i=0; i<NumMyColumns3; i++) { Values3cm[i] = MyPID + i + 1; Indices3cm[i]= i + MyPID; } for (int ii=0; ii<NumMyElements3; ii++) { A3cm.InsertGlobalValues(ii, NumMyColumns3, Values3cm, Indices3cm); } // The DomainMap and the RangeMap from the last test will work fine for // the RangeMap and DomainMap, respectively, but I will make copies to // avaoid confusion when passing what looks like a DomainMap where we // need a RangeMap and vice vera. Epetra_Map RangeMap3cm(DomainMap3); Epetra_Map DomainMap3cm(RangeMap3); EPETRA_TEST_ERR(A3cm.FillComplete(DomainMap3cm,RangeMap3cm),ierr); if (verbose1) cout << A3cm << endl; // Again, we can copy objects from the last example. //Epetra_Vector xRange3cm(xDomain3); //Don't use at this time Epetra_Vector xDomain3cm(DomainMap3cm,false); EPETRA_TEST_ERR(A3cm.InvColSums(xDomain3cm),ierr); if (verbose1) cout << xDomain3cm << endl; EPETRA_TEST_ERR(A3cm.RightScale(xDomain3cm),ierr); float A3cmOneNormFloat = A3cm.NormOne(); if (verbose1) cout << A3cm << endl; if (1.0!=A3cmOneNormFloat) { cout << "OneNorm should be = 1, but OneNorm = " << A3cmOneNormFloat << endl; EPETRA_TEST_ERR(-62,ierr); InvSumsBroke = true; } if (verbose) cout << "End partial sum testing" << endl; if (verbose) cout << "Begin replicated testing" << endl; // We will now view the shared row as a repliated row, rather than one // that has partial sums of its entries on mulitple processors. // We will reuse much of the data used for the partial sum tesitng. Epetra_Vector xRow3(RowMap3,false); Epetra_CrsMatrix A4(Copy, RowMap3, NumMyElements3); for (int ii=0; ii < NumMyElements3; ii++) { Values3[ii] = (int)((ii*.6)+1.0); } for (int ii=0; ii<NumMyRows3; ii++) { A4.InsertGlobalValues(myGlobalElements[ii],NumMyElements3,Values3,Indices3); } EPETRA_TEST_ERR(A4.FillComplete(DomainMap3, RangeMap3,false),ierr); if (verbose1) cout << A4 << endl; // The next two lines should be expanded into a verifiable test. EPETRA_TEST_ERR(A4.InvRowMaxs(xRow3),ierr); EPETRA_TEST_ERR(A4.InvRowMaxs(xRange3),ierr); if (verbose1) cout << xRow3 << xRange3; EPETRA_TEST_ERR(A4.InvRowSums(xRow3),ierr); if (verbose1) cout << xRow3; EPETRA_TEST_ERR(A4.LeftScale(xRow3),ierr); float A4infNormFloat = A4.NormInf(); if (verbose1) cout << A4; if (2.0!=A4infNormFloat && NumProc != 1) { if (verbose1) cout << "InfNorm should be = 2 (because one column is replicated on two processors and NormOne() does not handle replication), but InfNorm = " << A4infNormFloat <<endl; EPETRA_TEST_ERR(-63,ierr); InvSumsBroke = true; } else if (1.0!=A4infNormFloat && NumProc == 1) { if (verbose1) cout << "InfNorm should be = 1, but InfNorm = " << A4infNormFloat <<endl; EPETRA_TEST_ERR(-63,ierr); InvSumsBroke = true; } Epetra_Vector xCol3cm(ColMap3cm,false); Epetra_CrsMatrix A4cm(Copy, RowMap3cm, ColMap3cm, NumProc+1); //Use values from A3cm for (int ii=0; ii<NumMyElements3; ii++) { A4cm.InsertGlobalValues(ii,NumMyColumns3,Values3cm,Indices3cm); } EPETRA_TEST_ERR(A4cm.FillComplete(DomainMap3cm, RangeMap3cm,false),ierr); if (verbose1) cout << A4cm << endl; // The next two lines should be expanded into a verifiable test. EPETRA_TEST_ERR(A4cm.InvColMaxs(xCol3cm),ierr); EPETRA_TEST_ERR(A4cm.InvColMaxs(xDomain3cm),ierr); if (verbose1) cout << xCol3cm << xDomain3cm; EPETRA_TEST_ERR(A4cm.InvColSums(xCol3cm),ierr); if (verbose1) cout << xCol3cm << endl; EPETRA_TEST_ERR(A4cm.RightScale(xCol3cm),ierr); float A4cmOneNormFloat = A4cm.NormOne(); if (verbose1) cout << A4cm << endl; if (2.0!=A4cmOneNormFloat && NumProc != 1) { if (verbose1) cout << "OneNorm should be = 2 (because one column is replicated on two processors and NormOne() does not handle replication), but OneNorm = " << A4cmOneNormFloat << endl; EPETRA_TEST_ERR(-64,ierr); InvSumsBroke = true; } else if (1.0!=A4cmOneNormFloat && NumProc == 1) { if (verbose1) cout << "OneNorm should be = 1, but OneNorm = " << A4infNormFloat <<endl; EPETRA_TEST_ERR(-64,ierr); InvSumsBroke = true; } if (verbose) cout << "End replicated testing" << endl; if (InvSumsBroke) { if (verbose) cout << endl << "InvRowSums tests FAILED" << endl << endl; } else if (verbose) cout << endl << "InvRowSums tests PASSED" << endl << endl; A3cm.PutScalar(2.0); int nnz_A3cm = A3cm.Graph().NumGlobalNonzeros(); double check_frobnorm = sqrt(nnz_A3cm*4.0); double frobnorm = A3cm.NormFrobenius(); bool frobnorm_test_failed = false; if (fabs(check_frobnorm-frobnorm) > 5.e-5) { frobnorm_test_failed = true; } if (frobnorm_test_failed) { if (verbose) std::cout << "Frobenius-norm test FAILED."<<std::endl; EPETRA_TEST_ERR(-65, ierr); } delete [] Values2; delete [] Indices2; delete [] myGlobalElements; delete [] Values3; delete [] Indices3; delete [] Values3cm; delete [] Indices3cm; delete [] RangeLeftScaleValues; delete [] RowLeftScaleValues; #ifdef EPETRA_MPI MPI_Finalize() ; #endif /* end main */ return ierr ; }
int main(int argc, char *argv[]) { #ifdef HAVE_MPI MPI_Init(&argc, &argv); Epetra_MpiComm Comm(MPI_COMM_WORLD); #else Epetra_SerialComm Comm; #endif bool verbose = (Comm.MyPID() == 0); // set global dimension to 5, could be any number int NumGlobalElements = 5; // create a map Epetra_Map Map(NumGlobalElements,0,Comm); // local number of rows int NumMyElements = Map.NumMyElements(); // get update list int * MyGlobalElements = Map.MyGlobalElements( ); // ============= CONSTRUCTION OF THE MATRIX =========================== // Create a Epetra_Matrix Epetra_CrsMatrix A(Copy,Map,3); // Add rows one-at-a-time double *Values = new double[2]; Values[0] = -1.0; Values[1] = -1.0; int *Indices = new int[2]; double two = 2.0; int NumEntries; for( int i=0 ; i<NumMyElements; ++i ) { if (MyGlobalElements[i]==0) { Indices[0] = 1; NumEntries = 1; } else if (MyGlobalElements[i] == NumGlobalElements-1) { Indices[0] = NumGlobalElements-2; NumEntries = 1; } else { Indices[0] = MyGlobalElements[i]-1; Indices[1] = MyGlobalElements[i]+1; NumEntries = 2; } A.InsertGlobalValues(MyGlobalElements[i], NumEntries, Values, Indices); // Put in the diagonal entry A.InsertGlobalValues(MyGlobalElements[i], 1, &two, MyGlobalElements+i); } // Finish up A.FillComplete(); // ================ CONSTRUCTION OF VECTORS ======================= // build up two distributed vectors q and z, and compute // q = A * z Epetra_Vector q(A.RowMap()); Epetra_Vector z(A.RowMap()); // Fill z with 1's z.PutScalar( 1.0 ); // ================ USE OF TIME AND FLOPS ========================= Epetra_Flops counter; A.SetFlopCounter(counter); Epetra_Time timer(Comm); A.Multiply(false, z, q); // Compute q = A*z double elapsed_time = timer.ElapsedTime(); double total_flops =counter.Flops(); if (verbose) cout << "Total ops: " << total_flops << endl; double MFLOPs = total_flops/elapsed_time/1000000.0; if (verbose) cout << "Total MFLOPs for mat-vec = " << MFLOPs << endl<< endl; double dotProduct; z.SetFlopCounter(counter); timer.ResetStartTime(); z.Dot(q, &dotProduct); total_flops =counter.Flops(); if (verbose) cout << "Total ops: " << total_flops << endl; elapsed_time = timer.ElapsedTime(); if (elapsed_time != 0.0) MFLOPs = (total_flops / elapsed_time) / 1000000.0; else MFLOPs = 0; if (verbose) { cout << "Total MFLOPs for vec-vec = " << MFLOPs << endl<< endl; cout << "q dot z = " << dotProduct << endl; } #ifdef HAVE_MPI MPI_Finalize(); #endif return( 0 ); } /* main */
int main(int argc, char *argv[]) { #ifdef EPETRA_MPI MPI_Init(&argc,&argv); Epetra_MpiComm Comm (MPI_COMM_WORLD); #else Epetra_SerialComm Comm; #endif cout << Comm << endl; int MyPID = Comm.MyPID(); bool verbose = false; bool verbose1 = true; if (MyPID==0) verbose = true; if(argc < 2 && verbose) { cerr << "Usage: " << argv[0] << " HB_filename [level_fill [level_overlap [absolute_threshold [ relative_threshold]]]]" << endl << "where:" << endl << "HB_filename - filename and path of a Harwell-Boeing data set" << endl << "level_fill - The amount of fill to use for ILU(k) preconditioner (default 0)" << endl << "level_overlap - The amount of overlap used for overlapping Schwarz subdomains (default 0)" << endl << "absolute_threshold - The minimum value to place on the diagonal prior to factorization (default 0.0)" << endl << "relative_threshold - The relative amount to perturb the diagonal prior to factorization (default 1.0)" << endl << endl << "To specify a non-default value for one of these parameters, you must specify all" << endl << " preceding values but not any subsequent parameters. Example:" << endl << "ifpackHpcSerialMsr.exe mymatrix.hpc 1 - loads mymatrix.hpc, uses level fill of one, all other values are defaults" << endl << endl; return(1); } // Uncomment the next three lines to debug in mpi mode //int tmp; //if (MyPID==0) cin >> tmp; //Comm.Barrier(); Epetra_Map * readMap; Epetra_CrsMatrix * readA; Epetra_Vector * readx; Epetra_Vector * readb; Epetra_Vector * readxexact; // Call routine to read in HB problem Trilinos_Util_ReadHb2Epetra(argv[1], Comm, readMap, readA, readx, readb, readxexact); // Create uniform distributed map Epetra_Map map(readMap->NumGlobalElements(), 0, Comm); // Create Exporter to distribute read-in matrix and vectors Epetra_Export exporter(*readMap, map); Epetra_CrsMatrix A(Copy, map, 0); Epetra_Vector x(map); Epetra_Vector b(map); Epetra_Vector xexact(map); Epetra_Time FillTimer(Comm); x.Export(*readx, exporter, Add); b.Export(*readb, exporter, Add); xexact.Export(*readxexact, exporter, Add); Comm.Barrier(); double vectorRedistributeTime = FillTimer.ElapsedTime(); A.Export(*readA, exporter, Add); Comm.Barrier(); double matrixRedistributeTime = FillTimer.ElapsedTime() - vectorRedistributeTime; assert(A.FillComplete()==0); Comm.Barrier(); double fillCompleteTime = FillTimer.ElapsedTime() - matrixRedistributeTime; if (Comm.MyPID()==0) { cout << "\n\n****************************************************" << endl; cout << "\n Vector redistribute time (sec) = " << vectorRedistributeTime<< endl; cout << " Matrix redistribute time (sec) = " << matrixRedistributeTime << endl; cout << " Transform to Local time (sec) = " << fillCompleteTime << endl<< endl; } Epetra_Vector tmp1(*readMap); Epetra_Vector tmp2(map); readA->Multiply(false, *readxexact, tmp1); A.Multiply(false, xexact, tmp2); double residual; tmp1.Norm2(&residual); if (verbose) cout << "Norm of Ax from file = " << residual << endl; tmp2.Norm2(&residual); if (verbose) cout << "Norm of Ax after redistribution = " << residual << endl << endl << endl; //cout << "A from file = " << *readA << endl << endl << endl; //cout << "A after dist = " << A << endl << endl << endl; delete readA; delete readx; delete readb; delete readxexact; delete readMap; Comm.Barrier(); bool smallProblem = false; if (A.RowMap().NumGlobalElements()<100) smallProblem = true; if (smallProblem) cout << "Original Matrix = " << endl << A << endl; x.PutScalar(0.0); Epetra_LinearProblem FullProblem(&A, &x, &b); double normb, norma; b.NormInf(&normb); norma = A.NormInf(); if (verbose) cout << "Inf norm of Original Matrix = " << norma << endl << "Inf norm of Original RHS = " << normb << endl; Epetra_Time ReductionTimer(Comm); Epetra_CrsSingletonFilter SingletonFilter; Comm.Barrier(); double reduceInitTime = ReductionTimer.ElapsedTime(); SingletonFilter.Analyze(&A); Comm.Barrier(); double reduceAnalyzeTime = ReductionTimer.ElapsedTime() - reduceInitTime; if (SingletonFilter.SingletonsDetected()) cout << "Singletons found" << endl; else { cout << "Singletons not found" << endl; exit(1); } SingletonFilter.ConstructReducedProblem(&FullProblem); Comm.Barrier(); double reduceConstructTime = ReductionTimer.ElapsedTime() - reduceInitTime; double totalReduceTime = ReductionTimer.ElapsedTime(); if (verbose) cout << "\n\n****************************************************" << endl << " Reduction init time (sec) = " << reduceInitTime<< endl << " Reduction Analyze time (sec) = " << reduceAnalyzeTime << endl << " Construct Reduced Problem time (sec) = " << reduceConstructTime << endl << " Reduction Total time (sec) = " << totalReduceTime << endl<< endl; Statistics(SingletonFilter); Epetra_LinearProblem * ReducedProblem = SingletonFilter.ReducedProblem(); Epetra_CrsMatrix * Ap = dynamic_cast<Epetra_CrsMatrix *>(ReducedProblem->GetMatrix()); Epetra_Vector * bp = (*ReducedProblem->GetRHS())(0); Epetra_Vector * xp = (*ReducedProblem->GetLHS())(0); if (smallProblem) cout << " Reduced Matrix = " << endl << *Ap << endl << " LHS before sol = " << endl << *xp << endl << " RHS = " << endl << *bp << endl; // Construct ILU preconditioner double elapsed_time, total_flops, MFLOPs; Epetra_Time timer(Comm); int LevelFill = 0; if (argc > 2) LevelFill = atoi(argv[2]); if (verbose) cout << "Using Level Fill = " << LevelFill << endl; int Overlap = 0; if (argc > 3) Overlap = atoi(argv[3]); if (verbose) cout << "Using Level Overlap = " << Overlap << endl; double Athresh = 0.0; if (argc > 4) Athresh = atof(argv[4]); if (verbose) cout << "Using Absolute Threshold Value of = " << Athresh << endl; double Rthresh = 1.0; if (argc > 5) Rthresh = atof(argv[5]); if (verbose) cout << "Using Relative Threshold Value of = " << Rthresh << endl; Ifpack_IlukGraph * IlukGraph = 0; Ifpack_CrsRiluk * ILUK = 0; if (LevelFill>-1) { elapsed_time = timer.ElapsedTime(); IlukGraph = new Ifpack_IlukGraph(Ap->Graph(), LevelFill, Overlap); assert(IlukGraph->ConstructFilledGraph()==0); elapsed_time = timer.ElapsedTime() - elapsed_time; if (verbose) cout << "Time to construct ILUK graph = " << elapsed_time << endl; Epetra_Flops fact_counter; elapsed_time = timer.ElapsedTime(); ILUK = new Ifpack_CrsRiluk(*IlukGraph); ILUK->SetFlopCounter(fact_counter); ILUK->SetAbsoluteThreshold(Athresh); ILUK->SetRelativeThreshold(Rthresh); //assert(ILUK->InitValues()==0); int initerr = ILUK->InitValues(*Ap); if (initerr!=0) { cout << endl << Comm << endl << " InitValues error = " << initerr; if (initerr==1) cout << " Zero diagonal found, warning error only"; cout << endl << endl; } assert(ILUK->Factor()==0); elapsed_time = timer.ElapsedTime() - elapsed_time; total_flops = ILUK->Flops(); MFLOPs = total_flops/elapsed_time/1000000.0; if (verbose) cout << "Time to compute preconditioner values = " << elapsed_time << endl << "MFLOPS for Factorization = " << MFLOPs << endl; //cout << *ILUK << endl; double Condest; ILUK->Condest(false, Condest); if (verbose) cout << "Condition number estimate for this preconditioner = " << Condest << endl; } int Maxiter = 100; double Tolerance = 1.0E-8; Epetra_Flops counter; Ap->SetFlopCounter(counter); xp->SetFlopCounter(*Ap); bp->SetFlopCounter(*Ap); if (ILUK!=0) ILUK->SetFlopCounter(*Ap); elapsed_time = timer.ElapsedTime(); double normreducedb, normreduceda; bp->NormInf(&normreducedb); normreduceda = Ap->NormInf(); if (verbose) cout << "Inf norm of Reduced Matrix = " << normreduceda << endl << "Inf norm of Reduced RHS = " << normreducedb << endl; BiCGSTAB(*Ap, *xp, *bp, ILUK, Maxiter, Tolerance, &residual, verbose); elapsed_time = timer.ElapsedTime() - elapsed_time; total_flops = counter.Flops(); MFLOPs = total_flops/elapsed_time/1000000.0; if (verbose) cout << "Time to compute solution = " << elapsed_time << endl << "Number of operations in solve = " << total_flops << endl << "MFLOPS for Solve = " << MFLOPs<< endl << endl; SingletonFilter.ComputeFullSolution(); if (smallProblem) cout << " Reduced LHS after sol = " << endl << *xp << endl << " Full LHS after sol = " << endl << x << endl << " Full Exact LHS = " << endl << xexact << endl; Epetra_Vector resid(x); resid.Update(1.0, x, -1.0, xexact, 0.0); // resid = xcomp - xexact resid.Norm2(&residual); double normx, normxexact; x.Norm2(&normx); xexact.Norm2(&normxexact); if (verbose) cout << "2-norm of computed solution = " << normx << endl << "2-norm of exact solution = " << normxexact << endl << "2-norm of difference between computed and exact solution = " << residual << endl; if (verbose1 && residual>1.0e-5) { if (verbose) cout << "Difference between computed and exact solution appears large..." << endl << "Computing norm of A times this difference. If this norm is small, then matrix is singular" << endl; Epetra_Vector bdiff(b); assert(A.Multiply(false, resid, bdiff)==0); assert(bdiff.Norm2(&residual)==0); if (verbose) cout << "2-norm of A times difference between computed and exact solution = " << residual << endl; } if (verbose) cout << "********************************************************" << endl << " Solving again with 2*Ax=2*b" << endl << "********************************************************" << endl; A.Scale(1.0); // A = 2*A b.Scale(1.0); // b = 2*b x.PutScalar(0.0); b.NormInf(&normb); norma = A.NormInf(); if (verbose) cout << "Inf norm of Original Matrix = " << norma << endl << "Inf norm of Original RHS = " << normb << endl; double updateReducedProblemTime = ReductionTimer.ElapsedTime(); SingletonFilter.UpdateReducedProblem(&FullProblem); Comm.Barrier(); updateReducedProblemTime = ReductionTimer.ElapsedTime() - updateReducedProblemTime; if (verbose) cout << "\n\n****************************************************" << endl << " Update Reduced Problem time (sec) = " << updateReducedProblemTime<< endl << "****************************************************" << endl; Statistics(SingletonFilter); if (LevelFill>-1) { Epetra_Flops fact_counter; elapsed_time = timer.ElapsedTime(); int initerr = ILUK->InitValues(*Ap); if (initerr!=0) { cout << endl << Comm << endl << " InitValues error = " << initerr; if (initerr==1) cout << " Zero diagonal found, warning error only"; cout << endl << endl; } assert(ILUK->Factor()==0); elapsed_time = timer.ElapsedTime() - elapsed_time; total_flops = ILUK->Flops(); MFLOPs = total_flops/elapsed_time/1000000.0; if (verbose) cout << "Time to compute preconditioner values = " << elapsed_time << endl << "MFLOPS for Factorization = " << MFLOPs << endl; double Condest; ILUK->Condest(false, Condest); if (verbose) cout << "Condition number estimate for this preconditioner = " << Condest << endl; } bp->NormInf(&normreducedb); normreduceda = Ap->NormInf(); if (verbose) cout << "Inf norm of Reduced Matrix = " << normreduceda << endl << "Inf norm of Reduced RHS = " << normreducedb << endl; BiCGSTAB(*Ap, *xp, *bp, ILUK, Maxiter, Tolerance, &residual, verbose); elapsed_time = timer.ElapsedTime() - elapsed_time; total_flops = counter.Flops(); MFLOPs = total_flops/elapsed_time/1000000.0; if (verbose) cout << "Time to compute solution = " << elapsed_time << endl << "Number of operations in solve = " << total_flops << endl << "MFLOPS for Solve = " << MFLOPs<< endl << endl; SingletonFilter.ComputeFullSolution(); if (smallProblem) cout << " Reduced LHS after sol = " << endl << *xp << endl << " Full LHS after sol = " << endl << x << endl << " Full Exact LHS = " << endl << xexact << endl; resid.Update(1.0, x, -1.0, xexact, 0.0); // resid = xcomp - xexact resid.Norm2(&residual); x.Norm2(&normx); xexact.Norm2(&normxexact); if (verbose) cout << "2-norm of computed solution = " << normx << endl << "2-norm of exact solution = " << normxexact << endl << "2-norm of difference between computed and exact solution = " << residual << endl; if (verbose1 && residual>1.0e-5) { if (verbose) cout << "Difference between computed and exact solution appears large..." << endl << "Computing norm of A times this difference. If this norm is small, then matrix is singular" << endl; Epetra_Vector bdiff(b); assert(A.Multiply(false, resid, bdiff)==0); assert(bdiff.Norm2(&residual)==0); if (verbose) cout << "2-norm of A times difference between computed and exact solution = " << residual << endl; } if (ILUK!=0) delete ILUK; if (IlukGraph!=0) delete IlukGraph; #ifdef EPETRA_MPI MPI_Finalize() ; #endif return 0 ; }