int main(int argc, char *argv[]) { int ierr = 0; double elapsed_time; double total_flops; double MFLOPs; #ifdef EPETRA_MPI // Initialize MPI MPI_Init(&argc,&argv); Epetra_MpiComm comm( MPI_COMM_WORLD ); #else Epetra_SerialComm comm; #endif bool verbose = false; bool summary = false; // Check if we should print verbose results to standard out if (argc>6) if (argv[6][0]=='-' && argv[6][1]=='v') verbose = true; // Check if we should print verbose results to standard out if (argc>6) if (argv[6][0]=='-' && argv[6][1]=='s') summary = true; if(argc < 6) { cerr << "Usage: " << argv[0] << " NumNodesX NumNodesY NumProcX NumProcY NumPoints [-v|-s]" << endl << "where:" << endl << "NumNodesX - Number of mesh nodes in X direction per processor" << endl << "NumNodesY - Number of mesh nodes in Y direction per processor" << endl << "NumProcX - Number of processors to use in X direction" << endl << "NumProcY - Number of processors to use in Y direction" << endl << "NumPoints - Number of points to use in stencil (5, 9 or 25 only)" << endl << "-v|-s - (Optional) Run in verbose mode if -v present or summary mode if -s present" << endl << " NOTES: NumProcX*NumProcY must equal the number of processors used to run the problem." << endl << endl << " Serial example:" << endl << argv[0] << " 16 12 1 1 25 -v" << endl << " Run this program in verbose mode on 1 processor using a 16 X 12 grid with a 25 point stencil."<< endl <<endl << " MPI example:" << endl << "mpirun -np 32 " << argv[0] << " 10 12 4 8 9 -v" << endl << " Run this program in verbose mode on 32 processors putting a 10 X 12 subgrid on each processor using 4 processors "<< endl << " in the X direction and 8 in the Y direction. Total grid size is 40 points in X and 96 in Y with a 9 point stencil."<< endl << endl; return(1); } //char tmp; //if (comm.MyPID()==0) cout << "Press any key to continue..."<< endl; //if (comm.MyPID()==0) cin >> tmp; //comm.Barrier(); comm.SetTracebackMode(0); // This should shut down any error traceback reporting if (verbose && comm.MyPID()==0) cout << Epetra_Version() << endl << endl; if (summary && comm.MyPID()==0) { if (comm.NumProc()==1) cout << Epetra_Version() << endl << endl; else cout << endl << endl; // Print two blank line to keep output columns lined up } if (verbose) cout << comm <<endl; // Redefine verbose to only print on PE 0 if (verbose && comm.MyPID()!=0) verbose = false; if (summary && comm.MyPID()!=0) summary = false; int numNodesX = atoi(argv[1]); int numNodesY = atoi(argv[2]); int numProcsX = atoi(argv[3]); int numProcsY = atoi(argv[4]); int numPoints = atoi(argv[5]); if (verbose || (summary && comm.NumProc()==1)) { cout << " Number of local nodes in X direction = " << numNodesX << endl << " Number of local nodes in Y direction = " << numNodesY << endl << " Number of global nodes in X direction = " << numNodesX*numProcsX << endl << " Number of global nodes in Y direction = " << numNodesY*numProcsY << endl << " Number of local nonzero entries = " << numNodesX*numNodesY*numPoints << endl << " Number of global nonzero entries = " << numNodesX*numNodesY*numPoints*numProcsX*numProcsY << endl << " Number of Processors in X direction = " << numProcsX << endl << " Number of Processors in Y direction = " << numProcsY << endl << " Number of Points in stencil = " << numPoints << endl << endl; } // Print blank line to keep output columns lined up if (summary && comm.NumProc()>1) cout << endl << endl << endl << endl << endl << endl << endl << endl<< endl << endl; if (numProcsX*numProcsY!=comm.NumProc()) { cerr << "Number of processors = " << comm.NumProc() << endl << " is not the product of " << numProcsX << " and " << numProcsY << endl << endl; return(1); } if (numPoints!=5 && numPoints!=9 && numPoints!=25) { cerr << "Number of points specified = " << numPoints << endl << " is not 5, 9, 25" << endl << endl; return(1); } if (numNodesX*numNodesY<=0) { cerr << "Product of number of nodes is <= zero" << endl << endl; return(1); } Epetra_IntSerialDenseVector Xoff, XLoff, XUoff; Epetra_IntSerialDenseVector Yoff, YLoff, YUoff; if (numPoints==5) { // Generate a 5-point 2D Finite Difference matrix Xoff.Size(5); Yoff.Size(5); Xoff[0] = -1; Xoff[1] = 1; Xoff[2] = 0; Xoff[3] = 0; Xoff[4] = 0; Yoff[0] = 0; Yoff[1] = 0; Yoff[2] = 0; Yoff[3] = -1; Yoff[4] = 1; // Generate a 2-point 2D Lower triangular Finite Difference matrix XLoff.Size(2); YLoff.Size(2); XLoff[0] = -1; XLoff[1] = 0; YLoff[0] = 0; YLoff[1] = -1; // Generate a 3-point 2D upper triangular Finite Difference matrix XUoff.Size(3); YUoff.Size(3); XUoff[0] = 0; XUoff[1] = 1; XUoff[2] = 0; YUoff[0] = 0; YUoff[1] = 0; YUoff[2] = 1; } else if (numPoints==9) { // Generate a 9-point 2D Finite Difference matrix Xoff.Size(9); Yoff.Size(9); Xoff[0] = -1; Xoff[1] = 0; Xoff[2] = 1; Yoff[0] = -1; Yoff[1] = -1; Yoff[2] = -1; Xoff[3] = -1; Xoff[4] = 0; Xoff[5] = 1; Yoff[3] = 0; Yoff[4] = 0; Yoff[5] = 0; Xoff[6] = -1; Xoff[7] = 0; Xoff[8] = 1; Yoff[6] = 1; Yoff[7] = 1; Yoff[8] = 1; // Generate a 5-point lower triangular 2D Finite Difference matrix XLoff.Size(5); YLoff.Size(5); XLoff[0] = -1; XLoff[1] = 0; Xoff[2] = 1; YLoff[0] = -1; YLoff[1] = -1; Yoff[2] = -1; XLoff[3] = -1; XLoff[4] = 0; YLoff[3] = 0; YLoff[4] = 0; // Generate a 4-point upper triangular 2D Finite Difference matrix XUoff.Size(4); YUoff.Size(4); XUoff[0] = 1; YUoff[0] = 0; XUoff[1] = -1; XUoff[2] = 0; XUoff[3] = 1; YUoff[1] = 1; YUoff[2] = 1; YUoff[3] = 1; } else { // Generate a 25-point 2D Finite Difference matrix Xoff.Size(25); Yoff.Size(25); int xi = 0, yi = 0; int xo = -2, yo = -2; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; xo = -2, yo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; xo = -2, yo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; xo = -2, yo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; xo = -2, yo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Xoff[xi++] = xo++; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; Yoff[yi++] = yo ; // Generate a 13-point lower triangular 2D Finite Difference matrix XLoff.Size(13); YLoff.Size(13); xi = 0, yi = 0; xo = -2, yo = -2; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; YLoff[yi++] = yo ; YLoff[yi++] = yo ; YLoff[yi++] = yo ; YLoff[yi++] = yo ; YLoff[yi++] = yo ; xo = -2, yo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; YLoff[yi++] = yo ; YLoff[yi++] = yo ; YLoff[yi++] = yo ; YLoff[yi++] = yo ; YLoff[yi++] = yo ; xo = -2, yo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; XLoff[xi++] = xo++; YLoff[yi++] = yo ; YLoff[yi++] = yo ; YLoff[yi++] = yo ; // Generate a 13-point upper triangular 2D Finite Difference matrix XUoff.Size(13); YUoff.Size(13); xi = 0, yi = 0; xo = 0, yo = 0; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; YUoff[yi++] = yo ; YUoff[yi++] = yo ; YUoff[yi++] = yo ; xo = -2, yo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; YUoff[yi++] = yo ; YUoff[yi++] = yo ; YUoff[yi++] = yo ; YUoff[yi++] = yo ; YUoff[yi++] = yo ; xo = -2, yo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; XUoff[xi++] = xo++; YUoff[yi++] = yo ; YUoff[yi++] = yo ; YUoff[yi++] = yo ; YUoff[yi++] = yo ; YUoff[yi++] = yo ; } Epetra_Map * map; Epetra_Map * mapL; Epetra_Map * mapU; Epetra_CrsMatrix * A; Epetra_CrsMatrix * L; Epetra_CrsMatrix * U; Epetra_MultiVector * b; Epetra_MultiVector * bt; Epetra_MultiVector * xexact; Epetra_MultiVector * bL; Epetra_MultiVector * btL; Epetra_MultiVector * xexactL; Epetra_MultiVector * bU; Epetra_MultiVector * btU; Epetra_MultiVector * xexactU; Epetra_SerialDenseVector resvec(0); //Timings Epetra_Flops flopcounter; Epetra_Time timer(comm); #ifdef EPETRA_VERY_SHORT_PERFTEST int jstop = 1; #elif EPETRA_SHORT_PERFTEST int jstop = 1; #else int jstop = 2; #endif for (int j=0; j<jstop; j++) { for (int k=1; k<17; k++) { #ifdef EPETRA_VERY_SHORT_PERFTEST if (k<3 || (k%4==0 && k<9)) { #elif EPETRA_SHORT_PERFTEST if (k<6 || k%4==0) { #else if (k<7 || k%2==0) { #endif int nrhs=k; if (verbose) cout << "\n*************** Results for " << nrhs << " RHS with "; bool StaticProfile = (j!=0); if (verbose) { if (StaticProfile) cout << " static profile\n"; else cout << " dynamic profile\n"; } GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, numPoints, Xoff.Values(), Yoff.Values(), nrhs, comm, verbose, summary, map, A, b, bt, xexact, StaticProfile, false); #ifdef EPETRA_HAVE_JADMATRIX timer.ResetStartTime(); Epetra_JadMatrix JA(*A); elapsed_time = timer.ElapsedTime(); if (verbose) cout << "Time to create Jagged diagonal matrix = " << elapsed_time << endl; //cout << "A = " << *A << endl; //cout << "JA = " << JA << endl; runJadMatrixTests(&JA, b, bt, xexact, StaticProfile, verbose, summary); #endif runMatrixTests(A, b, bt, xexact, StaticProfile, verbose, summary); delete A; delete b; delete bt; delete xexact; GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, XLoff.Length(), XLoff.Values(), YLoff.Values(), nrhs, comm, verbose, summary, mapL, L, bL, btL, xexactL, StaticProfile, true); GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, XUoff.Length(), XUoff.Values(), YUoff.Values(), nrhs, comm, verbose, summary, mapU, U, bU, btU, xexactU, StaticProfile, true); runLUMatrixTests(L, bL, btL, xexactL, U, bU, btU, xexactU, StaticProfile, verbose, summary); delete L; delete bL; delete btL; delete xexactL; delete mapL; delete U; delete bU; delete btU; delete xexactU; delete mapU; Epetra_MultiVector q(*map, nrhs); Epetra_MultiVector z(q); Epetra_MultiVector r(q); delete map; q.SetFlopCounter(flopcounter); z.SetFlopCounter(q); r.SetFlopCounter(q); resvec.Resize(nrhs); flopcounter.ResetFlops(); timer.ResetStartTime(); //10 norms for( int i = 0; i < 10; ++i ) q.Norm2( resvec.Values() ); elapsed_time = timer.ElapsedTime(); total_flops = q.Flops(); MFLOPs = total_flops/elapsed_time/1000000.0; if (verbose) cout << "\nTotal MFLOPs for 10 Norm2's= " << MFLOPs << endl; if (summary) { if (comm.NumProc()==1) cout << "Norm2" << '\t'; cout << MFLOPs << endl; } flopcounter.ResetFlops(); timer.ResetStartTime(); //10 dot's for( int i = 0; i < 10; ++i ) q.Dot(z, resvec.Values()); elapsed_time = timer.ElapsedTime(); total_flops = q.Flops(); MFLOPs = total_flops/elapsed_time/1000000.0; if (verbose) cout << "Total MFLOPs for 10 Dot's = " << MFLOPs << endl; if (summary) { if (comm.NumProc()==1) cout << "DotProd" << '\t'; cout << MFLOPs << endl; } flopcounter.ResetFlops(); timer.ResetStartTime(); //10 dot's for( int i = 0; i < 10; ++i ) q.Update(1.0, z, 1.0, r, 0.0); elapsed_time = timer.ElapsedTime(); total_flops = q.Flops(); MFLOPs = total_flops/elapsed_time/1000000.0; if (verbose) cout << "Total MFLOPs for 10 Updates= " << MFLOPs << endl; if (summary) { if (comm.NumProc()==1) cout << "Update" << '\t'; cout << MFLOPs << endl; } } } } #ifdef EPETRA_MPI MPI_Finalize() ; #endif return ierr ; } // Constructs a 2D PDE finite difference matrix using the list of x and y offsets. // // nx (In) - number of grid points in x direction // ny (In) - number of grid points in y direction // The total number of equations will be nx*ny ordered such that the x direction changes // most rapidly: // First equation is at point (0,0) // Second at (1,0) // ... // nx equation at (nx-1,0) // nx+1st equation at (0,1) // numPoints (In) - number of points in finite difference stencil // xoff (In) - stencil offsets in x direction (of length numPoints) // yoff (In) - stencil offsets in y direction (of length numPoints) // A standard 5-point finite difference stencil would be described as: // numPoints = 5 // xoff = [-1, 1, 0, 0, 0] // yoff = [ 0, 0, 0, -1, 1] // nrhs - Number of rhs to generate. (First interface produces vectors, so nrhs is not needed // comm (In) - an Epetra_Comm object describing the parallel machine (numProcs and my proc ID) // map (Out) - Epetra_Map describing distribution of matrix and vectors/multivectors // A (Out) - Epetra_CrsMatrix constructed for nx by ny grid using prescribed stencil // Off-diagonal values are random between 0 and 1. If diagonal is part of stencil, // diagonal will be slightly diag dominant. // b (Out) - Generated RHS. Values satisfy b = A*xexact // bt (Out) - Generated RHS. Values satisfy b = A'*xexact // xexact (Out) - Generated exact solution to Ax = b and b' = A'xexact // Note: Caller of this function is responsible for deleting all output objects. void GenerateCrsProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints, int * xoff, int * yoff, const Epetra_Comm &comm, bool verbose, bool summary, Epetra_Map *& map, Epetra_CrsMatrix *& A, Epetra_Vector *& b, Epetra_Vector *& bt, Epetra_Vector *&xexact, bool StaticProfile, bool MakeLocalOnly) { Epetra_MultiVector * b1, * bt1, * xexact1; GenerateCrsProblem(numNodesX, numNodesY, numProcsX, numProcsY, numPoints, xoff, yoff, 1, comm, verbose, summary, map, A, b1, bt1, xexact1, StaticProfile, MakeLocalOnly); b = dynamic_cast<Epetra_Vector *>(b1); bt = dynamic_cast<Epetra_Vector *>(bt1); xexact = dynamic_cast<Epetra_Vector *>(xexact1); return; } void GenerateCrsProblem(int numNodesX, int numNodesY, int numProcsX, int numProcsY, int numPoints, int * xoff, int * yoff, int nrhs, const Epetra_Comm &comm, bool verbose, bool summary, Epetra_Map *& map, Epetra_CrsMatrix *& A, Epetra_MultiVector *& b, Epetra_MultiVector *& bt, Epetra_MultiVector *&xexact, bool StaticProfile, bool MakeLocalOnly) { Epetra_Time timer(comm); // Determine my global IDs long long * myGlobalElements; GenerateMyGlobalElements(numNodesX, numNodesY, numProcsX, numProcsY, comm.MyPID(), myGlobalElements); int numMyEquations = numNodesX*numNodesY; map = new Epetra_Map((long long)-1, numMyEquations, myGlobalElements, 0, comm); // Create map with 2D block partitioning. delete [] myGlobalElements; long long numGlobalEquations = map->NumGlobalElements64(); int profile = 0; if (StaticProfile) profile = numPoints; #ifdef EPETRA_HAVE_STATICPROFILE if (MakeLocalOnly) A = new Epetra_CrsMatrix(Copy, *map, *map, profile, StaticProfile); // Construct matrix with rowmap=colmap else A = new Epetra_CrsMatrix(Copy, *map, profile, StaticProfile); // Construct matrix #else if (MakeLocalOnly) A = new Epetra_CrsMatrix(Copy, *map, *map, profile); // Construct matrix with rowmap=colmap else A = new Epetra_CrsMatrix(Copy, *map, profile); // Construct matrix #endif long long * indices = new long long[numPoints]; double * values = new double[numPoints]; double dnumPoints = (double) numPoints; int nx = numNodesX*numProcsX; for (int i=0; i<numMyEquations; i++) { long long rowID = map->GID64(i); int numIndices = 0; for (int j=0; j<numPoints; j++) { long long colID = rowID + xoff[j] + nx*yoff[j]; // Compute column ID based on stencil offsets if (colID>-1 && colID<numGlobalEquations) { indices[numIndices] = colID; double value = - ((double) rand())/ ((double) RAND_MAX); if (colID==rowID) values[numIndices++] = dnumPoints - value; // Make diagonal dominant else values[numIndices++] = value; } } //cout << "Building row " << rowID << endl; A->InsertGlobalValues(rowID, numIndices, values, indices); } delete [] indices; delete [] values; double insertTime = timer.ElapsedTime(); timer.ResetStartTime(); A->FillComplete(false); double fillCompleteTime = timer.ElapsedTime(); if (verbose) cout << "Time to insert matrix values = " << insertTime << endl << "Time to complete fill = " << fillCompleteTime << endl; if (summary) { if (comm.NumProc()==1) cout << "InsertTime" << '\t'; cout << insertTime << endl; if (comm.NumProc()==1) cout << "FillCompleteTime" << '\t'; cout << fillCompleteTime << endl; } if (nrhs<=1) { b = new Epetra_Vector(*map); bt = new Epetra_Vector(*map); xexact = new Epetra_Vector(*map); } else { b = new Epetra_MultiVector(*map, nrhs); bt = new Epetra_MultiVector(*map, nrhs); xexact = new Epetra_MultiVector(*map, nrhs); } xexact->Random(); // Fill xexact with random values A->Multiply(false, *xexact, *b); A->Multiply(true, *xexact, *bt); return; }
void Stokhos::EpetraMultiVectorOrthogPoly:: computeMean(Epetra_MultiVector& v) const { v.Scale(1.0, *(coeff_[0])); }
int TestMultiLevelPreconditioner(char ProblemType[], Teuchos::ParameterList & MLList, Epetra_LinearProblem & Problem, double & TotalErrorResidual, double & TotalErrorExactSol) { Epetra_MultiVector* lhs = Problem.GetLHS(); Epetra_MultiVector* rhs = Problem.GetRHS(); Epetra_CrsMatrix* A = dynamic_cast<Epetra_CrsMatrix*>(Problem.GetMatrix()); int PID = A->Comm().MyPID(); int numProcs = A->Comm().NumProc(); RCP<const Epetra_RowMatrix> Arcp = Teuchos::rcp(A, false); double n1, n2,nf; // ======================================== // // create a rhs corresponding to lhs or 1's // // ======================================== // lhs->PutScalar(1.0); A->Multiply(false,*lhs,*rhs); lhs->PutScalar(0.0); MLList.set("ML output", 0); RowMatrixToMatlabFile("mat_f.dat",*A); MultiVectorToMatrixMarketFile("lhs_f.dat",*lhs,0,0,false); MultiVectorToMatrixMarketFile("rhs_f.dat",*rhs,0,0,false); Epetra_Time Time(A->Comm()); /* Build the Zoltan list - Group #1 */ ParameterList Zlist1,Sublist1; Sublist1.set("DEBUG_LEVEL","0"); Sublist1.set("NUM_GLOBAL_PARTITIONS","2"); Zlist1.set("Zoltan",Sublist1); /* Start Isorropia's Ninja Magic - Group #1 */ RefCountPtr<Isorropia::Epetra::Partitioner> partitioner1 = Isorropia::Epetra::create_partitioner(Arcp, Zlist1); Isorropia::Epetra::Redistributor rd1(partitioner1); Teuchos::RCP<Epetra_CrsMatrix> ResA1=rd1.redistribute(*A); Teuchos::RCP<Epetra_MultiVector> ResX1=rd1.redistribute(*lhs); Teuchos::RCP<Epetra_MultiVector> ResB1=rd1.redistribute(*rhs); RestrictedCrsMatrixWrapper RW1; RW1.restrict_comm(ResA1); RestrictedMultiVectorWrapper RX1,RB1; RX1.restrict_comm(ResX1); RB1.restrict_comm(ResB1); /* Build the Zoltan list - Group #2 */ ParameterList Zlist2,Sublist2; Sublist2.set("DEBUG_LEVEL","0"); if(PID > 1) Sublist2.set("NUM_LOCAL_PARTITIONS","1"); else Sublist2.set("NUM_LOCAL_PARTITIONS","0"); Zlist2.set("Zoltan",Sublist2); /* Start Isorropia's Ninja Magic - Group #2 */ RefCountPtr<Isorropia::Epetra::Partitioner> partitioner2 = Isorropia::Epetra::create_partitioner(Arcp, Zlist2); Isorropia::Epetra::Redistributor rd2(partitioner2); Teuchos::RCP<Epetra_CrsMatrix> ResA2=rd2.redistribute(*A); Teuchos::RCP<Epetra_MultiVector> ResX2=rd2.redistribute(*lhs); Teuchos::RCP<Epetra_MultiVector> ResB2=rd2.redistribute(*rhs); RestrictedCrsMatrixWrapper RW2; RW2.restrict_comm(ResA2); RestrictedMultiVectorWrapper RX2,RB2; RX2.restrict_comm(ResX2); RB2.restrict_comm(ResB2); if(RW1.RestrictedProcIsActive()){ Teuchos::RCP<Epetra_CrsMatrix> SubA1 = RW1.RestrictedMatrix(); Teuchos::RCP<Epetra_MultiVector> SubX1 = RX1.RestrictedMultiVector(); Teuchos::RCP<Epetra_MultiVector> SubB1 = RB1.RestrictedMultiVector(); ML_Epetra::MultiLevelPreconditioner * SubPrec1 = new ML_Epetra::MultiLevelPreconditioner(*SubA1, MLList, true); Epetra_LinearProblem Problem1(&*SubA1,&*SubX1,&*SubB1); AztecOO solver1(Problem1); solver1.SetPrecOperator(SubPrec1); solver1.SetAztecOption(AZ_solver, AZ_gmres); solver1.SetAztecOption(AZ_output, 32); solver1.SetAztecOption(AZ_kspace, 160); solver1.Iterate(1550, 1e-12); delete SubPrec1; } else{ Teuchos::RCP<Epetra_CrsMatrix> SubA2 = RW2.RestrictedMatrix(); Teuchos::RCP<Epetra_MultiVector> SubX2 = RX2.RestrictedMultiVector(); Teuchos::RCP<Epetra_MultiVector> SubB2 = RB2.RestrictedMultiVector(); ML_Epetra::MultiLevelPreconditioner * SubPrec2 = new ML_Epetra::MultiLevelPreconditioner(*SubA2, MLList, true); Epetra_LinearProblem Problem2(&*SubA2,&*SubX2,&*SubB2); AztecOO solver2(Problem2); solver2.SetPrecOperator(SubPrec2); solver2.SetAztecOption(AZ_solver, AZ_gmres); solver2.SetAztecOption(AZ_output, 32); solver2.SetAztecOption(AZ_kspace, 160); solver2.Iterate(1550, 1e-12); delete SubPrec2; } /* Post-processing exports */ Epetra_MultiVector ans1(*lhs), ans2(*lhs); rd1.redistribute_reverse(*ResX1,ans1); rd2.redistribute_reverse(*ResX2,ans2); /* Run on Full Problem */ A->Comm().Barrier(); ML_Epetra::MultiLevelPreconditioner * FullPrec = new ML_Epetra::MultiLevelPreconditioner(*A, MLList, true); AztecOO solverF(Problem); solverF.SetPrecOperator(FullPrec); solverF.SetAztecOption(AZ_solver, AZ_gmres); solverF.SetAztecOption(AZ_output, 32); solverF.SetAztecOption(AZ_kspace, 160); solverF.Iterate(1550, 1e-12); delete FullPrec; /* Solution Comparison */ ans1.Update(1.0,*lhs,-1.0); ans2.Update(1.0,*lhs,-1.0); ans1.Norm2(&n1); ans2.Norm2(&n2); if(!PID) { printf("Norm Diff 1 = %6.4e\n",n1); printf("Norm Diff 2 = %6.4e\n",n2); } TotalErrorExactSol += n1 + n2; }
int BlockPCGSolver::Solve(const Epetra_MultiVector &X, Epetra_MultiVector &Y, int blkSize) const { int xrow = X.MyLength(); int xcol = X.NumVectors(); int ycol = Y.NumVectors(); int info = 0; int localVerbose = verbose*(MyComm.MyPID() == 0); double *valX = X.Values(); int NB = 3 + callLAPACK.ILAENV(1, "hetrd", "u", blkSize); int lworkD = (blkSize > NB) ? blkSize*blkSize : NB*blkSize; int wSize = 4*blkSize*xrow + 3*blkSize + 2*blkSize*blkSize + lworkD; bool useY = true; if (ycol % blkSize != 0) { // Allocate an extra block to store the solutions wSize += blkSize*xrow; useY = false; } if (lWorkSpace < wSize) { delete[] workSpace; workSpace = new (std::nothrow) double[wSize]; if (workSpace == 0) { info = -1; return info; } lWorkSpace = wSize; } // if (lWorkSpace < wSize) double *pointer = workSpace; // Array to store the matrix PtKP double *PtKP = pointer; pointer = pointer + blkSize*blkSize; // Array to store coefficient matrices double *coeff = pointer; pointer = pointer + blkSize*blkSize; // Workspace array double *workD = pointer; pointer = pointer + lworkD; // Array to store the eigenvalues of P^t K P double *da = pointer; pointer = pointer + blkSize; // Array to store the norms of right hand sides double *initNorm = pointer; pointer = pointer + blkSize; // Array to store the norms of residuals double *resNorm = pointer; pointer = pointer + blkSize; // Array to store the residuals double *valR = pointer; pointer = pointer + xrow*blkSize; Epetra_MultiVector R(View, X.Map(), valR, xrow, blkSize); // Array to store the preconditioned residuals double *valZ = pointer; pointer = pointer + xrow*blkSize; Epetra_MultiVector Z(View, X.Map(), valZ, xrow, blkSize); // Array to store the search directions double *valP = pointer; pointer = pointer + xrow*blkSize; Epetra_MultiVector P(View, X.Map(), valP, xrow, blkSize); // Array to store the image of the search directions double *valKP = pointer; pointer = pointer + xrow*blkSize; Epetra_MultiVector KP(View, X.Map(), valKP, xrow, blkSize); // Pointer to store the solutions double *valSOL = (useY == true) ? Y.Values() : pointer; int iRHS; for (iRHS = 0; iRHS < xcol; iRHS += blkSize) { int numVec = (iRHS + blkSize < xcol) ? blkSize : xcol - iRHS; // Set the initial residuals to the right hand sides if (numVec < blkSize) { R.Random(); } memcpy(valR, valX + iRHS*xrow, numVec*xrow*sizeof(double)); // Set the initial guess to zero valSOL = (useY == true) ? Y.Values() + iRHS*xrow : valSOL; Epetra_MultiVector SOL(View, X.Map(), valSOL, xrow, blkSize); SOL.PutScalar(0.0); int ii = 0; int iter = 0; int nFound = 0; R.Norm2(initNorm); if (localVerbose > 1) { std::cout << std::endl; std::cout << " Vectors " << iRHS << " to " << iRHS + numVec - 1 << std::endl; if (localVerbose > 2) { std::fprintf(stderr,"\n"); for (ii = 0; ii < numVec; ++ii) { std::cout << " ... Initial Residual Norm " << ii << " = " << initNorm[ii] << std::endl; } std::cout << std::endl; } } // Iteration loop for (iter = 1; iter <= iterMax; ++iter) { // Apply the preconditioner if (Prec) Prec->ApplyInverse(R, Z); else Z = R; // Define the new search directions if (iter == 1) { P = Z; } else { // Compute P^t K Z callBLAS.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, blkSize, blkSize, xrow, 1.0, KP.Values(), xrow, Z.Values(), xrow, 0.0, workD, blkSize); MyComm.SumAll(workD, coeff, blkSize*blkSize); // Compute the coefficient (P^t K P)^{-1} P^t K Z callBLAS.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, blkSize, blkSize, blkSize, 1.0, PtKP, blkSize, coeff, blkSize, 0.0, workD, blkSize); for (ii = 0; ii < blkSize; ++ii) callBLAS.SCAL(blkSize, da[ii], workD + ii, blkSize); callBLAS.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, blkSize, blkSize, blkSize, 1.0, PtKP, blkSize, workD, blkSize, 0.0, coeff, blkSize); // Update the search directions // Note: Use KP as a workspace memcpy(KP.Values(), P.Values(), xrow*blkSize*sizeof(double)); callBLAS.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, xrow, blkSize, blkSize, 1.0, KP.Values(), xrow, coeff, blkSize, 0.0, P.Values(), xrow); P.Update(1.0, Z, -1.0); } // if (iter == 1) K->Apply(P, KP); // Compute P^t K P callBLAS.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, blkSize, blkSize, xrow, 1.0, P.Values(), xrow, KP.Values(), xrow, 0.0, workD, blkSize); MyComm.SumAll(workD, PtKP, blkSize*blkSize); // Eigenvalue decomposition of P^t K P callLAPACK.SYEV('V', 'U', blkSize, PtKP, blkSize, da, workD, lworkD, &info); if (info) { // Break the loop as spectral decomposition failed break; } // if (info) // Compute the pseudo-inverse of the eigenvalues for (ii = 0; ii < blkSize; ++ii) { TEUCHOS_TEST_FOR_EXCEPTION(da[ii] < 0.0, std::runtime_error, "Negative " "eigenvalue for P^T K P: da[" << ii << "] = " << da[ii] << "."); da[ii] = (da[ii] == 0.0) ? 0.0 : 1.0/da[ii]; } // for (ii = 0; ii < blkSize; ++ii) // Compute P^t R callBLAS.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, blkSize, blkSize, xrow, 1.0, P.Values(), xrow, R.Values(), xrow, 0.0, workD, blkSize); MyComm.SumAll(workD, coeff, blkSize*blkSize); // Compute the coefficient (P^t K P)^{-1} P^t R callBLAS.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, blkSize, blkSize, blkSize, 1.0, PtKP, blkSize, coeff, blkSize, 0.0, workD, blkSize); for (ii = 0; ii < blkSize; ++ii) callBLAS.SCAL(blkSize, da[ii], workD + ii, blkSize); callBLAS.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, blkSize, blkSize, blkSize, 1.0, PtKP, blkSize, workD, blkSize, 0.0, coeff, blkSize); // Update the solutions callBLAS.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, xrow, blkSize, blkSize, 1.0, P.Values(), xrow, coeff, blkSize, 1.0, valSOL, xrow); // Update the residuals callBLAS.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, xrow, blkSize, blkSize, -1.0, KP.Values(), xrow, coeff, blkSize, 1.0, R.Values(), xrow); // Check convergence R.Norm2(resNorm); nFound = 0; for (ii = 0; ii < numVec; ++ii) { if (resNorm[ii] <= tolCG*initNorm[ii]) nFound += 1; } if (localVerbose > 1) { std::cout << " Vectors " << iRHS << " to " << iRHS + numVec - 1; std::cout << " -- Iteration " << iter << " -- " << nFound << " converged vectors\n"; if (localVerbose > 2) { std::cout << std::endl; for (ii = 0; ii < numVec; ++ii) { std::cout << " ... "; std::cout.width(5); std::cout << ii << " ... Residual = "; std::cout.precision(2); std::cout.setf(std::ios::scientific, std::ios::floatfield); std::cout << resNorm[ii] << " ... Right Hand Side = " << initNorm[ii] << std::endl; } std::cout << std::endl; } } if (nFound == numVec) { break; } } // for (iter = 1; iter <= maxIter; ++iter) if (useY == false) { // Copy the solutions back into Y memcpy(Y.Values() + xrow*iRHS, valSOL, numVec*xrow*sizeof(double)); } numSolve += nFound; if (nFound == numVec) { minIter = (iter < minIter) ? iter : minIter; maxIter = (iter > maxIter) ? iter : maxIter; sumIter += iter; } } // for (iRHS = 0; iRHS < xcol; iRHS += blkSize) return info; }
int BlockDACG::reSolve(int numEigen, Epetra_MultiVector &Q, double *lambda, int startingEV) { // Computes the smallest eigenvalues and the corresponding eigenvectors // of the generalized eigenvalue problem // // K X = M X Lambda // // using a Block Deflation Accelerated Conjugate Gradient algorithm. // // Note that if M is not specified, then K X = X Lambda is solved. // // Ref: P. Arbenz & R. Lehoucq, "A comparison of algorithms for modal analysis in the // absence of a sparse direct method", SNL, Technical Report SAND2003-1028J // With the notations of this report, the coefficient beta is defined as // diag( H^T_{k} G_{k} ) / diag( H^T_{k-1} G_{k-1} ) // // Input variables: // // numEigen (integer) = Number of eigenmodes requested // // Q (Epetra_MultiVector) = Converged eigenvectors // The number of columns of Q must be equal to numEigen + blockSize. // The rows of Q are distributed across processors. // At exit, the first numEigen columns contain the eigenvectors requested. // // lambda (array of doubles) = Converged eigenvalues // At input, it must be of size numEigen + blockSize. // At exit, the first numEigen locations contain the eigenvalues requested. // // startingEV (integer) = Number of existing converged eigenmodes // // Return information on status of computation // // info >= 0 >> Number of converged eigenpairs at the end of computation // // // Failure due to input arguments // // info = - 1 >> The stiffness matrix K has not been specified. // info = - 2 >> The maps for the matrix K and the matrix M differ. // info = - 3 >> The maps for the matrix K and the preconditioner P differ. // info = - 4 >> The maps for the vectors and the matrix K differ. // info = - 5 >> Q is too small for the number of eigenvalues requested. // info = - 6 >> Q is too small for the computation parameters. // // info = - 10 >> Failure during the mass orthonormalization // // info = - 20 >> Error in LAPACK during the local eigensolve // // info = - 30 >> MEMORY // // Check the input parameters if (numEigen <= startingEV) { return startingEV; } int info = myVerify.inputArguments(numEigen, K, M, Prec, Q, numEigen + blockSize); if (info < 0) return info; int myPid = MyComm.MyPID(); // Get the weight for approximating the M-inverse norm Epetra_Vector *vectWeight = 0; if (normWeight) { vectWeight = new Epetra_Vector(View, Q.Map(), normWeight); } int knownEV = startingEV; int localVerbose = verbose*(myPid==0); // Define local block vectors // // MX = Working vectors (storing M*X if M is specified, else pointing to X) // KX = Working vectors (storing K*X) // // R = Residuals // // H = Preconditioned residuals // // P = Search directions // MP = Working vectors (storing M*P if M is specified, else pointing to P) // KP = Working vectors (storing K*P) int xr = Q.MyLength(); Epetra_MultiVector X(View, Q, numEigen, blockSize); X.Random(); int tmp; tmp = (M == 0) ? 5*blockSize*xr : 7*blockSize*xr; double *work1 = new (nothrow) double[tmp]; if (work1 == 0) { if (vectWeight) delete vectWeight; info = -30; return info; } memRequested += sizeof(double)*tmp/(1024.0*1024.0); highMem = (highMem > currentSize()) ? highMem : currentSize(); double *tmpD = work1; Epetra_MultiVector KX(View, Q.Map(), tmpD, xr, blockSize); tmpD = tmpD + xr*blockSize; Epetra_MultiVector MX(View, Q.Map(), (M) ? tmpD : X.Values(), xr, blockSize); tmpD = (M) ? tmpD + xr*blockSize : tmpD; Epetra_MultiVector R(View, Q.Map(), tmpD, xr, blockSize); tmpD = tmpD + xr*blockSize; Epetra_MultiVector H(View, Q.Map(), tmpD, xr, blockSize); tmpD = tmpD + xr*blockSize; Epetra_MultiVector P(View, Q.Map(), tmpD, xr, blockSize); tmpD = tmpD + xr*blockSize; Epetra_MultiVector KP(View, Q.Map(), tmpD, xr, blockSize); tmpD = tmpD + xr*blockSize; Epetra_MultiVector MP(View, Q.Map(), (M) ? tmpD : P.Values(), xr, blockSize); // Define arrays // // theta = Store the local eigenvalues (size: 2*blockSize) // normR = Store the norm of residuals (size: blockSize) // // oldHtR = Store the previous H_i^T*R_i (size: blockSize) // currentHtR = Store the current H_i^T*R_i (size: blockSize) // // MM = Local mass matrix (size: 2*blockSize x 2*blockSize) // KK = Local stiffness matrix (size: 2*blockSize x 2*blockSize) // // S = Local eigenvectors (size: 2*blockSize x 2*blockSize) int lwork2; lwork2 = 5*blockSize + 12*blockSize*blockSize; double *work2 = new (nothrow) double[lwork2]; if (work2 == 0) { if (vectWeight) delete vectWeight; delete[] work1; info = -30; return info; } highMem = (highMem > currentSize()) ? highMem : currentSize(); tmpD = work2; double *theta = tmpD; tmpD = tmpD + 2*blockSize; double *normR = tmpD; tmpD = tmpD + blockSize; double *oldHtR = tmpD; tmpD = tmpD + blockSize; double *currentHtR = tmpD; tmpD = tmpD + blockSize; memset(currentHtR, 0, blockSize*sizeof(double)); double *MM = tmpD; tmpD = tmpD + 4*blockSize*blockSize; double *KK = tmpD; tmpD = tmpD + 4*blockSize*blockSize; double *S = tmpD; memRequested += sizeof(double)*lwork2/(1024.0*1024.0); // Define an array to store the residuals history if (localVerbose > 2) { resHistory = new (nothrow) double[maxIterEigenSolve*blockSize]; if (resHistory == 0) { if (vectWeight) delete vectWeight; delete[] work1; delete[] work2; info = -30; return info; } historyCount = 0; } // Miscellaneous definitions bool reStart = false; numRestart = 0; int localSize; int twoBlocks = 2*blockSize; int nFound = blockSize; int i, j; if (localVerbose > 0) { cout << endl; cout << " *|* Problem: "; if (M) cout << "K*Q = M*Q D "; else cout << "K*Q = Q D "; if (Prec) cout << " with preconditioner"; cout << endl; cout << " *|* Algorithm = DACG (block version)" << endl; cout << " *|* Size of blocks = " << blockSize << endl; cout << " *|* Number of requested eigenvalues = " << numEigen << endl; cout.precision(2); cout.setf(ios::scientific, ios::floatfield); cout << " *|* Tolerance for convergence = " << tolEigenSolve << endl; cout << " *|* Norm used for convergence: "; if (normWeight) cout << "weighted L2-norm with user-provided weights" << endl; else cout << "L^2-norm" << endl; if (startingEV > 0) cout << " *|* Input converged eigenvectors = " << startingEV << endl; cout << "\n -- Start iterations -- \n"; } timeOuterLoop -= MyWatch.WallTime(); for (outerIter = 1; outerIter <= maxIterEigenSolve; ++outerIter) { highMem = (highMem > currentSize()) ? highMem : currentSize(); if ((outerIter == 1) || (reStart == true)) { reStart = false; localSize = blockSize; if (nFound > 0) { Epetra_MultiVector X2(View, X, blockSize-nFound, nFound); Epetra_MultiVector MX2(View, MX, blockSize-nFound, nFound); Epetra_MultiVector KX2(View, KX, blockSize-nFound, nFound); // Apply the mass matrix to X timeMassOp -= MyWatch.WallTime(); if (M) M->Apply(X2, MX2); timeMassOp += MyWatch.WallTime(); massOp += nFound; if (knownEV > 0) { // Orthonormalize X against the known eigenvectors with Gram-Schmidt // Note: Use R as a temporary work space Epetra_MultiVector copyQ(View, Q, 0, knownEV); timeOrtho -= MyWatch.WallTime(); info = modalTool.massOrthonormalize(X, MX, M, copyQ, nFound, 0, R.Values()); timeOrtho += MyWatch.WallTime(); // Exit the code if the orthogonalization did not succeed if (info < 0) { info = -10; delete[] work1; delete[] work2; if (vectWeight) delete vectWeight; return info; } } // Apply the stiffness matrix to X timeStifOp -= MyWatch.WallTime(); K->Apply(X2, KX2); timeStifOp += MyWatch.WallTime(); stifOp += nFound; } // if (nFound > 0) } // if ((outerIter == 1) || (reStart == true)) else { // Apply the preconditioner on the residuals if (Prec != 0) { timePrecOp -= MyWatch.WallTime(); Prec->ApplyInverse(R, H); timePrecOp += MyWatch.WallTime(); precOp += blockSize; } else { memcpy(H.Values(), R.Values(), xr*blockSize*sizeof(double)); } // Compute the product H^T*R timeSearchP -= MyWatch.WallTime(); memcpy(oldHtR, currentHtR, blockSize*sizeof(double)); H.Dot(R, currentHtR); // Define the new search directions if (localSize == blockSize) { P.Scale(-1.0, H); localSize = twoBlocks; } // if (localSize == blockSize) else { bool hasZeroDot = false; for (j = 0; j < blockSize; ++j) { if (oldHtR[j] == 0.0) { hasZeroDot = true; break; } callBLAS.SCAL(xr, currentHtR[j]/oldHtR[j], P.Values() + j*xr); } if (hasZeroDot == true) { // Restart the computation when there is a null dot product if (localVerbose > 0) { cout << endl; cout << " !! Null dot product -- Restart the search space !!\n"; cout << endl; } if (blockSize == 1) { X.Random(); nFound = blockSize; } else { Epetra_MultiVector Xinit(View, X, j, blockSize-j); Xinit.Random(); nFound = blockSize - j; } // if (blockSize == 1) reStart = true; numRestart += 1; info = 0; continue; } callBLAS.AXPY(xr*blockSize, -1.0, H.Values(), P.Values()); } // if (localSize == blockSize) timeSearchP += MyWatch.WallTime(); // Apply the mass matrix on P timeMassOp -= MyWatch.WallTime(); if (M) M->Apply(P, MP); timeMassOp += MyWatch.WallTime(); massOp += blockSize; if (knownEV > 0) { // Orthogonalize P against the known eigenvectors // Note: Use R as a temporary work space Epetra_MultiVector copyQ(View, Q, 0, knownEV); timeOrtho -= MyWatch.WallTime(); modalTool.massOrthonormalize(P, MP, M, copyQ, blockSize, 1, R.Values()); timeOrtho += MyWatch.WallTime(); } // Apply the stiffness matrix to P timeStifOp -= MyWatch.WallTime(); K->Apply(P, KP); timeStifOp += MyWatch.WallTime(); stifOp += blockSize; } // if ((outerIter == 1) || (reStart == true)) // Form "local" mass and stiffness matrices // Note: Use S as a temporary workspace timeLocalProj -= MyWatch.WallTime(); modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, KX.Values(), xr, KK, localSize, S); modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, MX.Values(), xr, MM, localSize, S); if (localSize > blockSize) { modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, KP.Values(), xr, KK + blockSize*localSize, localSize, S); modalTool.localProjection(blockSize, blockSize, xr, P.Values(), xr, KP.Values(), xr, KK + blockSize*localSize + blockSize, localSize, S); modalTool.localProjection(blockSize, blockSize, xr, X.Values(), xr, MP.Values(), xr, MM + blockSize*localSize, localSize, S); modalTool.localProjection(blockSize, blockSize, xr, P.Values(), xr, MP.Values(), xr, MM + blockSize*localSize + blockSize, localSize, S); } // if (localSize > blockSize) timeLocalProj += MyWatch.WallTime(); // Perform a spectral decomposition timeLocalSolve -= MyWatch.WallTime(); int nevLocal = localSize; info = modalTool.directSolver(localSize, KK, localSize, MM, localSize, nevLocal, S, localSize, theta, localVerbose, (blockSize == 1) ? 1: 0); timeLocalSolve += MyWatch.WallTime(); if (info < 0) { // Stop when spectral decomposition has a critical failure break; } // Check for restarting if ((theta[0] < 0.0) || (nevLocal < blockSize)) { if (localVerbose > 0) { cout << " Iteration " << outerIter; cout << "- Failure for spectral decomposition - RESTART with new random search\n"; } if (blockSize == 1) { X.Random(); nFound = blockSize; } else { Epetra_MultiVector Xinit(View, X, 1, blockSize-1); Xinit.Random(); nFound = blockSize - 1; } // if (blockSize == 1) reStart = true; numRestart += 1; info = 0; continue; } // if ((theta[0] < 0.0) || (nevLocal < blockSize)) if ((localSize == twoBlocks) && (nevLocal == blockSize)) { for (j = 0; j < nevLocal; ++j) memcpy(S + j*blockSize, S + j*twoBlocks, blockSize*sizeof(double)); localSize = blockSize; } // Check the direction of eigenvectors // Note: This sign check is important for convergence for (j = 0; j < nevLocal; ++j) { double coeff = S[j + j*localSize]; if (coeff < 0.0) callBLAS.SCAL(localSize, -1.0, S + j*localSize); } // Compute the residuals timeResidual -= MyWatch.WallTime(); callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, KX.Values(), xr, S, localSize, 0.0, R.Values(), xr); if (localSize == twoBlocks) { callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, KP.Values(), xr, S + blockSize, localSize, 1.0, R.Values(), xr); } for (j = 0; j < blockSize; ++j) callBLAS.SCAL(localSize, theta[j], S + j*localSize); callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, -1.0, MX.Values(), xr, S, localSize, 1.0, R.Values(), xr); if (localSize == twoBlocks) { callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, -1.0, MP.Values(), xr, S + blockSize, localSize, 1.0, R.Values(), xr); } for (j = 0; j < blockSize; ++j) callBLAS.SCAL(localSize, 1.0/theta[j], S + j*localSize); timeResidual += MyWatch.WallTime(); // Compute the norms of the residuals timeNorm -= MyWatch.WallTime(); if (vectWeight) R.NormWeighted(*vectWeight, normR); else R.Norm2(normR); // Scale the norms of residuals with the eigenvalues // Count the converged eigenvectors nFound = 0; for (j = 0; j < blockSize; ++j) { normR[j] = (theta[j] == 0.0) ? normR[j] : normR[j]/theta[j]; if (normR[j] < tolEigenSolve) nFound += 1; } timeNorm += MyWatch.WallTime(); // Store the residual history if (localVerbose > 2) { memcpy(resHistory + historyCount*blockSize, normR, blockSize*sizeof(double)); historyCount += 1; } // Print information on current iteration if (localVerbose > 0) { cout << " Iteration " << outerIter << " - Number of converged eigenvectors "; cout << knownEV + nFound << endl; } if (localVerbose > 1) { cout << endl; cout.precision(2); cout.setf(ios::scientific, ios::floatfield); for (i=0; i<blockSize; ++i) { cout << " Iteration " << outerIter << " - Scaled Norm of Residual " << i; cout << " = " << normR[i] << endl; } cout << endl; cout.precision(2); for (i=0; i<blockSize; ++i) { cout << " Iteration " << outerIter << " - Ritz eigenvalue " << i; cout.setf((fabs(theta[i]) < 0.01) ? ios::scientific : ios::fixed, ios::floatfield); cout << " = " << theta[i] << endl; } cout << endl; } if (nFound == 0) { // Update the spaces // Note: Use H as a temporary work space timeLocalUpdate -= MyWatch.WallTime(); memcpy(H.Values(), X.Values(), xr*blockSize*sizeof(double)); callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, H.Values(), xr, S, localSize, 0.0, X.Values(), xr); memcpy(H.Values(), KX.Values(), xr*blockSize*sizeof(double)); callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, H.Values(), xr, S, localSize, 0.0, KX.Values(), xr); if (M) { memcpy(H.Values(), MX.Values(), xr*blockSize*sizeof(double)); callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, H.Values(), xr, S, localSize, 0.0, MX.Values(), xr); } if (localSize == twoBlocks) { callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, P.Values(), xr, S + blockSize, localSize, 1.0, X.Values(), xr); callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, KP.Values(), xr, S + blockSize, localSize, 1.0, KX.Values(), xr); if (M) { callBLAS.GEMM('N', 'N', xr, blockSize, blockSize, 1.0, MP.Values(), xr, S + blockSize, localSize, 1.0, MX.Values(), xr); } } // if (localSize == twoBlocks) timeLocalUpdate += MyWatch.WallTime(); // When required, monitor some orthogonalities if (verbose > 2) { if (knownEV == 0) { accuracyCheck(&X, &MX, &R, 0, (localSize>blockSize) ? &P : 0); } else { Epetra_MultiVector copyQ(View, Q, 0, knownEV); accuracyCheck(&X, &MX, &R, ©Q, (localSize>blockSize) ? &P : 0); } } // if (verbose > 2) continue; } // if (nFound == 0) // Order the Ritz eigenvectors by putting the converged vectors at the beginning int firstIndex = blockSize; for (j = 0; j < blockSize; ++j) { if (normR[j] >= tolEigenSolve) { firstIndex = j; break; } } // for (j = 0; j < blockSize; ++j) while (firstIndex < nFound) { for (j = firstIndex; j < blockSize; ++j) { if (normR[j] < tolEigenSolve) { // Swap the j-th and firstIndex-th position callFortran.SWAP(localSize, S + j*localSize, 1, S + firstIndex*localSize, 1); callFortran.SWAP(1, theta + j, 1, theta + firstIndex, 1); callFortran.SWAP(1, normR + j, 1, normR + firstIndex, 1); break; } } // for (j = firstIndex; j < blockSize; ++j) for (j = 0; j < blockSize; ++j) { if (normR[j] >= tolEigenSolve) { firstIndex = j; break; } } // for (j = 0; j < blockSize; ++j) } // while (firstIndex < nFound) // Copy the converged eigenvalues memcpy(lambda + knownEV, theta, nFound*sizeof(double)); // Convergence test if (knownEV + nFound >= numEigen) { callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, X.Values(), xr, S, localSize, 0.0, R.Values(), xr); if (localSize > blockSize) { callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, P.Values(), xr, S + blockSize, localSize, 1.0, R.Values(), xr); } memcpy(Q.Values() + knownEV*xr, R.Values(), nFound*xr*sizeof(double)); knownEV += nFound; if (localVerbose == 1) { cout << endl; cout.precision(2); cout.setf(ios::scientific, ios::floatfield); for (i=0; i<blockSize; ++i) { cout << " Iteration " << outerIter << " - Scaled Norm of Residual " << i; cout << " = " << normR[i] << endl; } cout << endl; } break; } // Store the converged eigenvalues and eigenvectors callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, X.Values(), xr, S, localSize, 0.0, Q.Values() + knownEV*xr, xr); if (localSize == twoBlocks) { callBLAS.GEMM('N', 'N', xr, nFound, blockSize, 1.0, P.Values(), xr, S + blockSize, localSize, 1.0, Q.Values() + knownEV*xr, xr); } knownEV += nFound; // Define the restarting vectors timeRestart -= MyWatch.WallTime(); int leftOver = (nevLocal < blockSize + nFound) ? nevLocal - nFound : blockSize; double *Snew = S + nFound*localSize; memcpy(H.Values(), X.Values(), blockSize*xr*sizeof(double)); callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, H.Values(), xr, Snew, localSize, 0.0, X.Values(), xr); memcpy(H.Values(), KX.Values(), blockSize*xr*sizeof(double)); callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, H.Values(), xr, Snew, localSize, 0.0, KX.Values(), xr); if (M) { memcpy(H.Values(), MX.Values(), blockSize*xr*sizeof(double)); callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, H.Values(), xr, Snew, localSize, 0.0, MX.Values(), xr); } if (localSize == twoBlocks) { callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, P.Values(), xr, Snew+blockSize, localSize, 1.0, X.Values(), xr); callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, KP.Values(), xr, Snew+blockSize, localSize, 1.0, KX.Values(), xr); if (M) { callBLAS.GEMM('N', 'N', xr, leftOver, blockSize, 1.0, MP.Values(), xr, Snew+blockSize, localSize, 1.0, MX.Values(), xr); } } // if (localSize == twoBlocks) if (nevLocal < blockSize + nFound) { // Put new random vectors at the end of the block Epetra_MultiVector Xtmp(View, X, leftOver, blockSize - leftOver); Xtmp.Random(); } else { nFound = 0; } // if (nevLocal < blockSize + nFound) reStart = true; timeRestart += MyWatch.WallTime(); } // for (outerIter = 1; outerIter <= maxIterEigenSolve; ++outerIter) timeOuterLoop += MyWatch.WallTime(); highMem = (highMem > currentSize()) ? highMem : currentSize(); // Clean memory delete[] work1; delete[] work2; if (vectWeight) delete vectWeight; // Sort the eigenpairs timePostProce -= MyWatch.WallTime(); if ((info == 0) && (knownEV > 0)) { mySort.sortScalars_Vectors(knownEV, lambda, Q.Values(), Q.MyLength()); } timePostProce += MyWatch.WallTime(); return (info == 0) ? knownEV : info; }
//============================================================================== int Ifpack_PointRelaxation:: ApplyInverseSGS_RowMatrix(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const { int NumVectors = X.NumVectors(); int Length = Matrix().MaxNumEntries(); std::vector<int> Indices(Length); std::vector<double> Values(Length); Teuchos::RefCountPtr< Epetra_MultiVector > Y2; if (IsParallel_) { Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) ); } else Y2 = Teuchos::rcp( &Y, false ); double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr; X.ExtractView(&x_ptr); Y.ExtractView(&y_ptr); Y2->ExtractView(&y2_ptr); Diagonal_->ExtractView(&d_ptr); for (int iter = 0 ; iter < NumSweeps_ ; ++iter) { // only one data exchange per sweep if (IsParallel_) IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert)); for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) { int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii]; int NumEntries; int col; double diag = d_ptr[i]; IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries, &Values[0], &Indices[0])); for (int m = 0 ; m < NumVectors ; ++m) { double dtemp = 0.0; for (int k = 0 ; k < NumEntries ; ++k) { col = Indices[k]; dtemp += Values[k] * y2_ptr[m][col]; } y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag; } } for (int ii = NumLocalSmoothingIndices_ - 1 ; ii > -1 ; --ii) { int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii]; int NumEntries; int col; double diag = d_ptr[i]; IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries, &Values[0], &Indices[0])); for (int m = 0 ; m < NumVectors ; ++m) { double dtemp = 0.0; for (int k = 0 ; k < NumEntries ; ++k) { col = Indices[k]; dtemp += Values[k] * y2_ptr[m][col]; } y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag; } } if (IsParallel_) for (int m = 0 ; m < NumVectors ; ++m) for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) { int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii]; y_ptr[m][i] = y2_ptr[m][i]; } } #ifdef IFPACK_FLOPCOUNTERS ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_); #endif return(0); }
//============================================================================== int LinearProblem_CrsSingletonFilter::UpdateReducedProblem(Epetra_LinearProblem * Problem) { int i, j; if (Problem==0) EPETRA_CHK_ERR(-1); // Null problem pointer FullProblem_ = Problem; FullMatrix_ = dynamic_cast<Epetra_RowMatrix *>(Problem->GetMatrix()); if (FullMatrix_==0) EPETRA_CHK_ERR(-2); // Need a RowMatrix if (Problem->GetRHS()==0) EPETRA_CHK_ERR(-3); // Need a RHS if (Problem->GetLHS()==0) EPETRA_CHK_ERR(-4); // Need a LHS if (!HaveReducedProblem_) EPETRA_CHK_ERR(-5); // Must have set up reduced problem // Create pointer to Full RHS, LHS Epetra_MultiVector * FullRHS = FullProblem()->GetRHS(); Epetra_MultiVector * FullLHS = FullProblem()->GetLHS(); int NumVectors = FullLHS->NumVectors(); int NumEntries; int * Indices; double * Values; int NumMyRows = FullMatrix()->NumMyRows(); int ColSingletonCounter = 0; for (i=0; i<NumMyRows; i++) { int curGRID = FullMatrixRowMap().GID(i); if (ReducedMatrixRowMap()->MyGID(curGRID)) { // Check if this row should go into reduced matrix EPETRA_CHK_ERR(GetRowGCIDs(i, NumEntries, Values, Indices)); // Get current row (indices global) int ierr = ReducedMatrix()->ReplaceGlobalValues(curGRID, NumEntries, Values, Indices); // Positive errors will occur because we are submitting col entries that are not part of // reduced system. However, because we specified a column map to the ReducedMatrix constructor // these extra column entries will be ignored and we will be politely reminded by a positive // error code if (ierr<0) EPETRA_CHK_ERR(ierr); } // Otherwise if singleton row we explicitly eliminate this row and solve for corresponding X value else { EPETRA_CHK_ERR(GetRow(i, NumEntries, Values, Indices)); // Get current row if (NumEntries==1) { double pivot = Values[0]; if (pivot==0.0) EPETRA_CHK_ERR(-1); // Encountered zero row, unable to continue int indX = Indices[0]; for (j=0; j<NumVectors; j++) (*tempExportX_)[j][indX] = (*FullRHS)[j][i]/pivot; } // Otherwise, this is a singleton column and we will scan for the pivot element needed // for post-solve equations else { j = ColSingletonPivotLIDs_[ColSingletonCounter]; double pivot = Values[j]; if (pivot==0.0) EPETRA_CHK_ERR(-2); // Encountered zero column, unable to continue ColSingletonPivots_[ColSingletonCounter] = pivot; ColSingletonCounter++; } } } assert(ColSingletonCounter==NumMyColSingletons_); // Sanity test // Update Reduced LHS (Puts any initial guess values into reduced system) ReducedLHS_->PutScalar(0.0); // zero out Reduced LHS EPETRA_CHK_ERR(ReducedLHS_->Import(*FullLHS, *Full2ReducedLHSImporter_, Insert)); FullLHS->PutScalar(0.0); // zero out Full LHS since we will inject values as we get them // Construct Reduced RHS // Zero out temp space tempX_->PutScalar(0.0); tempB_->PutScalar(0.0); //Inject known X values into tempX for purpose of computing tempB = FullMatrix*tempX // Also inject into full X since we already know the solution if (FullMatrix()->RowMatrixImporter()!=0) { EPETRA_CHK_ERR(tempX_->Export(*tempExportX_, *FullMatrix()->RowMatrixImporter(), Add)); EPETRA_CHK_ERR(FullLHS->Export(*tempExportX_, *FullMatrix()->RowMatrixImporter(), Add)); } else { tempX_->Update(1.0, *tempExportX_, 0.0); FullLHS->Update(1.0, *tempExportX_, 0.0); } EPETRA_CHK_ERR(FullMatrix()->Multiply(false, *tempX_, *tempB_)); EPETRA_CHK_ERR(tempB_->Update(1.0, *FullRHS, -1.0)); // tempB now has influence of already-known X values ReducedRHS_->PutScalar(0.0); EPETRA_CHK_ERR(ReducedRHS_->Import(*tempB_, *Full2ReducedRHSImporter_, Insert)); return(0); }
int ModeLaplace3DQ2::eigenCheck(const Epetra_MultiVector &Q, double *lambda, double *normWeight, bool /*smallest*/) const { using std::cout; using std::ios; int info = 0; int qc = Q.NumVectors(); int myPid = MyComm.MyPID(); cout.precision(2); cout.setf(ios::scientific, ios::floatfield); // Check orthonormality of eigenvectors double tmp = myVerify.errorOrthonormality(&Q, M); if (myPid == 0) cout << " Maximum coefficient in matrix Q^T M Q - I = " << tmp << endl; // Print out norm of residuals myVerify.errorEigenResiduals(Q, lambda, K, M, normWeight); // Check the eigenvalues int numX = (int) ceil(sqrt(Lx*Lx*lambda[qc-1]/M_PI/M_PI)); numX = (numX > 2*nX) ? 2*nX : numX; int numY = (int) ceil(sqrt(Ly*Ly*lambda[qc-1]/M_PI/M_PI)); numY = (numY > 2*nY) ? 2*nY : numY; int numZ = (int) ceil(sqrt(Lz*Lz*lambda[qc-1]/M_PI/M_PI)); numZ = (numZ > 2*nZ) ? 2*nZ : numZ; int newSize = (numX-1)*(numY-1)*(numZ-1); double *discrete = new (std::nothrow) double[2*newSize]; if (discrete == 0) { return -1; } double *continuous = discrete + newSize; double hx = Lx/nX; double hy = Ly/nY; double hz = Lz/nZ; int i, j, k; for (k = 1; k < numZ; ++k) { // Compute the coefficient alphaz double cosk = cos(k*M_PI*hz/2/Lz); double a = cosk*(92.0 - 12.0*cos(k*M_PI*hz/Lz)); double b = 48.0 + 32.0*cos(k*M_PI*hz/Lz); double c = -160.0*cosk; double delta = sqrt(b*b - 4*a*c); double alphaz = ((-b - delta)*0.5/a < 0.0) ? (-b + delta)*0.5/a : (-b - delta)*0.5/a; for (j = 1; j < numY; ++j) { // Compute the coefficient alphay double cosj = cos(j*M_PI*hy/2/Ly); a = cosj*(92.0 - 12.0*cos(j*M_PI*hy/Ly)); b = 48.0 + 32.0*cos(j*M_PI*hy/Ly); c = -160.0*cosj; delta = sqrt(b*b - 4*a*c); double alphay = ((-b - delta)*0.5/a < 0.0) ? (-b + delta)*0.5/a : (-b - delta)*0.5/a; for (i = 1; i < numX; ++i) { // Compute the coefficient alphax double cosi = cos(i*M_PI*hx/2/Lx); a = cosi*(92.0 - 12.0*cos(i*M_PI*hx/Lx)); b = 48.0 + 32.0*cos(i*M_PI*hx/Lx); c = -160.0*cosi; delta = sqrt(b*b - 4*a*c); double alphax = ((-b - delta)*0.5/a < 0.0) ? (-b + delta)*0.5/a : (-b - delta)*0.5/a; // Compute the continuous eigenvalue int pos = i-1 + (j-1)*(numX-1) + (k-1)*(numX-1)*(numY-1); continuous[pos] = M_PI*M_PI*(i*i/(Lx*Lx) + j*j/(Ly*Ly) + k*k/(Lz*Lz)); // Compute the discrete eigenvalue discrete[pos] = 240.0*(1.0-alphax*cosi)/((8.0+2*alphax*cosi)*(3.0*hx*hx)); discrete[pos] += 240.0*(1.0-alphay*cosj)/((8.0+2*alphay*cosj)*(3.0*hy*hy)); discrete[pos] += 240.0*(1.0-alphaz*cosk)/((8.0+2*alphaz*cosk)*(3.0*hz*hz)); } } } // Sort the eigenvalues in ascending order mySort.sortScalars(newSize, continuous); int *used = new (std::nothrow) int[newSize]; if (used == 0) { delete[] discrete; return -1; } mySort.sortScalars(newSize, discrete, used); int *index = new (std::nothrow) int[newSize]; if (index == 0) { delete[] discrete; delete[] used; return -1; } for (i=0; i<newSize; ++i) { index[used[i]] = i; } delete[] used; int nMax = myVerify.errorLambda(continuous, discrete, newSize, lambda, qc); // Define the exact discrete eigenvectors int localSize = Map->NumMyElements(); double *vQ = new (std::nothrow) double[(nMax+1)*localSize + nMax]; if (vQ == 0) { delete[] discrete; delete[] index; info = -1; return info; } double *normL2 = vQ + (nMax+1)*localSize; Epetra_MultiVector Qex(View, *Map, vQ, localSize, nMax); if ((myPid == 0) && (nMax > 0)) { cout << endl; cout << " --- Relative discretization errors for exact eigenvectors ---" << endl; cout << endl; cout << " Cont. Values Disc. Values Error H^1 norm L^2 norm\n"; } for (k=1; k < numZ; ++k) { for (j=1; j < numY; ++j) { for (i=1; i < numX; ++i) { int pos = i-1 + (j-1)*(numX-1) + (k-1)*(numX-1)*(numY-1); if (index[pos] < nMax) { int ii; for (ii=0; ii<localSize; ++ii) { // Compute the coefficient alphaz double cosk = cos(k*M_PI*hz/2/Lz); double a = cosk*(92.0 - 12.0*cos(k*M_PI*hz/Lz)); double b = 48.0 + 32.0*cos(k*M_PI*hz/Lz); double c = -160.0*cosk; double delta = sqrt(b*b - 4*a*c); double alphaz = ((-b - delta)*0.5/a < 0.0) ? (-b + delta)*0.5/a : (-b - delta)*0.5/a; // Compute the coefficient alphay double cosj = cos(j*M_PI*hy/2/Ly); a = cosj*(92.0 - 12.0*cos(j*M_PI*hy/Ly)); b = 48.0 + 32.0*cos(j*M_PI*hy/Ly); c = -160.0*cosj; delta = sqrt(b*b - 4*a*c); double alphay = ((-b - delta)*0.5/a < 0.0) ? (-b + delta)*0.5/a : (-b - delta)*0.5/a; // Compute the coefficient alphax double cosi = cos(i*M_PI*hx/2/Lx); a = cosi*(92.0 - 12.0*cos(i*M_PI*hx/Lx)); b = 48.0 + 32.0*cos(i*M_PI*hx/Lx); c = -160.0*cosi; delta = sqrt(b*b - 4*a*c); double alphax = ((-b - delta)*0.5/a < 0.0) ? (-b + delta)*0.5/a : (-b - delta)*0.5/a; // Get the value for this eigenvector double coeff = sin(i*(M_PI/Lx)*x[ii])*sin(j*(M_PI/Ly)*y[ii])*sin(k*(M_PI/Lz)*z[ii]); if (fabs(x[ii] - floor(x[ii]/hx+0.5)*hx) < 0.25*hx) coeff *= alphax; if (fabs(y[ii] - floor(y[ii]/hy+0.5)*hy) < 0.25*hy) coeff *= alphay; if (fabs(z[ii] - floor(z[ii]/hz+0.5)*hz) < 0.25*hz) coeff *= alphaz; Qex.ReplaceMyValue(ii, index[pos], coeff); } // Normalize Qex against the mass matrix Epetra_MultiVector MQex(View, *Map, vQ + nMax*localSize, localSize, 1); Epetra_MultiVector Qi(View, Qex, index[pos], 1); M->Apply(Qi, MQex); double mnorm = 0.0; Qi.Dot(MQex, &mnorm); Qi.Scale(1.0/sqrt(mnorm)); // Compute the L2 norm Epetra_MultiVector shapeInt(View, *Map, vQ + nMax*localSize, localSize, 1); for (ii=0; ii<localSize; ++ii) { double iX, iY, iZ; if (fabs(x[ii] - floor(x[ii]/hx+0.5)*hx) < 0.25*hx) iX = 2.0*sin(i*(M_PI/Lx)*x[ii])/(hx*hx*i*(M_PI/Lx)*i*(M_PI/Lx)*i*(M_PI/Lx))* sqrt(2.0/Lx)*( 3*hx*i*(M_PI/Lx) - 4*sin(i*(M_PI/Lx)*hx) + cos(i*(M_PI/Lx)*hx)*hx*i*(M_PI/Lx) ); else iX = 8.0*sin(i*(M_PI/Lx)*x[ii])/(hx*hx*i*(M_PI/Lx)*i*(M_PI/Lx)*i*(M_PI/Lx))* sqrt(2.0/Lx)*( 2*sin(i*(M_PI/Lx)*0.5*hx) - cos(i*(M_PI/Lx)*0.5*hx)*hx*i*(M_PI/Lx)); if (fabs(y[ii] - floor(y[ii]/hy+0.5)*hy) < 0.25*hy) iY = 2.0*sin(j*(M_PI/Ly)*y[ii])/(hy*hy*j*(M_PI/Ly)*j*(M_PI/Ly)*j*(M_PI/Ly))* sqrt(2.0/Ly)*( 3*hy*j*(M_PI/Ly) - 4*sin(j*(M_PI/Ly)*hy) + cos(j*(M_PI/Ly)*hy)*hy*j*(M_PI/Ly) ); else iY = 8.0*sin(j*(M_PI/Ly)*y[ii])/(hy*hy*j*(M_PI/Ly)*j*(M_PI/Ly)*j*(M_PI/Ly))* sqrt(2.0/Ly)*( 2*sin(j*(M_PI/Ly)*0.5*hy) - cos(j*(M_PI/Ly)*0.5*hy)*hy*j*(M_PI/Ly)); if (fabs(z[ii] - floor(z[ii]/hz+0.5)*hz) < 0.25*hz) iZ = 2.0*sin(k*(M_PI/Lz)*z[ii])/(hz*hz*k*(M_PI/Lz)*k*(M_PI/Lz)*k*(M_PI/Lz))* sqrt(2.0/Lz)*( 3*hz*k*(M_PI/Lz) - 4*sin(k*(M_PI/Lz)*hz) + cos(k*(M_PI/Lz)*hz)*hz*k*(M_PI/Lz) ); else iZ = 8.0*sin(k*(M_PI/Lz)*z[ii])/(hz*hz*k*(M_PI/Lz)*k*(M_PI/Lz)*k*(M_PI/Lz))* sqrt(2.0/Lz)*( 2*sin(k*(M_PI/Lz)*0.5*hz) - cos(k*(M_PI/Lz)*0.5*hz)*hz*k*(M_PI/Lz)); shapeInt.ReplaceMyValue(ii, 0, iX*iY*iZ); } Qi.Dot(shapeInt, normL2 + index[pos]); } // if index[pos] < nMax) } // for (i=1; i < numX; ++i) } // for (j=1; j < numY; ++j) } // for (k=1; k < numZ; ++k) if (myPid == 0) { for (i = 0; i < nMax; ++i) { double normH1 = continuous[i]*(1.0 - 2.0*normL2[i]) + discrete[i]; normL2[i] = 2.0 - 2.0*normL2[i]; normH1+= normL2[i]; // Print out the result if (myPid == 0) { cout << " "; cout.width(4); cout << i+1 << ". "; cout.setf(ios::scientific, ios::floatfield); cout.precision(8); cout << continuous[i] << " " << discrete[i] << " "; cout.precision(3); cout << fabs(discrete[i] - continuous[i])/continuous[i] << " "; cout << sqrt(fabs(normH1)/(continuous[i]+1.0)) << " "; cout << sqrt(fabs(normL2[i])) << endl; } } // for (i = 0; i < nMax; ++i) } // if (myPid == 0) delete[] discrete; delete[] index; // Check the angles between exact discrete eigenvectors and computed myVerify.errorSubspaces(Q, Qex, M); delete[] vQ; return info; }
LOCA::Epetra::Interface::MultiPoint:: MultiPoint( const Teuchos::RCP<LOCA::Epetra::Interface::Required> &iReq_, const Teuchos::RCP< NOX::Epetra::Interface::Jacobian> &iJac_, const Epetra_MultiVector &splitMultiVec_, const Teuchos::RCP<Epetra_RowMatrix> &splitJac_, const Teuchos::RCP<EpetraExt::MultiComm> &globalComm_) : iReq(iReq_), iJac(iJac_), splitJac(splitJac_), globalComm(globalComm_), splitVec(*(splitMultiVec_(0))), splitRes(*(splitMultiVec_(0))), jacobian(0), solution(0), solutionOverlap(0), overlapImporter(0), timeStepsOnTimeDomain(splitMultiVec_.NumVectors()), numTimeDomains(globalComm_->NumSubDomains()), timeDomain(globalComm_->SubDomainRank()), conStep(0), rowStencil(0), rowIndex(0) { if (globalComm->MyPID()==0) { // TODO: pass in globalData and use output stream cout << "----------MultiPoint Partition Info------------" << "\n\tNumProcs = " << globalComm->NumProc() << "\n\tSpatial Decomposition = " << splitMultiVec_.Comm().NumProc() << "\n\tNumber of Domains = " << numTimeDomains << "\n\tSteps on Domain 0 = " << timeStepsOnTimeDomain << "\n\tTotal Number of Steps = " << globalComm->NumTimeSteps(); cout << "\n-----------------------------------------------" << endl; } // Construct global block matrix graph from split jacobian and stencil, // which is just diagonal in this case rowStencil = new std::vector< std::vector<int> >(timeStepsOnTimeDomain); rowIndex = new std::vector<int>; for (int i=0; i < timeStepsOnTimeDomain; i++) { (*rowStencil)[i].push_back(0); (*rowIndex).push_back(i + globalComm->FirstTimeStepOnDomain()); } jacobian = new EpetraExt::BlockCrsMatrix(*splitJac, *rowStencil, *rowIndex, *globalComm); // Construct global solution vector, the overlap vector, //and importer between them solution = new EpetraExt::BlockVector(splitJac->RowMatrixRowMap(), jacobian->RowMap()); solutionOverlap = new EpetraExt::BlockVector(splitJac->RowMatrixRowMap(), jacobian->ColMap()); overlapImporter = new Epetra_Import(solutionOverlap->Map(), solution->Map()); // Load initial guess into block solution vector for (int i=0; i < timeStepsOnTimeDomain; i++) solution->LoadBlockValues(*(splitMultiVec_(i)), (*rowIndex)[i]); }
int Stokhos::ApproxSchurComplementPreconditioner:: ApplyInverse(const Epetra_MultiVector& Input, Epetra_MultiVector& Result) const { #ifdef STOKHOS_TEUCHOS_TIME_MONITOR TEUCHOS_FUNC_TIME_MONITOR("Stokhos: Total Approximate Schur Complement Time"); #endif // We have to be careful if Input and Result are the same vector. // If this is the case, the only possible solution is to make a copy const Epetra_MultiVector *input = &Input; bool made_copy = false; if (Input.Values() == Result.Values()) { input = new Epetra_MultiVector(Input); made_copy = true; } // Allocate temporary storage int m = input->NumVectors(); if (rhs_block == Teuchos::null || rhs_block->NumVectors() != m) rhs_block = Teuchos::rcp(new EpetraExt::BlockMultiVector(*base_map, *sg_map, m)); if (tmp == Teuchos::null || tmp->NumVectors() != m*max_num_mat_vec) tmp = Teuchos::rcp(new Epetra_MultiVector(*base_map, m*max_num_mat_vec)); j_ptr.resize(m*max_num_mat_vec); mj_indices.resize(m*max_num_mat_vec); // Extract blocks EpetraExt::BlockMultiVector input_block(View, *base_map, *input); EpetraExt::BlockMultiVector result_block(View, *base_map, Result); result_block.PutScalar(0.0); // Set right-hand-side to input_block rhs_block->Update(1.0, input_block, 0.0); // At level l, linear system has the structure // [ A_{l-1} B_l ][ u_l^{l-1} ] = [ r_l^{l-1} ] // [ C_l D_l ][ u_l^l ] [ r_l^l ] for (int l=P; l>=1; l--) { // Compute D_l^{-1} r_l^l divide_diagonal_block(block_indices[l], block_indices[l+1], *rhs_block, result_block); // Compute r_l^{l-1} = r_l^{l-1} - B_l D_l^{-1} r_l^l multiply_block(upper_block_Cijk[l], -1.0, result_block, *rhs_block); } // Solve A_0 u_0 = r_0 divide_diagonal_block(0, 1, *rhs_block, result_block); for (int l=1; l<=P; l++) { // Compute r_l^l - C_l*u_l^{l-1} multiply_block(lower_block_Cijk[l], -1.0, result_block, *rhs_block); // Compute D_l^{-1} (r_l^l - C_l*u_l^{l-1}) divide_diagonal_block(block_indices[l], block_indices[l+1], *rhs_block, result_block); } if (made_copy) delete input; return 0; }
void Albany::SolutionResponseFunction:: cullSolution(const Epetra_MultiVector& x, Epetra_MultiVector& x_culled) const { x_culled.Import(x, *importer, Insert); }
//============================================================================== int Ifpack_Chebyshev:: ApplyInverse(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const { if (!IsComputed()) IFPACK_CHK_ERR(-3); if (PolyDegree_ == 0) return 0; int nVec = X.NumVectors(); int len = X.MyLength(); if (nVec != Y.NumVectors()) IFPACK_CHK_ERR(-2); Time_->ResetStartTime(); // AztecOO gives X and Y pointing to the same memory location, // need to create an auxiliary vector, Xcopy Teuchos::RefCountPtr<const Epetra_MultiVector> Xcopy; if (X.Pointers()[0] == Y.Pointers()[0]) Xcopy = Teuchos::rcp( new Epetra_MultiVector(X) ); else Xcopy = Teuchos::rcp( &X, false ); double **xPtr = 0, **yPtr = 0; Xcopy->ExtractView(&xPtr); Y.ExtractView(&yPtr); #ifdef HAVE_IFPACK_EPETRAEXT EpetraExt_PointToBlockDiagPermute* IBD=0; if (UseBlockMode_) IBD=&*InvBlockDiagonal_; #endif //--- Do a quick solve when the matrix is identity double *invDiag=0; if(!UseBlockMode_) invDiag=InvDiagonal_->Values(); if ((LambdaMin_ == 1.0) && (LambdaMax_ == LambdaMin_)) { #ifdef HAVE_IFPACK_EPETRAEXT if(UseBlockMode_) IBD->ApplyInverse(*Xcopy,Y); else #endif if (nVec == 1) { double *yPointer = yPtr[0], *xPointer = xPtr[0]; for (int i = 0; i < len; ++i) yPointer[i] = xPointer[i]*invDiag[i]; } else { int i, k; for (i = 0; i < len; ++i) { double coeff = invDiag[i]; for (k = 0; k < nVec; ++k) yPtr[k][i] = xPtr[k][i] * coeff; } } // if (nVec == 1) return 0; } // if ((LambdaMin_ == 1.0) && (LambdaMax_ == LambdaMin_)) //--- Initialize coefficients // Note that delta stores the inverse of ML_Cheby::delta double alpha = LambdaMax_ / EigRatio_; double beta = 1.1 * LambdaMax_; double delta = 2.0 / (beta - alpha); double theta = 0.5 * (beta + alpha); double s1 = theta * delta; //--- Define vectors // In ML_Cheby, V corresponds to pAux and W to dk Epetra_MultiVector V(X); Epetra_MultiVector W(X); #ifdef HAVE_IFPACK_EPETRAEXT Epetra_MultiVector Temp(X); #endif double *vPointer = V.Values(), *wPointer = W.Values(); double oneOverTheta = 1.0/theta; int i, j, k; //--- If solving normal equations, multiply RHS by A^T if(SolveNormalEquations_){ Apply_Transpose(Operator_,Y,V); Y=V; } // Do the smoothing when block scaling is turned OFF // --- Treat the initial guess if (ZeroStartingSolution_ == false) { Operator_->Apply(Y, V); // Compute W = invDiag * ( X - V )/ Theta #ifdef HAVE_IFPACK_EPETRAEXT if(UseBlockMode_) { Temp.Update(oneOverTheta,X,-oneOverTheta,V,0.0); IBD->ApplyInverse(Temp,W); // Perform additional matvecs for normal equations // CMS: Testing this only in block mode FOR NOW if(SolveNormalEquations_){ IBD->ApplyInverse(W,Temp); Apply_Transpose(Operator_,Temp,W); } } else #endif if (nVec == 1) { double *xPointer = xPtr[0]; for (i = 0; i < len; ++i) wPointer[i] = invDiag[i] * (xPointer[i] - vPointer[i]) * oneOverTheta; } else { for (i = 0; i < len; ++i) { double coeff = invDiag[i]*oneOverTheta; double *wi = wPointer + i, *vi = vPointer + i; for (k = 0; k < nVec; ++k) { *wi = (xPtr[k][i] - (*vi)) * coeff; wi = wi + len; vi = vi + len; } } } // if (nVec == 1) // Update the vector Y Y.Update(1.0, W, 1.0); } else { // Compute W = invDiag * X / Theta #ifdef HAVE_IFPACK_EPETRAEXT if(UseBlockMode_) { IBD->ApplyInverse(X,W); // Perform additional matvecs for normal equations // CMS: Testing this only in block mode FOR NOW if(SolveNormalEquations_){ IBD->ApplyInverse(W,Temp); Apply_Transpose(Operator_,Temp,W); } W.Scale(oneOverTheta); Y.Update(1.0, W, 0.0); } else #endif if (nVec == 1) { double *xPointer = xPtr[0]; for (i = 0; i < len; ++i){ wPointer[i] = invDiag[i] * xPointer[i] * oneOverTheta; } memcpy(yPtr[0], wPointer, len*sizeof(double)); } else { for (i = 0; i < len; ++i) { double coeff = invDiag[i]*oneOverTheta; double *wi = wPointer + i; for (k = 0; k < nVec; ++k) { *wi = xPtr[k][i] * coeff; wi = wi + len; } } for (k = 0; k < nVec; ++k) memcpy(yPtr[k], wPointer + k*len, len*sizeof(double)); } // if (nVec == 1) } // if (ZeroStartingSolution_ == false) //--- Apply the polynomial double rhok = 1.0/s1, rhokp1; double dtemp1, dtemp2; int degreeMinusOne = PolyDegree_ - 1; if (nVec == 1) { double *xPointer = xPtr[0]; for (k = 0; k < degreeMinusOne; ++k) { Operator_->Apply(Y, V); rhokp1 = 1.0 / (2.0*s1 - rhok); dtemp1 = rhokp1 * rhok; dtemp2 = 2.0 * rhokp1 * delta; rhok = rhokp1; // Compute W = dtemp1 * W W.Scale(dtemp1); // Compute W = W + dtemp2 * invDiag * ( X - V ) #ifdef HAVE_IFPACK_EPETRAEXT if(UseBlockMode_) { //NTS: We can clobber V since it will be reset in the Apply V.Update(dtemp2,X,-dtemp2); IBD->ApplyInverse(V,Temp); // Perform additional matvecs for normal equations // CMS: Testing this only in block mode FOR NOW if(SolveNormalEquations_){ IBD->ApplyInverse(V,Temp); Apply_Transpose(Operator_,Temp,V); } W.Update(1.0,Temp,1.0); } else{ #endif for (i = 0; i < len; ++i) wPointer[i] += dtemp2* invDiag[i] * (xPointer[i] - vPointer[i]); #ifdef HAVE_IFPACK_EPETRAEXT } #endif // Update the vector Y Y.Update(1.0, W, 1.0); } // for (k = 0; k < degreeMinusOne; ++k) } else { for (k = 0; k < degreeMinusOne; ++k) { Operator_->Apply(Y, V); rhokp1 = 1.0 / (2.0*s1 - rhok); dtemp1 = rhokp1 * rhok; dtemp2 = 2.0 * rhokp1 * delta; rhok = rhokp1; // Compute W = dtemp1 * W W.Scale(dtemp1); // Compute W = W + dtemp2 * invDiag * ( X - V ) #ifdef HAVE_IFPACK_EPETRAEXT if(UseBlockMode_) { //We can clobber V since it will be reset in the Apply V.Update(dtemp2,X,-dtemp2); IBD->ApplyInverse(V,Temp); // Perform additional matvecs for normal equations // CMS: Testing this only in block mode FOR NOW if(SolveNormalEquations_){ IBD->ApplyInverse(V,Temp); Apply_Transpose(Operator_,Temp,V); } W.Update(1.0,Temp,1.0); } else{ #endif for (i = 0; i < len; ++i) { double coeff = invDiag[i]*dtemp2; double *wi = wPointer + i, *vi = vPointer + i; for (j = 0; j < nVec; ++j) { *wi += (xPtr[j][i] - (*vi)) * coeff; wi = wi + len; vi = vi + len; } } #ifdef HAVE_IFPACK_EPETRAEXT } #endif // Update the vector Y Y.Update(1.0, W, 1.0); } // for (k = 0; k < degreeMinusOne; ++k) } // if (nVec == 1) // Flops are updated in each of the following. ++NumApplyInverse_; ApplyInverseTime_ += Time_->ElapsedTime(); return(0); }
int Amesos_Scalapack::Solve() { if( debug_ == 1 ) std::cout << "Entering `Solve()'" << std::endl; NumSolve_++; Epetra_MultiVector *vecX = Problem_->GetLHS() ; Epetra_MultiVector *vecB = Problem_->GetRHS() ; // // Compute the number of right hands sides // (and check that X and B have the same shape) // int nrhs; if ( vecX == 0 ) { nrhs = 0 ; EPETRA_CHK_ERR( vecB != 0 ) ; } else { nrhs = vecX->NumVectors() ; EPETRA_CHK_ERR( vecB->NumVectors() != nrhs ) ; } Epetra_MultiVector *ScalapackB =0; Epetra_MultiVector *ScalapackX =0; // // Extract Scalapack versions of X and B // double *ScalapackXvalues ; Epetra_RowMatrix *RowMatrixA = dynamic_cast<Epetra_RowMatrix *>(Problem_->GetOperator()); Time_->ResetStartTime(); // track time to broadcast vectors // // Copy B to the scalapack version of B // const Epetra_Map &OriginalMap = RowMatrixA->RowMatrixRowMap(); Epetra_MultiVector *ScalapackXextract = new Epetra_MultiVector( *VectorMap_, nrhs ) ; Epetra_MultiVector *ScalapackBextract = new Epetra_MultiVector( *VectorMap_, nrhs ) ; Epetra_Import ImportToScalapack( *VectorMap_, OriginalMap ); ScalapackBextract->Import( *vecB, ImportToScalapack, Insert ) ; ScalapackB = ScalapackBextract ; ScalapackX = ScalapackXextract ; VecTime_ += Time_->ElapsedTime(); // // Call SCALAPACKs PDGETRS to perform the solve // int DescX[10]; ScalapackX->Scale(1.0, *ScalapackB) ; int ScalapackXlda ; Time_->ResetStartTime(); // tract time to solve // // Setup DescX // if( nrhs > nb_ ) { EPETRA_CHK_ERR( -2 ); } int Ierr[1] ; Ierr[0] = 0 ; const int zero = 0 ; const int one = 1 ; if ( iam_ < nprow_ * npcol_ ) { assert( ScalapackX->ExtractView( &ScalapackXvalues, &ScalapackXlda ) == 0 ) ; if ( false ) std::cout << "Amesos_Scalapack.cpp: " << __LINE__ << " ScalapackXlda = " << ScalapackXlda << " lda_ = " << lda_ << " nprow_ = " << nprow_ << " npcol_ = " << npcol_ << " myprow_ = " << myprow_ << " mypcol_ = " << mypcol_ << " iam_ = " << iam_ << std::endl ; if ( TwoD_distribution_ ) assert( mypcol_ >0 || EPETRA_MAX(ScalapackXlda,1) == lda_ ) ; DESCINIT_F77(DescX, &NumGlobalElements_, &nrhs, &nb_, &nb_, &zero, &zero, &ictxt_, &lda_, Ierr ) ; assert( Ierr[0] == 0 ) ; // // For the 1D data distribution, we factor the transposed // matrix, hence we must invert the sense of the transposition // char trans = 'N'; if ( TwoD_distribution_ ) { if ( UseTranspose() ) trans = 'T' ; } else { if ( ! UseTranspose() ) trans = 'T' ; } if ( nprow_ * npcol_ == 1 ) { DGETRS_F77(&trans, &NumGlobalElements_, &nrhs, &DenseA_[0], &lda_, &Ipiv_[0], ScalapackXvalues, &lda_, Ierr ) ; } else { PDGETRS_F77(&trans, &NumGlobalElements_, &nrhs, &DenseA_[0], &one, &one, DescA_, &Ipiv_[0], ScalapackXvalues, &one, &one, DescX, Ierr ) ; } } SolTime_ += Time_->ElapsedTime(); Time_->ResetStartTime(); // track time to broadcast vectors // // Copy X back to the original vector // Epetra_Import ImportFromScalapack( OriginalMap, *VectorMap_ ); vecX->Import( *ScalapackX, ImportFromScalapack, Insert ) ; delete ScalapackBextract ; delete ScalapackXextract ; VecTime_ += Time_->ElapsedTime(); // All processes should return the same error code if ( nprow_ * npcol_ < Comm().NumProc() ) Comm().Broadcast( Ierr, 1, 0 ) ; // MS // compute vector norms if( ComputeVectorNorms_ == true || verbose_ == 2 ) { double NormLHS, NormRHS; for( int i=0 ; i<nrhs ; ++i ) { assert((*vecX)(i)->Norm2(&NormLHS)==0); assert((*vecB)(i)->Norm2(&NormRHS)==0); if( verbose_ && Comm().MyPID() == 0 ) { std::cout << "Amesos_Scalapack : vector " << i << ", ||x|| = " << NormLHS << ", ||b|| = " << NormRHS << std::endl; } } } // MS // compute true residual if( ComputeTrueResidual_ == true || verbose_ == 2 ) { double Norm; Epetra_MultiVector Ax(vecB->Map(),nrhs); for( int i=0 ; i<nrhs ; ++i ) { (Problem_->GetMatrix()->Multiply(UseTranspose(), *((*vecX)(i)), Ax)); (Ax.Update(1.0, *((*vecB)(i)), -1.0)); (Ax.Norm2(&Norm)); if( verbose_ && Comm().MyPID() == 0 ) { std::cout << "Amesos_Scalapack : vector " << i << ", ||Ax - b|| = " << Norm << std::endl; } } } return Ierr[0]; }
//============================================================================= int Amesos_Klu::Solve() { Epetra_MultiVector* vecX = 0 ; Epetra_MultiVector* vecB = 0 ; #ifdef HAVE_AMESOS_EPETRAEXT Teuchos::RCP<Epetra_MultiVector> vecX_rcp; Teuchos::RCP<Epetra_MultiVector> vecB_rcp; #endif #ifdef Bug_8212 // This demonstrates Bug #2812 - Valgrind does not catch this // memory leak lose_this_ = (int *) malloc( 300 ) ; #ifdef Bug_8212_B // This demonstrates Bug #2812 - Valgrind does catch this // use of unitialized data - but only in TestOptions/TestOptions.exe // not in Test_Basic/amesos_test.exe // if ( lose_this_[0] == 12834 ) { std::cout << __FILE__ << "::" << __LINE__ << " very unlikely to happen " << std::endl ; } #endif #endif if ( !TrustMe_ ) { SerialB_ = Teuchos::rcp(Problem_->GetRHS(),false); SerialX_ = Teuchos::rcp(Problem_->GetLHS(),false); Epetra_MultiVector* OrigVecX ; Epetra_MultiVector* OrigVecB ; if (IsNumericFactorizationOK_ == false) AMESOS_CHK_ERR(NumericFactorization()); ResetTimer(1); // // Reindex the LHS and RHS // OrigVecX = Problem_->GetLHS() ; OrigVecB = Problem_->GetRHS() ; if ( Reindex_ ) { #ifdef HAVE_AMESOS_EPETRAEXT vecX_rcp = StdIndexDomain_->StandardizeIndex( *OrigVecX ) ; vecB_rcp = StdIndexRange_->StandardizeIndex( *OrigVecB ) ; vecX = &*vecX_rcp; vecB = &*vecB_rcp; #else AMESOS_CHK_ERR( -13 ) ; // Amesos_Klu can't handle non-standard indexing without EpetraExt #endif } else { vecX = OrigVecX ; vecB = OrigVecB ; } if ((vecX == 0) || (vecB == 0)) AMESOS_CHK_ERR(-1); // something wrong in input // Extract Serial versions of X and B ResetTimer(0); // Copy B to the serial version of B // if (UseDataInPlace_ == 1) { #ifdef HAVE_AMESOS_EPETRAEXT if(vecX_rcp==Teuchos::null) SerialX_ = Teuchos::rcp(vecX,false); else SerialX_ = vecX_rcp; if(vecB_rcp==Teuchos::null) SerialB_ = Teuchos::rcp(vecB,false); else SerialB_ = vecB_rcp; #else SerialB_ = Teuchos::rcp(vecB,false); SerialX_ = Teuchos::rcp(vecX,false); #endif NumVectors_ = Problem_->GetRHS()->NumVectors() ; } else { assert (UseDataInPlace_ == 0); if( NumVectors_ != Problem_->GetRHS()->NumVectors() ) { NumVectors_ = Problem_->GetRHS()->NumVectors() ; SerialXextract_ = rcp( new Epetra_MultiVector(*SerialMap_,NumVectors_)); SerialBextract_ = rcp (new Epetra_MultiVector(*SerialMap_,NumVectors_)); } if (NumVectors_ != vecB->NumVectors()) AMESOS_CHK_ERR(-1); // internal error //ImportRangeToSerial_ = rcp(new Epetra_Import ( *SerialMap_, vecB->Map() ) ); //if ( SerialBextract_->Import(*vecB,*ImportRangeToSerial_,Insert) ) Epetra_Import *UseImport; if(!UseTranspose_) UseImport=&*ImportRangeToSerial_; else UseImport=&*ImportDomainToSerial_; if ( SerialBextract_->Import(*vecB,*UseImport,Insert) ) AMESOS_CHK_ERR( -1 ) ; // internal error SerialB_ = Teuchos::rcp(&*SerialBextract_,false) ; SerialX_ = Teuchos::rcp(&*SerialXextract_,false) ; } VecRedistTime_ = AddTime("Total vector redistribution time", VecRedistTime_, 0); // Call KLU to perform the solve ResetTimer(0); if (MyPID_ == 0) { AMESOS_CHK_ERR(SerialB_->ExtractView(&SerialBvalues_,&SerialXlda_ )); AMESOS_CHK_ERR(SerialX_->ExtractView(&SerialXBvalues_,&SerialXlda_ )); if (SerialXlda_ != NumGlobalElements_) AMESOS_CHK_ERR(-1); } OverheadTime_ = AddTime("Total Amesos overhead time", OverheadTime_, 1); } else { SerialB_ = Teuchos::rcp(Problem_->GetRHS(),false) ; SerialX_ = Teuchos::rcp(Problem_->GetLHS(),false) ; NumVectors_ = SerialX_->NumVectors(); if (MyPID_ == 0) { AMESOS_CHK_ERR(SerialB_->ExtractView(&SerialBvalues_,&SerialXlda_ )); AMESOS_CHK_ERR(SerialX_->ExtractView(&SerialXBvalues_,&SerialXlda_ )); } } if ( MyPID_ == 0) { if ( NumVectors_ == 1 ) { for ( int i = 0 ; i < NumGlobalElements_ ; i++ ) SerialXBvalues_[i] = SerialBvalues_[i] ; } else { SerialX_->Scale(1.0, *SerialB_ ) ; // X = B (Klu overwrites B with X) } if (UseTranspose()) { amesos_klu_solve( &*PrivateKluData_->Symbolic_, &*PrivateKluData_->Numeric_, SerialXlda_, NumVectors_, &SerialXBvalues_[0], &*PrivateKluData_->common_ ); } else { amesos_klu_tsolve( &*PrivateKluData_->Symbolic_, &*PrivateKluData_->Numeric_, SerialXlda_, NumVectors_, &SerialXBvalues_[0], &*PrivateKluData_->common_ ); } } if ( !TrustMe_ ) { SolveTime_ = AddTime("Total solve time", SolveTime_, 0); // Copy X back to the original vector ResetTimer(0); ResetTimer(1); if (UseDataInPlace_ == 0) { Epetra_Import *UseImport; if(!UseTranspose_) UseImport=&*ImportDomainToSerial_; else UseImport=&*ImportRangeToSerial_; // ImportDomainToSerial_ = rcp(new Epetra_Import ( *SerialMap_, vecX->Map() ) ); vecX->Export( *SerialX_, *UseImport, Insert ) ; } // otherwise we are already in place. VecRedistTime_ = AddTime("Total vector redistribution time", VecRedistTime_, 0); #if 0 // // ComputeTrueResidual causes TestOptions to fail on my linux box // Bug #1417 if (ComputeTrueResidual_) ComputeTrueResidual(*SerialMatrix_, *vecX, *vecB, UseTranspose(), "Amesos_Klu"); #endif if (ComputeVectorNorms_) ComputeVectorNorms(*vecX, *vecB, "Amesos_Klu"); OverheadTime_ = AddTime("Total Amesos overhead time", OverheadTime_, 1); } ++NumSolve_; return(0) ; }
//============================================================================== int Ifpack_PointRelaxation:: ApplyInverseGS_RowMatrix(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const { int NumVectors = X.NumVectors(); int Length = Matrix().MaxNumEntries(); std::vector<int> Indices(Length); std::vector<double> Values(Length); Teuchos::RefCountPtr< Epetra_MultiVector > Y2; if (IsParallel_) Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) ); else Y2 = Teuchos::rcp( &Y, false ); // extract views (for nicer and faster code) double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr; X.ExtractView(&x_ptr); Y.ExtractView(&y_ptr); Y2->ExtractView(&y2_ptr); Diagonal_->ExtractView(&d_ptr); for (int j = 0; j < NumSweeps_ ; j++) { // data exchange is here, once per sweep if (IsParallel_) IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert)); // FIXME: do I really need this code below? if (NumVectors == 1) { double* y0_ptr = y_ptr[0]; double* y20_ptr = y2_ptr[0]; double* x0_ptr = x_ptr[0]; if(!DoBackwardGS_){ /* Forward Mode */ for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) { int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii]; int NumEntries; int col; IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries, &Values[0], &Indices[0])); double dtemp = 0.0; for (int k = 0 ; k < NumEntries ; ++k) { col = Indices[k]; dtemp += Values[k] * y20_ptr[col]; } y20_ptr[i] += DampingFactor_ * d_ptr[i] * (x0_ptr[i] - dtemp); } } else { /* Backward Mode */ for (int ii = NumLocalSmoothingIndices_ - 1 ; ii > -1 ; --ii) { int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii]; int NumEntries; int col; (void) col; // Forestall compiler warning for unused variable. IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries, &Values[0], &Indices[0])); double dtemp = 0.0; for (int k = 0 ; k < NumEntries ; ++k) { col = Indices[k]; dtemp += Values[k] * y20_ptr[i]; } y20_ptr[i] += DampingFactor_ * d_ptr[i] * (x0_ptr[i] - dtemp); } } // using Export() sounded quite expensive if (IsParallel_) for (int i = 0 ; i < NumMyRows_ ; ++i) y0_ptr[i] = y20_ptr[i]; } else { if(!DoBackwardGS_){ /* Forward Mode */ for (int i = 0 ; i < NumMyRows_ ; ++i) { int NumEntries; int col; IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries, &Values[0], &Indices[0])); for (int m = 0 ; m < NumVectors ; ++m) { double dtemp = 0.0; for (int k = 0 ; k < NumEntries ; ++k) { col = Indices[k]; dtemp += Values[k] * y2_ptr[m][col]; } y2_ptr[m][i] += DampingFactor_ * d_ptr[i] * (x_ptr[m][i] - dtemp); } } } else { /* Backward Mode */ for (int i = NumMyRows_ - 1 ; i > -1 ; --i) { int NumEntries; int col; IFPACK_CHK_ERR(Matrix_->ExtractMyRowCopy(i, Length,NumEntries, &Values[0], &Indices[0])); for (int m = 0 ; m < NumVectors ; ++m) { double dtemp = 0.0; for (int k = 0 ; k < NumEntries ; ++k) { col = Indices[k]; dtemp += Values[k] * y2_ptr[m][col]; } y2_ptr[m][i] += DampingFactor_ * d_ptr[i] * (x_ptr[m][i] - dtemp); } } } // using Export() sounded quite expensive if (IsParallel_) for (int m = 0 ; m < NumVectors ; ++m) for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) { int i = (!LocalSmoothingIndices_)? ii : LocalSmoothingIndices_[ii]; y_ptr[m][i] = y2_ptr[m][i]; } } } #ifdef IFPACK_FLOPCOUNTERS ApplyInverseFlops_ += NumVectors * (4 * NumGlobalRows_ + 2 * NumGlobalNonzeros_); #endif return(0); } //ApplyInverseGS_RowMatrix()
//============================================================================= int Amesos_Umfpack::Solve() { // if necessary, perform numeric factorization. // This may call SymbolicFactorization() as well. if (!IsNumericFactorizationOK_) AMESOS_CHK_ERR(NumericFactorization()); ResetTimer(1); Epetra_MultiVector* vecX = Problem_->GetLHS(); Epetra_MultiVector* vecB = Problem_->GetRHS(); if ((vecX == 0) || (vecB == 0)) AMESOS_CHK_ERR(-1); int NumVectors = vecX->NumVectors(); if (NumVectors != vecB->NumVectors()) AMESOS_CHK_ERR(-1); Epetra_MultiVector *SerialB, *SerialX; // Extract Serial versions of X and B // double *SerialXvalues ; double *SerialBvalues ; Epetra_MultiVector* SerialXextract = 0; Epetra_MultiVector* SerialBextract = 0; // Copy B to the serial version of B // ResetTimer(0); if (IsLocal_ == 1) { SerialB = vecB ; SerialX = vecX ; } else { assert (IsLocal_ == 0); SerialXextract = new Epetra_MultiVector(SerialMap(),NumVectors); SerialBextract = new Epetra_MultiVector(SerialMap(),NumVectors); SerialBextract->Import(*vecB,Importer(),Insert); SerialB = SerialBextract; SerialX = SerialXextract; } VecRedistTime_ = AddTime("Total vector redistribution time", VecRedistTime_, 0); // Call UMFPACK to perform the solve // Note: UMFPACK uses a Compressed Column Storage instead of compressed row storage, // Hence to compute A X = B, we ask UMFPACK to perform A^T X = B and vice versa OverheadTime_ = AddTime("Total Amesos overhead time", OverheadTime_, 1); ResetTimer(0); int SerialBlda, SerialXlda ; int UmfpackRequest = UseTranspose()?UMFPACK_A:UMFPACK_At ; int status = 0; if ( MyPID_ == 0 ) { int ierr; ierr = SerialB->ExtractView(&SerialBvalues, &SerialBlda); assert (ierr == 0); ierr = SerialX->ExtractView(&SerialXvalues, &SerialXlda); assert (ierr == 0); assert( SerialBlda == NumGlobalElements_ ) ; assert( SerialXlda == NumGlobalElements_ ) ; for ( int j =0 ; j < NumVectors; j++ ) { double *Control = (double *) NULL, *Info = (double *) NULL ; status = umfpack_di_solve (UmfpackRequest, &Ap[0], &Ai[0], &Aval[0], &SerialXvalues[j*SerialXlda], &SerialBvalues[j*SerialBlda], Numeric, Control, Info) ; } } if (status) AMESOS_CHK_ERR(status); SolveTime_ = AddTime("Total solve time", SolveTime_, 0); // Copy X back to the original vector ResetTimer(0); ResetTimer(1); if ( IsLocal_ == 0 ) { vecX->Export(*SerialX, Importer(), Insert ) ; if (SerialBextract) delete SerialBextract ; if (SerialXextract) delete SerialXextract ; } VecRedistTime_ = AddTime("Total vector redistribution time", VecRedistTime_, 0); if (ComputeTrueResidual_) { Epetra_RowMatrix* Matrix = dynamic_cast<Epetra_RowMatrix*>(Problem_->GetOperator()); ComputeTrueResidual(*Matrix, *vecX, *vecB, UseTranspose(), "Amesos_Umfpack"); } if (ComputeVectorNorms_) { ComputeVectorNorms(*vecX, *vecB, "Amesos_Umfpack"); } NumSolve_++; OverheadTime_ = AddTime("Total Amesos overhead time", OverheadTime_, 1); // Amesos overhead return(0); }
int Ifpack_PointRelaxation:: ApplyInverseGS_LocalFastCrsMatrix(const Epetra_CrsMatrix* A, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const { int* IndexOffset; int* Indices; double* Values; IFPACK_CHK_ERR(A->ExtractCrsDataPointers(IndexOffset, Indices, Values)); int NumVectors = X.NumVectors(); Teuchos::RefCountPtr< Epetra_MultiVector > Y2; if (IsParallel_) { Y2 = Teuchos::rcp( new Epetra_MultiVector(Importer_->TargetMap(), NumVectors) ); } else Y2 = Teuchos::rcp( &Y, false ); double** y_ptr, ** y2_ptr, ** x_ptr, *d_ptr; X.ExtractView(&x_ptr); Y.ExtractView(&y_ptr); Y2->ExtractView(&y2_ptr); Diagonal_->ExtractView(&d_ptr); for (int iter = 0 ; iter < NumSweeps_ ; ++iter) { // only one data exchange per sweep if (IsParallel_) IFPACK_CHK_ERR(Y2->Import(Y,*Importer_,Insert)); if(!DoBackwardGS_){ /* Forward Mode */ for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) { int i=LocalSmoothingIndices_[ii]; int col; double diag = d_ptr[i]; for (int m = 0 ; m < NumVectors ; ++m) { double dtemp = 0.0; for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) { col = Indices[k]; dtemp += Values[k] * y2_ptr[m][col]; } y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag; } } } else { /* Backward Mode */ for (int ii = NumLocalSmoothingIndices_ - 1 ; ii > -1 ; --ii) { int i=LocalSmoothingIndices_[ii]; int col; double diag = d_ptr[i]; for (int m = 0 ; m < NumVectors ; ++m) { double dtemp = 0.0; for (int k = IndexOffset[i] ; k < IndexOffset[i + 1] ; ++k) { col = Indices[k]; dtemp += Values[k] * y2_ptr[m][col]; } y2_ptr[m][i] += DampingFactor_ * (x_ptr[m][i] - dtemp) * diag; } } } if (IsParallel_) for (int m = 0 ; m < NumVectors ; ++m) for (int ii = 0 ; ii < NumLocalSmoothingIndices_ ; ++ii) { int i=LocalSmoothingIndices_[ii]; y_ptr[m][i] = y2_ptr[m][i]; } } #ifdef IFPACK_FLOPCOUNTERS ApplyInverseFlops_ += NumVectors * (8 * NumGlobalRows_ + 4 * NumGlobalNonzeros_); #endif return(0); } //ApplyInverseGS_LocalFastCrsMatrix()
//============================================================================= int Epetra_FastCrsMatrix::Multiply(bool TransA, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const { // // This function forms the product Y = A * Y or Y = A' * X // if (X.NumVectors()==1 && Y.NumVectors()==1) { double * xp = (double *) X[0]; double * yp = (double *) Y[0]; Epetra_Vector x(View, X.Map(), xp); Epetra_Vector y(View, Y.Map(), yp); return(Multiply(TransA, x, y)); } if (!Filled()) EPETRA_CHK_ERR(-1); // Matrix must be filled. int i, j, k; int * NumEntriesPerRow = NumEntriesPerRow_; int ** Indices = Indices_; double ** Values = Values_; double **Xp = (double**)X.Pointers(); double **Yp = (double**)Y.Pointers(); int NumVectors = X.NumVectors(); int NumMyCols_ = NumMyCols(); // Need to better manage the Import and Export vectors: // - Need accessor functions // - Need to make the NumVector match (use a View to do this) // - Need to look at RightScale and ColSum routines too. if (!TransA) { // If we have a non-trivial importer, we must import elements that are permuted or are on other processors if (Importer()!=0) { if (ImportVector_!=0) { if (ImportVector_->NumVectors()!=NumVectors) { delete ImportVector_; ImportVector_= 0;} } if (ImportVector_==0) ImportVector_ = new Epetra_MultiVector(ColMap(),NumVectors); // Create import vector if needed ImportVector_->Import(X, *Importer(), Insert); Xp = (double**)ImportVector_->Pointers(); } // If we have a non-trivial exporter, we must export elements that are permuted or belong to other processors if (Exporter()!=0) { if (ExportVector_!=0) { if (ExportVector_->NumVectors()!=NumVectors) { delete ExportVector_; ExportVector_= 0;} } if (ExportVector_==0) ExportVector_ = new Epetra_MultiVector(RowMap(),NumVectors); // Create Export vector if needed Yp = (double**)ExportVector_->Pointers(); } // Do actual computation for (i=0; i < NumMyRows_; i++) { int NumEntries = *NumEntriesPerRow++; int * RowIndices = *Indices++; double * RowValues = *Values++; for (k=0; k<NumVectors; k++) { double sum = 0.0; for (j=0; j < NumEntries; j++) sum += RowValues[j] * Xp[k][RowIndices[j]]; Yp[k][i] = sum; } } if (Exporter()!=0) Y.Export(*ExportVector_, *Exporter(), Add); // Fill Y with Values from export vector } else { // Transpose operation // If we have a non-trivial exporter, we must import elements that are permuted or are on other processors if (Exporter()!=0) { if (ExportVector_!=0) { if (ExportVector_->NumVectors()!=NumVectors) { delete ExportVector_; ExportVector_= 0;} } if (ExportVector_==0) ExportVector_ = new Epetra_MultiVector(RowMap(),NumVectors); // Create Export vector if needed ExportVector_->Import(X, *Exporter(), Insert); Xp = (double**)ExportVector_->Pointers(); } // If we have a non-trivial importer, we must export elements that are permuted or belong to other processors if (Importer()!=0) { if (ImportVector_!=0) { if (ImportVector_->NumVectors()!=NumVectors) { delete ImportVector_; ImportVector_= 0;} } if (ImportVector_==0) ImportVector_ = new Epetra_MultiVector(ColMap(),NumVectors); // Create import vector if needed Yp = (double**)ImportVector_->Pointers(); } // Do actual computation for (k=0; k<NumVectors; k++) for (i=0; i < NumMyCols_; i++) Yp[k][i] = 0.0; // Initialize y for transpose multiply for (i=0; i < NumMyRows_; i++) { int NumEntries = *NumEntriesPerRow++; int * RowIndices = *Indices++; double * RowValues = *Values++; for (k=0; k<NumVectors; k++) { for (j=0; j < NumEntries; j++) Yp[k][RowIndices[j]] += RowValues[j] * Xp[k][i]; } } if (Importer()!=0) Y.Export(*ImportVector_, *Importer(), Add); // Fill Y with Values from export vector } UpdateFlops(2*NumVectors*NumGlobalNonzeros64()); return(0); }
//============================================================================== int LinearProblem_CrsSingletonFilter::ConstructReducedProblem(Epetra_LinearProblem * Problem) { int i, j; if (HaveReducedProblem_) EPETRA_CHK_ERR(-1); // Setup already done once. Cannot do it again if (Problem==0) EPETRA_CHK_ERR(-2); // Null problem pointer FullProblem_ = Problem; FullMatrix_ = dynamic_cast<Epetra_RowMatrix *>(Problem->GetMatrix()); if (FullMatrix_==0) EPETRA_CHK_ERR(-3); // Need a RowMatrix if (Problem->GetRHS()==0) EPETRA_CHK_ERR(-4); // Need a RHS if (Problem->GetLHS()==0) EPETRA_CHK_ERR(-5); // Need a LHS // Generate reduced row and column maps Epetra_MapColoring & RowMapColors = *RowMapColors_; Epetra_MapColoring & ColMapColors = *ColMapColors_; ReducedMatrixRowMap_ = RowMapColors.GenerateMap(0); ReducedMatrixColMap_ = ColMapColors.GenerateMap(0); // Create domain and range map colorings by exporting map coloring of column and row maps if (FullMatrix()->RowMatrixImporter()!=0) { Epetra_MapColoring DomainMapColors(FullMatrixDomainMap()); EPETRA_CHK_ERR(DomainMapColors.Export(*ColMapColors_, *FullMatrix()->RowMatrixImporter(), AbsMax)); OrigReducedMatrixDomainMap_ = DomainMapColors.GenerateMap(0); } else OrigReducedMatrixDomainMap_ = ReducedMatrixColMap_; if (FullMatrixIsCrsMatrix_) { if (FullCrsMatrix()->Exporter()!=0) { // Non-trivial exporter Epetra_MapColoring RangeMapColors(FullMatrixRangeMap()); EPETRA_CHK_ERR(RangeMapColors.Export(*RowMapColors_, *FullCrsMatrix()->Exporter(), AbsMax)); ReducedMatrixRangeMap_ = RangeMapColors.GenerateMap(0); } else ReducedMatrixRangeMap_ = ReducedMatrixRowMap_; } else ReducedMatrixRangeMap_ = ReducedMatrixRowMap_; // Check to see if the reduced system domain and range maps are the same. // If not, we need to remap entries of the LHS multivector so that they are distributed // conformally with the rows of the reduced matrix and the RHS multivector SymmetricElimination_ = ReducedMatrixRangeMap_->SameAs(*OrigReducedMatrixDomainMap_); if (!SymmetricElimination_) ConstructRedistributeExporter(OrigReducedMatrixDomainMap_, ReducedMatrixRangeMap_, RedistributeDomainExporter_, ReducedMatrixDomainMap_); else { ReducedMatrixDomainMap_ = OrigReducedMatrixDomainMap_; OrigReducedMatrixDomainMap_ = 0; RedistributeDomainExporter_ = 0; } // Create pointer to Full RHS, LHS Epetra_MultiVector * FullRHS = FullProblem()->GetRHS(); Epetra_MultiVector * FullLHS = FullProblem()->GetLHS(); int NumVectors = FullLHS->NumVectors(); // Create importers // cout << "RedDomainMap\n"; // cout << *ReducedMatrixDomainMap(); // cout << "FullDomainMap\n"; // cout << FullMatrixDomainMap(); Full2ReducedLHSImporter_ = new Epetra_Import(*ReducedMatrixDomainMap(), FullMatrixDomainMap()); // cout << "RedRowMap\n"; // cout << *ReducedMatrixRowMap(); // cout << "FullRHSMap\n"; // cout << FullRHS->Map(); Full2ReducedRHSImporter_ = new Epetra_Import(*ReducedMatrixRowMap(), FullRHS->Map()); // Construct Reduced Matrix ReducedMatrix_ = new Epetra_CrsMatrix(Copy, *ReducedMatrixRowMap(), *ReducedMatrixColMap(), 0); // Create storage for temporary X values due to explicit elimination of rows tempExportX_ = new Epetra_MultiVector(FullMatrixColMap(), NumVectors); int NumEntries; int * Indices; double * Values; int NumMyRows = FullMatrix()->NumMyRows(); int ColSingletonCounter = 0; for (i=0; i<NumMyRows; i++) { int curGRID = FullMatrixRowMap().GID(i); if (ReducedMatrixRowMap()->MyGID(curGRID)) { // Check if this row should go into reduced matrix EPETRA_CHK_ERR(GetRowGCIDs(i, NumEntries, Values, Indices)); // Get current row (Indices are global) int ierr = ReducedMatrix()->InsertGlobalValues(curGRID, NumEntries, Values, Indices); // Insert into reduce matrix // Positive errors will occur because we are submitting col entries that are not part of // reduced system. However, because we specified a column map to the ReducedMatrix constructor // these extra column entries will be ignored and we will be politely reminded by a positive // error code if (ierr<0) EPETRA_CHK_ERR(ierr); } else { EPETRA_CHK_ERR(GetRow(i, NumEntries, Values, Indices)); // Get current row if (NumEntries==1) { double pivot = Values[0]; if (pivot==0.0) EPETRA_CHK_ERR(-1); // Encountered zero row, unable to continue int indX = Indices[0]; for (j=0; j<NumVectors; j++) (*tempExportX_)[j][indX] = (*FullRHS)[j][i]/pivot; } // Otherwise, this is a singleton column and we will scan for the pivot element needed // for post-solve equations else { int targetCol = ColSingletonColLIDs_[ColSingletonCounter]; for (j=0; j<NumEntries; j++) { if (Indices[j]==targetCol) { double pivot = Values[j]; if (pivot==0.0) EPETRA_CHK_ERR(-2); // Encountered zero column, unable to continue ColSingletonPivotLIDs_[ColSingletonCounter] = j; // Save for later use ColSingletonPivots_[ColSingletonCounter] = pivot; ColSingletonCounter++; break; } } } } } // Now convert to local indexing. We have constructed things so that the domain and range of the // matrix will have the same map. If the reduced matrix domain and range maps were not the same, the // differences were addressed in the ConstructRedistributeExporter() method EPETRA_CHK_ERR(ReducedMatrix()->FillComplete(*ReducedMatrixDomainMap(), *ReducedMatrixRangeMap())); // Construct Reduced LHS (Puts any initial guess values into reduced system) ReducedLHS_ = new Epetra_MultiVector(*ReducedMatrixDomainMap(), NumVectors); EPETRA_CHK_ERR(ReducedLHS_->Import(*FullLHS, *Full2ReducedLHSImporter_, Insert)); FullLHS->PutScalar(0.0); // zero out Full LHS since we will inject values as we get them // Construct Reduced RHS // First compute influence of already-known values of X on RHS tempX_ = new Epetra_MultiVector(FullMatrixDomainMap(), NumVectors); tempB_ = new Epetra_MultiVector(FullRHS->Map(), NumVectors); //Inject known X values into tempX for purpose of computing tempB = FullMatrix*tempX // Also inject into full X since we already know the solution if (FullMatrix()->RowMatrixImporter()!=0) { EPETRA_CHK_ERR(tempX_->Export(*tempExportX_, *FullMatrix()->RowMatrixImporter(), Add)); EPETRA_CHK_ERR(FullLHS->Export(*tempExportX_, *FullMatrix()->RowMatrixImporter(), Add)); } else { tempX_->Update(1.0, *tempExportX_, 0.0); FullLHS->Update(1.0, *tempExportX_, 0.0); } EPETRA_CHK_ERR(FullMatrix()->Multiply(false, *tempX_, *tempB_)); EPETRA_CHK_ERR(tempB_->Update(1.0, *FullRHS, -1.0)); // tempB now has influence of already-known X values ReducedRHS_ = new Epetra_MultiVector(*ReducedMatrixRowMap(), FullRHS->NumVectors()); EPETRA_CHK_ERR(ReducedRHS_->Import(*tempB_, *Full2ReducedRHSImporter_, Insert)); // Finally construct Reduced Linear Problem ReducedProblem_ = new Epetra_LinearProblem(ReducedMatrix_, ReducedLHS_, ReducedRHS_); double fn = FullMatrix()->NumGlobalRows(); double fnnz = FullMatrix()->NumGlobalNonzeros(); double rn = ReducedMatrix()->NumGlobalRows(); double rnnz = ReducedMatrix()->NumGlobalNonzeros(); RatioOfDimensions_ = rn/fn; RatioOfNonzeros_ = rnnz/fnnz; HaveReducedProblem_ = true; return(0); }
//============================================================================= int Epetra_FastCrsMatrix::Solve(bool Upper, bool Trans, bool UnitDiagonal, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const { // // This function find Y such that LY = X or UY = X or the transpose cases. // if (X.NumVectors()==1 && Y.NumVectors()==1) { double * xp = (double *) X[0]; double * yp = (double *) Y[0]; Epetra_Vector x(View, X.Map(), xp); Epetra_Vector y(View, Y.Map(), yp); return(Solve(Upper, Trans, UnitDiagonal, x, y)); } if (!Filled()) EPETRA_CHK_ERR(-1); // Matrix must be filled. if ((Upper) && (!UpperTriangular())) EPETRA_CHK_ERR(-2); if ((!Upper) && (!LowerTriangular())) EPETRA_CHK_ERR(-3); if ((!UnitDiagonal) && (NoDiagonal())) EPETRA_CHK_ERR(-4); // If matrix has no diagonal, we must use UnitDiagonal if ((!UnitDiagonal) && (NumMyDiagonals()<NumMyRows_)) EPETRA_CHK_ERR(-5); // Need each row to have a diagonal int i, j, j0, k; int * NumEntriesPerRow = NumEntriesPerRow_; int ** Indices = Indices_; double ** Values = Values_; double diag; // If upper, point to last row if ((Upper && !Trans) || (!Upper && Trans)) { NumEntriesPerRow += NumMyRows_-1; Indices += NumMyRows_-1; Values += NumMyRows_-1; } double **Xp = (double**)X.Pointers(); double **Yp = (double**)Y.Pointers(); int NumVectors = X.NumVectors(); if (!Trans) { if (Upper) { j0 = 1; if (NoDiagonal()) j0--; // Include first term if no diagonal for (i=NumMyRows_-1; i >=0; i--) { int NumEntries = *NumEntriesPerRow--; int * RowIndices = *Indices--; double * RowValues = *Values--; if (!UnitDiagonal) diag = 1.0/RowValues[0]; // Take inverse of diagonal once for later use for (k=0; k<NumVectors; k++) { double sum = 0.0; for (j=j0; j < NumEntries; j++) sum += RowValues[j] * Yp[k][RowIndices[j]]; if (UnitDiagonal) Yp[k][i] = Xp[k][i] - sum; else Yp[k][i] = (Xp[k][i] - sum)*diag; } } } else { j0 = 1; if (NoDiagonal()) j0--; // Include first term if no diagonal for (i=0; i < NumMyRows_; i++) { int NumEntries = *NumEntriesPerRow++ - j0; int * RowIndices = *Indices++; double * RowValues = *Values++; if (!UnitDiagonal) diag = 1.0/RowValues[NumEntries]; // Take inverse of diagonal once for later use for (k=0; k<NumVectors; k++) { double sum = 0.0; for (j=0; j < NumEntries; j++) sum += RowValues[j] * Yp[k][RowIndices[j]]; if (UnitDiagonal) Yp[k][i] = Xp[k][i] - sum; else Yp[k][i] = (Xp[k][i] - sum)*diag; } } } } // *********** Transpose case ******************************* else { for (k=0; k<NumVectors; k++) if (Yp[k]!=Xp[k]) for (i=0; i < NumMyRows_; i++) Yp[k][i] = Xp[k][i]; // Initialize y for transpose multiply if (Upper) { j0 = 1; if (NoDiagonal()) j0--; // Include first term if no diagonal for (i=0; i < NumMyRows_; i++) { int NumEntries = *NumEntriesPerRow++; int * RowIndices = *Indices++; double * RowValues = *Values++; if (!UnitDiagonal) diag = 1.0/RowValues[j0]; // Take inverse of diagonal once for later use for (k=0; k<NumVectors; k++) { if (!UnitDiagonal) Yp[k][i] = Yp[k][i]*diag; for (j=j0; j < NumEntries; j++) Yp[k][RowIndices[j]] -= RowValues[j] * Yp[k][i]; } } } else { j0 = 1; if (NoDiagonal()) j0--; // Include first term if no diagonal for (i=NumMyRows_-1; i>=0; i--) { int NumEntries = *NumEntriesPerRow-- - j0; int * RowIndices = *Indices--; double * RowValues = *Values--; for (k=0; k<NumVectors; k++) { if (!UnitDiagonal) Yp[k][i] = Yp[k][i]/Xp[k][i]; for (j=0; j < NumEntries; j++) Yp[k][RowIndices[j]] -= RowValues[j] * Yp[k][i]; } } } } UpdateFlops(2*NumVectors*NumGlobalNonzeros64()); return(0); }
int BlockPCGSolver::Solve(const Epetra_MultiVector &X, Epetra_MultiVector &Y) const { int info = 0; int localVerbose = verbose*(MyComm.MyPID() == 0); int xr = X.MyLength(); int wSize = 3*xr; if (lWorkSpace < wSize) { if (workSpace) delete[] workSpace; workSpace = new (std::nothrow) double[wSize]; if (workSpace == 0) { info = -1; return info; } lWorkSpace = wSize; } // if (lWorkSpace < wSize) double *pointer = workSpace; Epetra_Vector r(View, X.Map(), pointer); pointer = pointer + xr; Epetra_Vector p(View, X.Map(), pointer); pointer = pointer + xr; // Note: Kp and z uses the same memory space Epetra_Vector Kp(View, X.Map(), pointer); Epetra_Vector z(View, X.Map(), pointer); double tmp; double initNorm = 0.0, rNorm = 0.0, newRZ = 0.0, oldRZ = 0.0, alpha = 0.0; double tolSquare = tolCG*tolCG; memcpy(r.Values(), X.Values(), xr*sizeof(double)); tmp = callBLAS.DOT(xr, r.Values(), 1, r.Values(), 1); MyComm.SumAll(&tmp, &initNorm, 1); Y.PutScalar(0.0); if (localVerbose > 1) { std::cout << std::endl; std::cout << " --- PCG Iterations --- " << std::endl; } int iter; for (iter = 1; iter <= iterMax; ++iter) { if (Prec) { Prec->ApplyInverse(r, z); } else { memcpy(z.Values(), r.Values(), xr*sizeof(double)); } if (iter == 1) { tmp = callBLAS.DOT(xr, r.Values(), 1, z.Values(), 1); MyComm.SumAll(&tmp, &newRZ, 1); memcpy(p.Values(), z.Values(), xr*sizeof(double)); } else { oldRZ = newRZ; tmp = callBLAS.DOT(xr, r.Values(), 1, z.Values(), 1); MyComm.SumAll(&tmp, &newRZ, 1); p.Update(1.0, z, newRZ/oldRZ); } K->Apply(p, Kp); tmp = callBLAS.DOT(xr, p.Values(), 1, Kp.Values(), 1); MyComm.SumAll(&tmp, &alpha, 1); alpha = newRZ/alpha; TEUCHOS_TEST_FOR_EXCEPTION(alpha <= 0.0, std::runtime_error, " !!! Non-positive value for p^TKp (" << alpha << ") !!!"); callBLAS.AXPY(xr, alpha, p.Values(), 1, Y.Values(), 1); alpha *= -1.0; callBLAS.AXPY(xr, alpha, Kp.Values(), 1, r.Values(), 1); // Check convergence tmp = callBLAS.DOT(xr, r.Values(), 1, r.Values(), 1); MyComm.SumAll(&tmp, &rNorm, 1); if (localVerbose > 1) { std::cout << " Iter. " << iter; std::cout.precision(4); std::cout.setf(std::ios::scientific, std::ios::floatfield); std::cout << " Residual reduction " << std::sqrt(rNorm/initNorm) << std::endl; } if (rNorm <= tolSquare*initNorm) break; } // for (iter = 1; iter <= iterMax; ++iter) if (localVerbose == 1) { std::cout << std::endl; std::cout << " --- End of PCG solve ---" << std::endl; std::cout << " Iter. " << iter; std::cout.precision(4); std::cout.setf(std::ios::scientific, std::ios::floatfield); std::cout << " Residual reduction " << std::sqrt(rNorm/initNorm) << std::endl; std::cout << std::endl; } if (localVerbose > 1) { std::cout << std::endl; } numSolve += 1; minIter = (iter < minIter) ? iter : minIter; maxIter = (iter > maxIter) ? iter : maxIter; sumIter += iter; return info; }
// ================================================ ====== ==== ==== == = //! Apply the preconditioner to an Epetra_MultiVector X, puts the result in Y int ML_Epetra::FaceMatrixFreePreconditioner::ApplyInverse(const Epetra_MultiVector& B_, Epetra_MultiVector& X) const{ const Epetra_MultiVector *B; Epetra_MultiVector *Bcopy=0; /* Sanity Checks */ int NumVectors=B_.NumVectors(); if (!B_.Map().SameAs(*FaceDomainMap_)) ML_CHK_ERR(-1); if (NumVectors != X.NumVectors()) ML_CHK_ERR(-1); Epetra_MultiVector r_edge(*FaceDomainMap_,NumVectors,false); Epetra_MultiVector e_edge(*FaceDomainMap_,NumVectors,false); Epetra_MultiVector e_node(*CoarseMap_,NumVectors,false); Epetra_MultiVector r_node(*CoarseMap_,NumVectors,false); /* Deal with the B==X case */ if (B_.Pointers()[0] == X.Pointers()[0]){ Bcopy=new Epetra_MultiVector(B_); B=Bcopy; X.PutScalar(0.0); } else B=&B_; for(int i=0;i<num_cycles;i++){ /* Pre-smoothing */ #ifdef HAVE_ML_IFPACK if(Smoother_) ML_CHK_ERR(Smoother_->ApplyInverse(*B,X)); #endif if(MaxLevels > 0){ if(i != 0 #ifdef HAVE_ML_IFPACK || Smoother_ #endif ){ /* Calculate Residual (r_e = b - (S+M+Addon) * x) */ ML_CHK_ERR(Operator_->Apply(X,r_edge)); ML_CHK_ERR(r_edge.Update(1.0,*B,-1.0)); /* Xfer to coarse grid (r_n = P' * r_e) */ ML_CHK_ERR(Prolongator_->Multiply(true,r_edge,r_node)); } else{ /* Xfer to coarse grid (r_n = P' * r_e) */ ML_CHK_ERR(Prolongator_->Multiply(true,*B,r_node)); } /* AMG on coarse grid (e_n = (CoarseMatrix)^{-1} r_n) */ ML_CHK_ERR(CoarsePC->ApplyInverse(r_node,e_node)); /* Xfer back to fine grid (e_e = P * e_n) */ ML_CHK_ERR(Prolongator_->Multiply(false,e_node,e_edge)); /* Add in correction (x = x + e_e) */ ML_CHK_ERR(X.Update(1.0,e_edge,1.0)); }/*end if*/ /* Post-Smoothing */ #ifdef HAVE_ML_IFPACK if(Smoother_) ML_CHK_ERR(Smoother_->ApplyInverse(*B,X)); #endif }/*end for*/ /* Cleanup */ if(Bcopy) delete Bcopy; return 0; }/*end ApplyInverse*/
// ================================================ ====== ==== ==== == = //! Implicitly applies in the inverse in an additive format int ML_Epetra::RefMaxwellPreconditioner::ApplyInverse_Implicit_Additive(const Epetra_MultiVector& B, Epetra_MultiVector& X) const { #ifdef ML_TIMING double t_time,t_diff; StartTimer(&t_time); #endif int NumVectors=B.NumVectors(); Epetra_MultiVector TempE1(X.Map(),NumVectors,false); Epetra_MultiVector TempE2(X.Map(),NumVectors,true); Epetra_MultiVector TempN1(*NodeMap_,NumVectors,false); Epetra_MultiVector TempN2(*NodeMap_,NumVectors,true); Epetra_MultiVector Resid(B.Map(),NumVectors); /* Pre-Smoothing */ #ifdef HAVE_ML_IFPACK if(IfSmoother) {ML_CHK_ERR(IfSmoother->ApplyInverse(B,X));} else #endif if(PreEdgeSmoother) ML_CHK_ERR(PreEdgeSmoother->ApplyInverse(B,X)); /* Build Residual */ ML_CHK_ERR(SM_Matrix_->Multiply(false,X,TempE1)); ML_CHK_ERR(Resid.Update(-1.0,TempE1,1.0,B,0.0)); if(!HasOnlyDirichletNodes){ ML_CHK_ERR(D0_Matrix_->Multiply(true,Resid,TempN1)); } /* Precondition (1,1) block (additive)*/ ML_CHK_ERR(EdgePC->ApplyInverse(Resid,TempE2)); /* Precondition (2,2) block (additive)*/ if(!HasOnlyDirichletNodes){ ML_CHK_ERR(NodePC->ApplyInverse(TempN1,TempN2)); /* EXPERIMENTAL: Local Nodal Stuff, if active */ if(use_local_nodal_solver){ const Epetra_Map& LocalMap=LocalNodalMatrix->DomainMap(); Epetra_MultiVector TempNL1(LocalMap,NumVectors,true); Epetra_MultiVector TempNL2(LocalMap,NumVectors,true); Epetra_MultiVector TempN3(*NodeMap_,NumVectors,true); NodesToLocalNodes->Multiply(true,TempN1,TempNL1); LocalNodalSolver->ApplyInverse(TempNL1,TempNL2); NodesToLocalNodes->Multiply(false,TempNL2,TempN3); TempN2.Update(1.0,TempN3,1.0); }/*end if*/ D0_Matrix_->Multiply(false,TempN2,TempE1); }/*end if*/ /* Update solution */ if(HasOnlyDirichletNodes) X.Update(1.0,TempE2,1.0); else X.Update(1.0,TempE1,1.0,TempE2,1.0); /* Post-Smoothing */ #ifdef HAVE_ML_IFPACK if(IfSmoother) {ML_CHK_ERR(IfSmoother->ApplyInverse(B,X));} else #endif if(PostEdgeSmoother) ML_CHK_ERR(PostEdgeSmoother->ApplyInverse(B,X)); #ifdef ML_TIMING StopTimer(&t_time,&t_diff); /* Output */ ML_Comm *comm_; ML_Comm_Create(&comm_); this->ApplicationTime_+= t_diff; ML_Comm_Destroy(&comm_); #endif return 0; }
int Ifpack_SORa::ApplyInverse(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const{ if(!IsComputed_) return -1; Time_.ResetStartTime(); bool initial_guess_is_zero=false; const int lclNumRows = W_->NumMyRows(); const int NumVectors = X.NumVectors(); Epetra_MultiVector Temp(A_->RowMatrixRowMap(),NumVectors); double omega=GetOmega(); // need to create an auxiliary vector, Xcopy Teuchos::RCP<const Epetra_MultiVector> Xcopy; if (X.Pointers()[0] == Y.Pointers()[0]){ Xcopy = Teuchos::rcp( new Epetra_MultiVector(X) ); // Since the user didn't give us anything better, our initial guess is zero. Y.Scale(0.0); initial_guess_is_zero=true; } else Xcopy = Teuchos::rcp( &X, false ); Teuchos::RCP< Epetra_MultiVector > T2; // Note: Assuming that the matrix has an importer. Ifpack_PointRelaxation doesn't do this, but given that // I have a CrsMatrix, I'm probably OK. // Note: This is the lazy man's version sacrificing a few extra flops for avoiding if statements to determine // if things are on or off processor. // Note: T2 must be zero'd out if (IsParallel_ && W_->Importer()) T2 = Teuchos::rcp( new Epetra_MultiVector(W_->Importer()->TargetMap(),NumVectors,true)); else T2 = Teuchos::rcp( new Epetra_MultiVector(A_->RowMatrixRowMap(),NumVectors,true)); // Pointer grabs int* rowptr,*colind; double *values; double **t_ptr,** y_ptr, ** t2_ptr, **x_ptr,*d_ptr; T2->ExtractView(&t2_ptr); Y.ExtractView(&y_ptr); Temp.ExtractView(&t_ptr); Xcopy->ExtractView(&x_ptr); Wdiag_->ExtractView(&d_ptr); IFPACK_CHK_ERR(W_->ExtractCrsDataPointers(rowptr,colind,values)); for(int i=0; i<NumSweeps_; i++){ // Calculate b-Ax if(!initial_guess_is_zero || i > 0) { A_->Apply(Y,Temp); Temp.Update(1.0,*Xcopy,-1.0); } else Temp.Update(1.0,*Xcopy,0.0); // Note: The off-processor entries of T2 never get touched (they're always zero) and the other entries are updated // in this sweep before they are used, so we don't need to reset T2 to zero here. // Do backsolve & update // x = x + W^{-1} (b - A x) for(int j=0; j<lclNumRows; j++){ double diag=d_ptr[j]; for (int m=0 ; m<NumVectors; m++) { double dtmp=0.0; // Note: Since the diagonal is in the matrix, we need to zero that entry of T2 here to make sure it doesn't contribute. t2_ptr[m][j]=0.0; for(int k=rowptr[j];k<rowptr[j+1];k++){ dtmp+= values[k]*t2_ptr[m][colind[k]]; } // Yes, we need to update both of these. t2_ptr[m][j] = (t_ptr[m][j]- dtmp)/diag; y_ptr[m][j] += omega*t2_ptr[m][j]; } } } // Counter update NumApplyInverse_++; ApplyInverseTime_ += Time_.ElapsedTime(); return 0; }
// Apply the preconditioner w/ RHS B and get result X int ML_Epetra::LevelWrap::ApplyInverse(const Epetra_MultiVector& B, Epetra_MultiVector& X_) const{ #ifdef ML_TIMING double t_time,t_diff; StartTimer(&t_time); #endif // Sanity Checks if (!B.Map().SameAs(OperatorDomainMap())) return -1; if (!X_.Map().SameAs(OperatorRangeMap())) return -1; if (!X_.Map().SameAs(B.Map())) return -1; if (B.NumVectors() != X_.NumVectors()) return -1; // Build new work vector X Epetra_MultiVector X(X_.Map(),X_.NumVectors(),true); Epetra_MultiVector tmp0(X_.Map(),X_.NumVectors(),true); Epetra_MultiVector tmp1(P0_->DomainMap(),X_.NumVectors(),true); Epetra_MultiVector tmp2(P0_->DomainMap(),X_.NumVectors(),true); // Pre Smoother if(pre_or_post==ML_BOTH || pre_or_post==ML_PRESMOOTHER){ Smoother_->ApplyInverse(B,X); } // Form coarse residual A0_->Apply(X,tmp0); tmp0.Update(1.0,B,-1.0); if(use_pt_) P0_->Multiply(true,tmp0,tmp1); else R0_->Multiply(false,tmp0,tmp1); // Solve coarse problem A1prec_->ApplyInverse(tmp1,tmp2); // Update solution P0_->Multiply(false,tmp2,tmp0); X.Update(1.0,tmp0,1.0); // Post Smoother if(pre_or_post==ML_BOTH || pre_or_post==ML_PRESMOOTHER){ Smoother_->ApplyInverse(B,X); } // Copy to output X_=X; #ifdef ML_TIMING StopTimer(&t_time,&t_diff); /* Output */ ML_Comm *comm_; ML_Comm_Create(&comm_); ApplicationTime_+= t_diff; if(FirstApplication_){ FirstApplication_=false; FirstApplicationTime_=ApplicationTime_; }/*end if*/ ML_Comm_Destroy(&comm_); #endif return 0; }
/* Computes the approximate Schur complement for the wide separator */ Teuchos::RCP<Epetra_CrsMatrix> computeApproxWideSchur(shylu_config *config, shylu_symbolic *ssym, // symbolic structure Epetra_CrsMatrix *G, Epetra_CrsMatrix *R, Epetra_LinearProblem *LP, Amesos_BaseSolver *solver, Ifpack_Preconditioner *ifSolver, Epetra_CrsMatrix *C, Epetra_Map *localDRowMap) { int i; double relative_thres = config->relative_threshold; // Need to create local G (block diagonal portion) , R, C // Get row map of G //Epetra_Map CrMap = C->RowMap(); //int *c_rows = CrMap.MyGlobalElements(); //int *c_cols = (C->ColMap()).MyGlobalElements(); //int c_totalElems = CrMap.NumGlobalElements(); //int c_localElems = CrMap.NumMyElements(); //int c_localcolElems = (C->ColMap()).NumMyElements(); Epetra_Map GrMap = G->RowMap(); int *g_rows = GrMap.MyGlobalElements(); //int g_totalElems = GrMap.NumGlobalElements(); int g_localElems = GrMap.NumMyElements(); //Epetra_Map RrMap = R->RowMap(); //int *r_rows = RrMap.MyGlobalElements(); //int *r_cols = (R->ColMap()).MyGlobalElements(); //int r_totalElems = RrMap.NumGlobalElements(); //int r_localElems = RrMap.NumMyElements(); //int r_localcolElems = (R->ColMap()).NumMyElements(); Epetra_SerialComm LComm; Epetra_Map G_localRMap (-1, g_localElems, g_rows, 0, LComm); int nentries1, gid; // maxentries is the maximum of all three possible matrices as the arrays // are reused between the three int maxentries = max(C->MaxNumEntries(), R->MaxNumEntries()); maxentries = max(maxentries, G->MaxNumEntries()); double *values1 = new double[maxentries]; double *values2 = new double[maxentries]; double *values3 = new double[maxentries]; int *indices1 = new int[maxentries]; int *indices2 = new int[maxentries]; int *indices3 = new int[maxentries]; // Sbar - Approximate Schur complement Teuchos::RCP<Epetra_CrsMatrix> Sbar = Teuchos::rcp(new Epetra_CrsMatrix( Copy, GrMap, g_localElems)); // Include only the block diagonal elements of G in localG Epetra_CrsMatrix localG(Copy, G_localRMap, G->MaxNumEntries(), false); int cnt, scnt; for (i = 0; i < g_localElems ; i++) { gid = g_rows[i]; G->ExtractGlobalRowCopy(gid, maxentries, nentries1, values1, indices1); cnt = 0; scnt = 0; for (int j = 0 ; j < nentries1 ; j++) { if (G->LRID(indices1[j]) != -1) { values2[cnt] = values1[j]; indices2[cnt++] = indices1[j]; } else { // Add it to Sbar immediately values3[scnt] = values1[j]; indices3[scnt++] = indices1[j]; } } localG.InsertGlobalValues(gid, cnt, values2, indices2); Sbar->InsertGlobalValues(gid, scnt, values3, indices3); } localG.FillComplete(); //cout << "Created local G matrix" << endl; int nvectors = 16; /*ShyLU_Probing_Operator probeop(&localG, &localR, LP, solver, &localC, localDRowMap, nvectors);*/ ShyLU_Local_Schur_Operator probeop(config, ssym, &localG, R, LP, solver, ifSolver, C, localDRowMap, nvectors); #ifdef DUMP_MATRICES //ostringstream fnamestr; //fnamestr << "localC" << C->Comm().MyPID() << ".mat"; //string Cfname = fnamestr.str(); //EpetraExt::RowMatrixToMatlabFile(Cfname.c_str(), localC); //Epetra_Map defMapg(-1, g_localElems, 0, localG.Comm()); //EpetraExt::ViewTransform<Epetra_CrsMatrix> * ReIdx_MatTransg = //new EpetraExt::CrsMatrix_Reindex( defMapg ); //Epetra_CrsMatrix t2G = (*ReIdx_MatTransg)( localG ); //ReIdx_MatTransg->fwd(); //EpetraExt::RowMatrixToMatlabFile("localG.mat", t2G); #endif //cout << " totalElems in Schur Complement" << totalElems << endl; //cout << myPID << " localElems" << localElems << endl; // **************** Two collectives here ********************* #ifdef TIMING_OUTPUT Teuchos::Time ftime("setup time"); ftime.start(); #endif #ifdef TIMING_OUTPUT Teuchos::Time app_time("setup time"); #endif int nentries; // size > maxentries as there could be fill // TODO: Currently the size of the two arrays can be one, Even if we switch // the loop below the size of the array required is nvectors. Fix it double *values = new double[nvectors]; int *indices = new int[nvectors]; double *vecvalues; #ifdef SHYLU_DEBUG // mfh 25 May 2015: Don't declare this variable if it's not used. // It's only used if SHYLU_DEBUG is defined. int dropped = 0; #endif // SHYLU_DEBUG double *maxvalue = new double[nvectors]; #ifdef TIMING_OUTPUT ftime.start(); #endif int findex = g_localElems / nvectors ; int cindex; // int mypid = C->Comm().MyPID(); // unused Epetra_MultiVector probevec (G_localRMap, nvectors); Epetra_MultiVector Scol (G_localRMap, nvectors); probevec.PutScalar(0.0); for (i = 0 ; i < findex*nvectors ; i+=nvectors) { // Set the probevec to find block columns of S. for (int k = 0; k < nvectors; k++) { cindex = k+i; // TODO: Can do better than this, just need to go to the column map // of C, there might be null columns in C probevec.ReplaceGlobalValue(g_rows[cindex], k, 1.0); //if (mypid == 0) //cout << "Changing row to 1.0 " << g_rows[cindex] << endl; } #ifdef TIMING_OUTPUT app_time.start(); #endif probeop.Apply(probevec, Scol); #ifdef TIMING_OUTPUT app_time.stop(); #endif // Reset the probevec to all zeros. for (int k = 0; k < nvectors; k++) { cindex = k+i; probevec.ReplaceGlobalValue(g_rows[cindex], k, 0.0); } Scol.MaxValue(maxvalue); nentries = 0; for (int j = 0 ; j < g_localElems ; j++) { for (int k = 0; k < nvectors; k++) { cindex = k+i; vecvalues = Scol[k]; if ((g_rows[cindex] == g_rows[j]) || (abs(vecvalues[j]/maxvalue[k]) > relative_thres)) // diagonal entry or large entry. { values[nentries] = vecvalues[j]; indices[nentries++] = g_rows[cindex]; } #ifdef SHYLU_DEBUG else if (vecvalues[j] != 0.0) { dropped++; } #endif // SHYLU_DEBUG } Sbar->InsertGlobalValues(g_rows[j], nentries, values, indices); nentries = 0; } } if (i < g_localElems) { nvectors = g_localElems - i; probeop.ResetTempVectors(nvectors); Epetra_MultiVector probevec1 (G_localRMap, nvectors); Epetra_MultiVector Scol1 (G_localRMap, nvectors); probevec1.PutScalar(0.0); for (int k = 0; k < nvectors; k++) { cindex = k+i; // TODO: Can do better than this, just need to go to the column map // of C, there might be null columns in C probevec1.ReplaceGlobalValue(g_rows[cindex], k, 1.0); } #ifdef TIMING_OUTPUT app_time.start(); #endif probeop.Apply(probevec1, Scol1); #ifdef TIMING_OUTPUT app_time.stop(); #endif Scol1.MaxValue(maxvalue); nentries = 0; for (int j = 0 ; j < g_localElems ; j++) { //cout << "MAX" << maxvalue << endl; for (int k = 0; k < nvectors; k++) { cindex = k+i; vecvalues = Scol1[k]; //nentries = 0; // inserting one entry in each row for now if ((g_rows[cindex] == g_rows[j]) || (abs(vecvalues[j]/maxvalue[k]) > relative_thres)) // diagonal entry or large entry. { values[nentries] = vecvalues[j]; indices[nentries++] = g_rows[cindex]; } #ifdef SHYLU_DEBUG else if (vecvalues[j] != 0.0) { dropped++; } #endif // SHYLU_DEBUG } Sbar->InsertGlobalValues(g_rows[j], nentries, values, indices); nentries = 0; } } #ifdef TIMING_OUTPUT ftime.stop(); cout << "Time in finding and dropping entries" << ftime.totalElapsedTime() << endl; ftime.reset(); cout << "Time in Apply of probing" << app_time.totalElapsedTime() << endl; probeop.PrintTimingInfo(); #endif Sbar->FillComplete(); #ifdef DUMP_MATRICES Epetra_Map defMap2(-1, g_localElems, 0, C->Comm()); EpetraExt::ViewTransform<Epetra_CrsMatrix> * ReIdx_MatTrans2 = new EpetraExt::CrsMatrix_Reindex( defMap2 ); Epetra_CrsMatrix t2S = (*ReIdx_MatTrans2)( *Sbar ); ReIdx_MatTrans2->fwd(); EpetraExt::RowMatrixToMatlabFile("Schur.mat", t2S); #endif #ifdef SHYLU_DEBUG cout << "#dropped entries" << dropped << endl; #endif delete[] values; delete[] indices; delete[] values1; delete[] indices1; delete[] values2; delete[] indices2; delete[] values3; delete[] indices3; delete[] maxvalue; return Sbar; }
// // Amesos_TestMultiSolver.cpp reads in a matrix in Harwell-Boeing format, // calls one of the sparse direct solvers, using blocked right hand sides // and computes the error and residual. // // TestSolver ignores the Harwell-Boeing right hand sides, creating // random right hand sides instead. // // Amesos_TestMultiSolver can test either A x = b or A^T x = b. // This can be a bit confusing because sparse direct solvers // use compressed column storage - the transpose of Trilinos' // sparse row storage. // // Matrices: // readA - Serial. As read from the file. // transposeA - Serial. The transpose of readA. // serialA - if (transpose) then transposeA else readA // distributedA - readA distributed to all processes // passA - if ( distributed ) then distributedA else serialA // // int Amesos_TestMultiSolver( Epetra_Comm &Comm, char *matrix_file, int numsolves, SparseSolverType SparseSolver, bool transpose, int special, AMESOS_MatrixType matrix_type ) { int iam = Comm.MyPID() ; // int hatever; // if ( iam == 0 ) std::cin >> hatever ; Comm.Barrier(); Epetra_Map * readMap; Epetra_CrsMatrix * readA; Epetra_Vector * readx; Epetra_Vector * readb; Epetra_Vector * readxexact; std::string FileName = matrix_file ; int FN_Size = FileName.size() ; std::string LastFiveBytes = FileName.substr( EPETRA_MAX(0,FN_Size-5), FN_Size ); std::string LastFourBytes = FileName.substr( EPETRA_MAX(0,FN_Size-4), FN_Size ); bool NonContiguousMap = false; if ( LastFiveBytes == ".triU" ) { NonContiguousMap = true; // Call routine to read in unsymmetric Triplet matrix EPETRA_CHK_ERR( Trilinos_Util_ReadTriples2Epetra( matrix_file, false, Comm, readMap, readA, readx, readb, readxexact, NonContiguousMap ) ); } else { if ( LastFiveBytes == ".triS" ) { NonContiguousMap = true; // Call routine to read in symmetric Triplet matrix EPETRA_CHK_ERR( Trilinos_Util_ReadTriples2Epetra( matrix_file, true, Comm, readMap, readA, readx, readb, readxexact, NonContiguousMap ) ); } else { if ( LastFourBytes == ".mtx" ) { EPETRA_CHK_ERR( Trilinos_Util_ReadMatrixMarket2Epetra( matrix_file, Comm, readMap, readA, readx, readb, readxexact) ); } else { // Call routine to read in HB problem Trilinos_Util_ReadHb2Epetra( matrix_file, Comm, readMap, readA, readx, readb, readxexact) ; } } } Epetra_CrsMatrix transposeA(Copy, *readMap, 0); Epetra_CrsMatrix *serialA ; if ( transpose ) { assert( CrsMatrixTranspose( readA, &transposeA ) == 0 ); serialA = &transposeA ; } else { serialA = readA ; } // Create uniform distributed map Epetra_Map map(readMap->NumGlobalElements(), 0, Comm); Epetra_Map* map_; if( NonContiguousMap ) { // // map gives us NumMyElements and MyFirstElement; // int NumGlobalElements = readMap->NumGlobalElements(); int NumMyElements = map.NumMyElements(); int MyFirstElement = map.MinMyGID(); std::vector<int> MapMap_( NumGlobalElements ); readMap->MyGlobalElements( &MapMap_[0] ) ; Comm.Broadcast( &MapMap_[0], NumGlobalElements, 0 ) ; map_ = new Epetra_Map( NumGlobalElements, NumMyElements, &MapMap_[MyFirstElement], 0, Comm); } else { map_ = new Epetra_Map( map ) ; } // Create Exporter to distribute read-in matrix and vectors Epetra_Export exporter(*readMap, *map_); Epetra_CrsMatrix A(Copy, *map_, 0); Epetra_RowMatrix * passA = 0; Epetra_MultiVector * passx = 0; Epetra_MultiVector * passb = 0; Epetra_MultiVector * passxexact = 0; Epetra_MultiVector * passresid = 0; Epetra_MultiVector * passtmp = 0; Epetra_MultiVector x(*map_,numsolves); Epetra_MultiVector b(*map_,numsolves); Epetra_MultiVector xexact(*map_,numsolves); Epetra_MultiVector resid(*map_,numsolves); Epetra_MultiVector tmp(*map_,numsolves); Epetra_MultiVector serialx(*readMap,numsolves); Epetra_MultiVector serialb(*readMap,numsolves); Epetra_MultiVector serialxexact(*readMap,numsolves); Epetra_MultiVector serialresid(*readMap,numsolves); Epetra_MultiVector serialtmp(*readMap,numsolves); bool distribute_matrix = ( matrix_type == AMESOS_Distributed ) ; if ( distribute_matrix ) { // // Initialize x, b and xexact to the values read in from the file // A.Export(*serialA, exporter, Add); Comm.Barrier(); assert(A.FillComplete()==0); Comm.Barrier(); passA = &A; passx = &x; passb = &b; passxexact = &xexact; passresid = &resid; passtmp = &tmp; } else { passA = serialA; passx = &serialx; passb = &serialb; passxexact = &serialxexact; passresid = &serialresid; passtmp = &serialtmp; } passxexact->SetSeed(131) ; passxexact->Random(); passx->SetSeed(11231) ; passx->Random(); passb->PutScalar( 0.0 ); passA->Multiply( transpose, *passxexact, *passb ) ; Epetra_MultiVector CopyB( *passb ) ; double Anorm = passA->NormInf() ; SparseDirectTimingVars::SS_Result.Set_Anorm(Anorm) ; Epetra_LinearProblem Problem( (Epetra_RowMatrix *) passA, (Epetra_MultiVector *) passx, (Epetra_MultiVector *) passb ); double max_resid = 0.0; for ( int j = 0 ; j < special+1 ; j++ ) { Epetra_Time TotalTime( Comm ) ; if ( false ) { #ifdef TEST_UMFPACK unused code } else if ( SparseSolver == UMFPACK ) { UmfpackOO umfpack( (Epetra_RowMatrix *) passA, (Epetra_MultiVector *) passx, (Epetra_MultiVector *) passb ) ; umfpack.SetTrans( transpose ) ; umfpack.Solve() ; #endif #ifdef TEST_SUPERLU } else if ( SparseSolver == SuperLU ) { SuperluserialOO superluserial( (Epetra_RowMatrix *) passA, (Epetra_MultiVector *) passx, (Epetra_MultiVector *) passb ) ; superluserial.SetPermc( SuperLU_permc ) ; superluserial.SetTrans( transpose ) ; superluserial.SetUseDGSSV( special == 0 ) ; superluserial.Solve() ; #endif #ifdef HAVE_AMESOS_SLUD } else if ( SparseSolver == SuperLUdist ) { SuperludistOO superludist( Problem ) ; superludist.SetTrans( transpose ) ; EPETRA_CHK_ERR( superludist.Solve( true ) ) ; #endif #ifdef HAVE_AMESOS_SLUD2 } else if ( SparseSolver == SuperLUdist2 ) { Superludist2_OO superludist2( Problem ) ; superludist2.SetTrans( transpose ) ; EPETRA_CHK_ERR( superludist2.Solve( true ) ) ; #endif #ifdef TEST_SPOOLES } else if ( SparseSolver == SPOOLES ) { SpoolesOO spooles( (Epetra_RowMatrix *) passA, (Epetra_MultiVector *) passx, (Epetra_MultiVector *) passb ) ; spooles.SetTrans( transpose ) ; spooles.Solve() ; #endif #ifdef HAVE_AMESOS_DSCPACK } else if ( SparseSolver == DSCPACK ) { Teuchos::ParameterList ParamList ; Amesos_Dscpack dscpack( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( dscpack.SetParameters( ParamList ) ); EPETRA_CHK_ERR( dscpack.Solve( ) ); #endif #ifdef HAVE_AMESOS_UMFPACK } else if ( SparseSolver == UMFPACK ) { Teuchos::ParameterList ParamList ; Amesos_Umfpack umfpack( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( umfpack.SetParameters( ParamList ) ); EPETRA_CHK_ERR( umfpack.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( umfpack.Solve( ) ); #endif #ifdef HAVE_AMESOS_KLU } else if ( SparseSolver == KLU ) { Teuchos::ParameterList ParamList ; Amesos_Klu klu( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( klu.SetParameters( ParamList ) ); EPETRA_CHK_ERR( klu.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( klu.SymbolicFactorization( ) ); EPETRA_CHK_ERR( klu.NumericFactorization( ) ); EPETRA_CHK_ERR( klu.Solve( ) ); #endif #ifdef HAVE_AMESOS_PARAKLETE } else if ( SparseSolver == PARAKLETE ) { Teuchos::ParameterList ParamList ; Amesos_Paraklete paraklete( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( paraklete.SetParameters( ParamList ) ); EPETRA_CHK_ERR( paraklete.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( paraklete.SymbolicFactorization( ) ); EPETRA_CHK_ERR( paraklete.NumericFactorization( ) ); EPETRA_CHK_ERR( paraklete.Solve( ) ); #endif #ifdef HAVE_AMESOS_SLUS } else if ( SparseSolver == SuperLU ) { Epetra_SLU superluserial( &Problem ) ; EPETRA_CHK_ERR( superluserial.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( superluserial.SymbolicFactorization( ) ); EPETRA_CHK_ERR( superluserial.NumericFactorization( ) ); EPETRA_CHK_ERR( superluserial.Solve( ) ); #endif #ifdef HAVE_AMESOS_LAPACK } else if ( SparseSolver == LAPACK ) { Teuchos::ParameterList ParamList ; ParamList.set( "MaxProcs", -3 ); Amesos_Lapack lapack( Problem ) ; EPETRA_CHK_ERR( lapack.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( lapack.SymbolicFactorization( ) ); EPETRA_CHK_ERR( lapack.NumericFactorization( ) ); EPETRA_CHK_ERR( lapack.Solve( ) ); #endif #ifdef HAVE_AMESOS_TAUCS } else if ( SparseSolver == TAUCS ) { Teuchos::ParameterList ParamList ; Amesos_Taucs taucs( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( taucs.SetParameters( ParamList ) ); EPETRA_CHK_ERR( taucs.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( taucs.SymbolicFactorization( ) ); EPETRA_CHK_ERR( taucs.NumericFactorization( ) ); EPETRA_CHK_ERR( taucs.Solve( ) ); #endif #ifdef HAVE_AMESOS_PARDISO } else if ( SparseSolver == PARDISO ) { Teuchos::ParameterList ParamList ; Amesos_Pardiso pardiso( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( pardiso.SetParameters( ParamList ) ); EPETRA_CHK_ERR( pardiso.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( pardiso.SymbolicFactorization( ) ); EPETRA_CHK_ERR( pardiso.NumericFactorization( ) ); EPETRA_CHK_ERR( pardiso.Solve( ) ); #endif #ifdef HAVE_AMESOS_PARKLETE } else if ( SparseSolver == PARKLETE ) { Teuchos::ParameterList ParamList ; Amesos_Parklete parklete( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( parklete.SetParameters( ParamList ) ); EPETRA_CHK_ERR( parklete.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( parklete.SymbolicFactorization( ) ); EPETRA_CHK_ERR( parklete.NumericFactorization( ) ); EPETRA_CHK_ERR( parklete.Solve( ) ); #endif #ifdef HAVE_AMESOS_MUMPS } else if ( SparseSolver == MUMPS ) { Teuchos::ParameterList ParamList ; Amesos_Mumps mumps( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( mumps.SetParameters( ParamList ) ); EPETRA_CHK_ERR( mumps.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( mumps.SymbolicFactorization( ) ); EPETRA_CHK_ERR( mumps.NumericFactorization( ) ); EPETRA_CHK_ERR( mumps.Solve( ) ); #endif #ifdef HAVE_AMESOS_SCALAPACK } else if ( SparseSolver == SCALAPACK ) { Teuchos::ParameterList ParamList ; Amesos_Scalapack scalapack( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( scalapack.SetParameters( ParamList ) ); EPETRA_CHK_ERR( scalapack.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( scalapack.SymbolicFactorization( ) ); EPETRA_CHK_ERR( scalapack.NumericFactorization( ) ); EPETRA_CHK_ERR( scalapack.Solve( ) ); #endif #ifdef HAVE_AMESOS_SUPERLUDIST } else if ( SparseSolver == SUPERLUDIST ) { Teuchos::ParameterList ParamList ; Amesos_Superludist superludist( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( superludist.SetParameters( ParamList ) ); EPETRA_CHK_ERR( superludist.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( superludist.SymbolicFactorization( ) ); EPETRA_CHK_ERR( superludist.NumericFactorization( ) ); EPETRA_CHK_ERR( superludist.Solve( ) ); #endif #ifdef HAVE_AMESOS_SUPERLU } else if ( SparseSolver == SUPERLU ) { Teuchos::ParameterList ParamList ; Amesos_Superlu superlu( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( superlu.SetParameters( ParamList ) ); EPETRA_CHK_ERR( superlu.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( superlu.SymbolicFactorization( ) ); EPETRA_CHK_ERR( superlu.NumericFactorization( ) ); EPETRA_CHK_ERR( superlu.Solve( ) ); #endif #ifdef TEST_SPOOLESSERIAL } else if ( SparseSolver == SPOOLESSERIAL ) { SpoolesserialOO spoolesserial( (Epetra_RowMatrix *) passA, (Epetra_MultiVector *) passx, (Epetra_MultiVector *) passb ) ; spoolesserial.Solve() ; #endif } else { SparseDirectTimingVars::log_file << "Solver not implemented yet" << std::endl ; std::cerr << "\n\n#################### Requested solver not available (Or not tested with blocked RHS) on this platform #####################\n" << std::endl ; } SparseDirectTimingVars::SS_Result.Set_Total_Time( TotalTime.ElapsedTime() ); // SparseDirectTimingVars::SS_Result.Set_First_Time( 0.0 ); // SparseDirectTimingVars::SS_Result.Set_Middle_Time( 0.0 ); // SparseDirectTimingVars::SS_Result.Set_Last_Time( 0.0 ); // // Compute the error = norm(xcomp - xexact ) // std::vector <double> error(numsolves) ; double max_error = 0.0; passresid->Update(1.0, *passx, -1.0, *passxexact, 0.0); passresid->Norm2(&error[0]); for ( int i = 0 ; i< numsolves; i++ ) if ( error[i] > max_error ) max_error = error[i] ; SparseDirectTimingVars::SS_Result.Set_Error(max_error) ; // passxexact->Norm2(&error[0] ) ; // passx->Norm2(&error ) ; // // Compute the residual = norm(Ax - b) // std::vector <double> residual(numsolves) ; passtmp->PutScalar(0.0); passA->Multiply( transpose, *passx, *passtmp); passresid->Update(1.0, *passtmp, -1.0, *passb, 0.0); // passresid->Update(1.0, *passtmp, -1.0, CopyB, 0.0); passresid->Norm2(&residual[0]); for ( int i = 0 ; i< numsolves; i++ ) if ( residual[i] > max_resid ) max_resid = residual[i] ; SparseDirectTimingVars::SS_Result.Set_Residual(max_resid) ; std::vector <double> bnorm(numsolves); passb->Norm2( &bnorm[0] ) ; SparseDirectTimingVars::SS_Result.Set_Bnorm(bnorm[0]) ; std::vector <double> xnorm(numsolves); passx->Norm2( &xnorm[0] ) ; SparseDirectTimingVars::SS_Result.Set_Xnorm(xnorm[0]) ; if ( false && iam == 0 ) { std::cout << " Amesos_TestMutliSolver.cpp " << std::endl ; for ( int i = 0 ; i< numsolves && i < 10 ; i++ ) { std::cout << "i=" << i << " error = " << error[i] << " xnorm = " << xnorm[i] << " residual = " << residual[i] << " bnorm = " << bnorm[i] << std::endl ; } std::cout << std::endl << " max_resid = " << max_resid ; std::cout << " max_error = " << max_error << std::endl ; std::cout << " Get_residual() again = " << SparseDirectTimingVars::SS_Result.Get_Residual() << std::endl ; } } delete readA; delete readx; delete readb; delete readxexact; delete readMap; delete map_; Comm.Barrier(); return 0 ; }
int Davidson::reSolve(int numEigen, Epetra_MultiVector &Q, double *lambda, int startingEV) { // Computes the smallest eigenvalues and the corresponding eigenvectors // of the generalized eigenvalue problem // // K X = M X Lambda // // using a generalized Davidson algorithm // // Note that if M is not specified, then K X = X Lambda is solved. // // Input variables: // // numEigen (integer) = Number of eigenmodes requested // // Q (Epetra_MultiVector) = Converged eigenvectors // The number of columns of Q must be at least numEigen + blockSize. // The rows of Q are distributed across processors. // At exit, the first numEigen columns contain the eigenvectors requested. // // lambda (array of doubles) = Converged eigenvalues // At input, it must be of size numEigen + blockSize. // At exit, the first numEigen locations contain the eigenvalues requested. // // startingEV (integer) = Number of existing converged eigenvectors // We assume that the user has check the eigenvectors and // their M-orthonormality. // // Return information on status of computation // // info >= 0 >> Number of converged eigenpairs at the end of computation // // // Failure due to input arguments // // info = - 1 >> The stiffness matrix K has not been specified. // info = - 2 >> The maps for the matrix K and the matrix M differ. // info = - 3 >> The maps for the matrix K and the preconditioner P differ. // info = - 4 >> The maps for the vectors and the matrix K differ. // info = - 5 >> Q is too small for the number of eigenvalues requested. // info = - 6 >> Q is too small for the computation parameters. // // info = - 8 >> The number of blocks is too small for the number of eigenvalues. // // info = - 10 >> Failure during the mass orthonormalization // // info = - 30 >> MEMORY // // Check the input parameters if (numEigen <= startingEV) { return startingEV; } int info = myVerify.inputArguments(numEigen, K, M, Prec, Q, minimumSpaceDimension(numEigen)); if (info < 0) return info; int myPid = MyComm.MyPID(); if (numBlock*blockSize < numEigen) { if (myPid == 0) { cerr << endl; cerr << " !!! The space dimension (# of blocks x size of blocks) must be greater than "; cerr << " the number of eigenvalues !!!\n"; cerr << " Number of blocks = " << numBlock << endl; cerr << " Size of blocks = " << blockSize << endl; cerr << " Number of eigenvalues = " << numEigen << endl; cerr << endl; } return -8; } // Get the weight for approximating the M-inverse norm Epetra_Vector *vectWeight = 0; if (normWeight) { vectWeight = new Epetra_Vector(View, Q.Map(), normWeight); } int knownEV = startingEV; int localVerbose = verbose*(myPid==0); // Define local block vectors // // MX = Working vectors (storing M*X if M is specified, else pointing to X) // KX = Working vectors (storing K*X) // // R = Residuals int xr = Q.MyLength(); int dimSearch = blockSize*numBlock; Epetra_MultiVector X(View, Q, 0, dimSearch + blockSize); if (knownEV > 0) { Epetra_MultiVector copyX(View, Q, knownEV, blockSize); copyX.Random(); } else { X.Random(); } int tmp; tmp = (M == 0) ? 2*blockSize*xr : 3*blockSize*xr; double *work1 = new (nothrow) double[tmp]; if (work1 == 0) { if (vectWeight) delete vectWeight; info = -30; return info; } memRequested += sizeof(double)*tmp/(1024.0*1024.0); highMem = (highMem > currentSize()) ? highMem : currentSize(); double *tmpD = work1; Epetra_MultiVector KX(View, Q.Map(), tmpD, xr, blockSize); tmpD = tmpD + xr*blockSize; Epetra_MultiVector MX(View, Q.Map(), (M) ? tmpD : X.Values(), xr, blockSize); tmpD = (M) ? tmpD + xr*blockSize : tmpD; Epetra_MultiVector R(View, Q.Map(), tmpD, xr, blockSize); // Define arrays // // theta = Store the local eigenvalues (size: dimSearch) // normR = Store the norm of residuals (size: blockSize) // // KK = Local stiffness matrix (size: dimSearch x dimSearch) // // S = Local eigenvectors (size: dimSearch x dimSearch) // // tmpKK = Local workspace (size: blockSize x blockSize) int lwork2 = blockSize + dimSearch + 2*dimSearch*dimSearch + blockSize*blockSize; double *work2 = new (nothrow) double[lwork2]; if (work2 == 0) { if (vectWeight) delete vectWeight; delete[] work1; info = -30; return info; } memRequested += sizeof(double)*lwork2/(1024.0*1024.0); highMem = (highMem > currentSize()) ? highMem : currentSize(); tmpD = work2; double *theta = tmpD; tmpD = tmpD + dimSearch; double *normR = tmpD; tmpD = tmpD + blockSize; double *KK = tmpD; tmpD = tmpD + dimSearch*dimSearch; memset(KK, 0, dimSearch*dimSearch*sizeof(double)); double *S = tmpD; tmpD = tmpD + dimSearch*dimSearch; double *tmpKK = tmpD; // Define an array to store the residuals history if (localVerbose > 2) { resHistory = new (nothrow) double[maxIterEigenSolve*blockSize]; spaceSizeHistory = new (nothrow) int[maxIterEigenSolve]; if ((resHistory == 0) || (spaceSizeHistory == 0)) { if (vectWeight) delete vectWeight; delete[] work1; delete[] work2; info = -30; return info; } historyCount = 0; } // Miscellaneous definitions bool reStart = false; numRestart = 0; bool criticalExit = false; int bStart = 0; int offSet = 0; numBlock = (dimSearch/blockSize) - (knownEV/blockSize); int nFound = blockSize; int i, j; if (localVerbose > 0) { cout << endl; cout << " *|* Problem: "; if (M) cout << "K*Q = M*Q D "; else cout << "K*Q = Q D "; if (Prec) cout << " with preconditioner"; cout << endl; cout << " *|* Algorithm = Davidson algorithm (block version)" << endl; cout << " *|* Size of blocks = " << blockSize << endl; cout << " *|* Largest size of search space = " << numBlock*blockSize << endl; cout << " *|* Number of requested eigenvalues = " << numEigen << endl; cout.precision(2); cout.setf(ios::scientific, ios::floatfield); cout << " *|* Tolerance for convergence = " << tolEigenSolve << endl; cout << " *|* Norm used for convergence: "; if (vectWeight) cout << "weighted L2-norm with user-provided weights" << endl; else cout << "L^2-norm" << endl; if (startingEV > 0) cout << " *|* Input converged eigenvectors = " << startingEV << endl; cout << "\n -- Start iterations -- \n"; } int maxBlock = (dimSearch/blockSize) - (knownEV/blockSize); timeOuterLoop -= MyWatch.WallTime(); outerIter = 0; while (outerIter <= maxIterEigenSolve) { highMem = (highMem > currentSize()) ? highMem : currentSize(); int nb; for (nb = bStart; nb < maxBlock; ++nb) { outerIter += 1; if (outerIter > maxIterEigenSolve) break; int localSize = nb*blockSize; Epetra_MultiVector Xcurrent(View, X, localSize + knownEV, blockSize); timeMassOp -= MyWatch.WallTime(); if (M) M->Apply(Xcurrent, MX); timeMassOp += MyWatch.WallTime(); massOp += blockSize; // Orthonormalize X against the known eigenvectors and the previous vectors // Note: Use R as a temporary work space timeOrtho -= MyWatch.WallTime(); if (nb == bStart) { if (nFound > 0) { if (knownEV == 0) { info = modalTool.massOrthonormalize(Xcurrent, MX, M, Q, nFound, 2, R.Values()); } else { Epetra_MultiVector copyQ(View, X, 0, knownEV + localSize); info = modalTool.massOrthonormalize(Xcurrent, MX, M, copyQ, nFound, 0, R.Values()); } } nFound = 0; } else { Epetra_MultiVector copyQ(View, X, 0, knownEV + localSize); info = modalTool.massOrthonormalize(Xcurrent, MX, M, copyQ, blockSize, 0, R.Values()); } timeOrtho += MyWatch.WallTime(); // Exit the code when the number of vectors exceeds the space dimension if (info < 0) { delete[] work1; delete[] work2; if (vectWeight) delete vectWeight; return -10; } timeStifOp -= MyWatch.WallTime(); K->Apply(Xcurrent, KX); timeStifOp += MyWatch.WallTime(); stifOp += blockSize; // Check the orthogonality properties of X if (verbose > 2) { if (knownEV + localSize == 0) accuracyCheck(&Xcurrent, &MX, 0); else { Epetra_MultiVector copyQ(View, X, 0, knownEV + localSize); accuracyCheck(&Xcurrent, &MX, ©Q); } if (localVerbose > 0) cout << endl; } // if (verbose > 2) // Define the local stiffness matrix // Note: S is used as a workspace timeLocalProj -= MyWatch.WallTime(); for (j = 0; j <= nb; ++j) { callBLAS.GEMM('T', 'N', blockSize, blockSize, xr, 1.0, X.Values()+(knownEV+j*blockSize)*xr, xr, KX.Values(), xr, 0.0, tmpKK, blockSize); MyComm.SumAll(tmpKK, S, blockSize*blockSize); int iC; for (iC = 0; iC < blockSize; ++iC) { double *Kpointer = KK + localSize*dimSearch + j*blockSize + iC*dimSearch; memcpy(Kpointer, S + iC*blockSize, blockSize*sizeof(double)); } } timeLocalProj += MyWatch.WallTime(); // Perform a spectral decomposition timeLocalSolve -= MyWatch.WallTime(); int nevLocal = localSize + blockSize; info = modalTool.directSolver(localSize+blockSize, KK, dimSearch, 0, 0, nevLocal, S, dimSearch, theta, localVerbose, 10); timeLocalSolve += MyWatch.WallTime(); if (info != 0) { // Stop as spectral decomposition has a critical failure if (info < 0) { criticalExit = true; break; } // Restart as spectral decomposition failed if (localVerbose > 0) { cout << " Iteration " << outerIter; cout << "- Failure for spectral decomposition - RESTART with new random search\n"; } reStart = true; numRestart += 1; timeRestart -= MyWatch.WallTime(); Epetra_MultiVector Xinit(View, X, knownEV, blockSize); Xinit.Random(); timeRestart += MyWatch.WallTime(); nFound = blockSize; bStart = 0; break; } // if (info != 0) // Update the search space // Note: Use KX as a workspace timeLocalUpdate -= MyWatch.WallTime(); callBLAS.GEMM('N', 'N', xr, blockSize, localSize+blockSize, 1.0, X.Values()+knownEV*xr, xr, S, dimSearch, 0.0, KX.Values(), xr); timeLocalUpdate += MyWatch.WallTime(); // Apply the mass matrix for the next block timeMassOp -= MyWatch.WallTime(); if (M) M->Apply(KX, MX); timeMassOp += MyWatch.WallTime(); massOp += blockSize; // Apply the stiffness matrix for the next block timeStifOp -= MyWatch.WallTime(); K->Apply(KX, R); timeStifOp += MyWatch.WallTime(); stifOp += blockSize; // Form the residuals timeResidual -= MyWatch.WallTime(); if (M) { for (j = 0; j < blockSize; ++j) { callBLAS.AXPY(xr, -theta[j], MX.Values() + j*xr, R.Values() + j*xr); } } else { // Note KX contains the updated block for (j = 0; j < blockSize; ++j) { callBLAS.AXPY(xr, -theta[j], KX.Values() + j*xr, R.Values() + j*xr); } } timeResidual += MyWatch.WallTime(); residual += blockSize; // Compute the norm of residuals timeNorm -= MyWatch.WallTime(); if (vectWeight) { R.NormWeighted(*vectWeight, normR); } else { R.Norm2(normR); } // Scale the norms of residuals with the eigenvalues // Count the number of converged eigenvectors nFound = 0; for (j = 0; j < blockSize; ++j) { normR[j] = (theta[j] == 0.0) ? normR[j] : normR[j]/theta[j]; if (normR[j] < tolEigenSolve) nFound += 1; } // for (j = 0; j < blockSize; ++j) timeNorm += MyWatch.WallTime(); // Store the residual history if (localVerbose > 2) { memcpy(resHistory + historyCount*blockSize, normR, blockSize*sizeof(double)); spaceSizeHistory[historyCount] = localSize + blockSize; historyCount += 1; } maxSpaceSize = (maxSpaceSize > localSize+blockSize) ? maxSpaceSize : localSize+blockSize; sumSpaceSize += localSize + blockSize; // Print information on current iteration if (localVerbose > 0) { cout << " Iteration " << outerIter << " - Number of converged eigenvectors "; cout << knownEV + nFound << endl; } // if (localVerbose > 0) if (localVerbose > 1) { cout << endl; cout.precision(2); cout.setf(ios::scientific, ios::floatfield); for (i=0; i<blockSize; ++i) { cout << " Iteration " << outerIter << " - Scaled Norm of Residual " << i; cout << " = " << normR[i] << endl; } cout << endl; cout.precision(2); for (i=0; i<nevLocal; ++i) { cout << " Iteration " << outerIter << " - Ritz eigenvalue " << i; cout.setf((fabs(theta[i]) < 0.01) ? ios::scientific : ios::fixed, ios::floatfield); cout << " = " << theta[i] << endl; } cout << endl; } // Exit the loop to treat the converged eigenvectors if (nFound > 0) { nb += 1; offSet = 0; break; } // Apply the preconditioner on the residuals // Note: Use KX as a workspace if (maxBlock == 1) { if (Prec) { timePrecOp -= MyWatch.WallTime(); Prec->ApplyInverse(R, Xcurrent); timePrecOp += MyWatch.WallTime(); precOp += blockSize; } else { memcpy(Xcurrent.Values(), R.Values(), blockSize*xr*sizeof(double)); } timeRestart -= MyWatch.WallTime(); Xcurrent.Update(1.0, KX, -1.0); timeRestart += MyWatch.WallTime(); break; } // if (maxBlock == 1) if (nb == maxBlock - 1) { nb += 1; break; } Epetra_MultiVector Xnext(View, X, knownEV+localSize+blockSize, blockSize); if (Prec) { timePrecOp -= MyWatch.WallTime(); Prec->ApplyInverse(R, Xnext); timePrecOp += MyWatch.WallTime(); precOp += blockSize; } else { memcpy(Xnext.Values(), R.Values(), blockSize*xr*sizeof(double)); } } // for (nb = bStart; nb < maxBlock; ++nb) if (outerIter > maxIterEigenSolve) break; if (reStart == true) { reStart = false; continue; } if (criticalExit == true) break; // Store the final converged eigenvectors if (knownEV + nFound >= numEigen) { for (j = 0; j < blockSize; ++j) { if (normR[j] < tolEigenSolve) { memcpy(X.Values() + knownEV*xr, KX.Values() + j*xr, xr*sizeof(double)); lambda[knownEV] = theta[j]; knownEV += 1; } } if (localVerbose == 1) { cout << endl; cout.precision(2); cout.setf(ios::scientific, ios::floatfield); for (i=0; i<blockSize; ++i) { cout << " Iteration " << outerIter << " - Scaled Norm of Residual " << i; cout << " = " << normR[i] << endl; } cout << endl; } break; } // if (knownEV + nFound >= numEigen) // Treat the particular case of 1 block if (maxBlock == 1) { if (nFound > 0) { double *Xpointer = X.Values() + (knownEV+nFound)*xr; nFound = 0; for (j = 0; j < blockSize; ++j) { if (normR[j] < tolEigenSolve) { memcpy(X.Values() + knownEV*xr, KX.Values() + j*xr, xr*sizeof(double)); lambda[knownEV] = theta[j]; knownEV += 1; nFound += 1; } else { memcpy(Xpointer + (j-nFound)*xr, KX.Values() + j*xr, xr*sizeof(double)); } } Epetra_MultiVector Xnext(View, X, knownEV + blockSize - nFound, nFound); Xnext.Random(); } else { nFound = blockSize; } continue; } // Define the restarting block when maxBlock > 1 if (nFound > 0) { int firstIndex = blockSize; for (j = 0; j < blockSize; ++j) { if (normR[j] >= tolEigenSolve) { firstIndex = j; break; } } // for (j = 0; j < blockSize; ++j) while (firstIndex < nFound) { for (j = firstIndex; j < blockSize; ++j) { if (normR[j] < tolEigenSolve) { // Swap the j-th and firstIndex-th position callFortran.SWAP(nb*blockSize, S + j*dimSearch, 1, S + firstIndex*dimSearch, 1); callFortran.SWAP(1, theta + j, 1, theta + firstIndex, 1); callFortran.SWAP(1, normR + j, 1, normR + firstIndex, 1); break; } } // for (j = firstIndex; j < blockSize; ++j) for (j = 0; j < blockSize; ++j) { if (normR[j] >= tolEigenSolve) { firstIndex = j; break; } } // for (j = 0; j < blockSize; ++j) } // while (firstIndex < nFound) // Copy the converged eigenvalues memcpy(lambda + knownEV, theta, nFound*sizeof(double)); } // if (nFound > 0) // Define the restarting size bStart = ((nb - offSet) > 2) ? (nb - offSet)/2 : 0; // Define the restarting space and local stiffness timeRestart -= MyWatch.WallTime(); memset(KK, 0, nb*blockSize*dimSearch*sizeof(double)); for (j = 0; j < bStart*blockSize; ++j) { KK[j + j*dimSearch] = theta[j + nFound]; } // Form the restarting space int oldCol = nb*blockSize; int newCol = nFound + (bStart+1)*blockSize; newCol = (newCol > oldCol) ? oldCol : newCol; callFortran.GEQRF(oldCol, newCol, S, dimSearch, theta, R.Values(), xr*blockSize, &info); callFortran.ORMQR('R', 'N', xr, oldCol, newCol, S, dimSearch, theta, X.Values()+knownEV*xr, xr, R.Values(), blockSize*xr, &info); timeRestart += MyWatch.WallTime(); if (nFound == 0) offSet += 1; knownEV += nFound; maxBlock = (dimSearch/blockSize) - (knownEV/blockSize); // Put random vectors if the Rayleigh Ritz vectors are not enough newCol = nFound + (bStart+1)*blockSize; if (newCol > oldCol) { Epetra_MultiVector Xnext(View, X, knownEV+blockSize-nFound, nFound); Xnext.Random(); continue; } nFound = 0; } // while (outerIter <= maxIterEigenSolve) timeOuterLoop += MyWatch.WallTime(); highMem = (highMem > currentSize()) ? highMem : currentSize(); // Clean memory delete[] work1; delete[] work2; if (vectWeight) delete vectWeight; // Sort the eigenpairs timePostProce -= MyWatch.WallTime(); if ((info == 0) && (knownEV > 0)) { mySort.sortScalars_Vectors(knownEV, lambda, Q.Values(), Q.MyLength()); } timePostProce += MyWatch.WallTime(); return (info == 0) ? knownEV : info; }
int main(int argc, char *argv[]) { #ifdef HAVE_MPI Teuchos::GlobalMPISession mpiSession(&argc, &argv, 0); Epetra_MpiComm Comm(MPI_COMM_WORLD); #else Epetra_SerialComm Comm; #endif int nProcs, myPID ; Teuchos::ParameterList pLUList ; // ParaLU parameters Teuchos::ParameterList isoList ; // Isorropia parameters Teuchos::ParameterList shyLUList ; // shyLU parameters Teuchos::ParameterList ifpackList ; // shyLU parameters string ipFileName = "ShyLU.xml"; // TODO : Accept as i/p nProcs = mpiSession.getNProc(); myPID = Comm.MyPID(); if (myPID == 0) { cout <<"Parallel execution: nProcs="<< nProcs << endl; } // =================== Read input xml file ============================= Teuchos::updateParametersFromXmlFile(ipFileName, &pLUList); isoList = pLUList.sublist("Isorropia Input"); shyLUList = pLUList.sublist("ShyLU Input"); shyLUList.set("Outer Solver Library", "AztecOO"); // Get matrix market file name string MMFileName = Teuchos::getParameter<string>(pLUList, "mm_file"); string prec_type = Teuchos::getParameter<string>(pLUList, "preconditioner"); int maxiters = Teuchos::getParameter<int>(pLUList, "Outer Solver MaxIters"); double tol = Teuchos::getParameter<double>(pLUList, "Outer Solver Tolerance"); string rhsFileName = pLUList.get<string>("rhs_file", ""); if (myPID == 0) { cout << "Input :" << endl; cout << "ParaLU params " << endl; pLUList.print(std::cout, 2, true, true); cout << "Matrix market file name: " << MMFileName << endl; } // ==================== Read input Matrix ============================== Epetra_CrsMatrix *A; Epetra_MultiVector *b1; int err = EpetraExt::MatrixMarketFileToCrsMatrix(MMFileName.c_str(), Comm, A); //EpetraExt::MatlabFileToCrsMatrix(MMFileName.c_str(), Comm, A); //assert(err != 0); //cout <<"Done reading the matrix"<< endl; int n = A->NumGlobalRows(); //cout <<"n="<< n << endl; // Create input vectors Epetra_Map vecMap(n, 0, Comm); if (rhsFileName != "") { err = EpetraExt::MatrixMarketFileToMultiVector(rhsFileName.c_str(), vecMap, b1); } else { b1 = new Epetra_MultiVector(vecMap, 1, false); b1->PutScalar(1.0); } Epetra_MultiVector x(vecMap, 1); //cout << "Created the vectors" << endl; // Partition the matrix with hypergraph partitioning and redisstribute Isorropia::Epetra::Partitioner *partitioner = new Isorropia::Epetra::Partitioner(A, isoList, false); partitioner->partition(); Isorropia::Epetra::Redistributor rd(partitioner); Epetra_CrsMatrix *newA; Epetra_MultiVector *newX, *newB; rd.redistribute(*A, newA); delete A; A = newA; rd.redistribute(x, newX); rd.redistribute(*b1, newB); Epetra_LinearProblem problem(A, newX, newB); AztecOO solver(problem); ifpackList ; Ifpack_Preconditioner *prec; ML_Epetra::MultiLevelPreconditioner *MLprec; if (prec_type.compare("ShyLU") == 0) { prec = new Ifpack_ShyLU(A); prec->SetParameters(shyLUList); prec->Initialize(); prec->Compute(); //(dynamic_cast<Ifpack_ShyLU *>(prec))->JustTryIt(); //cout << " Going to set it in solver" << endl ; solver.SetPrecOperator(prec); //cout << " Done setting the solver" << endl ; } else if (prec_type.compare("ILU") == 0) { ifpackList.set( "fact: level-of-fill", 1 ); prec = new Ifpack_ILU(A); prec->SetParameters(ifpackList); prec->Initialize(); prec->Compute(); solver.SetPrecOperator(prec); } else if (prec_type.compare("ILUT") == 0) { ifpackList.set( "fact: ilut level-of-fill", 2 ); ifpackList.set( "fact: drop tolerance", 1e-8); prec = new Ifpack_ILUT(A); prec->SetParameters(ifpackList); prec->Initialize(); prec->Compute(); solver.SetPrecOperator(prec); } else if (prec_type.compare("ML") == 0) { Teuchos::ParameterList mlList; // TODO : Take it from i/p MLprec = new ML_Epetra::MultiLevelPreconditioner(*A, mlList, true); solver.SetPrecOperator(MLprec); } solver.SetAztecOption(AZ_solver, AZ_gmres); solver.SetMatrixName(333); //solver.SetAztecOption(AZ_output, 1); //solver.SetAztecOption(AZ_conv, AZ_Anorm); //cout << "Going to iterate for the global problem" << endl; solver.Iterate(maxiters, tol); // compute ||Ax - b|| double Norm; Epetra_MultiVector Ax(vecMap, 1); Epetra_MultiVector *newAx; rd.redistribute(Ax, newAx); A->Multiply(false, *newX, *newAx); newAx->Update(1.0, *newB, -1.0); newAx->Norm2(&Norm); double ANorm = A->NormOne(); cout << "|Ax-b |/|A| = " << Norm/ANorm << endl; delete newAx; if (prec_type.compare("ML") == 0) { delete MLprec; } else { delete prec; } delete b1; delete newX; delete newB; delete A; delete partitioner; }
int ShyLU_Probing_Operator::Apply(const Epetra_MultiVector &X, Epetra_MultiVector &Y) const { #ifdef TIMING_OUTPUT apply_time_->start(); #endif int nvectors = X.NumVectors(); bool local = (C_->Comm().NumProc() == 1); int err; //cout << "No of colors after probing" << nvectors << endl; #ifdef TIMING_OUTPUT matvec_time_->start(); #endif err = G_->Multiply(false, X, *temp2); assert(err == 0); if (!local) err = C_->Multiply(false, X, *temp); else { // localize X double *values; int mylda; X.ExtractView(&values, &mylda); Epetra_SerialComm LComm; // Use Serial Comm for the local blocks. Epetra_Map SerialMap(X.Map().NumMyElements(), X.Map().NumMyElements(), X.Map().MyGlobalElements(), 0, LComm); Epetra_MultiVector Xl(View, SerialMap, values, mylda, X.NumVectors()); err = C_->Multiply(false, Xl, *temp); } assert(err == 0); #ifdef TIMING_OUTPUT matvec_time_->stop(); #endif int nrows = C_->RowMap().NumMyElements(); #ifdef DEBUG cout << "DEBUG MODE" << endl; assert(nrows == localDRowMap_->NumGlobalElements()); int gids[nrows], gids1[nrows]; C_->RowMap().MyGlobalElements(gids); localDRowMap_->MyGlobalElements(gids1); for (int i = 0; i < nrows; i++) { assert(gids[i] == gids1[i]); } #endif #ifdef TIMING_OUTPUT localize_time_->start(); #endif //int err; int lda; double *values; if (!local) { err = temp->ExtractView(&values, &lda); assert (err == 0); // copy to local vector //TODO: OMP parallel assert(lda == nrows); //#pragma omp parallel for shared(nvectors, nrows, values) for (int v = 0; v < nvectors; v++) { for (int i = 0; i < nrows; i++) { err = ltemp->ReplaceMyValue(i, v, values[i+v*lda]); assert (err == 0); } } } #ifdef TIMING_OUTPUT localize_time_->stop(); trisolve_time_->start(); #endif if (!local) { LP_->SetRHS(ltemp.getRawPtr()); } else { //LP_->SetRHS(temp.getRawPtr()); } //LP_->SetLHS(localX.getRawPtr()); //TODO: Why not just in Reset(). Check the distr path. ssym_->OrigLP->SetLHS(localX.getRawPtr()); ssym_->OrigLP->SetRHS(temp.getRawPtr()); ssym_->ReIdx_LP->fwd(); solver_->Solve(); #ifdef TIMING_OUTPUT trisolve_time_->stop(); dist_time_->start(); #endif if (!local) { err = localX->ExtractView(&values, &lda); assert (err == 0); //Copy back to dist vector //TODO: OMP parallel //#pragma omp parallel for for (int v = 0; v < nvectors; v++) { for (int i = 0; i < nrows; i++) { err = temp->ReplaceMyValue(i, v, values[i+v*lda]); assert (err == 0); } } } #ifdef TIMING_OUTPUT dist_time_->stop(); matvec2_time_->start(); #endif if (!local) { R_->Multiply(false, *temp, Y); } else { // Should Y be localY in Multiply and then exported to Y ?? TODO: // Use view mode ? double *values; int mylda; Y.ExtractView(&values, &mylda); Epetra_SerialComm LComm; // Use Serial Comm for the local blocks. Epetra_Map SerialMap(Y.Map().NumMyElements(), Y.Map().NumMyElements(), Y.Map().MyGlobalElements(), 0, LComm); Epetra_MultiVector Yl(View, SerialMap, values, mylda, Y.NumVectors()); R_->Multiply(false, *localX, Yl); } #ifdef TIMING_OUTPUT matvec2_time_->stop(); update_time_->start(); #endif err = Y.Update(1.0, *temp2, -1.0); //cout << Y.MyLength() << " " << temp2.MyLength() << endl; assert(err == 0); #ifdef TIMING_OUTPUT update_time_->stop(); apply_time_->stop(); #endif cntApply++; return 0; }