int power_method(bool TransA, Epetra_CrsMatrix& A, Epetra_Vector& q, Epetra_Vector& z, Epetra_Vector& resid, double* lambda, int niters, double tolerance, bool verbose) { // Fill z with random Numbers z.Random(); // variable needed for iteration double normz, residual; int ierr = 1; for(int iter = 0; iter < niters; iter++) { z.Norm2(&normz); // Compute 2-norm of z q.Scale(1.0/normz, z); A.Multiply(TransA, q, z); // Compute z = A*q // SEGFAULT HAPPENS HERE q.Dot(z, lambda); // Approximate maximum eigenvaluE if(iter%100==0 || iter+1==niters) { resid.Update(1.0, z, -(*lambda), q, 0.0); // Compute A*q - lambda*q resid.Norm2(&residual); if(verbose) cout << "Iter = " << iter << " Lambda = " << *lambda << " Residual of A*q - lambda*q = " << residual << endl; } if(residual < tolerance) { ierr = 0; break; } } return(ierr); }
int powerMethod (double & lambda, Epetra_CrsMatrix& A, const int niters, const double tolerance, const bool verbose) { // In the power iteration, z = A*q. Thus, q must be in the domain // of A, and z must be in the range of A. The residual vector is of // course in the range of A. Epetra_Vector q (A.OperatorDomainMap ()); Epetra_Vector z (A.OperatorRangeMap ()); Epetra_Vector resid (A.OperatorRangeMap ()); Epetra_Flops* counter = A.GetFlopCounter(); if (counter != 0) { q.SetFlopCounter(A); z.SetFlopCounter(A); resid.SetFlopCounter(A); } // Initialize the starting vector z with random data. z.Random(); double normz, residual; int ierr = 1; for (int iter = 0; iter < niters; ++iter) { z.Norm2 (&normz); // normz := ||z||_2 q.Scale (1.0/normz, z); // q := z / normz A.Multiply(false, q, z); // z := A * q q.Dot(z, &lambda); // lambda := dot (q, z) // Compute the residual vector and display status output every // 100 iterations, or if we have reached the maximum number of // iterations. if (iter % 100 == 0 || iter + 1 == niters) { resid.Update (1.0, z, -lambda, q, 0.0); // resid := A*q - lambda*q resid.Norm2 (&residual); // residual := ||resid||_2 if (verbose) cout << "Iter = " << iter << " Lambda = " << lambda << " Residual of A*q - lambda*q = " << residual << endl; } if (residual < tolerance) { // We've converged! ierr = 0; break; } } return ierr; }
int TestOneMatrix( std::string HBname, std::string MMname, std::string TRIname, Epetra_Comm &Comm, bool verbose ) { if ( Comm.MyPID() != 0 ) verbose = false ; Epetra_Map * readMap = 0; Epetra_CrsMatrix * HbA = 0; Epetra_Vector * Hbx = 0; Epetra_Vector * Hbb = 0; Epetra_Vector * Hbxexact = 0; Epetra_CrsMatrix * TriplesA = 0; Epetra_Vector * Triplesx = 0; Epetra_Vector * Triplesb = 0; Epetra_Vector * Triplesxexact = 0; Epetra_CrsMatrix * MatrixMarketA = 0; Epetra_Vector * MatrixMarketx = 0; Epetra_Vector * MatrixMarketb = 0; Epetra_Vector * MatrixMarketxexact = 0; int TRI_Size = TRIname.size() ; std::string LastFiveBytes = TRIname.substr( EPETRA_MAX(0,TRI_Size-5), TRI_Size ); if ( LastFiveBytes == ".TimD" ) { // Call routine to read in a file with a Tim Davis header and zero-based indexing EPETRA_CHK_ERR( Trilinos_Util_ReadTriples2Epetra64( &TRIname[0], false, Comm, readMap, TriplesA, Triplesx, Triplesb, Triplesxexact, false, true, true ) ); delete readMap; } else { if ( LastFiveBytes == ".triU" ) { // Call routine to read in unsymmetric Triplet matrix EPETRA_CHK_ERR( Trilinos_Util_ReadTriples2Epetra64( &TRIname[0], false, Comm, readMap, TriplesA, Triplesx, Triplesb, Triplesxexact, false, false ) ); delete readMap; } else { if ( LastFiveBytes == ".triS" ) { // Call routine to read in symmetric Triplet matrix EPETRA_CHK_ERR( Trilinos_Util_ReadTriples2Epetra64( &TRIname[0], true, Comm, readMap, TriplesA, Triplesx, Triplesb, Triplesxexact, false, false ) ); delete readMap; } else { assert( false ) ; } } } EPETRA_CHK_ERR( Trilinos_Util_ReadMatrixMarket2Epetra64( &MMname[0], Comm, readMap, MatrixMarketA, MatrixMarketx, MatrixMarketb, MatrixMarketxexact) ); delete readMap; // Call routine to read in HB problem Trilinos_Util_ReadHb2Epetra64( &HBname[0], Comm, readMap, HbA, Hbx, Hbb, Hbxexact) ; #if 0 std::cout << " HbA " ; HbA->Print( std::cout ) ; std::cout << std::endl ; std::cout << " MatrixMarketA " ; MatrixMarketA->Print( std::cout ) ; std::cout << std::endl ; std::cout << " TriplesA " ; TriplesA->Print( std::cout ) ; std::cout << std::endl ; #endif int TripleErr = 0 ; int MMerr = 0 ; for ( int i = 0 ; i < 10 ; i++ ) { double resid_Hb_Triples; double resid_Hb_Matrix_Market; double norm_A ; Hbx->Random(); // // Set the output vectors to different values: // Triplesb->PutScalar(1.1); Hbb->PutScalar(1.2); MatrixMarketb->PutScalar(1.3); HbA->Multiply( false, *Hbx, *Hbb ); norm_A = HbA->NormOne( ) ; TriplesA->Multiply( false, *Hbx, *Triplesb ); Triplesb->Update( 1.0, *Hbb, -1.0 ) ; MatrixMarketA->Multiply( false, *Hbx, *MatrixMarketb ); MatrixMarketb->Update( 1.0, *Hbb, -1.0 ) ; Triplesb->Norm1( &resid_Hb_Triples ) ; MatrixMarketb->Norm1( &resid_Hb_Matrix_Market ) ; TripleErr += ( resid_Hb_Triples > 1e-11 * norm_A ) ; MMerr += ( resid_Hb_Matrix_Market > 1e-11 * norm_A ) ; if ( verbose && resid_Hb_Triples > 1e-11 * norm_A ) std::cout << " resid_Hb_Triples = " << resid_Hb_Triples << " norm_A = " << norm_A << std::endl ; if ( verbose && resid_Hb_Matrix_Market > 1e-11 * norm_A ) std::cout << " resid_Hb_Matrix_Market = " << resid_Hb_Matrix_Market << " norm_A = " << norm_A << std::endl ; } if ( verbose ) { if ( TripleErr ) std::cout << " Error in reading " << HBname << " or " << TRIname << std::endl ; if ( MMerr ) std::cout << " Error in reading " << HBname << " or " << MMname << std::endl ; } delete HbA; delete Hbx; delete Hbb; delete Hbxexact; delete TriplesA; delete Triplesx; delete Triplesb; delete Triplesxexact; delete MatrixMarketA; delete MatrixMarketx; delete MatrixMarketb; delete MatrixMarketxexact; delete readMap; return TripleErr+MMerr ; }
// ================================================ ====== ==== ==== == = // the tentative null space is in input because the user // has to remember to allocate and fill it, and then to delete // it after calling this method. int ML_Epetra::MultiLevelPreconditioner:: ComputeAdaptivePreconditioner(int TentativeNullSpaceSize, double* TentativeNullSpace) { if ((TentativeNullSpaceSize == 0) || (TentativeNullSpace == 0)) ML_CHK_ERR(-1); // ================================== // // get parameters from the input list // // ================================== // // maximum number of relaxation sweeps int MaxSweeps = List_.get("adaptive: max sweeps", 10); // number of std::vector to be added to the tentative null space int NumAdaptiveVectors = List_.get("adaptive: num vectors", 1); if (verbose_) { std::cout << PrintMsg_ << "*** Adaptive Smoother Aggregation setup ***" << std::endl; std::cout << PrintMsg_ << " Maximum relaxation sweeps = " << MaxSweeps << std::endl; std::cout << PrintMsg_ << " Additional vectors to compute = " << NumAdaptiveVectors << std::endl; } // ==================================================== // // compute the preconditioner, set null space from user // // (who will have to delete std::vector TentativeNullSpace) // // ==================================================== // double* NewNullSpace = 0; double* OldNullSpace = TentativeNullSpace; int OldNullSpaceSize = TentativeNullSpaceSize; // need some work otherwise matvec() with Epetra_Vbr fails. // Also, don't differentiate between range and domain here // as ML will not work if range != domain const Epetra_VbrMatrix* VbrA = NULL; VbrA = dynamic_cast<const Epetra_VbrMatrix*>(RowMatrix_); Epetra_Vector* LHS = 0; Epetra_Vector* RHS = 0; if (VbrA != 0) { LHS = new Epetra_Vector(VbrA->DomainMap()); RHS = new Epetra_Vector(VbrA->DomainMap()); } else { LHS = new Epetra_Vector(RowMatrix_->OperatorDomainMap()); RHS = new Epetra_Vector(RowMatrix_->OperatorDomainMap()); } // destroy what we may already have if (IsComputePreconditionerOK_ == true) { DestroyPreconditioner(); } // build the preconditioner for the first time List_.set("null space: type", "pre-computed"); List_.set("null space: dimension", OldNullSpaceSize); List_.set("null space: vectors", OldNullSpace); ComputePreconditioner(); // ====================== // // add one std::vector at time // // ====================== // for (int istep = 0 ; istep < NumAdaptiveVectors ; ++istep) { if (verbose_) { std::cout << PrintMsg_ << "\tAdaptation step " << istep << std::endl; std::cout << PrintMsg_ << "\t---------------" << std::endl; } // ==================== // // look for "bad" modes // // ==================== // // note: should an error occur, ML_CHK_ERR will return, // and LHS and RHS will *not* be delete'd (--> memory leak). // Anyway, this means that something wrong happened in the code // and should be fixed by the user. LHS->Random(); double Norm2; for (int i = 0 ; i < MaxSweeps ; ++i) { // RHS = (I - ML^{-1} A) LHS ML_CHK_ERR(RowMatrix_->Multiply(false,*LHS,*RHS)); // FIXME: can do something slightly better here ML_CHK_ERR(ApplyInverse(*RHS,*RHS)); ML_CHK_ERR(LHS->Update(-1.0,*RHS,1.0)); LHS->Norm2(&Norm2); if (verbose_) { std::cout << PrintMsg_ << "\titer " << i << ", ||x||_2 = "; std::cout << Norm2 << std::endl; } } // scaling vectors double NormInf; LHS->NormInf(&NormInf); LHS->Scale(1.0 / NormInf); // ========================================================= // // copy tentative and computed null space into NewNullSpace, // // which now becomes the standard null space // // ========================================================= // int NewNullSpaceSize = OldNullSpaceSize + 1; NewNullSpace = new double[NumMyRows() * NewNullSpaceSize]; assert (NewNullSpace != 0); int itmp = OldNullSpaceSize * NumMyRows(); for (int i = 0 ; i < itmp ; ++i) { NewNullSpace[i] = OldNullSpace[i]; } for (int j = 0 ; j < NumMyRows() ; ++j) { NewNullSpace[itmp + j] = (*LHS)[j]; } // =============== // // visualize modes // // =============== // if (List_.get("adaptive: visualize", false)) { double* x_coord = List_.get("viz: x-coordinates", (double*)0); double* y_coord = List_.get("viz: y-coordinates", (double*)0); double* z_coord = List_.get("viz: z-coordinates", (double*)0); assert (x_coord != 0); std::vector<double> plot_me(NumMyRows()/NumPDEEqns_); ML_Aggregate_Viz_Stats info; info.Amatrix = &(ml_->Amat[LevelID_[0]]); info.x = x_coord; info.y = y_coord; info.z = z_coord; info.Nlocal = NumMyRows() / NumPDEEqns_; info.Naggregates = 1; ML_Operator_AmalgamateAndDropWeak(&(ml_->Amat[LevelID_[0]]), NumPDEEqns_, 0.0); for (int ieqn = 0 ; ieqn < NumPDEEqns_ ; ++ieqn) { for (int j = 0 ; j < NumMyRows() ; j+=NumPDEEqns_) { plot_me[j / NumPDEEqns_] = (*LHS)[j + ieqn]; } char FileName[80]; sprintf(FileName,"nullspace-mode%d-eq%d.xyz", istep, ieqn); if (verbose_) std::cout << PrintMsg_ << "writing file " << FileName << "..." << std::endl; ML_Aggregate_VisualizeXYZ(info,FileName, ml_->comm,&plot_me[0]); } ML_Operator_UnAmalgamateAndDropWeak(&(ml_->Amat[LevelID_[0]]), NumPDEEqns_, 0.0); } // Destroy the old preconditioner DestroyPreconditioner(); // ==================================================== // // build the new preconditioner with the new null space // // ==================================================== // List_.set("null space: type", "pre-computed"); List_.set("null space: dimension", NewNullSpaceSize); List_.set("null space: vectors", NewNullSpace); ML_CHK_ERR(ComputePreconditioner()); if (istep && (istep != NumAdaptiveVectors)) delete OldNullSpace; OldNullSpace = NewNullSpace; OldNullSpaceSize = NewNullSpaceSize; } // keep trace of this pointer, it will be delete'd later NullSpaceToFree_ = NewNullSpace; delete LHS; delete RHS; return(0); }
void exampleRoutine (const Epetra_Comm& comm, std::ostream& out) { using std::endl; // Print out the Epetra software version. if (comm.MyPID () == 0) { out << Epetra_Version () << endl << endl; } // The type of global indices. You could just set this to int, // but we want the example to work for Epetra64 as well. #ifdef EPETRA_NO_32BIT_GLOBAL_INDICES // Epetra was compiled only with 64-bit global index support, so use // 64-bit global indices. typedef long long global_ordinal_type; #else // Epetra was compiled with 32-bit global index support. If // EPETRA_NO_64BIT_GLOBAL_INDICES is defined, it does not also // support 64-bit indices. typedef int global_ordinal_type; #endif // EPETRA_NO_32BIT_GLOBAL_INDICES ////////////////////////////////////////////////////////////////////// // Create some Epetra_Map objects ////////////////////////////////////////////////////////////////////// // // Epetra has local and global Maps. Local maps describe objects // that are replicated over all participating MPI processes. Global // maps describe distributed objects. You can do imports and // exports between local and global maps; this is how you would turn // locally replicated objects into distributed objects and vice // versa. // // The total (global, i.e., over all MPI processes) number of // entries in the Map. This has the same type as that of global // indices, so it can represent very large values if Epetra was // built with 64-bit global index support. // // For this example, we scale the global number of entries in the // Map with the number of MPI processes. That way, you can run this // example with any number of MPI processes and every process will // still have a positive number of entries. const global_ordinal_type numGlobalEntries = comm.NumProc () * 5; // Tpetra can index the entries of a Map starting with 0 (C style), // 1 (Fortran style), or any base you want. 1-based indexing is // handy when interfacing with Fortran. We choose 0-based indexing // here. This also has the same type as that of global indices. const global_ordinal_type indexBase = 0; // Construct a Map that puts the same number of equations on each // (MPI) process. The Epetra_Comm is passed in by value, but that's // OK, because Epetra_Comm has shallow copy semantics. (Its copy // constructor and assignment operator do not call MPI_Comm_dup; // they just pass along the MPI_Comm.) Epetra_Map contigMap (numGlobalEntries, indexBase, comm); // contigMap is contiguous by construction. if (! contigMap.LinearMap ()) { throw std::logic_error ("The supposedly contiguous Map isn't contiguous."); } // Let's create a second Map. It will have the same number of // global entries per process, but will distribute them differently, // in round-robin (1-D cyclic) fashion instead of contiguously. // We'll use the version of the Map constructor that takes, on each // MPI process, a list of the global indices in the Map belonging to // that process. You can use this constructor to construct an // overlapping (also called "not 1-to-1") Map, in which one or more // entries are owned by multiple processes. We don't do that here; // we make a nonoverlapping (also called "1-to-1") Map. const int numGblIndsPerProc = 5; global_ordinal_type* gblIndList = new global_ordinal_type [numGblIndsPerProc]; const int numProcs = comm.NumProc (); const int myRank = comm.MyPID (); for (int k = 0; k < numGblIndsPerProc; ++k) { gblIndList[k] = myRank + k*numProcs; } Epetra_Map cyclicMap (numGlobalEntries, numGblIndsPerProc, gblIndList, indexBase, comm); // The above constructor makes a deep copy of the input index list, // so it's safe to deallocate that list after this constructor // completes. if (gblIndList != NULL) { delete [] gblIndList; gblIndList = NULL; } // If there's more than one MPI process in the communicator, // then cyclicMap is definitely NOT contiguous. if (comm.NumProc () > 1 && cyclicMap.LinearMap ()) { throw std::logic_error ("The cyclic Map claims to be contiguous."); } // contigMap and cyclicMap should always be compatible. However, if // the communicator contains more than 1 process, then contigMap and // cyclicMap are NOT the same. // if (! contigMap.isCompatible (*cyclicMap)) { // throw std::logic_error ("contigMap should be compatible with cyclicMap, " // "but it's not."); // } if (comm.NumProc () > 1 && contigMap.SameAs (cyclicMap)) { throw std::logic_error ("contigMap should not be the same as cyclicMap."); } ////////////////////////////////////////////////////////////////////// // We have maps now, so we can create vectors. ////////////////////////////////////////////////////////////////////// // Create an Epetra_Vector with the contiguous Map we created above. // This version of the constructor will fill the vector with zeros. // The Vector constructor takes a Map by value, but that's OK, // because Epetra_Map has shallow copy semantics. It uses reference // counting internally to avoid copying data unnecessarily. Epetra_Vector x (contigMap); // The copy constructor performs a deep copy. // x and y have the same Map. Epetra_Vector y (x); // Create a Vector with the 1-D cyclic Map. Calling the constructor // with false for the second argument leaves the data uninitialized, // so that you can fill it later without paying the cost of // initially filling it with zeros. Epetra_Vector z (cyclicMap, false); // Set the entries of z to (pseudo)random numbers. Please don't // consider this a good parallel pseudorandom number generator. (void) z.Random (); // Set the entries of x to all ones. (void) x.PutScalar (1.0); // Define some constants for use below. const double alpha = 3.14159; const double beta = 2.71828; const double gamma = -10.0; // x = beta*x + alpha*z // // This is a legal operation! Even though the Maps of x and z are // not the same, their Maps are compatible. Whether it makes sense // or not depends on your application. (void) x.Update (alpha, z, beta); (void) y.PutScalar (42.0); // Set all entries of y to 42.0 // y = gamma*y + alpha*x + beta*z y.Update (alpha, x, beta, z, gamma); // Compute the 2-norm of y. // // The norm may have a different type than scalar_type. // For example, if scalar_type is complex, then the norm is real. // The ScalarTraits "traits class" gives us the type of the norm. double theNorm = 0.0; (void) y.Norm2 (&theNorm); // Print the norm of y on Proc 0. out << "Norm of y: " << theNorm << endl; }
int main(int argc, char *argv[]) { #ifdef EPETRA_MPI MPI_Init(&argc,&argv); Epetra_MpiComm Comm(MPI_COMM_WORLD); #else Epetra_SerialComm Comm; #endif Epetra_Time Time(Comm); // Create the linear problem using the class `Trilinos_Util::CrsMatrixGallery.' // Various matrix examples are supported; please refer to the // Trilinos tutorial for more details. // create Aztec stuff int proc_config[AZ_PROC_SIZE], options[AZ_OPTIONS_SIZE]; #ifdef ML_MPI /* get number of processors and the name of this processor */ AZ_set_proc_config(proc_config, MPI_COMM_WORLD); int proc = proc_config[AZ_node]; int nprocs = proc_config[AZ_N_procs]; #else AZ_set_proc_config(proc_config, AZ_NOT_MPI); int proc = 0; int nprocs = 1; #endif // read in the matrix size FILE *fp = fopen("ExampleMatrices/cantilever2D/data_matrix.txt","r"); int leng; fscanf(fp,"%d",&leng); int num_PDE_eqns=2; int N_grid_pts = leng/num_PDE_eqns; // make a linear distribution of the matrix respecting the blocks size int leng1 = leng/nprocs; int leng2 = leng-leng1*nprocs; if (proc >= leng2) { leng2 += (proc*leng1); } else { leng1++; leng2 = proc*leng1; } int N_update = leng1; int* update = new int[N_update+1]; int i; double *val=NULL; int *bindx=NULL; for (i=0; i<N_update; i++) update[i] = i+leng2; // create the Epetra_CrSMatrix Epetra_Map* StandardMap = new Epetra_Map(leng,N_update,update,0,Comm); Epetra_CrsMatrix* A = new Epetra_CrsMatrix(Copy,*StandardMap,1); AZ_input_msr_matrix("ExampleMatrices/cantilever2D/data_matrix.txt", update, &val, &bindx, N_update, proc_config); for (i=0; i<leng; i++) { int row = update[i]; A->SumIntoGlobalValues(row,1,&(val[i]),&row); A->SumIntoGlobalValues(row,bindx[i+1]-bindx[i],&(val[bindx[i]]),&(bindx[bindx[i]])); } A->TransformToLocal(); // create solution and right-hand side (MultiVectors are fine as well) Epetra_Vector* LHS = new Epetra_Vector(A->OperatorDomainMap()); Epetra_Vector* RHS = new Epetra_Vector(A->OperatorRangeMap()); LHS->Random(); RHS->Random(); // build the epetra linear problem Epetra_LinearProblem Problem(A, LHS, RHS); // Construct a solver object for this problem AztecOO solver(Problem); // =========================== begin of ML part =========================== // create a parameter list for ML options ParameterList MLList; // set defaults for classic smoothed aggregation ML_Epetra::SetDefaults("SA",MLList); MLList.set("aggregation: damping factor", 0.0); // number of relaxation sweeps MLList.set("adaptive: max sweeps", 10); // number of additional null space vectors to compute MLList.set("adaptive: num vectors",2); #if 1 ML_Epetra::MultiLevelPreconditioner* MLPrec = new ML_Epetra::MultiLevelPreconditioner(dynamic_cast<Epetra_RowMatrix&>(*A), MLList, false); // need to allocate and fill the null space (also the // default one, as in this case). This vector is no longer // needed after a call to ComputeAdaptivePreconditioner(). int NullSpaceSize = 2; vector<double> NullSpace((NullSpaceSize*A->NumMyRows())); for (i = 0 ; i < A->NumMyRows() ; ++i) { NullSpace[i] = 1.0; ++i; NullSpace[i] = 0.0; } for (i = A->NumMyRows() ; i < 2*A->NumMyRows() ; ++i) { NullSpace[i] = 0.0; ++i; NullSpace[i] = 1.0; } MLPrec->ComputeAdaptivePreconditioner(NullSpaceSize,&NullSpace[0]); #else ML_Epetra::MultiLevelPreconditioner* MLPrec = new ML_Epetra::MultiLevelPreconditioner(dynamic_cast<Epetra_RowMatrix&>(*A), MLList); #endif // tell AztecOO to use this preconditioner, then solve solver.SetPrecOperator(MLPrec); // =========================== end of ML part ============================= solver.SetAztecOption(AZ_solver, AZ_gmres); solver.SetAztecOption(AZ_output, 32); // solve with 500 iterations and 1e-12 tolerance solver.Iterate(1550, 1e-5); delete MLPrec; // compute the real residual double residual, diff; if( Comm.MyPID()==0 ) { cout << "||b-Ax||_2 = " << residual << endl; cout << "||x_exact - x||_2 = " << diff << endl; cout << "Total Time = " << Time.ElapsedTime() << endl; } #ifdef EPETRA_MPI MPI_Finalize(); #endif return(0); }