void Iterative_Inverse_Operator::operator () (const Epetra_MultiVector &b, Epetra_MultiVector &x) { // Initialize the solution to zero x.PutScalar( 0.0 ); // Reset the solver, problem, and status test for next solve (HKT) pProb->setProblem( Teuchos::rcp(&x, false), Teuchos::rcp(&b, false) ); timer.start(); Belos::ReturnType ret = pBelos->solve(); timer.stop(); int pid = pComm->MyPID(); if (pid == 0 && print) { if (ret == Belos::Converged) { std::cout << std::endl << "pid[" << pid << "] Block GMRES converged" << std::endl; std::cout << "Solution time: " << timer.totalElapsedTime() << std::endl; } else std::cout << std::endl << "pid[" << pid << "] Block GMRES did not converge" << std::endl; } }
void Stokhos::EpetraMultiVectorOrthogPoly:: computeStandardDeviation(Epetra_MultiVector& v) const { const Teuchos::Array<double>& nrm2 = this->basis_->norm_squared(); v.PutScalar(0.0); for (int i=1; i<this->size(); i++) v.Multiply(nrm2[i], *coeff_[i], *coeff_[i], 1.0); for (int j=0; j<v.NumVectors(); j++) for (int i=0; i<v.MyLength(); i++) v[j][i] = std::sqrt(v[j][i]); }
int Piro::Epetra::MatrixFreeOperator::Apply (const Epetra_MultiVector& V, Epetra_MultiVector& Y) const { TEUCHOS_TEST_FOR_EXCEPTION(!baseIsSet, std::logic_error, " Piro::Epetra::MatrixFreeOperator must have Base values set before Apply"); // Compute Wv=y by perturbing x //const Epetra_Vector* v = V(0); ??THIS DOESN'T WORK!!! Why??? Teuchos::RCP<Epetra_Vector> v = Teuchos::rcp(new Epetra_Vector(View, V, 0)); Teuchos::RCP<Epetra_Vector> y = Teuchos::rcp(new Epetra_Vector(Copy, Y, 0)); Teuchos::RCP<const Epetra_Vector> xBase = modelInArgs.get_x(); Teuchos::RCP<const Epetra_Vector> xdotBase; if (haveXdot) xdotBase = modelInArgs.get_x_dot(); double vectorNorm; v->Norm2(&vectorNorm); // Any operator time zero vector is zero vector if (vectorNorm == 0.0) { Y.PutScalar(0.0); return 0; } double eta = lambda * (lambda + solutionNorm/vectorNorm); xPert->Update(1.0, *xBase, eta, *v, 0.0); if (haveXdot) xdotPert->Update(1.0, *xdotBase, eta, *v, 0.0); EpetraExt::ModelEvaluator::OutArgs modelOutArgs = model->createOutArgs(); modelInArgs.set_x(xPert); if (haveXdot) modelInArgs.set_x_dot(xdotPert); // Alert model that this is a perturbed calculation, in case it does something different. EpetraExt::ModelEvaluator::Evaluation<Epetra_Vector> fPertEval(fPert, EpetraExt::ModelEvaluator::EVAL_TYPE_APPROX_DERIV); modelOutArgs.set_f(fPertEval); model->evalModel(modelInArgs, modelOutArgs); modelInArgs.set_x(xBase); if (haveXdot) modelInArgs.set_x_dot(xdotBase); modelOutArgs.set_f(fBase); y->Update(1.0, *fPert, -1.0, *fBase, 0.0); y->Scale(1.0/eta); Teuchos::RCP<Epetra_Vector> Y0 = Teuchos::rcp(new Epetra_Vector(View, Y, 0)); *Y0 = *y; // copy in return 0; }
//========================================================================= // Returns the result of a Epetra_Operator inverse applied to an Epetra_MultiVector X in Y. int EpetraExt_PointToBlockDiagPermute::ApplyInverse(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const{ // Stuff borrowed from Epetra_CrsMatrix int NumVectors = X.NumVectors(); if (NumVectors!=Y.NumVectors()) { EPETRA_CHK_ERR(-2); // Need same number of vectors in each MV } const Epetra_MultiVector *Xp=&X; Epetra_MultiVector *Yp=&Y; // Allocate temp workspace if X==Y and there are no imports or exports Epetra_MultiVector * Xcopy = 0; if (&X==&Y && Importer_==0 && Exporter_==0) { Xcopy = new Epetra_MultiVector(X); Xp=Xcopy; } UpdateImportVector(NumVectors); // Make sure Import and Export Vectors are compatible UpdateExportVector(NumVectors); // If we have a non-trivial importer, we must import elements that are permuted or are on other processors if (Importer_){ EPETRA_CHK_ERR(ImportVector_->Import(X, *Importer_, Insert)); Xp=ImportVector_; } // If we have a non-trivial exporter, we must export elements that are permuted or belong to other processors if (Exporter_) { Yp=ExportVector_; } // Do the matvec BDMat_->ApplyInverse(*Xp,*Yp); // Export if needed if (Exporter_) { Y.PutScalar(0.0); // Make sure target is zero Y.Export(*ExportVector_, *Exporter_, Add); // Fill Y with Values from export vector } // Cleanup if(Xcopy) { delete Xcopy; EPETRA_CHK_ERR(1); // Return positive code to alert the user about needing extra copy of X return 1; } return 0; }
int EpetraSamplingOperator::Apply(const Epetra_MultiVector &X, Epetra_MultiVector &Y) const { TEUCHOS_ASSERT(map_.PointSameAs(X.Map()) && map_.PointSameAs(Y.Map())); TEUCHOS_ASSERT(X.NumVectors() == Y.NumVectors()); Y.PutScalar(0.0); for (int iVec = 0; iVec < X.NumVectors(); ++iVec) { const ArrayView<const double> sourceVec(X[iVec], X.MyLength()); const ArrayView<double> targetVec(Y[iVec], Y.MyLength()); for (Array<GlobalIndex>::const_iterator it = sampleLIDs_.begin(), it_end = sampleLIDs_.end(); it != it_end; ++it) { targetVec[*it] = sourceVec[*it]; } } return 0; }
int AmesosGenOp::Apply (const Epetra_MultiVector& X, Epetra_MultiVector& Y) const { if (problem_ == NULL) { throw std::logic_error ("AmesosGenOp::Apply: problem_ is NULL"); } if (massMtx_.is_null ()) { throw std::logic_error ("AmesosGenOp::Apply: massMtx_ is null"); } if (solver_.is_null ()) { throw std::logic_error ("AmesosGenOp::Apply: solver_ is null"); } if (! useTranspose_) { // Storage for M*X Epetra_MultiVector MX (X.Map (), X.NumVectors ()); // Apply M*X massMtx_->Apply (X, MX); Y.PutScalar (0.0); // Set the LHS and RHS problem_->SetRHS (&MX); problem_->SetLHS (&Y); // Solve the linear system A*Y = MX solver_->Solve (); } else { // apply the transposed operator // Storage for A^{-T}*X Epetra_MultiVector ATX (X.Map (), X.NumVectors ()); Epetra_MultiVector tmpX = const_cast<Epetra_MultiVector&> (X); // Set the LHS and RHS problem_->SetRHS (&tmpX); problem_->SetLHS (&ATX); // Solve the linear system A^T*Y = X solver_->Solve (); // Apply M*ATX massMtx_->Apply (ATX, Y); } return 0; // the method completed correctly }
Teuchos::RCP<Epetra_LinearProblem> build_problem_mm(Teuchos::ParameterList& test_params, Epetra_CrsMatrix* A, Epetra_MultiVector* b) { const Epetra_Map& rowmap = A->RowMap(); Epetra_MultiVector* x = new Epetra_MultiVector(rowmap, 1); if (b == NULL) { std::cout << "creating b = A*random" << std::endl; b = new Epetra_MultiVector(rowmap, 1); x->Random(); A->Apply(*x, *b); } x->PutScalar(0); Teuchos::RCP<Epetra_LinearProblem> problem = Teuchos::rcp(new Epetra_LinearProblem(A,x,b)); return problem; }
int AmesosBucklingOp::Apply(const Epetra_MultiVector& X, Epetra_MultiVector& Y ) const { // Storage for A*X Epetra_MultiVector AX(X.Map(),X.NumVectors()); // Apply A*X stiffMtx_->Apply(X, AX); Y.PutScalar(0.0); // Set the LHS and RHS problem_->SetRHS(&AX); problem_->SetLHS(&Y); // Solve the linear system (A-sigma*M)*Y = AX solver_->Solve(); return 0; }
//============================================================================== int Ifpack_Polynomial:: ApplyInverse(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const { if (!IsComputed()) IFPACK_CHK_ERR(-3); if (PolyDegree_ == 0) return 0; int nVec = X.NumVectors(); if (nVec != Y.NumVectors()) IFPACK_CHK_ERR(-2); Time_->ResetStartTime(); Epetra_MultiVector Xcopy(X); if(ZeroStartingSolution_==true) { Y.PutScalar(0.0); } // mfh 20 Mar 2014: IBD never gets used, so I'm commenting out the // following lines of code in order to forestall build warnings. // #ifdef HAVE_IFPACK_EPETRAEXT // EpetraExt_PointToBlockDiagPermute* IBD=0; // if (UseBlockMode_) IBD=&*InvBlockDiagonal_; // #endif Y.Update(-coeff_[1], Xcopy, 1.0); for (int ii = 2; ii < static_cast<int> (coeff_.size ()); ++ii) { const Epetra_MultiVector V(Xcopy); Operator_->Apply(V,Xcopy); Xcopy.Multiply(1.0, *InvDiagonal_, Xcopy, 0.0); // Update Y Y.Update(-coeff_[ii], Xcopy, 1.0); } // Flops are updated in each of the following. ++NumApplyInverse_; ApplyInverseTime_ += Time_->ElapsedTime(); return(0); }
//============================================================================== // Note that Ifpack_PointRelaxation and Jacobi is much faster than // Ifpack_AdditiveSchwarz<Ifpack_PointRelaxation> (because of the // way the matrix-vector produce is performed). // // Another ML-related observation is that the starting solution (in Y) // is NOT supposed to be zero. This may slow down the application of just // one sweep of Jacobi. // int Ifpack_PointRelaxation:: ApplyInverse(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const { if (!IsComputed()) IFPACK_CHK_ERR(-3); if (X.NumVectors() != Y.NumVectors()) IFPACK_CHK_ERR(-2); Time_->ResetStartTime(); // AztecOO gives X and Y pointing to the same memory location, // need to create an auxiliary vector, Xcopy Teuchos::RefCountPtr< const Epetra_MultiVector > Xcopy; if (X.Pointers()[0] == Y.Pointers()[0]) Xcopy = Teuchos::rcp( new Epetra_MultiVector(X) ); else Xcopy = Teuchos::rcp( &X, false ); if (ZeroStartingSolution_) Y.PutScalar(0.0); // Flops are updated in each of the following. switch (PrecType_) { case IFPACK_JACOBI: IFPACK_CHK_ERR(ApplyInverseJacobi(*Xcopy,Y)); break; case IFPACK_GS: IFPACK_CHK_ERR(ApplyInverseGS(*Xcopy,Y)); break; case IFPACK_SGS: IFPACK_CHK_ERR(ApplyInverseSGS(*Xcopy,Y)); break; default: IFPACK_CHK_ERR(-1); // something wrong } ++NumApplyInverse_; ApplyInverseTime_ += Time_->ElapsedTime(); return(0); }
//============================================================================== int Ifpack_PointRelaxation:: ApplyInverseSGS(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const { if (ZeroStartingSolution_) Y.PutScalar(0.0); const Epetra_CrsMatrix* CrsMatrix = dynamic_cast<const Epetra_CrsMatrix*>(&*Matrix_); // try to pick the best option; performances may be improved // if several sweeps are used. if (CrsMatrix != 0) { if (CrsMatrix->StorageOptimized() && LocalSmoothingIndices_) return(ApplyInverseSGS_LocalFastCrsMatrix(CrsMatrix, X, Y)); else if (CrsMatrix->StorageOptimized()) return(ApplyInverseSGS_FastCrsMatrix(CrsMatrix, X, Y)); else return(ApplyInverseSGS_CrsMatrix(CrsMatrix, X, Y)); } else return(ApplyInverseSGS_RowMatrix(X, Y)); }
int Stokhos::ProductEpetraOperator:: Apply(const Epetra_MultiVector& Input, Epetra_MultiVector& Result) const { if (useTranspose) { EpetraExt::BlockMultiVector sg_input(View, *range_base_map, Input); Epetra_MultiVector tmp(Result.Map(), Result.NumVectors()); Result.PutScalar(0.0); for (int i=0; i<coeff_.size(); i++) { coeff_[i]->Apply(*(sg_input.GetBlock(i)), tmp); Result.Update(1.0, tmp, 1.0); } } else { EpetraExt::BlockMultiVector sg_result(View, *range_base_map, Result); for (int i=0; i<coeff_.size(); i++) coeff_[i]->Apply(Input, *(sg_result.GetBlock(i))); } return 0; }
//============================================================================== int Ifpack_DropFilter:: Multiply(bool TransA, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const { // NOTE: I suppose that the matrix has been localized, // hence all maps are trivial. int NumVectors = X.NumVectors(); if (NumVectors != Y.NumVectors()) IFPACK_CHK_ERR(-1); Y.PutScalar(0.0); vector<int> Indices(MaxNumEntries_); vector<double> Values(MaxNumEntries_); for (int i = 0 ; i < NumRows_ ; ++i) { int Nnz; ExtractMyRowCopy(i,MaxNumEntries_,Nnz, &Values[0], &Indices[0]); if (!TransA) { // no transpose first for (int j = 0 ; j < NumVectors ; ++j) { for (int k = 0 ; k < Nnz ; ++k) { Y[j][i] += Values[k] * X[j][Indices[k]]; } } } else { // transpose here for (int j = 0 ; j < NumVectors ; ++j) { for (int k = 0 ; k < Nnz ; ++k) { Y[j][Indices[k]] += Values[k] * X[j][i]; } } } } return(0); }
//============================================================================== int Ifpack_Krylov:: ApplyInverse(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const { if (!IsComputed()) IFPACK_CHK_ERR(-3); if (Iterations_ == 0) return 0; int nVec = X.NumVectors(); if (nVec != Y.NumVectors()) IFPACK_CHK_ERR(-2); Time_->ResetStartTime(); // AztecOO gives X and Y pointing to the same memory location, // need to create an auxiliary vector, Xcopy Teuchos::RCP<Epetra_MultiVector> Xcopy = Teuchos::rcp( new Epetra_MultiVector(X) ); if(ZeroStartingSolution_==true) { Y.PutScalar(0.0); } #ifdef HAVE_IFPACK_AZTECOO AztecSolver_ -> SetLHS(&Y); AztecSolver_ -> SetRHS(&*Xcopy); AztecSolver_ -> Iterate(Iterations_,Tolerance_); #else cout << "You need to configure IFPACK with support for AztecOO" << endl; cout << "to use this preconditioner. This may require --enable-aztecoo" << endl; cout << "in your configure script." << endl; IFPACK_CHK_ERR(-1); #endif // Flops are updated in each of the following. ++NumApplyInverse_; ApplyInverseTime_ += Time_->ElapsedTime(); return(0); }
//============================================================================== int Ifpack_SparsityFilter:: Multiply(bool TransA, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const { int NumVectors = X.NumVectors(); if (NumVectors != Y.NumVectors()) IFPACK_CHK_ERR(-1); Y.PutScalar(0.0); std::vector<int> Indices(MaxNumEntries_); std::vector<double> Values(MaxNumEntries_); for (int i = 0 ; i < A_->NumMyRows() ; ++i) { int Nnz; ExtractMyRowCopy(i,MaxNumEntries_,Nnz, &Values[0], &Indices[0]); if (!TransA) { // no transpose first for (int j = 0 ; j < NumVectors ; ++j) { for (int k = 0 ; k < Nnz ; ++k) { Y[j][i] += Values[k] * X[j][Indices[k]]; } } } else { // transpose here for (int j = 0 ; j < NumVectors ; ++j) { for (int k = 0 ; k < Nnz ; ++k) { Y[j][Indices[k]] += Values[k] * X[j][i]; } } } } return(0); }
virtual int Multiply(bool TransA, const Epetra_MultiVector& X, Epetra_MultiVector& Y) const { Epetra_MultiVector Xtmp(RowMatrixColMap(), X.NumVectors()); Xtmp.Import(X, *RowMatrixImporter(), Insert); std::vector<int> Indices(MaxNumEntries()); std::vector<double> Values(MaxNumEntries()); Y.PutScalar(0.0); for (int i = 0 ; i < NumMyRows() ; ++i) { int NumEntries; // use the inlined function getrow(i, MaxNumEntries(), NumEntries, &Values[0], &Indices[0]); for (int j = 0 ; j < NumEntries ; ++j) for (int k = 0 ; k < Y.NumVectors() ; ++k) Y[k][i] += Values[j] * Xtmp[k][Indices[j]]; } return(0); }
//----------------------------------------------------------------------------- // Function : N_LAS_AmesosGenOp::Apply() // Purpose : Applies the operator inv(A)*B*X = Y // Special Notes : // Scope : Public // Creator : Heidi Thornquist, SNL // Creation Date : 06/04/12 //----------------------------------------------------------------------------- int N_LAS_AmesosGenOp::Apply(const Epetra_MultiVector& X, Epetra_MultiVector& Y ) const { if (!useTranspose_) { // Storage for B*X Epetra_MultiVector BX(X.Map(),X.NumVectors()); // Apply B*X B_->Apply(X, BX); Y.PutScalar(0.0); // Set the LHS and RHS problem_->SetRHS(&BX); problem_->SetLHS(&Y); // Solve the linear system A*Y = BX solver_->Solve(); } else { // Storage for A^{-T}*X Epetra_MultiVector ATX(X.Map(),X.NumVectors()); Epetra_MultiVector tmpX = const_cast<Epetra_MultiVector&>(X); // Set the LHS and RHS problem_->SetRHS(&tmpX); problem_->SetLHS(&ATX); // Solve the linear system A^T*Y = X solver_->Solve(); // Apply B*ATX B_->Apply(ATX, Y); } return 0; }
int BlockPCGSolver::Solve(const Epetra_MultiVector &X, Epetra_MultiVector &Y) const { int info = 0; int localVerbose = verbose*(MyComm.MyPID() == 0); int xr = X.MyLength(); int wSize = 3*xr; if (lWorkSpace < wSize) { if (workSpace) delete[] workSpace; workSpace = new (std::nothrow) double[wSize]; if (workSpace == 0) { info = -1; return info; } lWorkSpace = wSize; } // if (lWorkSpace < wSize) double *pointer = workSpace; Epetra_Vector r(View, X.Map(), pointer); pointer = pointer + xr; Epetra_Vector p(View, X.Map(), pointer); pointer = pointer + xr; // Note: Kp and z uses the same memory space Epetra_Vector Kp(View, X.Map(), pointer); Epetra_Vector z(View, X.Map(), pointer); double tmp; double initNorm = 0.0, rNorm = 0.0, newRZ = 0.0, oldRZ = 0.0, alpha = 0.0; double tolSquare = tolCG*tolCG; memcpy(r.Values(), X.Values(), xr*sizeof(double)); tmp = callBLAS.DOT(xr, r.Values(), 1, r.Values(), 1); MyComm.SumAll(&tmp, &initNorm, 1); Y.PutScalar(0.0); if (localVerbose > 1) { std::cout << std::endl; std::cout << " --- PCG Iterations --- " << std::endl; } int iter; for (iter = 1; iter <= iterMax; ++iter) { if (Prec) { Prec->ApplyInverse(r, z); } else { memcpy(z.Values(), r.Values(), xr*sizeof(double)); } if (iter == 1) { tmp = callBLAS.DOT(xr, r.Values(), 1, z.Values(), 1); MyComm.SumAll(&tmp, &newRZ, 1); memcpy(p.Values(), z.Values(), xr*sizeof(double)); } else { oldRZ = newRZ; tmp = callBLAS.DOT(xr, r.Values(), 1, z.Values(), 1); MyComm.SumAll(&tmp, &newRZ, 1); p.Update(1.0, z, newRZ/oldRZ); } K->Apply(p, Kp); tmp = callBLAS.DOT(xr, p.Values(), 1, Kp.Values(), 1); MyComm.SumAll(&tmp, &alpha, 1); alpha = newRZ/alpha; TEUCHOS_TEST_FOR_EXCEPTION(alpha <= 0.0, std::runtime_error, " !!! Non-positive value for p^TKp (" << alpha << ") !!!"); callBLAS.AXPY(xr, alpha, p.Values(), 1, Y.Values(), 1); alpha *= -1.0; callBLAS.AXPY(xr, alpha, Kp.Values(), 1, r.Values(), 1); // Check convergence tmp = callBLAS.DOT(xr, r.Values(), 1, r.Values(), 1); MyComm.SumAll(&tmp, &rNorm, 1); if (localVerbose > 1) { std::cout << " Iter. " << iter; std::cout.precision(4); std::cout.setf(std::ios::scientific, std::ios::floatfield); std::cout << " Residual reduction " << std::sqrt(rNorm/initNorm) << std::endl; } if (rNorm <= tolSquare*initNorm) break; } // for (iter = 1; iter <= iterMax; ++iter) if (localVerbose == 1) { std::cout << std::endl; std::cout << " --- End of PCG solve ---" << std::endl; std::cout << " Iter. " << iter; std::cout.precision(4); std::cout.setf(std::ios::scientific, std::ios::floatfield); std::cout << " Residual reduction " << std::sqrt(rNorm/initNorm) << std::endl; std::cout << std::endl; } if (localVerbose > 1) { std::cout << std::endl; } numSolve += 1; minIter = (iter < minIter) ? iter : minIter; maxIter = (iter > maxIter) ? iter : maxIter; sumIter += iter; return info; }
//============================================================================== int LinearProblem_CrsSingletonFilter::UpdateReducedProblem(Epetra_LinearProblem * Problem) { int i, j; if (Problem==0) EPETRA_CHK_ERR(-1); // Null problem pointer FullProblem_ = Problem; FullMatrix_ = dynamic_cast<Epetra_RowMatrix *>(Problem->GetMatrix()); if (FullMatrix_==0) EPETRA_CHK_ERR(-2); // Need a RowMatrix if (Problem->GetRHS()==0) EPETRA_CHK_ERR(-3); // Need a RHS if (Problem->GetLHS()==0) EPETRA_CHK_ERR(-4); // Need a LHS if (!HaveReducedProblem_) EPETRA_CHK_ERR(-5); // Must have set up reduced problem // Create pointer to Full RHS, LHS Epetra_MultiVector * FullRHS = FullProblem()->GetRHS(); Epetra_MultiVector * FullLHS = FullProblem()->GetLHS(); int NumVectors = FullLHS->NumVectors(); int NumEntries; int * Indices; double * Values; int NumMyRows = FullMatrix()->NumMyRows(); int ColSingletonCounter = 0; for (i=0; i<NumMyRows; i++) { int curGRID = FullMatrixRowMap().GID(i); if (ReducedMatrixRowMap()->MyGID(curGRID)) { // Check if this row should go into reduced matrix EPETRA_CHK_ERR(GetRowGCIDs(i, NumEntries, Values, Indices)); // Get current row (indices global) int ierr = ReducedMatrix()->ReplaceGlobalValues(curGRID, NumEntries, Values, Indices); // Positive errors will occur because we are submitting col entries that are not part of // reduced system. However, because we specified a column map to the ReducedMatrix constructor // these extra column entries will be ignored and we will be politely reminded by a positive // error code if (ierr<0) EPETRA_CHK_ERR(ierr); } // Otherwise if singleton row we explicitly eliminate this row and solve for corresponding X value else { EPETRA_CHK_ERR(GetRow(i, NumEntries, Values, Indices)); // Get current row if (NumEntries==1) { double pivot = Values[0]; if (pivot==0.0) EPETRA_CHK_ERR(-1); // Encountered zero row, unable to continue int indX = Indices[0]; for (j=0; j<NumVectors; j++) (*tempExportX_)[j][indX] = (*FullRHS)[j][i]/pivot; } // Otherwise, this is a singleton column and we will scan for the pivot element needed // for post-solve equations else { j = ColSingletonPivotLIDs_[ColSingletonCounter]; double pivot = Values[j]; if (pivot==0.0) EPETRA_CHK_ERR(-2); // Encountered zero column, unable to continue ColSingletonPivots_[ColSingletonCounter] = pivot; ColSingletonCounter++; } } } assert(ColSingletonCounter==NumMyColSingletons_); // Sanity test // Update Reduced LHS (Puts any initial guess values into reduced system) ReducedLHS_->PutScalar(0.0); // zero out Reduced LHS EPETRA_CHK_ERR(ReducedLHS_->Import(*FullLHS, *Full2ReducedLHSImporter_, Insert)); FullLHS->PutScalar(0.0); // zero out Full LHS since we will inject values as we get them // Construct Reduced RHS // Zero out temp space tempX_->PutScalar(0.0); tempB_->PutScalar(0.0); //Inject known X values into tempX for purpose of computing tempB = FullMatrix*tempX // Also inject into full X since we already know the solution if (FullMatrix()->RowMatrixImporter()!=0) { EPETRA_CHK_ERR(tempX_->Export(*tempExportX_, *FullMatrix()->RowMatrixImporter(), Add)); EPETRA_CHK_ERR(FullLHS->Export(*tempExportX_, *FullMatrix()->RowMatrixImporter(), Add)); } else { tempX_->Update(1.0, *tempExportX_, 0.0); FullLHS->Update(1.0, *tempExportX_, 0.0); } EPETRA_CHK_ERR(FullMatrix()->Multiply(false, *tempX_, *tempB_)); EPETRA_CHK_ERR(tempB_->Update(1.0, *FullRHS, -1.0)); // tempB now has influence of already-known X values ReducedRHS_->PutScalar(0.0); EPETRA_CHK_ERR(ReducedRHS_->Import(*tempB_, *Full2ReducedRHSImporter_, Insert)); return(0); }
//============================================================================== int LinearProblem_CrsSingletonFilter::ConstructReducedProblem(Epetra_LinearProblem * Problem) { int i, j; if (HaveReducedProblem_) EPETRA_CHK_ERR(-1); // Setup already done once. Cannot do it again if (Problem==0) EPETRA_CHK_ERR(-2); // Null problem pointer FullProblem_ = Problem; FullMatrix_ = dynamic_cast<Epetra_RowMatrix *>(Problem->GetMatrix()); if (FullMatrix_==0) EPETRA_CHK_ERR(-3); // Need a RowMatrix if (Problem->GetRHS()==0) EPETRA_CHK_ERR(-4); // Need a RHS if (Problem->GetLHS()==0) EPETRA_CHK_ERR(-5); // Need a LHS // Generate reduced row and column maps Epetra_MapColoring & RowMapColors = *RowMapColors_; Epetra_MapColoring & ColMapColors = *ColMapColors_; ReducedMatrixRowMap_ = RowMapColors.GenerateMap(0); ReducedMatrixColMap_ = ColMapColors.GenerateMap(0); // Create domain and range map colorings by exporting map coloring of column and row maps if (FullMatrix()->RowMatrixImporter()!=0) { Epetra_MapColoring DomainMapColors(FullMatrixDomainMap()); EPETRA_CHK_ERR(DomainMapColors.Export(*ColMapColors_, *FullMatrix()->RowMatrixImporter(), AbsMax)); OrigReducedMatrixDomainMap_ = DomainMapColors.GenerateMap(0); } else OrigReducedMatrixDomainMap_ = ReducedMatrixColMap_; if (FullMatrixIsCrsMatrix_) { if (FullCrsMatrix()->Exporter()!=0) { // Non-trivial exporter Epetra_MapColoring RangeMapColors(FullMatrixRangeMap()); EPETRA_CHK_ERR(RangeMapColors.Export(*RowMapColors_, *FullCrsMatrix()->Exporter(), AbsMax)); ReducedMatrixRangeMap_ = RangeMapColors.GenerateMap(0); } else ReducedMatrixRangeMap_ = ReducedMatrixRowMap_; } else ReducedMatrixRangeMap_ = ReducedMatrixRowMap_; // Check to see if the reduced system domain and range maps are the same. // If not, we need to remap entries of the LHS multivector so that they are distributed // conformally with the rows of the reduced matrix and the RHS multivector SymmetricElimination_ = ReducedMatrixRangeMap_->SameAs(*OrigReducedMatrixDomainMap_); if (!SymmetricElimination_) ConstructRedistributeExporter(OrigReducedMatrixDomainMap_, ReducedMatrixRangeMap_, RedistributeDomainExporter_, ReducedMatrixDomainMap_); else { ReducedMatrixDomainMap_ = OrigReducedMatrixDomainMap_; OrigReducedMatrixDomainMap_ = 0; RedistributeDomainExporter_ = 0; } // Create pointer to Full RHS, LHS Epetra_MultiVector * FullRHS = FullProblem()->GetRHS(); Epetra_MultiVector * FullLHS = FullProblem()->GetLHS(); int NumVectors = FullLHS->NumVectors(); // Create importers // cout << "RedDomainMap\n"; // cout << *ReducedMatrixDomainMap(); // cout << "FullDomainMap\n"; // cout << FullMatrixDomainMap(); Full2ReducedLHSImporter_ = new Epetra_Import(*ReducedMatrixDomainMap(), FullMatrixDomainMap()); // cout << "RedRowMap\n"; // cout << *ReducedMatrixRowMap(); // cout << "FullRHSMap\n"; // cout << FullRHS->Map(); Full2ReducedRHSImporter_ = new Epetra_Import(*ReducedMatrixRowMap(), FullRHS->Map()); // Construct Reduced Matrix ReducedMatrix_ = new Epetra_CrsMatrix(Copy, *ReducedMatrixRowMap(), *ReducedMatrixColMap(), 0); // Create storage for temporary X values due to explicit elimination of rows tempExportX_ = new Epetra_MultiVector(FullMatrixColMap(), NumVectors); int NumEntries; int * Indices; double * Values; int NumMyRows = FullMatrix()->NumMyRows(); int ColSingletonCounter = 0; for (i=0; i<NumMyRows; i++) { int curGRID = FullMatrixRowMap().GID(i); if (ReducedMatrixRowMap()->MyGID(curGRID)) { // Check if this row should go into reduced matrix EPETRA_CHK_ERR(GetRowGCIDs(i, NumEntries, Values, Indices)); // Get current row (Indices are global) int ierr = ReducedMatrix()->InsertGlobalValues(curGRID, NumEntries, Values, Indices); // Insert into reduce matrix // Positive errors will occur because we are submitting col entries that are not part of // reduced system. However, because we specified a column map to the ReducedMatrix constructor // these extra column entries will be ignored and we will be politely reminded by a positive // error code if (ierr<0) EPETRA_CHK_ERR(ierr); } else { EPETRA_CHK_ERR(GetRow(i, NumEntries, Values, Indices)); // Get current row if (NumEntries==1) { double pivot = Values[0]; if (pivot==0.0) EPETRA_CHK_ERR(-1); // Encountered zero row, unable to continue int indX = Indices[0]; for (j=0; j<NumVectors; j++) (*tempExportX_)[j][indX] = (*FullRHS)[j][i]/pivot; } // Otherwise, this is a singleton column and we will scan for the pivot element needed // for post-solve equations else { int targetCol = ColSingletonColLIDs_[ColSingletonCounter]; for (j=0; j<NumEntries; j++) { if (Indices[j]==targetCol) { double pivot = Values[j]; if (pivot==0.0) EPETRA_CHK_ERR(-2); // Encountered zero column, unable to continue ColSingletonPivotLIDs_[ColSingletonCounter] = j; // Save for later use ColSingletonPivots_[ColSingletonCounter] = pivot; ColSingletonCounter++; break; } } } } } // Now convert to local indexing. We have constructed things so that the domain and range of the // matrix will have the same map. If the reduced matrix domain and range maps were not the same, the // differences were addressed in the ConstructRedistributeExporter() method EPETRA_CHK_ERR(ReducedMatrix()->FillComplete(*ReducedMatrixDomainMap(), *ReducedMatrixRangeMap())); // Construct Reduced LHS (Puts any initial guess values into reduced system) ReducedLHS_ = new Epetra_MultiVector(*ReducedMatrixDomainMap(), NumVectors); EPETRA_CHK_ERR(ReducedLHS_->Import(*FullLHS, *Full2ReducedLHSImporter_, Insert)); FullLHS->PutScalar(0.0); // zero out Full LHS since we will inject values as we get them // Construct Reduced RHS // First compute influence of already-known values of X on RHS tempX_ = new Epetra_MultiVector(FullMatrixDomainMap(), NumVectors); tempB_ = new Epetra_MultiVector(FullRHS->Map(), NumVectors); //Inject known X values into tempX for purpose of computing tempB = FullMatrix*tempX // Also inject into full X since we already know the solution if (FullMatrix()->RowMatrixImporter()!=0) { EPETRA_CHK_ERR(tempX_->Export(*tempExportX_, *FullMatrix()->RowMatrixImporter(), Add)); EPETRA_CHK_ERR(FullLHS->Export(*tempExportX_, *FullMatrix()->RowMatrixImporter(), Add)); } else { tempX_->Update(1.0, *tempExportX_, 0.0); FullLHS->Update(1.0, *tempExportX_, 0.0); } EPETRA_CHK_ERR(FullMatrix()->Multiply(false, *tempX_, *tempB_)); EPETRA_CHK_ERR(tempB_->Update(1.0, *FullRHS, -1.0)); // tempB now has influence of already-known X values ReducedRHS_ = new Epetra_MultiVector(*ReducedMatrixRowMap(), FullRHS->NumVectors()); EPETRA_CHK_ERR(ReducedRHS_->Import(*tempB_, *Full2ReducedRHSImporter_, Insert)); // Finally construct Reduced Linear Problem ReducedProblem_ = new Epetra_LinearProblem(ReducedMatrix_, ReducedLHS_, ReducedRHS_); double fn = FullMatrix()->NumGlobalRows(); double fnnz = FullMatrix()->NumGlobalNonzeros(); double rn = ReducedMatrix()->NumGlobalRows(); double rnnz = ReducedMatrix()->NumGlobalNonzeros(); RatioOfDimensions_ = rn/fn; RatioOfNonzeros_ = rnnz/fnnz; HaveReducedProblem_ = true; return(0); }
/* Computes the approximate Schur complement for the wide separator */ Teuchos::RCP<Epetra_CrsMatrix> computeApproxWideSchur(shylu_config *config, shylu_symbolic *ssym, // symbolic structure Epetra_CrsMatrix *G, Epetra_CrsMatrix *R, Epetra_LinearProblem *LP, Amesos_BaseSolver *solver, Ifpack_Preconditioner *ifSolver, Epetra_CrsMatrix *C, Epetra_Map *localDRowMap) { int i; double relative_thres = config->relative_threshold; // Need to create local G (block diagonal portion) , R, C // Get row map of G //Epetra_Map CrMap = C->RowMap(); //int *c_rows = CrMap.MyGlobalElements(); //int *c_cols = (C->ColMap()).MyGlobalElements(); //int c_totalElems = CrMap.NumGlobalElements(); //int c_localElems = CrMap.NumMyElements(); //int c_localcolElems = (C->ColMap()).NumMyElements(); Epetra_Map GrMap = G->RowMap(); int *g_rows = GrMap.MyGlobalElements(); //int g_totalElems = GrMap.NumGlobalElements(); int g_localElems = GrMap.NumMyElements(); //Epetra_Map RrMap = R->RowMap(); //int *r_rows = RrMap.MyGlobalElements(); //int *r_cols = (R->ColMap()).MyGlobalElements(); //int r_totalElems = RrMap.NumGlobalElements(); //int r_localElems = RrMap.NumMyElements(); //int r_localcolElems = (R->ColMap()).NumMyElements(); Epetra_SerialComm LComm; Epetra_Map G_localRMap (-1, g_localElems, g_rows, 0, LComm); int nentries1, gid; // maxentries is the maximum of all three possible matrices as the arrays // are reused between the three int maxentries = max(C->MaxNumEntries(), R->MaxNumEntries()); maxentries = max(maxentries, G->MaxNumEntries()); double *values1 = new double[maxentries]; double *values2 = new double[maxentries]; double *values3 = new double[maxentries]; int *indices1 = new int[maxentries]; int *indices2 = new int[maxentries]; int *indices3 = new int[maxentries]; // Sbar - Approximate Schur complement Teuchos::RCP<Epetra_CrsMatrix> Sbar = Teuchos::rcp(new Epetra_CrsMatrix( Copy, GrMap, g_localElems)); // Include only the block diagonal elements of G in localG Epetra_CrsMatrix localG(Copy, G_localRMap, G->MaxNumEntries(), false); int cnt, scnt; for (i = 0; i < g_localElems ; i++) { gid = g_rows[i]; G->ExtractGlobalRowCopy(gid, maxentries, nentries1, values1, indices1); cnt = 0; scnt = 0; for (int j = 0 ; j < nentries1 ; j++) { if (G->LRID(indices1[j]) != -1) { values2[cnt] = values1[j]; indices2[cnt++] = indices1[j]; } else { // Add it to Sbar immediately values3[scnt] = values1[j]; indices3[scnt++] = indices1[j]; } } localG.InsertGlobalValues(gid, cnt, values2, indices2); Sbar->InsertGlobalValues(gid, scnt, values3, indices3); } localG.FillComplete(); //cout << "Created local G matrix" << endl; int nvectors = 16; /*ShyLU_Probing_Operator probeop(&localG, &localR, LP, solver, &localC, localDRowMap, nvectors);*/ ShyLU_Local_Schur_Operator probeop(config, ssym, &localG, R, LP, solver, ifSolver, C, localDRowMap, nvectors); #ifdef DUMP_MATRICES //ostringstream fnamestr; //fnamestr << "localC" << C->Comm().MyPID() << ".mat"; //string Cfname = fnamestr.str(); //EpetraExt::RowMatrixToMatlabFile(Cfname.c_str(), localC); //Epetra_Map defMapg(-1, g_localElems, 0, localG.Comm()); //EpetraExt::ViewTransform<Epetra_CrsMatrix> * ReIdx_MatTransg = //new EpetraExt::CrsMatrix_Reindex( defMapg ); //Epetra_CrsMatrix t2G = (*ReIdx_MatTransg)( localG ); //ReIdx_MatTransg->fwd(); //EpetraExt::RowMatrixToMatlabFile("localG.mat", t2G); #endif //cout << " totalElems in Schur Complement" << totalElems << endl; //cout << myPID << " localElems" << localElems << endl; // **************** Two collectives here ********************* #ifdef TIMING_OUTPUT Teuchos::Time ftime("setup time"); ftime.start(); #endif #ifdef TIMING_OUTPUT Teuchos::Time app_time("setup time"); #endif int nentries; // size > maxentries as there could be fill // TODO: Currently the size of the two arrays can be one, Even if we switch // the loop below the size of the array required is nvectors. Fix it double *values = new double[nvectors]; int *indices = new int[nvectors]; double *vecvalues; #ifdef SHYLU_DEBUG // mfh 25 May 2015: Don't declare this variable if it's not used. // It's only used if SHYLU_DEBUG is defined. int dropped = 0; #endif // SHYLU_DEBUG double *maxvalue = new double[nvectors]; #ifdef TIMING_OUTPUT ftime.start(); #endif int findex = g_localElems / nvectors ; int cindex; // int mypid = C->Comm().MyPID(); // unused Epetra_MultiVector probevec (G_localRMap, nvectors); Epetra_MultiVector Scol (G_localRMap, nvectors); probevec.PutScalar(0.0); for (i = 0 ; i < findex*nvectors ; i+=nvectors) { // Set the probevec to find block columns of S. for (int k = 0; k < nvectors; k++) { cindex = k+i; // TODO: Can do better than this, just need to go to the column map // of C, there might be null columns in C probevec.ReplaceGlobalValue(g_rows[cindex], k, 1.0); //if (mypid == 0) //cout << "Changing row to 1.0 " << g_rows[cindex] << endl; } #ifdef TIMING_OUTPUT app_time.start(); #endif probeop.Apply(probevec, Scol); #ifdef TIMING_OUTPUT app_time.stop(); #endif // Reset the probevec to all zeros. for (int k = 0; k < nvectors; k++) { cindex = k+i; probevec.ReplaceGlobalValue(g_rows[cindex], k, 0.0); } Scol.MaxValue(maxvalue); nentries = 0; for (int j = 0 ; j < g_localElems ; j++) { for (int k = 0; k < nvectors; k++) { cindex = k+i; vecvalues = Scol[k]; if ((g_rows[cindex] == g_rows[j]) || (abs(vecvalues[j]/maxvalue[k]) > relative_thres)) // diagonal entry or large entry. { values[nentries] = vecvalues[j]; indices[nentries++] = g_rows[cindex]; } #ifdef SHYLU_DEBUG else if (vecvalues[j] != 0.0) { dropped++; } #endif // SHYLU_DEBUG } Sbar->InsertGlobalValues(g_rows[j], nentries, values, indices); nentries = 0; } } if (i < g_localElems) { nvectors = g_localElems - i; probeop.ResetTempVectors(nvectors); Epetra_MultiVector probevec1 (G_localRMap, nvectors); Epetra_MultiVector Scol1 (G_localRMap, nvectors); probevec1.PutScalar(0.0); for (int k = 0; k < nvectors; k++) { cindex = k+i; // TODO: Can do better than this, just need to go to the column map // of C, there might be null columns in C probevec1.ReplaceGlobalValue(g_rows[cindex], k, 1.0); } #ifdef TIMING_OUTPUT app_time.start(); #endif probeop.Apply(probevec1, Scol1); #ifdef TIMING_OUTPUT app_time.stop(); #endif Scol1.MaxValue(maxvalue); nentries = 0; for (int j = 0 ; j < g_localElems ; j++) { //cout << "MAX" << maxvalue << endl; for (int k = 0; k < nvectors; k++) { cindex = k+i; vecvalues = Scol1[k]; //nentries = 0; // inserting one entry in each row for now if ((g_rows[cindex] == g_rows[j]) || (abs(vecvalues[j]/maxvalue[k]) > relative_thres)) // diagonal entry or large entry. { values[nentries] = vecvalues[j]; indices[nentries++] = g_rows[cindex]; } #ifdef SHYLU_DEBUG else if (vecvalues[j] != 0.0) { dropped++; } #endif // SHYLU_DEBUG } Sbar->InsertGlobalValues(g_rows[j], nentries, values, indices); nentries = 0; } } #ifdef TIMING_OUTPUT ftime.stop(); cout << "Time in finding and dropping entries" << ftime.totalElapsedTime() << endl; ftime.reset(); cout << "Time in Apply of probing" << app_time.totalElapsedTime() << endl; probeop.PrintTimingInfo(); #endif Sbar->FillComplete(); #ifdef DUMP_MATRICES Epetra_Map defMap2(-1, g_localElems, 0, C->Comm()); EpetraExt::ViewTransform<Epetra_CrsMatrix> * ReIdx_MatTrans2 = new EpetraExt::CrsMatrix_Reindex( defMap2 ); Epetra_CrsMatrix t2S = (*ReIdx_MatTrans2)( *Sbar ); ReIdx_MatTrans2->fwd(); EpetraExt::RowMatrixToMatlabFile("Schur.mat", t2S); #endif #ifdef SHYLU_DEBUG cout << "#dropped entries" << dropped << endl; #endif delete[] values; delete[] indices; delete[] values1; delete[] indices1; delete[] values2; delete[] indices2; delete[] values3; delete[] indices3; delete[] maxvalue; return Sbar; }
int Stokhos::MatrixFreeOperator:: Apply(const Epetra_MultiVector& Input, Epetra_MultiVector& Result) const { #ifdef STOKHOS_TEUCHOS_TIME_MONITOR TEUCHOS_FUNC_TIME_MONITOR("Stokhos: SG Operator Apply()"); #endif // Note for transpose: // The stochastic matrix is symmetric, however the matrix blocks may not // be. So the algorithm here is the same whether we are using the transpose // or not. We just apply the transpose of the blocks in the case of // applying the global transpose, and make sure the imported Input // vectors use the right map. // We have to be careful if Input and Result are the same vector. // If this is the case, the only possible solution is to make a copy const Epetra_MultiVector *input = &Input; bool made_copy = false; if (Input.Values() == Result.Values() && !is_stoch_parallel) { input = new Epetra_MultiVector(Input); made_copy = true; } // Initialize Result.PutScalar(0.0); const Epetra_Map* input_base_map = domain_base_map.get(); const Epetra_Map* result_base_map = range_base_map.get(); if (useTranspose == true) { input_base_map = range_base_map.get(); result_base_map = domain_base_map.get(); } // Allocate temporary storage int m = Input.NumVectors(); if (useTranspose == false && (tmp == Teuchos::null || tmp->NumVectors() != m*max_num_mat_vec)) tmp = Teuchos::rcp(new Epetra_MultiVector(*result_base_map, m*max_num_mat_vec)); else if (useTranspose == true && (tmp_trans == Teuchos::null || tmp_trans->NumVectors() != m*max_num_mat_vec)) tmp_trans = Teuchos::rcp(new Epetra_MultiVector(*result_base_map, m*max_num_mat_vec)); Epetra_MultiVector *tmp_result; if (useTranspose == false) tmp_result = tmp.get(); else tmp_result = tmp_trans.get(); // Map input into column map const Epetra_MultiVector *tmp_col; if (!is_stoch_parallel) tmp_col = input; else { if (useTranspose == false) { if (input_col == Teuchos::null || input_col->NumVectors() != m) input_col = Teuchos::rcp(new Epetra_MultiVector(*global_col_map, m)); input_col->Import(*input, *col_importer, Insert); tmp_col = input_col.get(); } else { if (input_col_trans == Teuchos::null || input_col_trans->NumVectors() != m) input_col_trans = Teuchos::rcp(new Epetra_MultiVector(*global_col_map_trans, m)); input_col_trans->Import(*input, *col_importer_trans, Insert); tmp_col = input_col_trans.get(); } } // Extract blocks EpetraExt::BlockMultiVector sg_input(View, *input_base_map, *tmp_col); EpetraExt::BlockMultiVector sg_result(View, *result_base_map, Result); for (int i=0; i<input_block.size(); i++) input_block[i] = sg_input.GetBlock(i); for (int i=0; i<result_block.size(); i++) result_block[i] = sg_result.GetBlock(i); // Apply block SG operator via // w_i = // \sum_{j=0}^P \sum_{k=0}^L J_k v_j < \psi_i \psi_j \psi_k > / <\psi_i^2> // for i=0,...,P where P = expansion_size, L = num_blocks, w_j is the jth // input block, w_i is the ith result block, and J_k is the kth block operator // k_begin and k_end are initialized in the constructor const Teuchos::Array<double>& norms = sg_basis->norm_squared(); for (Cijk_type::k_iterator k_it=k_begin; k_it!=k_end; ++k_it) { int k = index(k_it); Cijk_type::kj_iterator j_begin = Cijk->j_begin(k_it); Cijk_type::kj_iterator j_end = Cijk->j_end(k_it); int nj = Cijk->num_j(k_it); if (nj > 0) { Teuchos::Array<double*> j_ptr(nj*m); Teuchos::Array<int> mj_indices(nj*m); int l = 0; for (Cijk_type::kj_iterator j_it = j_begin; j_it != j_end; ++j_it) { int j = index(j_it); for (int mm=0; mm<m; mm++) { j_ptr[l*m+mm] = (*input_block[j])[mm]; mj_indices[l*m+mm] = l*m+mm; } l++; } Epetra_MultiVector input_tmp(View, *input_base_map, &j_ptr[0], nj*m); Epetra_MultiVector result_tmp(View, *tmp_result, &mj_indices[0], nj*m); if (use_block_apply) { (*block_ops)[k].Apply(input_tmp, result_tmp); } else { for (int jj=0; jj<nj*m; jj++) (*block_ops)[k].Apply(*(input_tmp(jj)), *(result_tmp(jj))); } l = 0; for (Cijk_type::kj_iterator j_it = j_begin; j_it != j_end; ++j_it) { int j = index(j_it); for (Cijk_type::kji_iterator i_it = Cijk->i_begin(j_it); i_it != Cijk->i_end(j_it); ++i_it) { int i = index(i_it); double c = value(i_it); if (scale_op) { int i_gid; if (useTranspose) i_gid = epetraCijk->GCID(j); else i_gid = epetraCijk->GRID(i); c /= norms[i_gid]; } for (int mm=0; mm<m; mm++) (*result_block[i])(mm)->Update(c, *result_tmp(l*m+mm), 1.0); } l++; } } } // Destroy blocks for (int i=0; i<input_block.size(); i++) input_block[i] = Teuchos::null; for (int i=0; i<result_block.size(); i++) result_block[i] = Teuchos::null; if (made_copy) delete input; return 0; }
int TestMultiLevelPreconditioner(char ProblemType[], Teuchos::ParameterList & MLList, Epetra_LinearProblem & Problem, double & TotalErrorResidual, double & TotalErrorExactSol) { Epetra_MultiVector* lhs = Problem.GetLHS(); Epetra_MultiVector* rhs = Problem.GetRHS(); Epetra_CrsMatrix* A = dynamic_cast<Epetra_CrsMatrix*>(Problem.GetMatrix()); int PID = A->Comm().MyPID(); int numProcs = A->Comm().NumProc(); RCP<const Epetra_RowMatrix> Arcp = Teuchos::rcp(A, false); double n1, n2,nf; // ======================================== // // create a rhs corresponding to lhs or 1's // // ======================================== // lhs->PutScalar(1.0); A->Multiply(false,*lhs,*rhs); lhs->PutScalar(0.0); MLList.set("ML output", 0); RowMatrixToMatlabFile("mat_f.dat",*A); MultiVectorToMatrixMarketFile("lhs_f.dat",*lhs,0,0,false); MultiVectorToMatrixMarketFile("rhs_f.dat",*rhs,0,0,false); Epetra_Time Time(A->Comm()); /* Build the Zoltan list - Group #1 */ ParameterList Zlist1,Sublist1; Sublist1.set("DEBUG_LEVEL","0"); Sublist1.set("NUM_GLOBAL_PARTITIONS","2"); Zlist1.set("Zoltan",Sublist1); /* Start Isorropia's Ninja Magic - Group #1 */ RefCountPtr<Isorropia::Epetra::Partitioner> partitioner1 = Isorropia::Epetra::create_partitioner(Arcp, Zlist1); Isorropia::Epetra::Redistributor rd1(partitioner1); Teuchos::RCP<Epetra_CrsMatrix> ResA1=rd1.redistribute(*A); Teuchos::RCP<Epetra_MultiVector> ResX1=rd1.redistribute(*lhs); Teuchos::RCP<Epetra_MultiVector> ResB1=rd1.redistribute(*rhs); RestrictedCrsMatrixWrapper RW1; RW1.restrict_comm(ResA1); RestrictedMultiVectorWrapper RX1,RB1; RX1.restrict_comm(ResX1); RB1.restrict_comm(ResB1); /* Build the Zoltan list - Group #2 */ ParameterList Zlist2,Sublist2; Sublist2.set("DEBUG_LEVEL","0"); if(PID > 1) Sublist2.set("NUM_LOCAL_PARTITIONS","1"); else Sublist2.set("NUM_LOCAL_PARTITIONS","0"); Zlist2.set("Zoltan",Sublist2); /* Start Isorropia's Ninja Magic - Group #2 */ RefCountPtr<Isorropia::Epetra::Partitioner> partitioner2 = Isorropia::Epetra::create_partitioner(Arcp, Zlist2); Isorropia::Epetra::Redistributor rd2(partitioner2); Teuchos::RCP<Epetra_CrsMatrix> ResA2=rd2.redistribute(*A); Teuchos::RCP<Epetra_MultiVector> ResX2=rd2.redistribute(*lhs); Teuchos::RCP<Epetra_MultiVector> ResB2=rd2.redistribute(*rhs); RestrictedCrsMatrixWrapper RW2; RW2.restrict_comm(ResA2); RestrictedMultiVectorWrapper RX2,RB2; RX2.restrict_comm(ResX2); RB2.restrict_comm(ResB2); if(RW1.RestrictedProcIsActive()){ Teuchos::RCP<Epetra_CrsMatrix> SubA1 = RW1.RestrictedMatrix(); Teuchos::RCP<Epetra_MultiVector> SubX1 = RX1.RestrictedMultiVector(); Teuchos::RCP<Epetra_MultiVector> SubB1 = RB1.RestrictedMultiVector(); ML_Epetra::MultiLevelPreconditioner * SubPrec1 = new ML_Epetra::MultiLevelPreconditioner(*SubA1, MLList, true); Epetra_LinearProblem Problem1(&*SubA1,&*SubX1,&*SubB1); AztecOO solver1(Problem1); solver1.SetPrecOperator(SubPrec1); solver1.SetAztecOption(AZ_solver, AZ_gmres); solver1.SetAztecOption(AZ_output, 32); solver1.SetAztecOption(AZ_kspace, 160); solver1.Iterate(1550, 1e-12); delete SubPrec1; } else{ Teuchos::RCP<Epetra_CrsMatrix> SubA2 = RW2.RestrictedMatrix(); Teuchos::RCP<Epetra_MultiVector> SubX2 = RX2.RestrictedMultiVector(); Teuchos::RCP<Epetra_MultiVector> SubB2 = RB2.RestrictedMultiVector(); ML_Epetra::MultiLevelPreconditioner * SubPrec2 = new ML_Epetra::MultiLevelPreconditioner(*SubA2, MLList, true); Epetra_LinearProblem Problem2(&*SubA2,&*SubX2,&*SubB2); AztecOO solver2(Problem2); solver2.SetPrecOperator(SubPrec2); solver2.SetAztecOption(AZ_solver, AZ_gmres); solver2.SetAztecOption(AZ_output, 32); solver2.SetAztecOption(AZ_kspace, 160); solver2.Iterate(1550, 1e-12); delete SubPrec2; } /* Post-processing exports */ Epetra_MultiVector ans1(*lhs), ans2(*lhs); rd1.redistribute_reverse(*ResX1,ans1); rd2.redistribute_reverse(*ResX2,ans2); /* Run on Full Problem */ A->Comm().Barrier(); ML_Epetra::MultiLevelPreconditioner * FullPrec = new ML_Epetra::MultiLevelPreconditioner(*A, MLList, true); AztecOO solverF(Problem); solverF.SetPrecOperator(FullPrec); solverF.SetAztecOption(AZ_solver, AZ_gmres); solverF.SetAztecOption(AZ_output, 32); solverF.SetAztecOption(AZ_kspace, 160); solverF.Iterate(1550, 1e-12); delete FullPrec; /* Solution Comparison */ ans1.Update(1.0,*lhs,-1.0); ans2.Update(1.0,*lhs,-1.0); ans1.Norm2(&n1); ans2.Norm2(&n2); if(!PID) { printf("Norm Diff 1 = %6.4e\n",n1); printf("Norm Diff 2 = %6.4e\n",n2); } TotalErrorExactSol += n1 + n2; }
int TestMultiLevelPreconditioner(char ProblemType[], Teuchos::ParameterList & MLList, Epetra_LinearProblem & Problem, double & TotalErrorResidual, double & TotalErrorExactSol) { Epetra_MultiVector* lhs = Problem.GetLHS(); Epetra_MultiVector* rhs = Problem.GetRHS(); Epetra_RowMatrix* A = Problem.GetMatrix(); // ======================================== // // create a rhs corresponding to lhs or 1's // // ======================================== // lhs->PutScalar(1.0); A->Multiply(false,*lhs,*rhs); lhs->PutScalar(0.0); Epetra_Time Time(A->Comm()); Epetra_MultiVector lhs2(*lhs); Epetra_MultiVector rhs2(*rhs); // =================== // // call ML and AztecOO // // =================== // AztecOO solver(Problem); MLList.set("ML output", 0); ML_set_random_seed(24601); ML_Epetra::MultiLevelPreconditioner * MLPrec = new ML_Epetra::MultiLevelPreconditioner(*A, MLList, true); // tell AztecOO to use this preconditioner, then solve solver.SetPrecOperator(MLPrec); solver.SetAztecOption(AZ_solver, AZ_gmres); solver.SetAztecOption(AZ_output, 32); solver.SetAztecOption(AZ_kspace, 160); solver.Iterate(1550, 1e-12); delete MLPrec; // ================================= // // call ML and AztecOO a second time // // ================================= // Epetra_LinearProblem Problem2(A,&lhs2,&rhs2); AztecOO solver2(Problem2); ML_set_random_seed(24601); ML_Epetra::MultiLevelPreconditioner * MLPrec2 = new ML_Epetra::MultiLevelPreconditioner(*A, MLList, true); // tell AztecOO to use this preconditioner, then solve solver2.SetPrecOperator(MLPrec2); solver2.SetAztecOption(AZ_solver, AZ_gmres); solver2.SetAztecOption(AZ_output, 32); solver2.SetAztecOption(AZ_kspace, 160); solver2.Iterate(1550, 1e-12); // ==================================================== // // compute difference between the two ML solutions // // ==================================================== // double d = 0.0, d_tot = 0.0; for( int i=0 ; i<lhs->Map().NumMyElements() ; ++i ) d += ((*lhs)[0][i] - lhs2[0][i]) * ((*lhs)[0][i] - lhs2[0][i]); A->Comm().SumAll(&d,&d_tot,1); string msg = ProblemType; if (A->Comm().MyPID() == 0) { cout << msg << "......Using " << A->Comm().NumProc() << " processes" << endl; cout << msg << "......||x_1 - x_2||_2 = " << sqrt(d_tot) << endl; cout << msg << "......Total Time = " << Time.ElapsedTime() << endl; } TotalErrorExactSol += sqrt(d_tot); return( solver.NumIters() ); }
//! Initialize vector static void init(Epetra_MultiVector& vec, double val) { vec.PutScalar(val); }
// ============================================================================ int ML_Epetra::MatrixFreePreconditioner:: Compute(const Epetra_CrsGraph& Graph, Epetra_MultiVector& NullSpace) { Epetra_Time TotalTime(Comm()); const int NullSpaceDim = NullSpace.NumVectors(); // get parameters from the list std::string PrecType = List_.get("prec: type", "hybrid"); std::string SmootherType = List_.get("smoother: type", "Jacobi"); std::string ColoringType = List_.get("coloring: type", "JONES_PLASSMAN"); int PolynomialDegree = List_.get("smoother: degree", 3); std::string DiagonalColoringType = List_.get("diagonal coloring: type", "JONES_PLASSMAN"); int MaximumIterations = List_.get("eigen-analysis: max iters", 10); std::string EigenType_ = List_.get("eigen-analysis: type", "cg"); double boost = List_.get("eigen-analysis: boost for lambda max", 1.0); int OutputLevel = List_.get("ML output", -47); if (OutputLevel == -47) OutputLevel = List_.get("output", 10); omega_ = List_.get("smoother: damping", omega_); ML_Set_PrintLevel(OutputLevel); bool LowMemory = List_.get("low memory", true); double AllocationFactor = List_.get("AP allocation factor", 0.5); verbose_ = (MyPID() == 0 && ML_Get_PrintLevel() > 5); // ================ // // check parameters // // ================ // if (PrecType == "presmoother only") PrecType_ = ML_MFP_PRESMOOTHER_ONLY; else if (PrecType == "hybrid") PrecType_ = ML_MFP_HYBRID; else if (PrecType == "additive") PrecType_ = ML_MFP_ADDITIVE; else ML_CHK_ERR(-3); // not recognized if (SmootherType == "none") SmootherType_ = ML_MFP_NONE; else if (SmootherType == "Jacobi") SmootherType_ = ML_MFP_JACOBI; else if (SmootherType == "block Jacobi") SmootherType_ = ML_MFP_BLOCK_JACOBI; else if (SmootherType == "Chebyshev") SmootherType_ = ML_MFP_CHEBY; else ML_CHK_ERR(-4); // not recognized if (AllocationFactor <= 0.0) ML_CHK_ERR(-1); // should be positive // =============================== // // basic checkings and some output // // =============================== // int OperatorDomainPoints = Operator_.OperatorDomainMap().NumGlobalPoints(); int OperatorRangePoints = Operator_.OperatorRangeMap().NumGlobalPoints(); int GraphBlockRows = Graph.NumGlobalBlockRows(); int GraphNnz = Graph.NumGlobalNonzeros(); NumPDEEqns_ = OperatorRangePoints / GraphBlockRows; NumMyBlockRows_ = Graph.NumMyBlockRows(); if (OperatorDomainPoints != OperatorRangePoints) ML_CHK_ERR(-1); // only square matrices if (OperatorRangePoints % NumPDEEqns_ != 0) ML_CHK_ERR(-2); // num PDEs seems not constant if (verbose_) { ML_print_line("=",78); std::cout << "*** " << std::endl; std::cout << "*** ML_Epetra::MatrixFreePreconditioner" << std::endl; std::cout << "***" << std::endl; std::cout << "Number of rows and columns = " << OperatorDomainPoints << std::endl; std::cout << "Number of rows per processor = " << OperatorDomainPoints / Comm().NumProc() << " (on average)" << std::endl; std::cout << "Number of rows in the graph = " << GraphBlockRows << std::endl; std::cout << "Number of nonzeros in the graph = " << GraphNnz << std::endl; std::cout << "Processors used in computation = " << Comm().NumProc() << std::endl; std::cout << "Number of PDE equations = " << NumPDEEqns_ << std::endl; std::cout << "Null space dimension = " << NullSpaceDim << std::endl; std::cout << "Preconditioner type = " << PrecType << std::endl; std::cout << "Smoother type = " << SmootherType << std::endl; std::cout << "Coloring type = " << ColoringType << std::endl; std::cout << "Allocation factor = " << AllocationFactor << std::endl; std::cout << "Number of V-cycles for C = " << List_.sublist("ML list").get("cycle applications", 1) << std::endl; std::cout << std::endl; } ResetStartTime(); // ==================================== // // compute the inverse of the diagonal, // // control that no elements are zero. // // ==================================== // for (int i = 0; i < InvPointDiagonal_->MyLength(); ++i) if ((*InvPointDiagonal_)[i] != 0.0) (*InvPointDiagonal_)[i] = 1.0 / (*InvPointDiagonal_)[i]; // ========================================================= // // Setup the smoother. I need to extract the block diagonal // // only if block Jacobi is used. For Chebyshev, I scale with // // the point diagonal only. In this latter case, I need to // // compute lambda_max of the scaled operator. // // ========================================================= // // probes for the block diagonal of the matrix. if (SmootherType_ == ML_MFP_JACOBI || SmootherType_ == ML_MFP_NONE) { // do-nothing here } else if (SmootherType_ == ML_MFP_BLOCK_JACOBI) { if (verbose_); std::cout << "Diagonal coloring type = " << DiagonalColoringType << std::endl; ML_CHK_ERR(GetBlockDiagonal(Graph, DiagonalColoringType)); AddAndResetStartTime("block diagonal construction", true); } else if (SmootherType_ == ML_MFP_CHEBY) { double lambda_min = 0.0; double lambda_max = 0.0; Teuchos::ParameterList IFPACKList; if (EigenType_ == "power-method") { ML_CHK_ERR(Ifpack_Chebyshev::PowerMethod(Operator_, *InvPointDiagonal_, MaximumIterations, lambda_max)); } else if(EigenType_ == "cg") { ML_CHK_ERR(Ifpack_Chebyshev::CG(Operator_, *InvPointDiagonal_, MaximumIterations, lambda_min, lambda_max)); } else ML_CHK_ERR(-1); // not recognized if (verbose_) { std::cout << "Using Chebyshev smoother of degree " << PolynomialDegree << std::endl; std::cout << "Estimating eigenvalues using " << EigenType_ << std::endl; std::cout << "lambda_min = " << lambda_min << ", "; std::cout << "lambda_max = " << lambda_max << std::endl; } IFPACKList.set("chebyshev: min eigenvalue", lambda_min); IFPACKList.set("chebyshev: max eigenvalue", boost * lambda_max); // FIXME: this allocates a new std::vector inside IFPACKList.set("chebyshev: operator inv diagonal", InvPointDiagonal_.get()); IFPACKList.set("chebyshev: degree", PolynomialDegree); PreSmoother_ = rcp(new Ifpack_Chebyshev((Epetra_Operator*)(&Operator_))); if (PreSmoother_.get() == 0) ML_CHK_ERR(-1); // memory error? IFPACKList.set("chebyshev: zero starting solution", true); ML_CHK_ERR(PreSmoother_->SetParameters(IFPACKList)); ML_CHK_ERR(PreSmoother_->Initialize()); ML_CHK_ERR(PreSmoother_->Compute()); PostSmoother_ = rcp(new Ifpack_Chebyshev((Epetra_Operator*)(&Operator_))); if (PostSmoother_.get() == 0) ML_CHK_ERR(-1); // memory error? IFPACKList.set("chebyshev: zero starting solution", false); ML_CHK_ERR(PostSmoother_->SetParameters(IFPACKList)); ML_CHK_ERR(PostSmoother_->Initialize()); ML_CHK_ERR(PostSmoother_->Compute()); } // ========================================================= // // building P and R for block graph. This is done by working // // on the Graph_ object. Support is provided for local // // aggregation schemes only so that all is basically local. // // Then, build the block graph coarse problem. // // ========================================================= // // ML wrapper for Graph_ ML_Operator* Graph_ML = ML_Operator_Create(Comm_ML()); ML_Operator_WrapEpetraCrsGraph(const_cast<Epetra_CrsGraph*>(&Graph), Graph_ML); ML_Aggregate* BlockAggr_ML = 0; ML_Operator* BlockPtent_ML = 0, *BlockRtent_ML = 0,* CoarseGraph_ML = 0; if (verbose_) std::cout << std::endl; ML_CHK_ERR(Coarsen(Graph_ML, &BlockAggr_ML, &BlockPtent_ML, &BlockRtent_ML, &CoarseGraph_ML)); if (verbose_) std::cout << std::endl; Epetra_CrsMatrix* GraphCoarse; ML_CHK_ERR(ML_Operator2EpetraCrsMatrix(CoarseGraph_ML, GraphCoarse)); // used later to estimate the entries in AP ML_Operator* CoarseAP_ML = ML_Operator_Create(Comm_ML()); ML_2matmult(Graph_ML, BlockPtent_ML, CoarseAP_ML, ML_CSR_MATRIX); int AP_MaxNnzRow, itmp = CoarseAP_ML->max_nz_per_row; Comm().MaxAll(&itmp, &AP_MaxNnzRow, 1); ML_Operator_Destroy(&CoarseAP_ML); int NumAggregates = BlockPtent_ML->invec_leng; ML_Operator_Destroy(&BlockRtent_ML); ML_Operator_Destroy(&CoarseGraph_ML); AddAndResetStartTime("construction of block C, R, and P", true); if (verbose_) std::cout << std::endl; // ================================================== // // coloring of block graph: // // - color of block row `i' is given by `ColorMap[i]' // // - number of colors is ColorMap.NumColors(). // // ================================================== // ResetStartTime(); CrsGraph_MapColoring* MapColoringTransform; if (ColoringType == "JONES_PLASSMAN") MapColoringTransform = new CrsGraph_MapColoring (CrsGraph_MapColoring::JONES_PLASSMAN, 0, false, 0); else if (ColoringType == "PSEUDO_PARALLEL") MapColoringTransform = new CrsGraph_MapColoring (CrsGraph_MapColoring::PSEUDO_PARALLEL, 0, false, 0); else if (ColoringType == "GREEDY") MapColoringTransform = new CrsGraph_MapColoring (CrsGraph_MapColoring::GREEDY, 0, false, 0); else if (ColoringType == "LUBY") MapColoringTransform = new CrsGraph_MapColoring (CrsGraph_MapColoring::LUBY, 0, false, 0); else ML_CHK_ERR(-1); Epetra_MapColoring* ColorMap = &(*MapColoringTransform)(const_cast<Epetra_CrsGraph&>(GraphCoarse->Graph())); // move the information from ColorMap to std::vector Colors const int NumColors = ColorMap->MaxNumColors(); RefCountPtr<Epetra_IntSerialDenseVector> Colors = rcp(new Epetra_IntSerialDenseVector(GraphCoarse->Graph().NumMyRows())); for (int i = 0; i < GraphCoarse->Graph().NumMyRows(); ++i) (*Colors)[i] = (*ColorMap)[i]; delete MapColoringTransform; delete ColorMap; ColorMap = 0; delete GraphCoarse; AddAndResetStartTime("coarse graph coloring", true); if (verbose_) std::cout << std::endl; // get some other information about the aggregates, to be used // in the QR factorization of the null space. NodesOfAggregate // contains the local ID of block rows contained in each aggregate. // FIXME: make it faster std::vector< std::vector<int> > NodesOfAggregate(NumAggregates); for (int i = 0; i < Graph.NumMyBlockRows(); ++i) { int AID = BlockAggr_ML->aggr_info[0][i]; NodesOfAggregate[AID].push_back(i); } int MaxAggrSize = 0; for (int i = 0; i < NumAggregates; ++i) { const int& MySize = NodesOfAggregate[i].size(); if (MySize > MaxAggrSize) MaxAggrSize = MySize; } // collect aggregate information, and mark all nodes that are // connected with each aggregate. These nodes will have a possible // nonzero entry after the matrix-matrix product between the Operator_ // and the tentative prolongator. std::vector<vector<int> > aggregates(NumAggregates); std::vector<int>::iterator iter; for (int i = 0; i < NumAggregates; ++i) aggregates[i].reserve(MaxAggrSize); for (int i = 0; i < Graph.NumMyBlockRows(); ++i) { int AID = BlockAggr_ML->aggr_info[0][i]; int NumEntries; int* Indices; Graph.ExtractMyRowView(i, NumEntries, Indices); for (int k = 0; k < NumEntries; ++k) { // FIXME: use hash?? const int& GCID = Graph.ColMap().GID(Indices[k]); iter = find(aggregates[AID].begin(), aggregates[AID].end(), GCID); if (iter == aggregates[AID].end()) aggregates[AID].push_back(GCID); } } int* BlockNodeList = Graph.ColMap().MyGlobalElements(); // finally get rid of the ML_Aggregate structure. ML_Aggregate_Destroy(&BlockAggr_ML); const Epetra_Map& FineMap = Operator_.OperatorDomainMap(); Epetra_Map CoarseMap(-1, NumAggregates * NullSpaceDim, 0, Comm()); RefCountPtr<Epetra_Map> BlockNodeListMap = rcp(new Epetra_Map(-1, Graph.ColMap().NumMyElements(), BlockNodeList, 0, Comm())); std::vector<int> NodeList(Graph.ColMap().NumMyElements() * NumPDEEqns_); for (int i = 0; i < Graph.ColMap().NumMyElements(); ++i) for (int m = 0; m < NumPDEEqns_; ++m) NodeList[i * NumPDEEqns_ + m] = BlockNodeList[i] * NumPDEEqns_ + m; RefCountPtr<Epetra_Map> NodeListMap = rcp(new Epetra_Map(-1, NodeList.size(), &NodeList[0], 0, Comm())); AddAndResetStartTime("data structures", true); // ====================== // // process the null space // // ====================== // // CHECKME Epetra_MultiVector NewNullSpace(CoarseMap, NullSpaceDim); NewNullSpace.PutScalar(0.0); if (NullSpaceDim == 1) { double* ns_ptr = NullSpace.Values(); for (int AID = 0; AID < NumAggregates; ++AID) { double dtemp = 0.0; for (int j = 0; j < (int) (NodesOfAggregate[AID].size()); j++) for (int m = 0; m < NumPDEEqns_; ++m) { const int& pos = NodesOfAggregate[AID][j] * NumPDEEqns_ + m; dtemp += (ns_ptr[pos] * ns_ptr[pos]); } dtemp = std::sqrt(dtemp); NewNullSpace[0][AID] = dtemp; dtemp = 1.0 / dtemp; for (int j = 0; j < (int) (NodesOfAggregate[AID].size()); j++) for (int m = 0; m < NumPDEEqns_; ++m) ns_ptr[NodesOfAggregate[AID][j] * NumPDEEqns_ + m] *= dtemp; } } else { // FIXME std::vector<double> qr_ptr(MaxAggrSize * NumPDEEqns_ * MaxAggrSize * NumPDEEqns_); std::vector<double> tmp_ptr(MaxAggrSize * NumPDEEqns_ * NullSpaceDim); std::vector<double> work(NullSpaceDim); int info; for (int AID = 0; AID < NumAggregates; ++AID) { int MySize = NodesOfAggregate[AID].size(); int MyFullSize = NodesOfAggregate[AID].size() * NumPDEEqns_; int lwork = NullSpaceDim; for (int k = 0; k < NullSpaceDim; ++k) for (int j = 0; j < MySize; ++j) for (int m = 0; m < NumPDEEqns_; ++m) qr_ptr[k * MyFullSize + j * NumPDEEqns_ + m] = NullSpace[k][NodesOfAggregate[AID][j] * NumPDEEqns_ + m]; DGEQRF_F77(&MyFullSize, (int*)&NullSpaceDim, &qr_ptr[0], &MyFullSize, &tmp_ptr[0], &work[0], &lwork, &info); ML_CHK_ERR(info); if (work[0] > lwork) work.resize((int) work[0]); // the upper triangle of qr_tmp is now R, so copy that into the // new nullspace for (int j = 0; j < NullSpaceDim; j++) for (int k = j; k < NullSpaceDim; k++) NewNullSpace[k][AID * NullSpaceDim + j] = qr_ptr[j + MyFullSize * k]; // to get this block of P, need to run qr_tmp through another LAPACK // function: DORGQR_F77(&MyFullSize, (int*)&NullSpaceDim, (int*)&NullSpaceDim, &qr_ptr[0], &MyFullSize, &tmp_ptr[0], &work[0], &lwork, &info); ML_CHK_ERR(info); // dgeqtr returned a non-zero if (work[0] > lwork) work.resize((int) work[0]); // insert the Q block into the null space for (int k = 0; k < NullSpaceDim; ++k) for (int j = 0; j < MySize; ++j) for (int m = 0; m < NumPDEEqns_; ++m) { int LRID = NodesOfAggregate[AID][j] * NumPDEEqns_ + m; double& val = qr_ptr[k * MyFullSize + j * NumPDEEqns_ + m]; NullSpace[k][LRID] = val; } } } AddAndResetStartTime("null space setup", true); if (verbose_) std::cout << "Number of colors on processor " << Comm().MyPID() << " = " << NumColors << std::endl; if (verbose_) std::cout << "Maximum number of colors = " << NumColors << std::endl; RefCountPtr<Epetra_FECrsMatrix> AP; // try to get a good estimate of the nonzeros per row. // This is a compromize between efficiency -- that is, reduce // the memory allocation processes, and memory usage -- that, is // overestimating can actually kill the code. Basically, this is // all junk due to our dear friend, the Cray XT3. AP = rcp(new Epetra_FECrsMatrix(Copy, FineMap, (int) (AllocationFactor * AP_MaxNnzRow * NullSpaceDim))); if (AP.get() == 0) throw(-1); if (!LowMemory) { // ================================================= // // allocate one big chunk of memory, and use View // // to create Epetra_MultiVectors. Note that // // NumColors * NullSpace can indeed be a quite large // // value. To reduce the memory consumption, both // // ColoredAP and ExtColoredAP use the same memory // // array. // // ================================================= // Epetra_MultiVector* ColoredP; std::vector<double> ColoredAP_ptr; try { ColoredP = new Epetra_MultiVector(FineMap, NumColors * NullSpaceDim); ColoredAP_ptr.resize(NumColors * NullSpaceDim * NodeListMap->NumMyPoints()); } catch (std::exception& rhs) { catch_message("the allocation of ColoredP", rhs.what(), __FILE__, __LINE__); ML_CHK_ERR(-1); } catch (...) { catch_message("the allocation of ColoredP", "", __FILE__, __LINE__); ML_CHK_ERR(-1); } int ColoredAP_LDA = NodeListMap->NumMyPoints(); ColoredP->PutScalar(0.0); for (int i = 0; i < BlockPtent_ML->outvec_leng; ++i) { int allocated = 1; int NumEntries; int Indices; double Values; int ierr = ML_Operator_Getrow(BlockPtent_ML, 1 ,&i, allocated, &Indices,&Values,&NumEntries); if (ierr < 0) ML_CHK_ERR(-1); assert (NumEntries == 1); // this is the block P const int& Color = (*Colors)[Indices] - 1; for (int k = 0; k < NumPDEEqns_; ++k) for (int j = 0; j < NullSpaceDim; ++j) (*ColoredP)[(Color * NullSpaceDim + j)][i * NumPDEEqns_ + k] = NullSpace[j][i * NumPDEEqns_ + k]; } ML_Operator_Destroy(&BlockPtent_ML); Epetra_MultiVector ColoredAP(View, Operator_.OperatorRangeMap(), &ColoredAP_ptr[0], ColoredAP_LDA, NumColors * NullSpaceDim); // move ColoredAP into ColoredP. This should not be required. // but I prefer to skip strange games with View pointers Operator_.Apply(*ColoredP, ColoredAP); *ColoredP = ColoredAP; // FIXME: only if NumProc > 1 Epetra_MultiVector ExtColoredAP(View, *NodeListMap, &ColoredAP_ptr[0], ColoredAP_LDA, NumColors * NullSpaceDim); try { Epetra_Import Importer(*NodeListMap, Operator_.OperatorRangeMap()); ExtColoredAP.Import(*ColoredP, Importer, Insert); } catch (std::exception& rhs) { catch_message("importing of ExtColoredAP", rhs.what(), __FILE__, __LINE__); ML_CHK_ERR(-1); } catch (...) { catch_message("importing of ExtColoredAP", "", __FILE__, __LINE__); ML_CHK_ERR(-1); } delete ColoredP; AddAndResetStartTime("computation of AP", true); // populate the actual AP operator, skip some controls to make it faster for (int i = 0; i < NumAggregates; ++i) { for (int j = 0; j < (int) (aggregates[i].size()); ++j) { int GRID = aggregates[i][j]; int LRID = BlockNodeListMap->LID(GRID); // this is the block ID //assert (LRID != -1); int GCID = CoarseMap.GID(i * NullSpaceDim); //assert (GCID != -1); int color = (*Colors)[i] - 1; for (int k = 0; k < NumPDEEqns_; ++k) for (int j = 0; j < NullSpaceDim; ++j) { double val = ExtColoredAP[color * NullSpaceDim + j][LRID * NumPDEEqns_ + k]; if (val != 0.0) { int GRID2 = GRID * NumPDEEqns_ + k; int GCID2 = GCID + j; AP->InsertGlobalValues(1, &GRID2, 1, &GCID2, &val); //if (ierr < 0) ML_CHK_ERR(ierr); } } } } } else { // =============================================================== // // apply the operator one color at-a-time. This requires NumColors // // cycles over BlockPtent. However, the memory requirements are // // drastically reduced. As for low-memory == false, both ColoredAP // // and ExtColoredAP point to the same memory location. // // =============================================================== // if (verbose_) std::cout << "Using low-memory computation for AP" << std::endl; Epetra_MultiVector ColoredP(FineMap, NullSpaceDim); std::vector<double> ColoredAP_ptr; try { ColoredAP_ptr.resize(NullSpaceDim * NodeListMap->NumMyPoints()); } catch (std::exception& rhs) { catch_message("resizing of ColoredAP_pt", rhs.what(), __FILE__, __LINE__); ML_CHK_ERR(-1); } catch (...) { catch_message("resizing of ColoredAP_pt", "", __FILE__, __LINE__); ML_CHK_ERR(-1); } Epetra_MultiVector ColoredAP(View, Operator_.OperatorRangeMap(), &ColoredAP_ptr[0], NodeListMap->NumMyPoints(), NullSpaceDim); Epetra_MultiVector ExtColoredAP(View, *NodeListMap, &ColoredAP_ptr[0], NodeListMap->NumMyPoints(), NullSpaceDim); Epetra_Import Importer(*NodeListMap, Operator_.OperatorRangeMap()); for (int ic = 0; ic < NumColors; ++ic) { if (ML_Get_PrintLevel() > 8 && Comm().MyPID() == 0) { if (ic % 20 == 0) std::cout << "Processing color " << flush; std::cout << ic << " " << flush; if (ic % 20 == 19 || ic == NumColors - 1) std::cout << std::endl; if (ic == NumColors - 1) std::cout << std::endl; } ColoredP.PutScalar(0.0); for (int i = 0; i < BlockPtent_ML->outvec_leng; ++i) { int allocated = 1; int NumEntries; int Indices; double Values; int ierr = ML_Operator_Getrow(BlockPtent_ML, 1 ,&i, allocated, &Indices,&Values,&NumEntries); if (ierr < 0 || // something strange in getrow NumEntries != 1) // this is the block P ML_CHK_ERR(-1); const int& Color = (*Colors)[Indices] - 1; if (Color != ic) continue; // skip this color for this cycle for (int k = 0; k < NumPDEEqns_; ++k) for (int j = 0; j < NullSpaceDim; ++j) ColoredP[j][i * NumPDEEqns_ + k] = NullSpace[j][i * NumPDEEqns_ + k]; } Operator_.Apply(ColoredP, ColoredAP); ColoredP = ColoredAP; // just to be safe ExtColoredAP.Import(ColoredP, Importer, Insert); // populate the actual AP operator, skip some controls to make it faster std::vector<int> InsertCols(NullSpaceDim * NumPDEEqns_); std::vector<double> InsertValues(NullSpaceDim * NumPDEEqns_); for (int i = 0; i < NumAggregates; ++i) { for (int j = 0; j < (int) (aggregates[i].size()); ++j) { int GRID = aggregates[i][j]; int LRID = BlockNodeListMap->LID(GRID); // this is the block ID //assert (LRID != -1); int GCID = CoarseMap.GID(i * NullSpaceDim); //assert (GCID != -1); int color = (*Colors)[i] - 1; if (color != ic) continue; for (int k = 0; k < NumPDEEqns_; ++k) { int count = 0; int GRID2 = GRID * NumPDEEqns_ + k; for (int j = 0; j < NullSpaceDim; ++j) { double val = ExtColoredAP[j][LRID * NumPDEEqns_ + k]; if (val != 0.0) { InsertCols[count] = GCID + j; InsertValues[count] = val; ++count; } } AP->InsertGlobalValues(1, &GRID2, count, &InsertCols[0], &InsertValues[0]); } } } } ML_Operator_Destroy(&BlockPtent_ML); } aggregates.resize(0); BlockNodeListMap = Teuchos::null; NodeListMap = Teuchos::null; Colors = Teuchos::null; AP->GlobalAssemble(false); AP->FillComplete(CoarseMap, FineMap); #if 0 try { AP->OptimizeStorage(); } catch(...) { // a memory error was reported, typically ReportError. // We just continue with fingers crossed. } #endif AddAndResetStartTime("computation of the final AP", true); ML_Operator* AP_ML = ML_Operator_Create(Comm_ML()); ML_Operator_WrapEpetraMatrix(AP.get(), AP_ML); // ======== // // create R // // ======== // std::vector<int> REntries(NumAggregates * NullSpaceDim); for (int AID = 0; AID < NumAggregates; ++AID) { for (int m = 0; m < NullSpaceDim; ++m) REntries[AID * NullSpaceDim + m] = NodesOfAggregate[AID].size() * NumPDEEqns_; } R_ = rcp(new Epetra_CrsMatrix(Copy, CoarseMap, &REntries[0], true)); REntries.resize(0); for (int AID = 0; AID < NumAggregates; ++AID) { const int& MySize = NodesOfAggregate[AID].size(); // FIXME: make it faster for (int j = 0; j < MySize; ++j) for (int m = 0; m < NumPDEEqns_; ++m) for (int k = 0; k < NullSpaceDim; ++k) { int LCID = NodesOfAggregate[AID][j] * NumPDEEqns_ + m; int GCID = FineMap.GID(LCID); assert (GCID != -1); double& val = NullSpace[k][LCID]; int GRID = CoarseMap.GID(AID * NullSpaceDim + k); int ierr = R_->InsertGlobalValues(GRID, 1, &val, &GCID); if (ierr < 0) ML_CHK_ERR(-1); } } NodesOfAggregate.resize(0); R_->FillComplete(FineMap, CoarseMap); #if 0 try { R_->OptimizeStorage(); } catch(...) { // a memory error was reported, typically ReportError. // We just continue with fingers crossed. } #endif ML_Operator* R_ML = ML_Operator_Create(Comm_ML()); ML_Operator_WrapEpetraMatrix(R_.get(), R_ML); AddAndResetStartTime("computation of R", true); // ======== // // Create C // // ======== // C_ML_ = ML_Operator_Create(Comm_ML()); ML_2matmult(R_ML, AP_ML, C_ML_, ML_MSR_MATRIX); ML_Operator_Destroy(&AP_ML); ML_Operator_Destroy(&R_ML); AP = Teuchos::null; C_ = rcp(new ML_Epetra::RowMatrix(C_ML_, &Comm(), false)); assert (R_->OperatorRangeMap().SameAs(C_->OperatorDomainMap())); TotalTime.ResetStartTime(); AddAndResetStartTime("computation of C", true); if (verbose_) { std::cout << "Matrix-free preconditioner built. Now building solver for C..." << std::endl; } Teuchos::ParameterList& sublist = List_.sublist("ML list"); sublist.set("PDE equations", NullSpaceDim); sublist.set("null space: type", "pre-computed"); sublist.set("null space: dimension", NewNullSpace.NumVectors()); sublist.set("null space: vectors", NewNullSpace.Values()); MLP_ = rcp(new MultiLevelPreconditioner(*C_, sublist, true)); assert (MLP_.get() != 0); IsComputed_ = true; AddAndResetStartTime("computation of the preconditioner for C", true); if (verbose_) { std::cout << std::endl; std::cout << "Total CPU time for construction (all included) = "; std::cout << TotalCPUTime() << std::endl; ML_print_line("=",78); } return(0); }
// // Amesos_TestMultiSolver.cpp reads in a matrix in Harwell-Boeing format, // calls one of the sparse direct solvers, using blocked right hand sides // and computes the error and residual. // // TestSolver ignores the Harwell-Boeing right hand sides, creating // random right hand sides instead. // // Amesos_TestMultiSolver can test either A x = b or A^T x = b. // This can be a bit confusing because sparse direct solvers // use compressed column storage - the transpose of Trilinos' // sparse row storage. // // Matrices: // readA - Serial. As read from the file. // transposeA - Serial. The transpose of readA. // serialA - if (transpose) then transposeA else readA // distributedA - readA distributed to all processes // passA - if ( distributed ) then distributedA else serialA // // int Amesos_TestMultiSolver( Epetra_Comm &Comm, char *matrix_file, int numsolves, SparseSolverType SparseSolver, bool transpose, int special, AMESOS_MatrixType matrix_type ) { int iam = Comm.MyPID() ; // int hatever; // if ( iam == 0 ) std::cin >> hatever ; Comm.Barrier(); Epetra_Map * readMap; Epetra_CrsMatrix * readA; Epetra_Vector * readx; Epetra_Vector * readb; Epetra_Vector * readxexact; std::string FileName = matrix_file ; int FN_Size = FileName.size() ; std::string LastFiveBytes = FileName.substr( EPETRA_MAX(0,FN_Size-5), FN_Size ); std::string LastFourBytes = FileName.substr( EPETRA_MAX(0,FN_Size-4), FN_Size ); bool NonContiguousMap = false; if ( LastFiveBytes == ".triU" ) { NonContiguousMap = true; // Call routine to read in unsymmetric Triplet matrix EPETRA_CHK_ERR( Trilinos_Util_ReadTriples2Epetra( matrix_file, false, Comm, readMap, readA, readx, readb, readxexact, NonContiguousMap ) ); } else { if ( LastFiveBytes == ".triS" ) { NonContiguousMap = true; // Call routine to read in symmetric Triplet matrix EPETRA_CHK_ERR( Trilinos_Util_ReadTriples2Epetra( matrix_file, true, Comm, readMap, readA, readx, readb, readxexact, NonContiguousMap ) ); } else { if ( LastFourBytes == ".mtx" ) { EPETRA_CHK_ERR( Trilinos_Util_ReadMatrixMarket2Epetra( matrix_file, Comm, readMap, readA, readx, readb, readxexact) ); } else { // Call routine to read in HB problem Trilinos_Util_ReadHb2Epetra( matrix_file, Comm, readMap, readA, readx, readb, readxexact) ; } } } Epetra_CrsMatrix transposeA(Copy, *readMap, 0); Epetra_CrsMatrix *serialA ; if ( transpose ) { assert( CrsMatrixTranspose( readA, &transposeA ) == 0 ); serialA = &transposeA ; } else { serialA = readA ; } // Create uniform distributed map Epetra_Map map(readMap->NumGlobalElements(), 0, Comm); Epetra_Map* map_; if( NonContiguousMap ) { // // map gives us NumMyElements and MyFirstElement; // int NumGlobalElements = readMap->NumGlobalElements(); int NumMyElements = map.NumMyElements(); int MyFirstElement = map.MinMyGID(); std::vector<int> MapMap_( NumGlobalElements ); readMap->MyGlobalElements( &MapMap_[0] ) ; Comm.Broadcast( &MapMap_[0], NumGlobalElements, 0 ) ; map_ = new Epetra_Map( NumGlobalElements, NumMyElements, &MapMap_[MyFirstElement], 0, Comm); } else { map_ = new Epetra_Map( map ) ; } // Create Exporter to distribute read-in matrix and vectors Epetra_Export exporter(*readMap, *map_); Epetra_CrsMatrix A(Copy, *map_, 0); Epetra_RowMatrix * passA = 0; Epetra_MultiVector * passx = 0; Epetra_MultiVector * passb = 0; Epetra_MultiVector * passxexact = 0; Epetra_MultiVector * passresid = 0; Epetra_MultiVector * passtmp = 0; Epetra_MultiVector x(*map_,numsolves); Epetra_MultiVector b(*map_,numsolves); Epetra_MultiVector xexact(*map_,numsolves); Epetra_MultiVector resid(*map_,numsolves); Epetra_MultiVector tmp(*map_,numsolves); Epetra_MultiVector serialx(*readMap,numsolves); Epetra_MultiVector serialb(*readMap,numsolves); Epetra_MultiVector serialxexact(*readMap,numsolves); Epetra_MultiVector serialresid(*readMap,numsolves); Epetra_MultiVector serialtmp(*readMap,numsolves); bool distribute_matrix = ( matrix_type == AMESOS_Distributed ) ; if ( distribute_matrix ) { // // Initialize x, b and xexact to the values read in from the file // A.Export(*serialA, exporter, Add); Comm.Barrier(); assert(A.FillComplete()==0); Comm.Barrier(); passA = &A; passx = &x; passb = &b; passxexact = &xexact; passresid = &resid; passtmp = &tmp; } else { passA = serialA; passx = &serialx; passb = &serialb; passxexact = &serialxexact; passresid = &serialresid; passtmp = &serialtmp; } passxexact->SetSeed(131) ; passxexact->Random(); passx->SetSeed(11231) ; passx->Random(); passb->PutScalar( 0.0 ); passA->Multiply( transpose, *passxexact, *passb ) ; Epetra_MultiVector CopyB( *passb ) ; double Anorm = passA->NormInf() ; SparseDirectTimingVars::SS_Result.Set_Anorm(Anorm) ; Epetra_LinearProblem Problem( (Epetra_RowMatrix *) passA, (Epetra_MultiVector *) passx, (Epetra_MultiVector *) passb ); double max_resid = 0.0; for ( int j = 0 ; j < special+1 ; j++ ) { Epetra_Time TotalTime( Comm ) ; if ( false ) { #ifdef TEST_UMFPACK unused code } else if ( SparseSolver == UMFPACK ) { UmfpackOO umfpack( (Epetra_RowMatrix *) passA, (Epetra_MultiVector *) passx, (Epetra_MultiVector *) passb ) ; umfpack.SetTrans( transpose ) ; umfpack.Solve() ; #endif #ifdef TEST_SUPERLU } else if ( SparseSolver == SuperLU ) { SuperluserialOO superluserial( (Epetra_RowMatrix *) passA, (Epetra_MultiVector *) passx, (Epetra_MultiVector *) passb ) ; superluserial.SetPermc( SuperLU_permc ) ; superluserial.SetTrans( transpose ) ; superluserial.SetUseDGSSV( special == 0 ) ; superluserial.Solve() ; #endif #ifdef HAVE_AMESOS_SLUD } else if ( SparseSolver == SuperLUdist ) { SuperludistOO superludist( Problem ) ; superludist.SetTrans( transpose ) ; EPETRA_CHK_ERR( superludist.Solve( true ) ) ; #endif #ifdef HAVE_AMESOS_SLUD2 } else if ( SparseSolver == SuperLUdist2 ) { Superludist2_OO superludist2( Problem ) ; superludist2.SetTrans( transpose ) ; EPETRA_CHK_ERR( superludist2.Solve( true ) ) ; #endif #ifdef TEST_SPOOLES } else if ( SparseSolver == SPOOLES ) { SpoolesOO spooles( (Epetra_RowMatrix *) passA, (Epetra_MultiVector *) passx, (Epetra_MultiVector *) passb ) ; spooles.SetTrans( transpose ) ; spooles.Solve() ; #endif #ifdef HAVE_AMESOS_DSCPACK } else if ( SparseSolver == DSCPACK ) { Teuchos::ParameterList ParamList ; Amesos_Dscpack dscpack( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( dscpack.SetParameters( ParamList ) ); EPETRA_CHK_ERR( dscpack.Solve( ) ); #endif #ifdef HAVE_AMESOS_UMFPACK } else if ( SparseSolver == UMFPACK ) { Teuchos::ParameterList ParamList ; Amesos_Umfpack umfpack( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( umfpack.SetParameters( ParamList ) ); EPETRA_CHK_ERR( umfpack.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( umfpack.Solve( ) ); #endif #ifdef HAVE_AMESOS_KLU } else if ( SparseSolver == KLU ) { Teuchos::ParameterList ParamList ; Amesos_Klu klu( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( klu.SetParameters( ParamList ) ); EPETRA_CHK_ERR( klu.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( klu.SymbolicFactorization( ) ); EPETRA_CHK_ERR( klu.NumericFactorization( ) ); EPETRA_CHK_ERR( klu.Solve( ) ); #endif #ifdef HAVE_AMESOS_PARAKLETE } else if ( SparseSolver == PARAKLETE ) { Teuchos::ParameterList ParamList ; Amesos_Paraklete paraklete( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( paraklete.SetParameters( ParamList ) ); EPETRA_CHK_ERR( paraklete.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( paraklete.SymbolicFactorization( ) ); EPETRA_CHK_ERR( paraklete.NumericFactorization( ) ); EPETRA_CHK_ERR( paraklete.Solve( ) ); #endif #ifdef HAVE_AMESOS_SLUS } else if ( SparseSolver == SuperLU ) { Epetra_SLU superluserial( &Problem ) ; EPETRA_CHK_ERR( superluserial.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( superluserial.SymbolicFactorization( ) ); EPETRA_CHK_ERR( superluserial.NumericFactorization( ) ); EPETRA_CHK_ERR( superluserial.Solve( ) ); #endif #ifdef HAVE_AMESOS_LAPACK } else if ( SparseSolver == LAPACK ) { Teuchos::ParameterList ParamList ; ParamList.set( "MaxProcs", -3 ); Amesos_Lapack lapack( Problem ) ; EPETRA_CHK_ERR( lapack.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( lapack.SymbolicFactorization( ) ); EPETRA_CHK_ERR( lapack.NumericFactorization( ) ); EPETRA_CHK_ERR( lapack.Solve( ) ); #endif #ifdef HAVE_AMESOS_TAUCS } else if ( SparseSolver == TAUCS ) { Teuchos::ParameterList ParamList ; Amesos_Taucs taucs( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( taucs.SetParameters( ParamList ) ); EPETRA_CHK_ERR( taucs.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( taucs.SymbolicFactorization( ) ); EPETRA_CHK_ERR( taucs.NumericFactorization( ) ); EPETRA_CHK_ERR( taucs.Solve( ) ); #endif #ifdef HAVE_AMESOS_PARDISO } else if ( SparseSolver == PARDISO ) { Teuchos::ParameterList ParamList ; Amesos_Pardiso pardiso( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( pardiso.SetParameters( ParamList ) ); EPETRA_CHK_ERR( pardiso.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( pardiso.SymbolicFactorization( ) ); EPETRA_CHK_ERR( pardiso.NumericFactorization( ) ); EPETRA_CHK_ERR( pardiso.Solve( ) ); #endif #ifdef HAVE_AMESOS_PARKLETE } else if ( SparseSolver == PARKLETE ) { Teuchos::ParameterList ParamList ; Amesos_Parklete parklete( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( parklete.SetParameters( ParamList ) ); EPETRA_CHK_ERR( parklete.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( parklete.SymbolicFactorization( ) ); EPETRA_CHK_ERR( parklete.NumericFactorization( ) ); EPETRA_CHK_ERR( parklete.Solve( ) ); #endif #ifdef HAVE_AMESOS_MUMPS } else if ( SparseSolver == MUMPS ) { Teuchos::ParameterList ParamList ; Amesos_Mumps mumps( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( mumps.SetParameters( ParamList ) ); EPETRA_CHK_ERR( mumps.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( mumps.SymbolicFactorization( ) ); EPETRA_CHK_ERR( mumps.NumericFactorization( ) ); EPETRA_CHK_ERR( mumps.Solve( ) ); #endif #ifdef HAVE_AMESOS_SCALAPACK } else if ( SparseSolver == SCALAPACK ) { Teuchos::ParameterList ParamList ; Amesos_Scalapack scalapack( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( scalapack.SetParameters( ParamList ) ); EPETRA_CHK_ERR( scalapack.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( scalapack.SymbolicFactorization( ) ); EPETRA_CHK_ERR( scalapack.NumericFactorization( ) ); EPETRA_CHK_ERR( scalapack.Solve( ) ); #endif #ifdef HAVE_AMESOS_SUPERLUDIST } else if ( SparseSolver == SUPERLUDIST ) { Teuchos::ParameterList ParamList ; Amesos_Superludist superludist( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( superludist.SetParameters( ParamList ) ); EPETRA_CHK_ERR( superludist.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( superludist.SymbolicFactorization( ) ); EPETRA_CHK_ERR( superludist.NumericFactorization( ) ); EPETRA_CHK_ERR( superludist.Solve( ) ); #endif #ifdef HAVE_AMESOS_SUPERLU } else if ( SparseSolver == SUPERLU ) { Teuchos::ParameterList ParamList ; Amesos_Superlu superlu( Problem ) ; ParamList.set( "MaxProcs", -3 ); EPETRA_CHK_ERR( superlu.SetParameters( ParamList ) ); EPETRA_CHK_ERR( superlu.SetUseTranspose( transpose ) ); EPETRA_CHK_ERR( superlu.SymbolicFactorization( ) ); EPETRA_CHK_ERR( superlu.NumericFactorization( ) ); EPETRA_CHK_ERR( superlu.Solve( ) ); #endif #ifdef TEST_SPOOLESSERIAL } else if ( SparseSolver == SPOOLESSERIAL ) { SpoolesserialOO spoolesserial( (Epetra_RowMatrix *) passA, (Epetra_MultiVector *) passx, (Epetra_MultiVector *) passb ) ; spoolesserial.Solve() ; #endif } else { SparseDirectTimingVars::log_file << "Solver not implemented yet" << std::endl ; std::cerr << "\n\n#################### Requested solver not available (Or not tested with blocked RHS) on this platform #####################\n" << std::endl ; } SparseDirectTimingVars::SS_Result.Set_Total_Time( TotalTime.ElapsedTime() ); // SparseDirectTimingVars::SS_Result.Set_First_Time( 0.0 ); // SparseDirectTimingVars::SS_Result.Set_Middle_Time( 0.0 ); // SparseDirectTimingVars::SS_Result.Set_Last_Time( 0.0 ); // // Compute the error = norm(xcomp - xexact ) // std::vector <double> error(numsolves) ; double max_error = 0.0; passresid->Update(1.0, *passx, -1.0, *passxexact, 0.0); passresid->Norm2(&error[0]); for ( int i = 0 ; i< numsolves; i++ ) if ( error[i] > max_error ) max_error = error[i] ; SparseDirectTimingVars::SS_Result.Set_Error(max_error) ; // passxexact->Norm2(&error[0] ) ; // passx->Norm2(&error ) ; // // Compute the residual = norm(Ax - b) // std::vector <double> residual(numsolves) ; passtmp->PutScalar(0.0); passA->Multiply( transpose, *passx, *passtmp); passresid->Update(1.0, *passtmp, -1.0, *passb, 0.0); // passresid->Update(1.0, *passtmp, -1.0, CopyB, 0.0); passresid->Norm2(&residual[0]); for ( int i = 0 ; i< numsolves; i++ ) if ( residual[i] > max_resid ) max_resid = residual[i] ; SparseDirectTimingVars::SS_Result.Set_Residual(max_resid) ; std::vector <double> bnorm(numsolves); passb->Norm2( &bnorm[0] ) ; SparseDirectTimingVars::SS_Result.Set_Bnorm(bnorm[0]) ; std::vector <double> xnorm(numsolves); passx->Norm2( &xnorm[0] ) ; SparseDirectTimingVars::SS_Result.Set_Xnorm(xnorm[0]) ; if ( false && iam == 0 ) { std::cout << " Amesos_TestMutliSolver.cpp " << std::endl ; for ( int i = 0 ; i< numsolves && i < 10 ; i++ ) { std::cout << "i=" << i << " error = " << error[i] << " xnorm = " << xnorm[i] << " residual = " << residual[i] << " bnorm = " << bnorm[i] << std::endl ; } std::cout << std::endl << " max_resid = " << max_resid ; std::cout << " max_error = " << max_error << std::endl ; std::cout << " Get_residual() again = " << SparseDirectTimingVars::SS_Result.Get_Residual() << std::endl ; } } delete readA; delete readx; delete readb; delete readxexact; delete readMap; delete map_; Comm.Barrier(); return 0 ; }
int Stokhos::KLMatrixFreeOperator:: Apply(const Epetra_MultiVector& Input, Epetra_MultiVector& Result) const { // We have to be careful if Input and Result are the same vector. // If this is the case, the only possible solution is to make a copy const Epetra_MultiVector *input = &Input; bool made_copy = false; if (Input.Values() == Result.Values() && !is_stoch_parallel) { input = new Epetra_MultiVector(Input); made_copy = true; } // Initialize Result.PutScalar(0.0); const Epetra_Map* input_base_map = domain_base_map.get(); const Epetra_Map* result_base_map = range_base_map.get(); if (useTranspose == true) { input_base_map = range_base_map.get(); result_base_map = domain_base_map.get(); } // Allocate temporary storage int m = Input.NumVectors(); if (useTranspose == false && (tmp == Teuchos::null || tmp->NumVectors() != m*max_num_mat_vec)) tmp = Teuchos::rcp(new Epetra_MultiVector(*result_base_map, m*max_num_mat_vec)); else if (useTranspose == true && (tmp_trans == Teuchos::null || tmp_trans->NumVectors() != m*max_num_mat_vec)) tmp_trans = Teuchos::rcp(new Epetra_MultiVector(*result_base_map, m*max_num_mat_vec)); Epetra_MultiVector *tmp_result; if (useTranspose == false) tmp_result = tmp.get(); else tmp_result = tmp_trans.get(); // Map input into column map const Epetra_MultiVector *tmp_col; if (!is_stoch_parallel) tmp_col = input; else { if (useTranspose == false) { if (input_col == Teuchos::null || input_col->NumVectors() != m) input_col = Teuchos::rcp(new Epetra_MultiVector(*global_col_map, m)); input_col->Import(*input, *col_importer, Insert); tmp_col = input_col.get(); } else { if (input_col_trans == Teuchos::null || input_col_trans->NumVectors() != m) input_col_trans = Teuchos::rcp(new Epetra_MultiVector(*global_col_map_trans, m)); input_col_trans->Import(*input, *col_importer_trans, Insert); tmp_col = input_col_trans.get(); } } // Extract blocks EpetraExt::BlockMultiVector sg_input(View, *input_base_map, *tmp_col); EpetraExt::BlockMultiVector sg_result(View, *result_base_map, Result); for (int i=0; i<input_block.size(); i++) input_block[i] = sg_input.GetBlock(i); for (int i=0; i<result_block.size(); i++) result_block[i] = sg_result.GetBlock(i); int N = result_block[0]->MyLength(); const Teuchos::Array<double>& norms = sg_basis->norm_squared(); int d = sg_basis->dimension(); Teuchos::Array<double> zero(d), one(d); for(int j = 0; j<d; j++) { zero[j] = 0.0; one[j] = 1.0; } Teuchos::Array< double > phi_0(expansion_size), phi_1(expansion_size); sg_basis->evaluateBases(zero, phi_0); sg_basis->evaluateBases(one, phi_1); // k_begin and k_end are initialized in the constructor for (Cijk_type::k_iterator k_it=k_begin; k_it!=k_end; ++k_it) { Cijk_type::kj_iterator j_begin = Cijk->j_begin(k_it); Cijk_type::kj_iterator j_end = Cijk->j_end(k_it); int k = index(k_it); int nj = Cijk->num_j(k_it); if (nj > 0) { Teuchos::Array<double*> j_ptr(nj*m); Teuchos::Array<int> mj_indices(nj*m); int l = 0; for (Cijk_type::kj_iterator j_it = j_begin; j_it != j_end; ++j_it) { int j = index(j_it); for (int mm=0; mm<m; mm++) { j_ptr[l*m+mm] = input_block[j]->Values()+mm*N; mj_indices[l*m+mm] = l*m+mm; } l++; } Epetra_MultiVector input_tmp(View, *input_base_map, &j_ptr[0], nj*m); Epetra_MultiVector result_tmp(View, *tmp_result, &mj_indices[0], nj*m); (*block_ops)[k].Apply(input_tmp, result_tmp); l = 0; for (Cijk_type::kj_iterator j_it = j_begin; j_it != j_end; ++j_it) { int j = index(j_it); int j_gid = epetraCijk->GCID(j); for (Cijk_type::kji_iterator i_it = Cijk->i_begin(j_it); i_it != Cijk->i_end(j_it); ++i_it) { int i = index(i_it); int i_gid = epetraCijk->GRID(i); double c = value(i_it); if (k == 0) c /= phi_0[0]; else { c /= phi_1[k]; if (i_gid == j_gid) c -= phi_0[k]/(phi_1[k]*phi_0[0])*norms[i_gid]; } if (scale_op) { if (useTranspose) c /= norms[j_gid]; else c /= norms[i_gid]; } for (int mm=0; mm<m; mm++) (*result_block[i])(mm)->Update(c, *result_tmp(l*m+mm), 1.0); } l++; } } } // Destroy blocks for (int i=0; i<input_block.size(); i++) input_block[i] = Teuchos::null; for (int i=0; i<result_block.size(); i++) result_block[i] = Teuchos::null; if (made_copy) delete input; return 0; }
int TestMultiLevelPreconditioner(char ProblemType[], Teuchos::ParameterList & MLList, Epetra_LinearProblem & Problem, double & TotalErrorResidual, double & TotalErrorExactSol,bool cg=false) { Epetra_MultiVector* lhs = Problem.GetLHS(); Epetra_MultiVector* rhs = Problem.GetRHS(); Epetra_RowMatrix* A = Problem.GetMatrix(); // ======================================== // // create a rhs corresponding to lhs or 1's // // ======================================== // lhs->PutScalar(1.0); A->Multiply(false,*lhs,*rhs); lhs->PutScalar(0.0); Epetra_Time Time(A->Comm()); // =================== // // call ML and AztecOO // // =================== // AztecOO solver(Problem); MLList.set("ML output", 10); ML_Epetra::MultiLevelPreconditioner * MLPrec = new ML_Epetra::MultiLevelPreconditioner(*A, MLList, true); // tell AztecOO to use this preconditioner, then solve solver.SetPrecOperator(MLPrec); if(cg) solver.SetAztecOption(AZ_solver, AZ_cg); else solver.SetAztecOption(AZ_solver, AZ_gmres); solver.SetAztecOption(AZ_output, 32); solver.SetAztecOption(AZ_kspace, 160); solver.Iterate(1550, 1e-12); delete MLPrec; // ==================================================== // // compute difference between exact solution and ML one // // ==================================================== // double d = 0.0, d_tot = 0.0; for( int i=0 ; i<lhs->Map().NumMyElements() ; ++i ) d += ((*lhs)[0][i] - 1.0) * ((*lhs)[0][i] - 1.0); A->Comm().SumAll(&d,&d_tot,1); // ================== // // compute ||Ax - b|| // // ================== // double Norm; Epetra_Vector Ax(rhs->Map()); A->Multiply(false, *lhs, Ax); Ax.Update(1.0, *rhs, -1.0); Ax.Norm2(&Norm); string msg = ProblemType; if (A->Comm().MyPID() == 0) { cout << msg << "......Using " << A->Comm().NumProc() << " processes" << endl; cout << msg << "......||A x - b||_2 = " << Norm << endl; cout << msg << "......||x_exact - x||_2 = " << sqrt(d_tot) << endl; cout << msg << "......Total Time = " << Time.ElapsedTime() << endl; } TotalErrorExactSol += sqrt(d_tot); TotalErrorResidual += Norm; return( solver.NumIters() ); }
// ================================================ ====== ==== ==== == = //! Apply the preconditioner to an Epetra_MultiVector X, puts the result in Y int ML_Epetra::FaceMatrixFreePreconditioner::ApplyInverse(const Epetra_MultiVector& B_, Epetra_MultiVector& X) const{ const Epetra_MultiVector *B; Epetra_MultiVector *Bcopy=0; /* Sanity Checks */ int NumVectors=B_.NumVectors(); if (!B_.Map().SameAs(*FaceDomainMap_)) ML_CHK_ERR(-1); if (NumVectors != X.NumVectors()) ML_CHK_ERR(-1); Epetra_MultiVector r_edge(*FaceDomainMap_,NumVectors,false); Epetra_MultiVector e_edge(*FaceDomainMap_,NumVectors,false); Epetra_MultiVector e_node(*CoarseMap_,NumVectors,false); Epetra_MultiVector r_node(*CoarseMap_,NumVectors,false); /* Deal with the B==X case */ if (B_.Pointers()[0] == X.Pointers()[0]){ Bcopy=new Epetra_MultiVector(B_); B=Bcopy; X.PutScalar(0.0); } else B=&B_; for(int i=0;i<num_cycles;i++){ /* Pre-smoothing */ #ifdef HAVE_ML_IFPACK if(Smoother_) ML_CHK_ERR(Smoother_->ApplyInverse(*B,X)); #endif if(MaxLevels > 0){ if(i != 0 #ifdef HAVE_ML_IFPACK || Smoother_ #endif ){ /* Calculate Residual (r_e = b - (S+M+Addon) * x) */ ML_CHK_ERR(Operator_->Apply(X,r_edge)); ML_CHK_ERR(r_edge.Update(1.0,*B,-1.0)); /* Xfer to coarse grid (r_n = P' * r_e) */ ML_CHK_ERR(Prolongator_->Multiply(true,r_edge,r_node)); } else{ /* Xfer to coarse grid (r_n = P' * r_e) */ ML_CHK_ERR(Prolongator_->Multiply(true,*B,r_node)); } /* AMG on coarse grid (e_n = (CoarseMatrix)^{-1} r_n) */ ML_CHK_ERR(CoarsePC->ApplyInverse(r_node,e_node)); /* Xfer back to fine grid (e_e = P * e_n) */ ML_CHK_ERR(Prolongator_->Multiply(false,e_node,e_edge)); /* Add in correction (x = x + e_e) */ ML_CHK_ERR(X.Update(1.0,e_edge,1.0)); }/*end if*/ /* Post-Smoothing */ #ifdef HAVE_ML_IFPACK if(Smoother_) ML_CHK_ERR(Smoother_->ApplyInverse(*B,X)); #endif }/*end for*/ /* Cleanup */ if(Bcopy) delete Bcopy; return 0; }/*end ApplyInverse*/