void DefaultMultiVectorLinearOpWithSolve<Scalar>::applyImpl( const EOpTransp M_trans, const MultiVectorBase<Scalar> &XX, const Ptr<MultiVectorBase<Scalar> > &YY, const Scalar alpha, const Scalar beta ) const { using Teuchos::dyn_cast; typedef DefaultMultiVectorProductVector<Scalar> MVPV; const Ordinal numCols = XX.domain()->dim(); for (Ordinal col_j = 0; col_j < numCols; ++col_j) { const RCP<const VectorBase<Scalar> > x = XX.col(col_j); const RCP<VectorBase<Scalar> > y = YY->col(col_j); RCP<const MultiVectorBase<Scalar> > X = dyn_cast<const MVPV>(*x).getMultiVector().assert_not_null(); RCP<MultiVectorBase<Scalar> > Y = dyn_cast<MVPV>(*y).getNonconstMultiVector().assert_not_null(); Thyra::apply( *lows_.getConstObj(), M_trans, *X, Y.ptr(), alpha, beta ); } }
void LinearOpScalarProd<Scalar>::scalarProdsImpl( const MultiVectorBase<Scalar>& X, const MultiVectorBase<Scalar>& Y, const ArrayView<Scalar> &scalarProds_out ) const { Teuchos::RCP<MultiVectorBase<Scalar> > T = createMembers(Y.range() ,Y.domain()->dim()); Thyra::apply(*op_, NOTRANS,Y, T.ptr()); dots(X, *T, scalarProds_out); }
void DefaultColumnwiseMultiVector<Scalar>::applyImpl( const EOpTransp M_trans, const MultiVectorBase<Scalar> &X, const Ptr<MultiVectorBase<Scalar> > &Y, const Scalar alpha, const Scalar beta ) const { #ifdef TEUCHOS_DEBUG THYRA_ASSERT_LINEAR_OP_MULTIVEC_APPLY_SPACES( "MultiVectorBase<Scalar>::apply()", *this, M_trans, X, &*Y); #endif const Ordinal nc = this->domain()->dim(); const Ordinal m = X.domain()->dim(); for (Ordinal col_j = 0; col_j < m; ++col_j) { const RCP<const VectorBase<Scalar> > x_j = X.col(col_j); const RCP<VectorBase<Scalar> > y_j = Y->col(col_j); // y_j *= beta Vt_S(y_j.ptr(), beta); // y_j += alpha*op(M)*x_j if(M_trans == NOTRANS) { // // y_j += alpha*M*x_j = alpha*M.col(0)*x_j(0) + ... + alpha*M.col(nc-1)*x_j(nc-1) // // Extract an explicit view of x_j RTOpPack::ConstSubVectorView<Scalar> x_sub_vec; x_j->acquireDetachedView(Range1D(), &x_sub_vec); // Loop through and add the multiple of each column for (Ordinal j = 0; j < nc; ++j ) Vp_StV( y_j.ptr(), Scalar(alpha*x_sub_vec(j)), *this->col(j) ); // Release the view of x x_j->releaseDetachedView(&x_sub_vec); } else { // // [ alpha*dot(M.col(0),x_j) ] // y_j += alpha*M^T*x_j = [ alpha*dot(M.col(1),x_j) ] // [ ... ] // [ alpha*dot(M.col(nc-1),x_j) ] // // Extract an explicit view of y_j RTOpPack::SubVectorView<Scalar> y_sub_vec; y_j->acquireDetachedView(Range1D(), &y_sub_vec); // Loop through and add to each element in y_j for (Ordinal j = 0; j < nc; ++j ) y_sub_vec(j) += alpha*dot(*this->col(j), *x_j); // Commit explicit view of y y_j->commitDetachedView(&y_sub_vec); } } }
Teuchos::RCP<const Epetra_MultiVector> Thyra::get_Epetra_MultiVector( const Epetra_Map &map, const MultiVectorBase<double> &mv ) { using Teuchos::rcpWithEmbeddedObj; using Teuchos::rcpFromRef; using Teuchos::outArg; ArrayRCP<const double> mvData; Ordinal mvLeadingDim = -1; const SpmdMultiVectorBase<double> *mvSpmdMv = 0; const SpmdVectorBase<double> *mvSpmdV = 0; if ((mvSpmdMv = dynamic_cast<const SpmdMultiVectorBase<double>*>(&mv))) { mvSpmdMv->getLocalData(outArg(mvData), outArg(mvLeadingDim)); } else if ((mvSpmdV = dynamic_cast<const SpmdVectorBase<double>*>(&mv))) { mvSpmdV->getLocalData(outArg(mvData)); mvLeadingDim = mvSpmdV->spmdSpace()->localSubDim(); } if (nonnull(mvData)) { return rcpWithEmbeddedObj( new Epetra_MultiVector( ::View,map, const_cast<double*>(mvData.getRawPtr()), mvLeadingDim, mv.domain()->dim() ), mvData ); } return ::Thyra::get_Epetra_MultiVector(map, rcpFromRef(mv)); }
THYRA_DEPRECATED SolveStatus<Scalar> solveTranspose( const LinearOpWithSolveBase<Scalar> &A, const EConj conj, const MultiVectorBase<Scalar> &B, MultiVectorBase<Scalar> *X, const SolveCriteria<Scalar> *solveCriteria = NULL ) { typedef SolveCriteria<Scalar> SC; typedef BlockSolveCriteria<Scalar> BSC; typedef SolveStatus<Scalar> BSS; SC defaultSolveCriteria; BSC blockSolveCriteria[1]; BSS blockSolveStatus[1]; blockSolveCriteria[0] = BSC( solveCriteria ? *solveCriteria : defaultSolveCriteria, B.domain()->dim()); A.solveTranspose( conj,B,X,1, blockSolveCriteria, blockSolveStatus ); return blockSolveStatus[0]; }
Teuchos::Array<typename Teuchos::ScalarTraits<Scalar>::magnitudeType> Thyra::norms_inf( const MultiVectorBase<Scalar>& V ) { typedef typename ScalarTraits<Scalar>::magnitudeType ScalarMag; Array<ScalarMag> norms(V.domain()->dim()); Thyra::norms_inf<Scalar>(V, norms()); return norms; }
void DefaultMultipliedLinearOp<Scalar>::applyImpl( const EOpTransp M_trans, const MultiVectorBase<Scalar> &X, const Ptr<MultiVectorBase<Scalar> > &Y, const Scalar alpha, const Scalar beta ) const { using Teuchos::rcpFromPtr; using Teuchos::rcpFromRef; #ifdef TEUCHOS_DEBUG THYRA_ASSERT_LINEAR_OP_MULTIVEC_APPLY_SPACES( "DefaultMultipliedLinearOp<Scalar>::apply(...)", *this, M_trans, X, &*Y ); #endif // TEUCHOS_DEBUG const int nOps = Ops_.size(); const Ordinal m = X.domain()->dim(); if( real_trans(M_trans)==NOTRANS ) { // // Y = alpha * M * X + beta*Y // => // Y = alpha * op(Op[0]) * op(Op[1]) * ... * op(Op[numOps-1]) * X + beta*Y // RCP<MultiVectorBase<Scalar> > T_kp1, T_k; // Temporary propagated between loops for( int k = nOps-1; k >= 0; --k ) { RCP<MultiVectorBase<Scalar> > Y_k; RCP<const MultiVectorBase<Scalar> > X_k; if(k==0) Y_k = rcpFromPtr(Y); else Y_k = T_k = createMembers(getOp(k)->range(), m); if(k==nOps-1) X_k = rcpFromRef(X); else X_k = T_kp1; if( k > 0 ) Thyra::apply(*getOp(k), M_trans, *X_k, Y_k.ptr()); else Thyra::apply(*getOp(k), M_trans, *X_k, Y_k.ptr(), alpha, beta); T_kp1 = T_k; } } else { // // Y = alpha * M' * X + beta*Y // => // Y = alpha * Op[numOps-1]' * Op[1]' * ... * Op[0]' * X + beta * Y // RCP<MultiVectorBase<Scalar> > T_km1, T_k; // Temporary propagated between loops for( int k = 0; k <= nOps-1; ++k ) { RCP<MultiVectorBase<Scalar> > Y_k; RCP<const MultiVectorBase<Scalar> > X_k; if(k==nOps-1) Y_k = rcpFromPtr(Y); else Y_k = T_k = createMembers(getOp(k)->domain(), m); if(k==0) X_k = rcpFromRef(X); else X_k = T_km1; if( k < nOps-1 ) Thyra::apply(*getOp(k), M_trans, *X_k, Y_k.ptr()); else Thyra::apply(*getOp(k), M_trans, *X_k, Y_k.ptr(), alpha, beta); T_km1 = T_k; } } }
SolveStatus<Scalar> DefaultMultiVectorLinearOpWithSolve<Scalar>::solveImpl( const EOpTransp transp, const MultiVectorBase<Scalar> &BB, const Ptr<MultiVectorBase<Scalar> > &XX, const Ptr<const SolveCriteria<Scalar> > solveCriteria ) const { using Teuchos::dyn_cast; using Teuchos::outArg; using Teuchos::inOutArg; typedef DefaultMultiVectorProductVector<Scalar> MVPV; const Ordinal numCols = BB.domain()->dim(); SolveStatus<Scalar> overallSolveStatus; accumulateSolveStatusInit(outArg(overallSolveStatus)); for (Ordinal col_j = 0; col_j < numCols; ++col_j) { const RCP<const VectorBase<Scalar> > b = BB.col(col_j); const RCP<VectorBase<Scalar> > x = XX->col(col_j); RCP<const MultiVectorBase<Scalar> > B = dyn_cast<const MVPV>(*b).getMultiVector().assert_not_null(); RCP<MultiVectorBase<Scalar> > X = dyn_cast<MVPV>(*x).getNonconstMultiVector().assert_not_null(); const SolveStatus<Scalar> solveStatus = Thyra::solve(*lows_.getConstObj(), transp, *B, X.ptr(), solveCriteria); accumulateSolveStatus( SolveCriteria<Scalar>(), // Never used solveStatus, inOutArg(overallSolveStatus) ); } return overallSolveStatus; }
void DefaultDiagonalLinearOp<Scalar>::applyImpl( const EOpTransp M_trans, const MultiVectorBase<Scalar> &X, const Ptr<MultiVectorBase<Scalar> > &Y, const Scalar alpha, const Scalar beta ) const { typedef Teuchos::ScalarTraits<Scalar> ST; #ifdef TEUCHOS_DEBUG THYRA_ASSERT_LINEAR_OP_MULTIVEC_APPLY_SPACES( "DefaultDiagonalLinearOp<Scalar>::apply(...)",*this, M_trans, X, &*Y ); #endif // TEUCHOS_DEBUG // Y = beta * Y if( beta != ST::one() ) scale<Scalar>(beta, Y); // Y += alpha *op(M) * X const Ordinal m = X.domain()->dim(); for (Ordinal col_j = 0; col_j < m; ++col_j) { const RCP<const VectorBase<Scalar> > x = X.col(col_j); const RCP<VectorBase<Scalar> > y = Y->col(col_j); if (ST::isComplex) { if ( M_trans==NOTRANS || M_trans==TRANS ) { ele_wise_prod( alpha, *diag_.getConstObj(), *x, y.ptr() ); } else { ele_wise_conj_prod( alpha, *diag_.getConstObj(), *x, y.ptr() ); } } else { ele_wise_prod( alpha, *diag_.getConstObj(), *x, y.ptr() ); } } }
void doExplicitMultiVectorAdjoint( const MultiVectorBase<Scalar>& mvIn, MultiVectorBase<Scalar>* mvTransOut ) { typedef Teuchos::ScalarTraits<Scalar> ST; #ifdef TEUCHOS_DEBUG TEST_FOR_EXCEPT(0==mvTransOut); THYRA_ASSERT_VEC_SPACES("doExplicitMultiVectorAdjoint(...)", *mvIn.domain(), *mvTransOut->range() ); THYRA_ASSERT_VEC_SPACES("doExplicitMultiVectorAdjoint(...)", *mvIn.range(), *mvTransOut->domain() ); #endif ConstDetachedMultiVectorView<Scalar> dMvIn(mvIn); DetachedMultiVectorView<Scalar> dMvTransOut(*mvTransOut); const int m = dMvIn.subDim(); const int n = dMvIn.numSubCols(); for ( int j = 0; j < n; ++j ) { for ( int i = 0; i < m; ++i ) { dMvTransOut(j,i) = ST::conjugate(dMvIn(i,j)); } } }
void Thyra::reductions( const MultiVectorBase<Scalar>& V, const NormOp &op, const ArrayView<typename ScalarTraits<Scalar>::magnitudeType> &norms ) { using Teuchos::tuple; using Teuchos::ptrInArg; using Teuchos::null; const int m = V.domain()->dim(); Array<RCP<RTOpPack::ReductTarget> > rcp_op_targs(m); Array<Ptr<RTOpPack::ReductTarget> > op_targs(m); for( int kc = 0; kc < m; ++kc ) { rcp_op_targs[kc] = op.reduct_obj_create(); op_targs[kc] = rcp_op_targs[kc].ptr(); } applyOp<Scalar>(op, tuple(ptrInArg(V)), ArrayView<Ptr<MultiVectorBase<Scalar> > >(null), op_targs ); for( int kc = 0; kc < m; ++kc ) { norms[kc] = op(*op_targs[kc]); } }
void SpmdMultiVectorSerializer<Scalar>::serialize( const MultiVectorBase<Scalar>& mv, std::ostream& out ) const { Teuchos::RCP<const SpmdVectorSpaceBase<Scalar> > mpi_vec_spc = Teuchos::rcp_dynamic_cast<const SpmdVectorSpaceBase<Scalar> >(mv.range()); std::ios::fmtflags fmt(out.flags()); out.precision(std::numeric_limits<Scalar>::digits10+4); if( mpi_vec_spc.get() ) { // This is a mpi-based vector space so let's just write the local // multi-vector elements (row-by-row). const Ordinal localOffset = mpi_vec_spc->localOffset(), localSubDim = mpi_vec_spc->localSubDim(); const Range1D localRng( localOffset, localOffset+localSubDim-1 ); ConstDetachedMultiVectorView<Scalar> local_mv(mv,localRng,Range1D()); out << localSubDim << " " << local_mv.numSubCols() << std::endl; if( binaryMode() ) { // Write column-wise for better cache performance for( Ordinal j = 0; j < local_mv.numSubCols(); ++j ) out.write( reinterpret_cast<const char*>(&local_mv(0,j)), sizeof(Scalar)*localSubDim ); } else { // Write row-wise for better readability for( Ordinal i = 0; i < localSubDim; ++i ) { out << " " << i; for( Ordinal j = 0; j < local_mv.numSubCols(); ++j ) { out << " " << local_mv(i,j); } out << std::endl; } } } else { // This is a serial (or locally replicated) vector space so // just write all of the multi-vector elements here. TEUCHOS_TEST_FOR_EXCEPTION( true, std::logic_error, "Does not handle non-SPMD spaces yet" ); } out.flags(fmt); }
SolveStatus<Scalar> BelosLinearOpWithSolve<Scalar>::solveImpl( const EOpTransp M_trans, const MultiVectorBase<Scalar> &B, const Ptr<MultiVectorBase<Scalar> > &X, const Ptr<const SolveCriteria<Scalar> > solveCriteria ) const { TEUCHOS_FUNC_TIME_MONITOR("BelosLOWS"); using Teuchos::rcp; using Teuchos::rcpFromRef; using Teuchos::rcpFromPtr; using Teuchos::FancyOStream; using Teuchos::OSTab; using Teuchos::describe; typedef Teuchos::ScalarTraits<Scalar> ST; typedef typename ST::magnitudeType ScalarMag; Teuchos::Time totalTimer(""), timer(""); totalTimer.start(true); assertSolveSupports(*this, M_trans, solveCriteria); // 2010/08/22: rabartl: Bug 4915 ToDo: Move the above into the NIV function // solve(...). const int numRhs = B.domain()->dim(); const int numEquations = B.range()->dim(); const RCP<FancyOStream> out = this->getOStream(); const Teuchos::EVerbosityLevel verbLevel = this->getVerbLevel(); OSTab tab = this->getOSTab(); if (out.get() && static_cast<int>(verbLevel) > static_cast<int>(Teuchos::VERB_LOW)) { *out << "\nStarting iterations with Belos:\n"; OSTab tab2(out); *out << "Using forward operator = " << describe(*fwdOpSrc_->getOp(),verbLevel); *out << "Using iterative solver = " << describe(*iterativeSolver_,verbLevel); *out << "With #Eqns="<<numEquations<<", #RHSs="<<numRhs<<" ...\n"; } // // Set RHS and LHS // bool ret = lp_->setProblem( rcpFromPtr(X), rcpFromRef(B) ); TEST_FOR_EXCEPTION( ret == false, CatastrophicSolveFailure ,"Error, the Belos::LinearProblem could not be set for the current solve!" ); // // Set the solution criteria // const RCP<Teuchos::ParameterList> tmpPL = Teuchos::parameterList(); SolveMeasureType solveMeasureType; RCP<GeneralSolveCriteriaBelosStatusTest<Scalar> > generalSolveCriteriaBelosStatusTest; if (nonnull(solveCriteria)) { solveMeasureType = solveCriteria->solveMeasureType; const ScalarMag requestedTol = solveCriteria->requestedTol; if (solveMeasureType.useDefault()) { tmpPL->set("Convergence Tolerance", defaultTol_); } else if (solveMeasureType(SOLVE_MEASURE_NORM_RESIDUAL, SOLVE_MEASURE_NORM_RHS)) { if (requestedTol != SolveCriteria<Scalar>::unspecifiedTolerance()) { tmpPL->set("Convergence Tolerance", requestedTol); } else { tmpPL->set("Convergence Tolerance", defaultTol_); } tmpPL->set("Explicit Residual Scaling", "Norm of RHS"); } else if (solveMeasureType(SOLVE_MEASURE_NORM_RESIDUAL, SOLVE_MEASURE_NORM_INIT_RESIDUAL)) { if (requestedTol != SolveCriteria<Scalar>::unspecifiedTolerance()) { tmpPL->set("Convergence Tolerance", requestedTol); } else { tmpPL->set("Convergence Tolerance", defaultTol_); } tmpPL->set("Explicit Residual Scaling", "Norm of Initial Residual"); } else { // Set the most generic (and inefficient) solve criteria generalSolveCriteriaBelosStatusTest = createGeneralSolveCriteriaBelosStatusTest( *solveCriteria, convergenceTestFrequency_); // Set the verbosity level (one level down) generalSolveCriteriaBelosStatusTest->setOStream(out); generalSolveCriteriaBelosStatusTest->setVerbLevel(incrVerbLevel(verbLevel, -1)); // Set the default convergence tolerance to always converged to allow // the above status test to control things. tmpPL->set("Convergence Tolerance", 1.0); } } else { // No solveCriteria was even passed in! tmpPL->set("Convergence Tolerance", defaultTol_); } // // Reset the blocksize if we adding more vectors than half the number of equations, // orthogonalization will fail on the first iteration! // RCP<const Teuchos::ParameterList> solverParams = iterativeSolver_->getCurrentParameters(); const int currBlockSize = Teuchos::getParameter<int>(*solverParams, "Block Size"); bool isNumBlocks = false; int currNumBlocks = 0; if (Teuchos::isParameterType<int>(*solverParams, "Num Blocks")) { currNumBlocks = Teuchos::getParameter<int>(*solverParams, "Num Blocks"); isNumBlocks = true; } const int newBlockSize = TEUCHOS_MIN(currBlockSize,numEquations/2); if (nonnull(out) && static_cast<int>(verbLevel) > static_cast<int>(Teuchos::VERB_NONE) && newBlockSize != currBlockSize) { *out << "\nAdjusted block size = " << newBlockSize << "\n"; } // tmpPL->set("Block Size",newBlockSize); // // Set the number of Krylov blocks if we are using a GMRES solver, or a solver // that recognizes "Num Blocks". Otherwise the solver will throw an error! // if (isNumBlocks) { const int Krylov_length = (currNumBlocks*currBlockSize)/newBlockSize; tmpPL->set("Num Blocks",Krylov_length); if (newBlockSize != currBlockSize) { if (out.get() && static_cast<int>(verbLevel) > static_cast<int>(Teuchos::VERB_NONE)) *out << "\nAdjusted max number of Krylov basis blocks = " << Krylov_length << "\n"; } } // // Solve the linear system // Belos::ReturnType belosSolveStatus; { RCP<std::ostream> outUsed = ( static_cast<int>(verbLevel) > static_cast<int>(Teuchos::VERB_NONE) ? out : rcp(new FancyOStream(rcp(new Teuchos::oblackholestream()))) ); Teuchos::OSTab tab(outUsed,1,"BELOS"); tmpPL->set("Output Stream", outUsed); iterativeSolver_->setParameters(tmpPL); if (nonnull(generalSolveCriteriaBelosStatusTest)) { iterativeSolver_->setUserConvStatusTest(generalSolveCriteriaBelosStatusTest); } belosSolveStatus = iterativeSolver_->solve(); } // // Report the solve status // totalTimer.stop(); SolveStatus<Scalar> solveStatus; switch (belosSolveStatus) { case Belos::Unconverged: { solveStatus.solveStatus = SOLVE_STATUS_UNCONVERGED; break; } case Belos::Converged: { solveStatus.solveStatus = SOLVE_STATUS_CONVERGED; if (nonnull(generalSolveCriteriaBelosStatusTest)) { const ArrayView<const ScalarMag> achievedTol = generalSolveCriteriaBelosStatusTest->achievedTol(); solveStatus.achievedTol = ST::zero(); for (Ordinal i = 0; i < achievedTol.size(); ++i) { solveStatus.achievedTol = std::max(solveStatus.achievedTol, achievedTol[i]); } } else { solveStatus.achievedTol = tmpPL->get("Convergence Tolerance", defaultTol_); } break; } TEUCHOS_SWITCH_DEFAULT_DEBUG_ASSERT(); } std::ostringstream ossmessage; ossmessage << "The Belos solver of type \""<<iterativeSolver_->description() <<"\" returned a solve status of \""<< toString(solveStatus.solveStatus) << "\"" << " in " << iterativeSolver_->getNumIters() << " iterations" << " with total CPU time of " << totalTimer.totalElapsedTime() << " sec" ; if (out.get() && static_cast<int>(verbLevel) >=static_cast<int>(Teuchos::VERB_LOW)) *out << "\n" << ossmessage.str() << "\n"; solveStatus.message = ossmessage.str(); if (out.get() && static_cast<int>(verbLevel) >= static_cast<int>(Teuchos::VERB_LOW)) *out << "\nTotal solve time in Belos = "<<totalTimer.totalElapsedTime()<<" sec\n"; return solveStatus; }
void VectorDefaultBase<Scalar>::applyImpl( const EOpTransp M_trans, const MultiVectorBase<Scalar> &X, const Ptr<MultiVectorBase<Scalar> > &Y, const Scalar alpha, const Scalar beta ) const { typedef Teuchos::ScalarTraits<Scalar> ST; // Validate input #ifdef TEUCHOS_DEBUG THYRA_ASSERT_LINEAR_OP_MULTIVEC_APPLY_SPACES( "VectorDefaultBase<Scalar>::apply()", *this, M_trans, X, &*Y); #endif const Ordinal numCols = X.domain()->dim(); for (Ordinal col_j = 0; col_j < numCols; ++col_j) { // Get single column vectors const RCP<const VectorBase<Scalar> > x = X.col(col_j); const RCP<VectorBase<Scalar> > y = Y->col(col_j); // Here M = m (where m is a column vector) if( M_trans == NOTRANS || (M_trans == CONJ && !ST::isComplex) ) { // y = beta*y + alpha*m*x (x is a scalar!) #ifdef THYRA_VECTOR_VERBOSE_TO_ERROR_OUT THYRA_VECTOR_VERBOSE_OUT_STATEMENT; *dbgout << "\nThyra::VectorDefaultBase<" <<Teuchos::ScalarTraits<Scalar>::name() <<">::apply(...) : y = beta*y + alpha*m*x (x is a scalar!)\n"; #endif Vt_S( y.ptr(), beta ); Vp_StV( y.ptr(), Scalar(alpha*get_ele(*x,0)), *this ); } else if( M_trans == CONJTRANS || (M_trans == TRANS && !ST::isComplex) ) { // y = beta*y + alpha*m'*x (y is a scalar!) #ifdef THYRA_VECTOR_VERBOSE_TO_ERROR_OUT THYRA_VECTOR_VERBOSE_OUT_STATEMENT; *dbgout << "\nThyra::VectorDefaultBase<" <<Teuchos::ScalarTraits<Scalar>::name() <<">::apply(...) : y = beta*y + alpha*m'*x (y is a scalar!)\n"; #endif Scalar y_inout; if( beta == ST::zero() ) { y_inout = ST::zero(); } else { y_inout = beta*get_ele(*y,0); } #if defined(THYRA_VECTOR_VERBOSE_TO_ERROR_OUT) && defined(RTOPPACK_SPMD_APPLY_OP_DUMP) RTOpPack::show_spmd_apply_op_dump = true; #endif #if defined(THYRA_VECTOR_VERBOSE_TO_ERROR_OUT) && defined(RTOPPACK_RTOPT_HELPER_DUMP_OUTPUT) RTOpPack::rtop_helpers_dump_all = true; #endif y_inout += alpha * this->space()->scalarProd(*this, *x); #if defined(THYRA_VECTOR_VERBOSE_TO_ERROR_OUT) && defined(RTOPPACK_SPMD_APPLY_OP_DUMP) RTOpPack::show_spmd_apply_op_dump = false; #endif #if defined(THYRA_VECTOR_VERBOSE_TO_ERROR_OUT) && defined(RTOPPACK_RTOPT_HELPER_DUMP_OUTPUT) RTOpPack::rtop_helpers_dump_all = false; #endif set_ele(0, y_inout, y.ptr()); #ifdef THYRA_VECTOR_VERBOSE_TO_ERROR_OUT *dbgout << "\nThyra::VectorDefaultBase<"<<ST::name()<<">::apply(...) : y_inout = " << y_inout << "\n"; #endif } else { TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, "VectorBase<"<<ST::name()<<">::apply(M_trans,...): Error, M_trans=" <<toString(M_trans)<<" not supported!" ); } } }
SolveStatus<Scalar> BelosLinearOpWithSolve<Scalar>::solveImpl( const EOpTransp M_trans, const MultiVectorBase<Scalar> &B, const Ptr<MultiVectorBase<Scalar> > &X, const Ptr<const SolveCriteria<Scalar> > solveCriteria ) const { THYRA_FUNC_TIME_MONITOR("Stratimikos: BelosLOWS"); using Teuchos::rcp; using Teuchos::rcpFromRef; using Teuchos::rcpFromPtr; using Teuchos::FancyOStream; using Teuchos::OSTab; using Teuchos::ParameterList; using Teuchos::parameterList; using Teuchos::describe; typedef Teuchos::ScalarTraits<Scalar> ST; typedef typename ST::magnitudeType ScalarMag; Teuchos::Time totalTimer(""), timer(""); totalTimer.start(true); assertSolveSupports(*this, M_trans, solveCriteria); // 2010/08/22: rabartl: Bug 4915 ToDo: Move the above into the NIV function // solve(...). const RCP<FancyOStream> out = this->getOStream(); const Teuchos::EVerbosityLevel verbLevel = this->getVerbLevel(); OSTab tab = this->getOSTab(); if (out.get() && static_cast<int>(verbLevel) > static_cast<int>(Teuchos::VERB_LOW)) { *out << "\nStarting iterations with Belos:\n"; OSTab tab2(out); *out << "Using forward operator = " << describe(*fwdOpSrc_->getOp(),verbLevel); *out << "Using iterative solver = " << describe(*iterativeSolver_,verbLevel); *out << "With #Eqns="<<B.range()->dim()<<", #RHSs="<<B.domain()->dim()<<" ...\n"; } // // Set RHS and LHS // bool ret = lp_->setProblem( rcpFromPtr(X), rcpFromRef(B) ); TEUCHOS_TEST_FOR_EXCEPTION( ret == false, CatastrophicSolveFailure ,"Error, the Belos::LinearProblem could not be set for the current solve!" ); // // Set the solution criteria // // Parameter list for the current solve. const RCP<ParameterList> tmpPL = Teuchos::parameterList(); // The solver's valid parameter list. RCP<const ParameterList> validPL = iterativeSolver_->getValidParameters(); SolveMeasureType solveMeasureType; RCP<GeneralSolveCriteriaBelosStatusTest<Scalar> > generalSolveCriteriaBelosStatusTest; if (nonnull(solveCriteria)) { solveMeasureType = solveCriteria->solveMeasureType; const ScalarMag requestedTol = solveCriteria->requestedTol; if (solveMeasureType.useDefault()) { tmpPL->set("Convergence Tolerance", defaultTol_); } else if (solveMeasureType(SOLVE_MEASURE_NORM_RESIDUAL, SOLVE_MEASURE_NORM_RHS)) { if (requestedTol != SolveCriteria<Scalar>::unspecifiedTolerance()) { tmpPL->set("Convergence Tolerance", requestedTol); } else { tmpPL->set("Convergence Tolerance", defaultTol_); } setResidualScalingType (tmpPL, validPL, "Norm of RHS"); } else if (solveMeasureType(SOLVE_MEASURE_NORM_RESIDUAL, SOLVE_MEASURE_NORM_INIT_RESIDUAL)) { if (requestedTol != SolveCriteria<Scalar>::unspecifiedTolerance()) { tmpPL->set("Convergence Tolerance", requestedTol); } else { tmpPL->set("Convergence Tolerance", defaultTol_); } setResidualScalingType (tmpPL, validPL, "Norm of Initial Residual"); } else { // Set the most generic (and inefficient) solve criteria generalSolveCriteriaBelosStatusTest = createGeneralSolveCriteriaBelosStatusTest( *solveCriteria, convergenceTestFrequency_); // Set the verbosity level (one level down) generalSolveCriteriaBelosStatusTest->setOStream(out); generalSolveCriteriaBelosStatusTest->setVerbLevel(incrVerbLevel(verbLevel, -1)); // Set the default convergence tolerance to always converged to allow // the above status test to control things. tmpPL->set("Convergence Tolerance", 1.0); } // maximum iterations if (nonnull(solveCriteria->extraParameters)) { if (Teuchos::isParameterType<int>(*solveCriteria->extraParameters,"Maximum Iterations")) { tmpPL->set("Maximum Iterations", Teuchos::get<int>(*solveCriteria->extraParameters,"Maximum Iterations")); } } } else { // No solveCriteria was even passed in! tmpPL->set("Convergence Tolerance", defaultTol_); } // // Solve the linear system // Belos::ReturnType belosSolveStatus; { RCP<std::ostream> outUsed = ( static_cast<int>(verbLevel) > static_cast<int>(Teuchos::VERB_LOW) ? out : rcp(new FancyOStream(rcp(new Teuchos::oblackholestream()))) ); Teuchos::OSTab tab1(outUsed,1,"BELOS"); tmpPL->set("Output Stream", outUsed); iterativeSolver_->setParameters(tmpPL); if (nonnull(generalSolveCriteriaBelosStatusTest)) { iterativeSolver_->setUserConvStatusTest(generalSolveCriteriaBelosStatusTest); } belosSolveStatus = iterativeSolver_->solve(); } // // Report the solve status // totalTimer.stop(); SolveStatus<Scalar> solveStatus; switch (belosSolveStatus) { case Belos::Unconverged: { solveStatus.solveStatus = SOLVE_STATUS_UNCONVERGED; // Set achievedTol even if the solver did not converge. This is // helpful for things like nonlinear solvers, which might be // able to use a partially converged result, and which would // like to know the achieved convergence tolerance for use in // computing bounds. It's also helpful for estimating whether a // small increase in the maximum iteration count might be // helpful next time. try { // Some solvers might not have implemented achievedTol(). // The default implementation throws std::runtime_error. solveStatus.achievedTol = iterativeSolver_->achievedTol(); } catch (std::runtime_error&) { // Do nothing; use the default value of achievedTol. } break; } case Belos::Converged: { solveStatus.solveStatus = SOLVE_STATUS_CONVERGED; if (nonnull(generalSolveCriteriaBelosStatusTest)) { // The user set a custom status test. This means that we // should ask the custom status test itself, rather than the // Belos solver, what the final achieved convergence tolerance // was. const ArrayView<const ScalarMag> achievedTol = generalSolveCriteriaBelosStatusTest->achievedTol(); solveStatus.achievedTol = ST::zero(); for (Ordinal i = 0; i < achievedTol.size(); ++i) { solveStatus.achievedTol = std::max(solveStatus.achievedTol, achievedTol[i]); } } else { try { // Some solvers might not have implemented achievedTol(). // The default implementation throws std::runtime_error. solveStatus.achievedTol = iterativeSolver_->achievedTol(); } catch (std::runtime_error&) { // Use the default convergence tolerance. This is a correct // upper bound, since we did actually converge. solveStatus.achievedTol = tmpPL->get("Convergence Tolerance", defaultTol_); } } break; } TEUCHOS_SWITCH_DEFAULT_DEBUG_ASSERT(); } std::ostringstream ossmessage; ossmessage << "The Belos solver of type \""<<iterativeSolver_->description() <<"\" returned a solve status of \""<< toString(solveStatus.solveStatus) << "\"" << " in " << iterativeSolver_->getNumIters() << " iterations" << " with total CPU time of " << totalTimer.totalElapsedTime() << " sec" ; if (out.get() && static_cast<int>(verbLevel) > static_cast<int>(Teuchos::VERB_NONE)) *out << "\n" << ossmessage.str() << "\n"; solveStatus.message = ossmessage.str(); // Dump the getNumIters() and the achieved convergence tolerance // into solveStatus.extraParameters, as the "Belos/Iteration Count" // resp. "Belos/Achieved Tolerance" parameters. if (solveStatus.extraParameters.is_null()) { solveStatus.extraParameters = parameterList (); } solveStatus.extraParameters->set ("Belos/Iteration Count", iterativeSolver_->getNumIters());\ // package independent version of the same solveStatus.extraParameters->set ("Iteration Count", iterativeSolver_->getNumIters());\ // NOTE (mfh 13 Dec 2011) Though the most commonly used Belos // solvers do implement achievedTol(), some Belos solvers currently // do not. In the latter case, if the solver did not converge, the // reported achievedTol() value may just be the default "invalid" // value -1, and if the solver did converge, the reported value will // just be the convergence tolerance (a correct upper bound). solveStatus.extraParameters->set ("Belos/Achieved Tolerance", solveStatus.achievedTol); // This information is in the previous line, which is printed anytime the verbosity // is not set to Teuchos::VERB_NONE, so I'm commenting this out for now. // if (out.get() && static_cast<int>(verbLevel) > static_cast<int>(Teuchos::VERB_NONE)) // *out << "\nTotal solve time in Belos = "<<totalTimer.totalElapsedTime()<<" sec\n"; return solveStatus; }
bool SpmdMultiVectorSerializer<Scalar>::isCompatible( const MultiVectorBase<Scalar> &mv ) const { return 0!=dynamic_cast<const SpmdVectorSpaceBase<Scalar>*>(&*mv.range()); }
void SpmdMultiVectorBase<Scalar>::euclideanApply( const EOpTransp M_trans, const MultiVectorBase<Scalar> &X, const Ptr<MultiVectorBase<Scalar> > &Y, const Scalar alpha, const Scalar beta ) const { typedef Teuchos::ScalarTraits<Scalar> ST; using Teuchos::Workspace; Teuchos::WorkspaceStore* wss = Teuchos::get_default_workspace_store().get(); #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES Teuchos::Time timerTotal("dummy",true); Teuchos::Time timer("dummy"); #endif // // This function performs one of two operations. // // The first operation (M_trans == NOTRANS) is: // // Y = beta * Y + alpha * M * X // // where Y and M have compatible (distributed?) range vector // spaces and X is a locally replicated serial multi-vector. This // operation does not require any global communication. // // The second operation (M_trans == TRANS) is: // // Y = beta * Y + alpha * M' * X // // where M and X have compatible (distributed?) range vector spaces // and Y is a locally replicated serial multi-vector. This operation // requires a local reduction. // // // Get spaces and validate compatibility // // Get the SpmdVectorSpace const SpmdVectorSpaceBase<Scalar> &spmdSpc = *this->spmdSpace(); // Get the Spmd communicator const RCP<const Teuchos::Comm<Ordinal> > comm = spmdSpc.getComm(); #ifdef TEUCHOS_DEBUG const VectorSpaceBase<Scalar> &Y_range = *Y->range(), &X_range = *X.range(); // std::cout << "SpmdMultiVectorBase<Scalar>::apply(...): comm = " << comm << std::endl; TEUCHOS_TEST_FOR_EXCEPTION( ( globalDim_ > localSubDim_ ) && comm.get()==NULL, std::logic_error ,"SpmdMultiVectorBase<Scalar>::apply(...MultiVectorBase<Scalar>...): Error!" ); // ToDo: Write a good general validation function that I can call that will replace // all of these TEUCHOS_TEST_FOR_EXCEPTION(...) uses TEUCHOS_TEST_FOR_EXCEPTION( real_trans(M_trans)==NOTRANS && !spmdSpc.isCompatible(Y_range), Exceptions::IncompatibleVectorSpaces ,"SpmdMultiVectorBase<Scalar>::apply(...MultiVectorBase<Scalar>...): Error!" ); TEUCHOS_TEST_FOR_EXCEPTION( real_trans(M_trans)==TRANS && !spmdSpc.isCompatible(X_range), Exceptions::IncompatibleVectorSpaces ,"SpmdMultiVectorBase<Scalar>::apply(...MultiVectorBase<Scalar>...): Error!" ); #endif // // Get explicit (local) views of Y, M and X // #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES timer.start(); #endif DetachedMultiVectorView<Scalar> Y_local( *Y, real_trans(M_trans)==NOTRANS ? Range1D(localOffset_,localOffset_+localSubDim_-1) : Range1D(), Range1D() ); ConstDetachedMultiVectorView<Scalar> M_local( *this, Range1D(localOffset_,localOffset_+localSubDim_-1), Range1D() ); ConstDetachedMultiVectorView<Scalar> X_local( X ,real_trans(M_trans)==NOTRANS ? Range1D() : Range1D(localOffset_,localOffset_+localSubDim_-1) ,Range1D() ); #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES timer.stop(); std::cout << "\nSpmdMultiVectorBase<Scalar>::apply(...): Time for getting view = " << timer.totalElapsedTime() << " seconds\n"; #endif #ifdef TEUCHOS_DEBUG TEUCHOS_TEST_FOR_EXCEPTION( real_trans(M_trans)==NOTRANS && ( M_local.numSubCols() != X_local.subDim() || X_local.numSubCols() != Y_local.numSubCols() ) , Exceptions::IncompatibleVectorSpaces ,"SpmdMultiVectorBase<Scalar>::apply(...MultiVectorBase<Scalar>...): Error!" ); TEUCHOS_TEST_FOR_EXCEPTION( real_trans(M_trans)==TRANS && ( M_local.subDim() != X_local.subDim() || X_local.numSubCols() != Y_local.numSubCols() ) , Exceptions::IncompatibleVectorSpaces ,"SpmdMultiVectorBase<Scalar>::apply(...MultiVectorBase<Scalar>...): Error!" ); #endif // // If nonlocal (i.e. M_trans==TRANS) then create temporary storage // for: // // Y_local_tmp = alpha * M(local) * X(local) : on nonroot processes // // or // // Y_local_tmp = beta*Y_local + alpha * M(local) * X(local) : on root process (localOffset_==0) // // and set // // localBeta = ( localOffset_ == 0 ? beta : 0.0 ) // // Above, we choose localBeta such that we will only perform // Y_local = beta * Y_local + ... on one process (the root // process where localOffset_==0x). Then, when we add up Y_local // on all of the processors and we will get the correct result. // // If strictly local (i.e. M_trans == NOTRANS) then set: // // Y_local_tmp = Y_local // localBeta = beta // #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES timer.start(); #endif Workspace<Scalar> Y_local_tmp_store(wss, Y_local.subDim()*Y_local.numSubCols(), false); RTOpPack::SubMultiVectorView<Scalar> Y_local_tmp; Scalar localBeta; if( real_trans(M_trans) == TRANS && globalDim_ > localSubDim_ ) { // Nonlocal Y_local_tmp.initialize( 0, Y_local.subDim(), 0, Y_local.numSubCols(), Teuchos::arcpFromArrayView(Y_local_tmp_store()), Y_local.subDim() // leadingDim == subDim (columns are adjacent) ); if( localOffset_ == 0 ) { // Root process: Must copy Y_local into Y_local_tmp for( int j = 0; j < Y_local.numSubCols(); ++j ) { Scalar *Y_local_j = Y_local.values() + Y_local.leadingDim()*j; std::copy( Y_local_j, Y_local_j + Y_local.subDim(), Y_local_tmp.values() + Y_local_tmp.leadingDim()*j ); } localBeta = beta; } else { // Not the root process localBeta = 0.0; } } else { // Local Y_local_tmp = Y_local.smv(); // Shallow copy only! localBeta = beta; } #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES timer.stop(); std::cout << "\nSpmdMultiVectorBase<Scalar>::apply(...): Time for setting up Y_local_tmp and localBeta = " << timer.totalElapsedTime() << " seconds\n"; #endif // // Perform the local multiplication: // // Y(local) = localBeta * Y(local) + alpha * op(M(local)) * X(local) // // or in BLAS lingo: // // C = beta * C + alpha * op(A) * op(B) // #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES timer.start(); #endif Teuchos::ETransp t_transp; if(ST::isComplex) { switch(M_trans) { case NOTRANS: t_transp = Teuchos::NO_TRANS; break; case TRANS: t_transp = Teuchos::TRANS; break; case CONJTRANS: t_transp = Teuchos::CONJ_TRANS; break; default: TEUCHOS_TEST_FOR_EXCEPT(true); } } else { switch(real_trans(M_trans)) { case NOTRANS: t_transp = Teuchos::NO_TRANS; break; case TRANS: t_transp = Teuchos::TRANS; break; default: TEUCHOS_TEST_FOR_EXCEPT(true); } } if (M_local.numSubCols() > 0) { // AGS: Added std::max on ld? below, following what is done in // Epetra_MultiVector Multiply use of GEMM. Allows for 0 length. blas_.GEMM( t_transp // TRANSA ,Teuchos::NO_TRANS // TRANSB ,Y_local.subDim() // M ,Y_local.numSubCols() // N ,real_trans(M_trans)==NOTRANS ? M_local.numSubCols() : M_local.subDim() // K ,alpha // ALPHA ,const_cast<Scalar*>(M_local.values()) // A ,std::max((int) M_local.leadingDim(),1) // LDA ,const_cast<Scalar*>(X_local.values()) // B ,std::max((int) X_local.leadingDim(),1) // LDB ,localBeta // BETA ,Y_local_tmp.values().get() // C ,std::max((int) Y_local_tmp.leadingDim(),1) // LDC ); } else { std::fill( Y_local_tmp.values().begin(), Y_local_tmp.values().end(), ST::zero() ); } #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES timer.stop(); std::cout << "\nSpmdMultiVectorBase<Scalar>::apply(...): Time for GEMM = " << timer.totalElapsedTime() << " seconds\n"; #endif if( comm.get() ) { // // Perform the global reduction of Y_local_tmp back into Y_local // if( real_trans(M_trans)==TRANS && globalDim_ > localSubDim_ ) { // Contiguous buffer for final reduction Workspace<Scalar> Y_local_final_buff(wss,Y_local.subDim()*Y_local.numSubCols(),false); // Perform the reduction Teuchos::reduceAll<Ordinal,Scalar>( *comm,Teuchos::REDUCE_SUM,Y_local_final_buff.size(),Y_local_tmp.values().get(), &Y_local_final_buff[0] ); // Load Y_local_final_buff back into Y_local const Scalar *Y_local_final_buff_ptr = &Y_local_final_buff[0]; for( int j = 0; j < Y_local.numSubCols(); ++j ) { Scalar *Y_local_ptr = Y_local.values() + Y_local.leadingDim()*j; for( int i = 0; i < Y_local.subDim(); ++i ) { (*Y_local_ptr++) = (*Y_local_final_buff_ptr++); } } } } else { // When you get here the view Y_local will be committed back to Y // in the destructor to Y_local } #ifdef THYRA_SPMD_MULTI_VECTOR_BASE_PRINT_TIMES timer.stop(); std::cout << "\nSpmdMultiVectorBase<Scalar>::apply(...): Total time = " << timerTotal.totalElapsedTime() << " seconds\n"; #endif }
SolveStatus<double> AztecOOLinearOpWithSolve::solveImpl( const EOpTransp M_trans, const MultiVectorBase<double> &B, const Ptr<MultiVectorBase<double> > &X, const Ptr<const SolveCriteria<double> > solveCriteria ) const { using Teuchos::rcp; using Teuchos::rcpFromRef; using Teuchos::rcpFromPtr; using Teuchos::OSTab; typedef SolveCriteria<double> SC; typedef SolveStatus<double> SS; THYRA_FUNC_TIME_MONITOR("Stratimikos: AztecOOLOWS"); Teuchos::Time totalTimer(""), timer(""); totalTimer.start(true); RCP<Teuchos::FancyOStream> out = this->getOStream(); Teuchos::EVerbosityLevel verbLevel = this->getVerbLevel(); OSTab tab = this->getOSTab(); if (out.get() && static_cast<int>(verbLevel) > static_cast<int>(Teuchos::VERB_NONE)) *out << "\nSolving block system using AztecOO ...\n\n"; // // Validate input // TEUCHOS_ASSERT(this->solveSupportsImpl(M_trans)); SolveMeasureType solveMeasureType; if (nonnull(solveCriteria)) { solveMeasureType = solveCriteria->solveMeasureType; assertSupportsSolveMeasureType(*this, M_trans, solveMeasureType); } // // Get the transpose argument // const EOpTransp aztecOpTransp = real_trans(M_trans); // // Get the solver, operator, and preconditioner that we will use // RCP<AztecOO> aztecSolver = ( aztecOpTransp == NOTRANS ? aztecFwdSolver_ : aztecAdjSolver_ ); const Epetra_Operator *aztecOp = aztecSolver->GetUserOperator(); // // Get the op(...) range and domain maps // const Epetra_Map &opRangeMap = aztecOp->OperatorRangeMap(), &opDomainMap = aztecOp->OperatorDomainMap(); // // Get the convergence criteria // double tol = ( aztecOpTransp==NOTRANS ? fwdDefaultTol() : adjDefaultTol() ); int maxIterations = ( aztecOpTransp==NOTRANS ? fwdDefaultMaxIterations() : adjDefaultMaxIterations() ); bool isDefaultSolveCriteria = true; if (nonnull(solveCriteria)) { if ( solveCriteria->requestedTol != SC::unspecifiedTolerance() ) { tol = solveCriteria->requestedTol; isDefaultSolveCriteria = false; } if (nonnull(solveCriteria->extraParameters)) { maxIterations = solveCriteria->extraParameters->get("Maximum Iterations",maxIterations); } } // // Get Epetra_MultiVector views of B and X // RCP<const Epetra_MultiVector> epetra_B; RCP<Epetra_MultiVector> epetra_X; const EpetraOperatorWrapper* opWrapper = dynamic_cast<const EpetraOperatorWrapper*>(aztecOp); if (opWrapper == 0) { epetra_B = get_Epetra_MultiVector(opRangeMap, rcpFromRef(B)); epetra_X = get_Epetra_MultiVector(opDomainMap, rcpFromPtr(X)); } // // Use AztecOO to solve each RHS one at a time (which is all that I can do anyway) // int totalIterations = 0; SolveStatus<double> solveStatus; solveStatus.solveStatus = SOLVE_STATUS_CONVERGED; solveStatus.achievedTol = -1.0; /* Get the number of columns in the multivector. We use Thyra * functions rather than Epetra functions to do this, as we * might not yet have created an Epetra multivector. - KL */ //const int m = epetra_B->NumVectors(); const int m = B.domain()->dim(); for( int j = 0; j < m; ++j ) { THYRA_FUNC_TIME_MONITOR_DIFF("Stratimikos: AztecOOLOWS:SingleSolve", SingleSolve); // // Get Epetra_Vector views of B(:,j) and X(:,j) // How this is done will depend on whether we have a true Epetra operator // or we are wrapping a general Thyra operator in an Epetra operator. // // We need to declare epetra_x_j as non-const because when we have a phony // Epetra operator we'll have to copy a thyra vector into it. RCP<Epetra_Vector> epetra_b_j; RCP<Epetra_Vector> epetra_x_j; if (opWrapper == 0) { epetra_b_j = rcpFromRef(*const_cast<Epetra_Vector*>((*epetra_B)(j))); epetra_x_j = rcpFromRef(*(*epetra_X)(j)); } else { if (is_null(epetra_b_j)) { epetra_b_j = rcp(new Epetra_Vector(opRangeMap)); epetra_x_j = rcp(new Epetra_Vector(opDomainMap)); } opWrapper->copyThyraIntoEpetra(*B.col(j), *epetra_b_j); opWrapper->copyThyraIntoEpetra(*X->col(j), *epetra_x_j); } // // Set the RHS and LHS // aztecSolver->SetRHS(&*epetra_b_j); aztecSolver->SetLHS(&*epetra_x_j); // // Solve the linear system // timer.start(true); { SetAztecSolveState setAztecSolveState(aztecSolver,out,verbLevel,solveMeasureType); aztecSolver->Iterate( maxIterations, tol ); // NOTE: We ignore the returned status but get it below } timer.stop(); // // Scale the solution // (Originally, this was at the end of the loop after all columns had been // processed. It's moved here because we need to do it before copying the // solution back into a Thyra vector. - KL // if (aztecSolverScalar_ != 1.0) epetra_x_j->Scale(1.0/aztecSolverScalar_); // // If necessary, convert the solution back to a non-epetra vector // if (opWrapper != 0) { opWrapper->copyEpetraIntoThyra(*epetra_x_j, X->col(j).ptr()); } // // Set the return solve status // const int iterations = aztecSolver->NumIters(); const double achievedTol = aztecSolver->ScaledResidual(); const double *AZ_status = aztecSolver->GetAztecStatus(); std::ostringstream oss; bool converged = false; if (AZ_status[AZ_why]==AZ_normal) { oss << "Aztec returned AZ_normal."; converged = true; } else if (AZ_status[AZ_why]==AZ_param) oss << "Aztec returned AZ_param."; else if (AZ_status[AZ_why]==AZ_breakdown) oss << "Aztec returned AZ_breakdown."; else if (AZ_status[AZ_why]==AZ_loss) oss << "Aztec returned AZ_loss."; else if (AZ_status[AZ_why]==AZ_ill_cond) oss << "Aztec returned AZ_ill_cond."; else if (AZ_status[AZ_why]==AZ_maxits) oss << "Aztec returned AZ_maxits."; else oss << "Aztec returned an unknown status?"; oss << " Iterations = " << iterations << "."; oss << " Achieved Tolerance = " << achievedTol << "."; oss << " Total time = " << timer.totalElapsedTime() << " sec."; if (out.get() && static_cast<int>(verbLevel) > static_cast<int>(Teuchos::VERB_NONE) && outputEveryRhs()) Teuchos::OSTab(out).o() << "j="<<j<<": " << oss.str() << "\n"; solveStatus.achievedTol = TEUCHOS_MAX(solveStatus.achievedTol, achievedTol); // Note, achieveTol may actually be greater than tol due to ill conditioning and roundoff! totalIterations += iterations; solveStatus.message = oss.str(); if ( isDefaultSolveCriteria ) { switch(solveStatus.solveStatus) { case SOLVE_STATUS_UNKNOWN: // Leave overall unknown! break; case SOLVE_STATUS_CONVERGED: solveStatus.solveStatus = ( converged ? SOLVE_STATUS_CONVERGED : SOLVE_STATUS_UNCONVERGED ); break; case SOLVE_STATUS_UNCONVERGED: // Leave overall unconverged! break; default: TEUCHOS_TEST_FOR_EXCEPT(true); // Should never get here! } } } aztecSolver->UnsetLHSRHS(); // // Release the Epetra_MultiVector views of X and B // epetra_X = Teuchos::null; epetra_B = Teuchos::null; // // Update the overall solve criteria // totalTimer.stop(); SolveStatus<double> overallSolveStatus; if (isDefaultSolveCriteria) { overallSolveStatus.solveStatus = SOLVE_STATUS_UNKNOWN; overallSolveStatus.achievedTol = SS::unknownTolerance(); } else { overallSolveStatus.solveStatus = solveStatus.solveStatus; overallSolveStatus.achievedTol = solveStatus.achievedTol; } std::ostringstream oss; oss << "AztecOO solver " << ( overallSolveStatus.solveStatus==SOLVE_STATUS_CONVERGED ? "converged" : "unconverged" ) << " on m = "<<m<<" RHSs using " << totalIterations << " cumulative iterations" << " for an average of " << (totalIterations/m) << " iterations/RHS and" << " total CPU time of "<<totalTimer.totalElapsedTime()<<" sec."; overallSolveStatus.message = oss.str(); // Added these statistics following what was done for Belos if (overallSolveStatus.extraParameters.is_null()) { overallSolveStatus.extraParameters = Teuchos::parameterList (); } overallSolveStatus.extraParameters->set ("AztecOO/Iteration Count", totalIterations); // package independent version of the same overallSolveStatus.extraParameters->set ("Iteration Count", totalIterations); overallSolveStatus.extraParameters->set ("AztecOO/Achieved Tolerance", overallSolveStatus.achievedTol); // // Report the overall time // if (out.get() && static_cast<int>(verbLevel) >= static_cast<int>(Teuchos::VERB_LOW)) *out << "\nTotal solve time = "<<totalTimer.totalElapsedTime()<<" sec\n"; return overallSolveStatus; }
void EpetraLinearOp::applyImpl( const EOpTransp M_trans, const MultiVectorBase<double> &X_in, const Ptr<MultiVectorBase<double> > &Y_inout, const double alpha, const double beta ) const { THYRA_FUNC_TIME_MONITOR("Thyra::EpetraLinearOp::euclideanApply"); const EOpTransp real_M_trans = real_trans(M_trans); #ifdef TEUCHOS_DEBUG TEUCHOS_TEST_FOR_EXCEPT(!isFullyInitialized_); THYRA_ASSERT_LINEAR_OP_MULTIVEC_APPLY_SPACES( "EpetraLinearOp::euclideanApply(...)", *this, M_trans, X_in, Y_inout ); TEUCHOS_TEST_FOR_EXCEPTION( real_M_trans==TRANS && adjointSupport_==EPETRA_OP_ADJOINT_UNSUPPORTED, Exceptions::OpNotSupported, "EpetraLinearOp::apply(...): *this was informed that adjoints " "are not supported when initialized." ); #endif const RCP<const VectorSpaceBase<double> > XY_domain = X_in.domain(); const int numCols = XY_domain->dim(); // // Get Epetra_MultiVector objects for the arguments // // 2007/08/18: rabartl: Note: After profiling, I found that calling the more // general functions get_Epetra_MultiVector(...) was too slow. These // functions must ensure that memory is being remembered efficiently and the // use of extra data with the RCP and other things is slow. // RCP<const Epetra_MultiVector> X; RCP<Epetra_MultiVector> Y; { THYRA_FUNC_TIME_MONITOR_DIFF( "Thyra::EpetraLinearOp::euclideanApply: Convert MultiVectors", MultiVectors); // X X = get_Epetra_MultiVector( real_M_trans==NOTRANS ? getDomainMap() : getRangeMap(), X_in ); // Y if( beta == 0 ) { Y = get_Epetra_MultiVector( real_M_trans==NOTRANS ? getRangeMap() : getDomainMap(), *Y_inout ); } } // // Set the operator mode // /* We need to save the transpose state here, and then reset it after * application. The reason for this is that if we later apply the * operator outside Thyra (in Aztec, for instance), it will remember * the transpose flag set here. */ bool oldState = op_->UseTranspose(); op_->SetUseTranspose( real_trans(trans_trans(opTrans_,M_trans)) == NOTRANS ? false : true ); // // Perform the apply operation // { THYRA_FUNC_TIME_MONITOR_DIFF( "Thyra::EpetraLinearOp::euclideanApply: Apply", Apply); if( beta == 0.0 ) { // Y = M * X if( applyAs_ == EPETRA_OP_APPLY_APPLY ) { THYRA_FUNC_TIME_MONITOR_DIFF( "Thyra::EpetraLinearOp::euclideanApply: Apply(beta==0): Apply", ApplyApply); op_->Apply( *X, *Y ); } else if( applyAs_ == EPETRA_OP_APPLY_APPLY_INVERSE ) { THYRA_FUNC_TIME_MONITOR_DIFF( "Thyra::EpetraLinearOp::euclideanApply: Apply(beta==0): ApplyInverse", ApplyApplyInverse); op_->ApplyInverse( *X, *Y ); } else { #ifdef TEUCHOS_DEBUG TEUCHOS_TEST_FOR_EXCEPT(true); #endif } // Y = alpha * Y if( alpha != 1.0 ) { THYRA_FUNC_TIME_MONITOR_DIFF( "Thyra::EpetraLinearOp::euclideanApply: Apply(beta==0): Scale Y", Scale); Y->Scale(alpha); } } else { // beta != 0.0 // Y_inout = beta * Y_inout if(beta != 0.0) { THYRA_FUNC_TIME_MONITOR_DIFF( "Thyra::EpetraLinearOp::euclideanApply: Apply(beta!=0): Scale Y", Scale); scale( beta, Y_inout ); } else { THYRA_FUNC_TIME_MONITOR_DIFF( "Thyra::EpetraLinearOp::euclideanApply: Apply(beta!=0): Y=0", Apply2); assign( Y_inout, 0.0 ); } // T = M * X Epetra_MultiVector T(op_->OperatorRangeMap(), numCols, false); // NOTE: Above, op_->OperatorRange() will be right for either // non-transpose or transpose because we have already set the // UseTranspose flag correctly. if( applyAs_ == EPETRA_OP_APPLY_APPLY ) { THYRA_FUNC_TIME_MONITOR_DIFF( "Thyra::EpetraLinearOp::euclideanApply: Apply(beta!=0): Apply", Apply2); op_->Apply( *X, T ); } else if( applyAs_ == EPETRA_OP_APPLY_APPLY_INVERSE ) { THYRA_FUNC_TIME_MONITOR_DIFF( "Thyra::EpetraLinearOp::euclideanApply: Apply(beta!=0): ApplyInverse", ApplyInverse); op_->ApplyInverse( *X, T ); } else { #ifdef TEUCHOS_DEBUG TEUCHOS_TEST_FOR_EXCEPT(true); #endif } // Y_inout += alpha * T { THYRA_FUNC_TIME_MONITOR_DIFF( "Thyra::EpetraLinearOp::euclideanApply: Apply(beta!=0): Update Y", Update); update( alpha, *create_MultiVector( Teuchos::rcp(&Teuchos::getConst(T),false), Y_inout->range(), XY_domain ), Y_inout ); } } } // Reset the transpose state op_->SetUseTranspose(oldState); // 2009/04/14: ToDo: This will not reset the transpose flag correctly if an // exception is thrown! }