void TpetraOperatorWrapper::apply(const Tpetra::MultiVector<ST,LO,GO,NT>& X, Tpetra::MultiVector<ST,LO,GO,NT>& Y,Teuchos::ETransp mode,ST alpha, ST beta) const { if (!useTranspose_) { // allocate space for each vector RCP<Thyra::MultiVectorBase<ST> > tX; RCP<Thyra::MultiVectorBase<ST> > tY; tX = Thyra::createMembers(thyraOp_->domain(),X.getNumVectors()); tY = Thyra::createMembers(thyraOp_->range(),X.getNumVectors()); Thyra::assign(tX.ptr(),0.0); Thyra::assign(tY.ptr(),0.0); // copy epetra X into thyra X mapStrategy_->copyTpetraIntoThyra(X, tX.ptr()); mapStrategy_->copyTpetraIntoThyra(Y, tY.ptr()); // if this matrix isn't block square, this probably won't work! // perform matrix vector multiplication thyraOp_->apply(Thyra::NOTRANS,*tX,tY.ptr(),alpha,beta); // copy thyra Y into epetra Y mapStrategy_->copyThyraIntoTpetra(tY, Y); } else { TEUCHOS_ASSERT(false); } }
void OverlappingRowMatrix<MatrixType>::applyTempl(const Tpetra::MultiVector<DomainScalar,LocalOrdinal,GlobalOrdinal,Node> &X, Tpetra::MultiVector<RangeScalar,LocalOrdinal,GlobalOrdinal,Node> &Y, Teuchos::ETransp mode, RangeScalar alpha, RangeScalar beta) const { // Note: This isn't AztecOO compliant. But neither was Ifpack's version. TEUCHOS_TEST_FOR_EXCEPTION(X.getNumVectors() != Y.getNumVectors(), std::runtime_error, "Ifpack2::OverlappingRowMatrix::apply ERROR: X.getNumVectors() != Y.getNumVectors()."); RangeScalar zero = Teuchos::ScalarTraits<RangeScalar>::zero(); Teuchos::ArrayRCP<Teuchos::ArrayRCP<const DomainScalar> > x_ptr = X.get2dView(); Teuchos::ArrayRCP<Teuchos::ArrayRCP<RangeScalar> > y_ptr = Y.get2dViewNonConst(); Y.putScalar(zero); size_t NumVectors = Y.getNumVectors(); for (size_t i = 0 ; i < NumMyRowsA_ ; ++i) { size_t Nnz; // Use this class's getrow to make the below code simpler A_->getLocalRowCopy(i,Indices_(),Values_(),Nnz); if (mode==Teuchos::NO_TRANS){ for (size_t j = 0 ; j < Nnz ; ++j) for (size_t k = 0 ; k < NumVectors ; ++k) y_ptr[k][i] += (RangeScalar)Values_[j] * (RangeScalar)x_ptr[k][Indices_[j]]; } else if (mode==Teuchos::TRANS){ for (size_t j = 0 ; j < Nnz ; ++j) for (size_t k = 0 ; k < NumVectors ; ++k) y_ptr[k][Indices_[j]] += (RangeScalar)Values_[j] * (RangeScalar)x_ptr[k][i]; } else { //mode==Teuchos::CONJ_TRANS for (size_t j = 0 ; j < Nnz ; ++j) for (size_t k = 0 ; k < NumVectors ; ++k) y_ptr[k][Indices_[j]] += Teuchos::ScalarTraits<RangeScalar>::conjugate((RangeScalar)Values_[j]) * (RangeScalar)x_ptr[k][i]; } } for (size_t i = 0 ; i < NumMyRowsB_ ; ++i) { size_t Nnz; // Use this class's getrow to make the below code simpler ExtMatrix_->getLocalRowCopy(i,Indices_(),Values_(),Nnz); if (mode==Teuchos::NO_TRANS){ for (size_t j = 0 ; j < Nnz ; ++j) for (size_t k = 0 ; k < NumVectors ; ++k) y_ptr[k][NumMyRowsA_+i] += (RangeScalar)Values_[j] * (RangeScalar)x_ptr[k][Indices_[j]]; } else if (mode==Teuchos::TRANS){ for (size_t j = 0 ; j < Nnz ; ++j) for (size_t k = 0 ; k < NumVectors ; ++k) y_ptr[k][NumMyRowsA_+Indices_[j]] += (RangeScalar)Values_[j] * (RangeScalar)x_ptr[k][i]; } else { //mode==Teuchos::CONJ_TRANS for (size_t j = 0 ; j < Nnz ; ++j) for (size_t k = 0 ; k < NumVectors ; ++k) y_ptr[k][NumMyRowsA_+Indices_[j]] += Teuchos::ScalarTraits<RangeScalar>::conjugate((RangeScalar)Values_[j]) * (RangeScalar)x_ptr[k][i]; } } }
void Chebyshev<MatrixType>:: applyMat (const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X, Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y, Teuchos::ETransp mode) const { TEUCHOS_TEST_FOR_EXCEPTION(X.getNumVectors() != Y.getNumVectors(), std::runtime_error, "Ifpack2::Chebyshev::applyMat(): X.getNumVectors() != Y.getNumVectors()."); impl_.getMatrix ()->apply (X, Y, mode); }
void TomBlockRelaxation<MatrixType,ContainerType>::apply( const Tpetra::MultiVector<typename MatrixType::scalar_type, typename MatrixType::local_ordinal_type, typename MatrixType::global_ordinal_type, typename MatrixType::node_type>& X, Tpetra::MultiVector<typename MatrixType::scalar_type, typename MatrixType::local_ordinal_type, typename MatrixType::global_ordinal_type, typename MatrixType::node_type>& Y, Teuchos::ETransp mode, Scalar alpha, Scalar beta) const { TEUCHOS_TEST_FOR_EXCEPTION(isComputed() == false, std::runtime_error, "Ifpack2::TomBlockRelaxation::apply ERROR: isComputed() must be true prior to calling apply."); TEUCHOS_TEST_FOR_EXCEPTION(X.getNumVectors() != Y.getNumVectors(), std::runtime_error, "Ifpack2::TomBlockRelaxation::apply ERROR: X.getNumVectors() != Y.getNumVectors()."); TEUCHOS_TEST_FOR_EXCEPTION(mode != Teuchos::NO_TRANS, std::runtime_error, "Ifpack2::TomBlockRelaxation::apply ERORR: transpose modes not supported."); Time_->start(true); // If X and Y are pointing to the same memory location, // we need to create an auxiliary vector, Xcopy Teuchos::RCP< const Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> > Xcopy; if (X.getLocalMV().getValues() == Y.getLocalMV().getValues()) Xcopy = Teuchos::rcp( new Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>(X) ); else Xcopy = Teuchos::rcp( &X, false ); if (ZeroStartingSolution_) Y.putScalar(0.0); // Flops are updated in each of the following. switch (PrecType_) { case Ifpack2::JACOBI: ApplyInverseJacobi(*Xcopy,Y); break; case Ifpack2::GS: ApplyInverseGS(*Xcopy,Y); break; case Ifpack2::SGS: ApplyInverseSGS(*Xcopy,Y); break; default: throw std::runtime_error("Ifpack2::TomBlockRelaxation::apply internal logic error."); } ++NumApply_; Time_->stop(); ApplyTime_ += Time_->totalElapsedTime(); }
void ReorderFilter<MatrixType>::permuteOriginalToReorderedTempl(const Tpetra::MultiVector<DomainScalar,local_ordinal_type,global_ordinal_type,node_type> &originalX, Tpetra::MultiVector<RangeScalar,local_ordinal_type,global_ordinal_type,node_type> &reorderedY) const { TEUCHOS_TEST_FOR_EXCEPTION(originalX.getNumVectors() != reorderedY.getNumVectors(), std::runtime_error, "Ifpack2::ReorderFilter::permuteOriginalToReordered ERROR: X.getNumVectors() != Y.getNumVectors()."); Teuchos::ArrayRCP<Teuchos::ArrayRCP<const DomainScalar> > x_ptr = originalX.get2dView(); Teuchos::ArrayRCP<Teuchos::ArrayRCP<RangeScalar> > y_ptr = reorderedY.get2dViewNonConst(); for(size_t k=0; k < originalX.getNumVectors(); k++) for(local_ordinal_type i=0; (size_t)i< originalX.getLocalLength(); i++) y_ptr[k][perm_[i]] = (RangeScalar)x_ptr[k][i]; }
void ReorderFilter<MatrixType>::permuteReorderedToOriginalTempl(const Tpetra::MultiVector<DomainScalar,LocalOrdinal,GlobalOrdinal,Node> &reorderedX, Tpetra::MultiVector<RangeScalar,LocalOrdinal,GlobalOrdinal,Node> &originalY) const { TEUCHOS_TEST_FOR_EXCEPTION(reorderedX.getNumVectors() != originalY.getNumVectors(), std::runtime_error, "Ifpack2::ReorderFilter::permuteReorderedToOriginal ERROR: X.getNumVectors() != Y.getNumVectors()."); Teuchos::ArrayRCP<Teuchos::ArrayRCP<const DomainScalar> > x_ptr = reorderedX.get2dView(); Teuchos::ArrayRCP<Teuchos::ArrayRCP<RangeScalar> > y_ptr = originalY.get2dViewNonConst(); for(size_t k=0; k < reorderedX.getNumVectors(); k++) for(LocalOrdinal i=0; (size_t)i< reorderedX.getLocalLength(); i++) y_ptr[k][reverseperm_[i]] = (RangeScalar)x_ptr[k][i]; }
void Hiptmair<MatrixType>:: applyHiptmairSmoother(const Tpetra::MultiVector<typename MatrixType::scalar_type, typename MatrixType::local_ordinal_type, typename MatrixType::global_ordinal_type, typename MatrixType::node_type>& X, Tpetra::MultiVector<typename MatrixType::scalar_type, typename MatrixType::local_ordinal_type, typename MatrixType::global_ordinal_type, typename MatrixType::node_type>& Y) const { using Teuchos::RCP; using Teuchos::rcp; using Teuchos::rcpFromRef; typedef Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type> MV; const scalar_type ZERO = STS::zero (); const scalar_type ONE = STS::one (); RCP<MV> res1 = rcp (new MV (A_->getRowMap (), X.getNumVectors ())); RCP<MV> vec1 = rcp (new MV (A_->getRowMap (), X.getNumVectors ())); RCP<MV> res2 = rcp (new MV (PtAP_->getRowMap (), X.getNumVectors ())); RCP<MV> vec2 = rcp (new MV (PtAP_->getRowMap (), X.getNumVectors ())); if (preOrPost_ == "pre" || preOrPost_ == "both") { // apply initial relaxation to primary space A_->apply (Y, *res1); res1->update (ONE, X, -ONE); vec1->putScalar (ZERO); ifpack2_prec1_->apply (*res1, *vec1); Y.update (ONE, *vec1, ONE); } // project to auxiliary space and smooth A_->apply (Y, *res1); res1->update (ONE, X, -ONE); P_->apply (*res1, *res2, Teuchos::TRANS); vec2->putScalar (ZERO); ifpack2_prec2_->apply (*res2, *vec2); P_->apply (*vec2, *vec1, Teuchos::NO_TRANS); Y.update (ONE,*vec1,ONE); if (preOrPost_ == "post" || preOrPost_ == "both") { // smooth again on primary space A_->apply (Y, *res1); res1->update (ONE, X, -ONE); vec1->putScalar (ZERO); ifpack2_prec1_->apply (*res1, *vec1); Y.update (ONE, *vec1, ONE); } }
void BorderedOperator<Scalar, LocalOrdinal, GlobalOrdinal, Node >::apply( const Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node >& X, Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node >& Y, Teuchos::ETransp mode, Scalar coefAx, Scalar coefY ) const { //bool opHasTrans = A_->hasTransposeApply(); //TEUCHOS_TEST_FOR_EXCEPTION( mode && !opHasTrans, std::runtime_error, //"Ifpack2::BorderedOperator::apply() ERROR: The operator does not implement transpose."); TEUCHOS_TEST_FOR_EXCEPTION(X.getNumVectors() != Y.getNumVectors(), std::runtime_error, "Ifpack2::BorderedOperator::apply() ERROR: X.getNumVectors() != Y.getNumVectors()."); A_->apply(X, Y, mode, coefAx, coefY ); }
void ReorderFilter<MatrixType>:: apply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> &X, Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> &Y, Teuchos::ETransp mode, scalar_type alpha, scalar_type beta) const { typedef Teuchos::ScalarTraits<scalar_type> STS; // Note: This isn't AztecOO compliant. But neither was Ifpack's version. // Note: The localized maps mean the matvec is trivial (and has no import) TEUCHOS_TEST_FOR_EXCEPTION( X.getNumVectors() != Y.getNumVectors(), std::runtime_error, "Ifpack2::ReorderFilter::apply: X.getNumVectors() != Y.getNumVectors()."); const scalar_type zero = STS::zero (); Teuchos::ArrayRCP<Teuchos::ArrayRCP<const scalar_type> > x_ptr = X.get2dView(); Teuchos::ArrayRCP<Teuchos::ArrayRCP<scalar_type> > y_ptr = Y.get2dViewNonConst(); Y.putScalar (zero); const size_t NumVectors = Y.getNumVectors (); for (size_t i = 0; i < A_->getNodeNumRows (); ++i) { size_t Nnz; // Use this class's getrow to make the below code simpler getLocalRowCopy (i, Indices_ (), Values_ (), Nnz); if (mode == Teuchos::NO_TRANS) { for (size_t j = 0; j < Nnz; ++j) { for (size_t k = 0; k < NumVectors; ++k) { y_ptr[k][i] += Values_[j] * x_ptr[k][Indices_[j]]; } } } else if (mode == Teuchos::TRANS) { for (size_t j = 0; j < Nnz; ++j) { for (size_t k = 0; k < NumVectors; ++k) { y_ptr[k][Indices_[j]] += Values_[j] * x_ptr[k][i]; } } } else { //mode==Teuchos::CONJ_TRANS for (size_t j = 0; j < Nnz; ++j) { for (size_t k = 0; k < NumVectors; ++k) { y_ptr[k][Indices_[j]] += STS::conjugate(Values_[j]) * x_ptr[k][i]; } } } } }
void TomBlockRelaxation<MatrixType,ContainerType>::DoJacobi(const Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& X, Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Y) const { size_t NumVectors = X.getNumVectors(); Scalar one=Teuchos::ScalarTraits<Scalar>::one(); // Note: Flop counts copied naively from Ifpack. if (OverlapLevel_ == 0) { // Non-overlapping Jacobi for (LocalOrdinal i = 0 ; i < NumLocalBlocks_ ; i++) { // may happen that a partition is empty if (Containers_[i]->getNumRows() == 0) continue; Containers_[i]->apply(X,Y,Teuchos::NO_TRANS,DampingFactor_,one); ApplyFlops_ += NumVectors * 2 * NumGlobalRows_; } } else { // Overlapping Jacobi for (LocalOrdinal i = 0 ; i < NumLocalBlocks_ ; i++) { // may happen that a partition is empty if (Containers_[i]->getNumRows() == 0) continue; Containers_[i]->weightedApply(X,Y,*W_,Teuchos::NO_TRANS,DampingFactor_,one); // NOTE: do not count (for simplicity) the flops due to overlapping rows ApplyFlops_ += NumVectors * 4 * NumGlobalRows_; } } }
void SingletonFilter<MatrixType>::CreateReducedRHSTempl(const Tpetra::MultiVector<DomainScalar,LocalOrdinal,GlobalOrdinal,Node>& LHS, const Tpetra::MultiVector<RangeScalar,LocalOrdinal,GlobalOrdinal,Node>& RHS, Tpetra::MultiVector<RangeScalar,LocalOrdinal,GlobalOrdinal,Node>& ReducedRHS) { Teuchos::ArrayRCP<Teuchos::ArrayRCP<const RangeScalar > > RHS_ptr = RHS.get2dView(); Teuchos::ArrayRCP<Teuchos::ArrayRCP<const DomainScalar> > LHS_ptr = LHS.get2dView(); Teuchos::ArrayRCP<Teuchos::ArrayRCP<RangeScalar> > ReducedRHS_ptr = ReducedRHS.get2dViewNonConst(); size_t NumVectors = LHS.getNumVectors(); for (size_t i = 0 ; i < NumRows_ ; ++i) for (size_t k = 0 ; k < NumVectors ; ++k) ReducedRHS_ptr[k][i] = RHS_ptr[k][InvReorder_[i]]; for (size_t i = 0 ; i < NumRows_ ; ++i) { LocalOrdinal ii = InvReorder_[i]; size_t Nnz; A_->getLocalRowCopy(ii,Indices_(),Values_(),Nnz); for (size_t j = 0 ; j < Nnz ; ++j) { if (Reorder_[Indices_[j]] == -1) { for (size_t k = 0 ; k < NumVectors ; ++k) ReducedRHS_ptr[k][i] -= (RangeScalar)Values_[j] * (RangeScalar)LHS_ptr[k][Indices_[j]]; } } } }
void TomBlockRelaxation<MatrixType,ContainerType>::applyMat( const Tpetra::MultiVector<typename MatrixType::scalar_type, typename MatrixType::local_ordinal_type, typename MatrixType::global_ordinal_type, typename MatrixType::node_type>& X, Tpetra::MultiVector<typename MatrixType::scalar_type, typename MatrixType::local_ordinal_type, typename MatrixType::global_ordinal_type, typename MatrixType::node_type>& Y, Teuchos::ETransp mode) const { TEUCHOS_TEST_FOR_EXCEPTION(isComputed() == false, std::runtime_error, "Ifpack2::TomBlockRelaxation::applyMat() ERROR: isComputed() must be true prior to calling applyMat()."); TEUCHOS_TEST_FOR_EXCEPTION(X.getNumVectors() != Y.getNumVectors(), std::runtime_error, "Ifpack2::TomBlockRelaxation::applyMat() ERROR: X.getNumVectors() != Y.getNumVectors()."); A_->apply(X, Y, mode); }
void Chebyshev<MatrixType>:: applyMat (const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X, Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y, Teuchos::ETransp mode) const { TEUCHOS_TEST_FOR_EXCEPTION( X.getNumVectors () != Y.getNumVectors (), std::invalid_argument, "Ifpack2::Chebyshev::applyMat: X.getNumVectors() != Y.getNumVectors()."); Teuchos::RCP<const row_matrix_type> A = impl_.getMatrix (); TEUCHOS_TEST_FOR_EXCEPTION( A.is_null (), std::runtime_error, "Ifpack2::Chebyshev::applyMat: The input " "matrix A is null. Please call setMatrix() with a nonnull input matrix " "before calling this method."); A->apply (X, Y, mode); }
void Chebyshev<MatrixType>:: apply (const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X, Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y, Teuchos::ETransp mode, scalar_type alpha, scalar_type beta) const { { Teuchos::TimeMonitor timeMon (*Time_); // compute() calls initialize() if it hasn't already been called. // Thus, we only need to check isComputed(). TEUCHOS_TEST_FOR_EXCEPTION(! isComputed(), std::runtime_error, "Ifpack2::Chebyshev::apply(): You must call the compute() method before " "you may call apply()."); TEUCHOS_TEST_FOR_EXCEPTION( X.getNumVectors() != Y.getNumVectors(), std::runtime_error, "Ifpack2::Chebyshev::apply(): X and Y must have the same number of " "columns. X.getNumVectors() = " << X.getNumVectors() << " != " << "Y.getNumVectors() = " << Y.getNumVectors() << "."); #ifdef HAVE_TEUCHOS_DEBUG { // The relation 'isSameAs' is transitive. It's also a collective, // so we don't have to do a "shared" test for exception (i.e., a // global reduction on the test value). TEUCHOS_TEST_FOR_EXCEPTION( ! X.getMap ()->isSameAs (*getDomainMap ()), std::runtime_error, "Ifpack2::Chebyshev: The domain Map of the matrix must be the same as " "the Map of the input vector(s) X."); TEUCHOS_TEST_FOR_EXCEPTION( ! Y.getMap ()->isSameAs (*getRangeMap ()), std::runtime_error, "Ifpack2::Chebyshev: The range Map of the matrix must be the same as " "the Map of the output vector(s) Y."); } #endif // HAVE_TEUCHOS_DEBUG applyImpl (X, Y, mode, alpha, beta); } ++NumApply_; ApplyTime_ += Time_->totalElapsedTime (); }
void SingletonFilter<MatrixType>::UpdateLHSTempl(const Tpetra::MultiVector<DomainScalar,LocalOrdinal,GlobalOrdinal,Node>& ReducedLHS, Tpetra::MultiVector<RangeScalar,LocalOrdinal,GlobalOrdinal,Node>& LHS) { Teuchos::ArrayRCP<Teuchos::ArrayRCP<RangeScalar> > LHS_ptr = LHS.get2dViewNonConst(); Teuchos::ArrayRCP<Teuchos::ArrayRCP<const DomainScalar> > ReducedLHS_ptr = ReducedLHS.get2dView(); for (size_t i = 0 ; i < NumRows_ ; ++i) for (size_t k = 0 ; k < LHS.getNumVectors() ; ++k) LHS_ptr[k][InvReorder_[i]] = (RangeScalar)ReducedLHS_ptr[k][i]; }
void SingletonFilter<MatrixType>::apply(const Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &X, Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Y, Teuchos::ETransp mode, Scalar alpha, Scalar beta) const { // Note: This isn't AztecOO compliant. But neither was Ifpack's version. TEUCHOS_TEST_FOR_EXCEPTION(X.getNumVectors() != Y.getNumVectors(), std::runtime_error, "Ifpack2::SingletonFilter::apply ERROR: X.getNumVectors() != Y.getNumVectors()."); Scalar zero = Teuchos::ScalarTraits<Scalar>::zero(); Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar> > x_ptr = X.get2dView(); Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> > y_ptr = Y.get2dViewNonConst(); Y.putScalar(zero); size_t NumVectors = Y.getNumVectors(); for (size_t i = 0 ; i < NumRows_ ; ++i) { size_t Nnz; // Use this class's getrow to make the below code simpler getLocalRowCopy(i,Indices_(),Values_(),Nnz); if (mode==Teuchos::NO_TRANS){ for (size_t j = 0 ; j < Nnz ; ++j) for (size_t k = 0 ; k < NumVectors ; ++k) y_ptr[k][i] += Values_[j] * x_ptr[k][Indices_[j]]; } else if (mode==Teuchos::TRANS){ for (size_t j = 0 ; j < Nnz ; ++j) for (size_t k = 0 ; k < NumVectors ; ++k) y_ptr[k][Indices_[j]] += Values_[j] * x_ptr[k][i]; } else { //mode==Teuchos::CONJ_TRANS for (size_t j = 0 ; j < Nnz ; ++j) for (size_t k = 0 ; k < NumVectors ; ++k) y_ptr[k][Indices_[j]] += Teuchos::ScalarTraits<Scalar>::conjugate(Values_[j]) * x_ptr[k][i]; } } }
void RILUK<MatrixType>::generateXY(Teuchos::ETransp mode, const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Xin, const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Yin, Teuchos::RCP<const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> >& Xout, Teuchos::RCP<Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> >& Yout) const { // Generate an X and Y suitable for performing Solve() and Multiply() methods TEUCHOS_TEST_FOR_EXCEPTION(Xin.getNumVectors()!=Yin.getNumVectors(), std::runtime_error, "Ifpack2::RILUK::GenerateXY ERROR: X and Y not the same size"); //cout << "Xin = " << Xin << endl; Xout = Teuchos::rcp( (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> *) &Xin, false ); Yout = Teuchos::rcp( (Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> *) &Yin, false ); if (!isOverlapped_) return; // Nothing more to do if (isOverlapped_) { // Make sure the number of vectors in the multivector is the same as before. if (OverlapX_!=Teuchos::null) { if (OverlapX_->getNumVectors()!=Xin.getNumVectors()) { OverlapX_ = Teuchos::null; OverlapY_ = Teuchos::null; } } if (OverlapX_==Teuchos::null) { // Need to allocate space for overlap X and Y OverlapX_ = Teuchos::rcp( new Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>(U_->getColMap(), Xout->getNumVectors()) ); OverlapY_ = Teuchos::rcp( new Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>(L_->getRowMap(), Yout->getNumVectors()) ); } if (mode == Teuchos::NO_TRANS) { OverlapX_->doImport(*Xout,*U_->getGraph()->getImporter(), Tpetra::INSERT); // Import X values for solve } else { OverlapX_->doImport(*Xout,*L_->getGraph()->getExporter(), Tpetra::INSERT); // Import X values for solve } Xout = OverlapX_; Yout = OverlapY_; // Set pointers for Xout and Yout to point to overlap space //cout << "OverlapX_ = " << *OverlapX_ << endl; } }
void Chebyshev<MatrixType>:: apply (const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X, Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y, Teuchos::ETransp mode, scalar_type alpha, scalar_type beta) const { const std::string timerName ("Ifpack2::Chebyshev::apply"); Teuchos::RCP<Teuchos::Time> timer = Teuchos::TimeMonitor::lookupCounter (timerName); if (timer.is_null ()) { timer = Teuchos::TimeMonitor::getNewCounter (timerName); } // Start timing here. { Teuchos::TimeMonitor timeMon (*timer); // compute() calls initialize() if it hasn't already been called. // Thus, we only need to check isComputed(). TEUCHOS_TEST_FOR_EXCEPTION( ! isComputed (), std::runtime_error, "Ifpack2::Chebyshev::apply(): You must call the compute() method before " "you may call apply()."); TEUCHOS_TEST_FOR_EXCEPTION( X.getNumVectors () != Y.getNumVectors (), std::runtime_error, "Ifpack2::Chebyshev::apply(): X and Y must have the same number of " "columns. X.getNumVectors() = " << X.getNumVectors() << " != " << "Y.getNumVectors() = " << Y.getNumVectors() << "."); applyImpl (X, Y, mode, alpha, beta); } ++NumApply_; // timer->totalElapsedTime() returns the total time over all timer // calls. Thus, we use = instead of +=. ApplyTime_ = timer->totalElapsedTime (); }
void SingletonFilter<MatrixType>::SolveSingletonsTempl(const Tpetra::MultiVector<DomainScalar,LocalOrdinal,GlobalOrdinal,Node>& RHS, Tpetra::MultiVector<RangeScalar,LocalOrdinal,GlobalOrdinal,Node>& LHS) { Teuchos::ArrayRCP<Teuchos::ArrayRCP<const DomainScalar> > RHS_ptr = RHS.get2dView(); Teuchos::ArrayRCP<Teuchos::ArrayRCP<RangeScalar> > LHS_ptr = LHS.get2dViewNonConst(); for (size_t i = 0 ; i < NumSingletons_ ; ++i) { LocalOrdinal ii = SingletonIndex_[i]; // get the diagonal value for the singleton size_t Nnz; A_->getLocalRowCopy(ii,Indices_(),Values_(),Nnz); for (size_t j = 0 ; j < Nnz ; ++j) { if (Indices_[j] == ii) { for (size_t k = 0 ; k < LHS.getNumVectors() ; ++k) LHS_ptr[k][ii] = (RangeScalar)RHS_ptr[k][ii] / (RangeScalar)Values_[j]; } } } }
void apply( const Tpetra::MultiVector<double,int,int> & X, Tpetra::MultiVector<double,int,int> & Y, Teuchos::ETransp mode = Teuchos::NO_TRANS, double alpha = Teuchos::ScalarTraits<double>::one(), double beta = Teuchos::ScalarTraits<double>::zero() ) const { for (size_t k = 0; k < Y.getNumVectors(); k++) { const auto x_data = X.getData(k); const auto x0_data = x0_.getData(); auto y_data = Y.getDataNonConst(k); for (size_t i = 0; i < y_data.size(); i++) { y_data[i] = 2 * x0_data[i] * x_data[i]; } } return; }
void IdentitySolver<MatrixType>:: apply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X, Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y, Teuchos::ETransp /*mode*/, scalar_type alpha, scalar_type beta) const { using Teuchos::RCP; typedef Teuchos::ScalarTraits<scalar_type> STS; typedef Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type> MV; TEUCHOS_TEST_FOR_EXCEPTION( ! isComputed (), std::runtime_error, "Ifpack2::IdentitySolver::apply: If compute() has not yet been called, " "or if you have changed the matrix via setMatrix(), " "you must call compute() before you may call this method."); // "Identity solver" does what it says: it's the identity operator. // We have to Export if the domain and range Maps are not the same. // Otherwise, this operator would be a permutation, not the identity. if (export_.is_null ()) { Y.update (alpha, X, beta); } else { if (alpha == STS::one () && beta == STS::zero ()) { // the common case Y.doExport (X, *export_, Tpetra::REPLACE); } else { // We know that the domain and range Maps are compatible. First // bring X into the range Map via Export. Then compute in place // in Y. MV X_tmp (Y.getMap (), Y.getNumVectors ()); X_tmp.doExport (X, *export_, Tpetra::REPLACE); Y.update (alpha, X_tmp, beta); } } ++numApply_; }
void TomBlockRelaxation<MatrixType,ContainerType>::ApplyInverseJacobi( const Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& X, Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Y) const { size_t NumVectors = X.getNumVectors(); Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> AY( Y.getMap(),NumVectors ); // Initial matvec not needed int starting_iteration=0; if(ZeroStartingSolution_) { DoJacobi(X,Y); starting_iteration=1; } for (int j = starting_iteration; j < NumSweeps_ ; j++) { applyMat(Y,AY); AY.update(1.0,X,-1.0); DoJacobi(AY,Y); // Flops for matrix apply & update ApplyFlops_ += NumVectors * (2 * NumGlobalNonzeros_ + 2 * NumGlobalRows_); } }
void PrecPrecomputed::applyTempl( const Tpetra::MultiVector<DomainScalar, Ordinal, Ordinal, Node>& X, Tpetra::MultiVector<RangeScalar, Ordinal, Ordinal, Node>& Z, Teuchos::ETransp mode, RangeScalar alpha, RangeScalar beta) const { using Teuchos::RCP; using Teuchos::rcp; typedef Tpetra::MultiVector<DomainScalar, Ordinal, Ordinal, Node> MV; Teuchos::Time timer ("ILUT::apply"); { // Timer scope for timing apply() Teuchos::TimeMonitor timeMon (timer, true); TEUCHOS_TEST_FOR_EXCEPTION( ! isComputed (), std::runtime_error, "Ifpack2::ILUT::apply: You must call compute() to compute the incomplete " "factorization, before calling apply()."); TEUCHOS_TEST_FOR_EXCEPTION( X.getNumVectors() != Z.getNumVectors(), std::runtime_error, "Ifpack2::ILUT::apply: X and Y must have the same number of columns. " "X has " << X.getNumVectors () << " columns, but Y has " << Z.getNumVectors () << " columns."); TEUCHOS_TEST_FOR_EXCEPTION( beta != STS::zero (), std::logic_error, "Ifpack2::ILUT::apply: This method does not currently work when beta != 0."); // If X and Y are pointing to the same memory location, // we need to create an auxiliary vector, Xcopy RCP<const MV> Xcopy; if (X.getLocalMV ().getValues () == Z.getLocalMV ().getValues ()) { Xcopy = rcp (new MV (X)); } else { Xcopy = rcpFromRef (X); //ZACH always taken } //if (mode == Teuchos::NO_TRANS) { // Solve L U Y = X /* L_->template localSolve<RangeScalar,DomainScalar> (*Xcopy, Y, Teuchos::NO_TRANS); U_->template localSolve<RangeScalar,RangeScalar> (Y, Y, Teuchos::NO_TRANS); */ //dump(X, "##X"); //dump(*Xcopy, "##Xcopy"); //ZACH the problem is here: This actually needs to be a global triangular solve L_->template localSolve<RangeScalar,DomainScalar> (*Xcopy, *Y_, Teuchos::NO_TRANS); //dump(*Y_, "##Y"); U_->template localSolve<RangeScalar,RangeScalar> (*Y_, Z, Teuchos::NO_TRANS); //U_->template localSolve<RangeScalar,RangeScalar> (*Xcopy, Z, Teuchos::NO_TRANS); //dump(Z, "##Z"); //U_->template localSolve<RangeScalar,DomainScalar> (*Xcopy, Y, Teuchos::NO_TRANS); //U_->template localSolve<RangeScalar,RangeScalar> (Y, Y, Teuchos::TRANS); //} //else { // Solve U^* L^* Y = X /* U_->template localSolve<RangeScalar,DomainScalar> (*Xcopy, Y, mode); L_->template localSolve<RangeScalar,RangeScalar> (Y, Y, mode); */ //} if (alpha != STS::one ()) { Z.scale (alpha); } } ++NumApply_; ApplyTime_ += timer.totalElapsedTime (); }
void Hiptmair<MatrixType>:: apply (const Tpetra::MultiVector<typename MatrixType::scalar_type, typename MatrixType::local_ordinal_type, typename MatrixType::global_ordinal_type, typename MatrixType::node_type>& X, Tpetra::MultiVector<typename MatrixType::scalar_type, typename MatrixType::local_ordinal_type, typename MatrixType::global_ordinal_type, typename MatrixType::node_type>& Y, Teuchos::ETransp mode, typename MatrixType::scalar_type alpha, typename MatrixType::scalar_type beta) const { using Teuchos::RCP; using Teuchos::rcp; using Teuchos::rcpFromRef; typedef Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type> MV; TEUCHOS_TEST_FOR_EXCEPTION( ! isComputed (), std::runtime_error, "Ifpack2::Hiptmair::apply: You must call compute() before you may call apply()."); TEUCHOS_TEST_FOR_EXCEPTION( X.getNumVectors () != Y.getNumVectors (), std::invalid_argument, "Ifpack2::Hiptmair::apply: The MultiVector inputs X and Y do not have the " "same number of columns. X.getNumVectors() = " << X.getNumVectors () << " != Y.getNumVectors() = " << Y.getNumVectors () << "."); // Catch unimplemented cases: alpha != 1, beta != 0, mode != NO_TRANS. TEUCHOS_TEST_FOR_EXCEPTION( alpha != STS::one (), std::logic_error, "Ifpack2::Hiptmair::apply: alpha != 1 has not been implemented."); TEUCHOS_TEST_FOR_EXCEPTION( beta != STS::zero (), std::logic_error, "Ifpack2::Hiptmair::apply: zero != 0 has not been implemented."); TEUCHOS_TEST_FOR_EXCEPTION( mode != Teuchos::NO_TRANS, std::logic_error, "Ifpack2::Hiptmair::apply: mode != Teuchos::NO_TRANS has not been implemented."); Teuchos::Time timer ("apply"); { // The body of code to time Teuchos::TimeMonitor timeMon (timer); // If X and Y are pointing to the same memory location, // we need to create an auxiliary vector, Xcopy RCP<const MV> Xcopy; { auto X_lcl_host = X.template getLocalView<Kokkos::HostSpace> (); auto Y_lcl_host = Y.template getLocalView<Kokkos::HostSpace> (); if (X_lcl_host.ptr_on_device () == Y_lcl_host.ptr_on_device ()) { Xcopy = rcp (new MV (X, Teuchos::Copy)); } else { Xcopy = rcpFromRef (X); } } RCP<MV> Ycopy = rcpFromRef (Y); if (ZeroStartingSolution_) { Ycopy->putScalar (STS::zero ()); } // apply Hiptmair Smoothing applyHiptmairSmoother (*Xcopy, *Ycopy); } ++NumApply_; ApplyTime_ += timer.totalElapsedTime (); }
size_t findUniqueGids( Tpetra::MultiVector<gno_t, lno_t, gno_t> &keys, Tpetra::Vector<gno_t, lno_t, gno_t> &gids ) { // Input: Tpetra MultiVector of keys; key length = numVectors() // May contain duplicate keys within a processor. // May contain duplicate keys across processors. // Input: Empty Tpetra Vector with same map for holding the results // Output: Filled gids vector, containing unique global numbers for // each unique key. Global numbers are in range [0,#UniqueKeys). size_t num_keys = keys.getLocalLength(); size_t num_entries = keys.getNumVectors(); #ifdef HAVE_ZOLTAN2_MPI MPI_Comm mpicomm = Teuchos::getRawMpiComm(*(keys.getMap()->getComm())); #else // Zoltan's siMPI will be used here { int flag; MPI_Initialized(&flag); if (!flag) { int narg = 0; char **argv = NULL; MPI_Init(&narg, &argv); } } MPI_Comm mpicomm = MPI_COMM_WORLD; // Will get MPI_COMM_WORLD from siMPI #endif int num_gid = sizeof(gno_t)/sizeof(ZOLTAN_ID_TYPE) * num_entries; int num_user = sizeof(gno_t); // Buffer the keys for Zoltan_DD Teuchos::ArrayRCP<const gno_t> *tmpKeyVecs = new Teuchos::ArrayRCP<const gno_t>[num_entries]; for (size_t v = 0; v < num_entries; v++) tmpKeyVecs[v] = keys.getData(v); ZOLTAN_ID_PTR ddkeys = new ZOLTAN_ID_TYPE[num_gid * num_keys]; size_t idx = 0; for (size_t i = 0; i < num_keys; i++) { for (size_t v = 0; v < num_entries; v++) { ZOLTAN_ID_PTR ddkey = &(ddkeys[idx]); TPL_Traits<ZOLTAN_ID_PTR,gno_t>::ASSIGN(ddkey, tmpKeyVecs[v][i]); idx += TPL_Traits<ZOLTAN_ID_PTR,gno_t>::NUM_ID; } } delete [] tmpKeyVecs; // Allocate memory for the result char *ddnewgids = new char[num_user * num_keys]; // Compute the new GIDs size_t nUnique = findUniqueGidsCommon<gno_t>(num_keys, num_gid, ddkeys, ddnewgids, mpicomm); // Copy the result into the output vector gno_t *result = (gno_t *)ddnewgids; for (size_t i = 0; i < num_keys; i++) gids.replaceLocalValue(i, result[i]); // Clean up delete [] ddkeys; delete [] ddnewgids; return nUnique; }
void DenseContainer<MatrixType,LocalScalarType>:: weightedApply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X, Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y, const Tpetra::Vector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& D, Teuchos::ETransp mode, scalar_type alpha, scalar_type beta) const { using Teuchos::ArrayRCP; using Teuchos::ArrayView; using Teuchos::Range1D; using Teuchos::RCP; using Teuchos::rcp; using Teuchos::rcp_const_cast; using std::cerr; using std::endl; typedef Teuchos::ScalarTraits<scalar_type> STS; // The local operator template parameter might have a different // Scalar type than MatrixType. This means that we might have to // convert X and Y to the Tpetra::MultiVector specialization that // the local operator wants. This class' X_ and Y_ internal fields // are of the right type for the local operator, so we can use those // as targets. // Tpetra::MultiVector specialization corresponding to the global operator. typedef Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type> global_mv_type; // Tpetra::Vector specialization corresponding to the local // operator. We will use this for the subset permutation of the // diagonal scaling D. typedef Tpetra::Vector<local_scalar_type, local_ordinal_type, global_ordinal_type, node_type> local_vec_type; const char prefix[] = "Ifpack2::DenseContainer::weightedApply: "; TEUCHOS_TEST_FOR_EXCEPTION( ! IsComputed_, std::runtime_error, prefix << "You must have called the " "compute() method before you may call this method. You may call " "weightedApply() as many times as you want after calling compute() once, " "but you must have called compute() at least once first."); const size_t numVecs = X.getNumVectors (); TEUCHOS_TEST_FOR_EXCEPTION( numVecs != Y.getNumVectors (), std::runtime_error, prefix << "X and Y have different numbers of vectors (columns). X has " << X.getNumVectors () << ", but Y has " << X.getNumVectors () << "."); if (numVecs == 0) { return; // done! nothing to do } // The local operator works on a permuted subset of the local parts // of X and Y. The subset and permutation are defined by the index // array returned by getLocalRows(). If the permutation is trivial // and the subset is exactly equal to the local indices, then we // could use the local parts of X and Y exactly, without needing to // permute. Otherwise, we have to use temporary storage to permute // X and Y. For now, we always use temporary storage. // // Create temporary permuted versions of the input and output. // (Re)allocate X_ and/or Y_ only if necessary. We'll use them to // store the permuted versions of X resp. Y. Note that X_local has // the domain Map of the operator, which may be a permuted subset of // the local Map corresponding to X.getMap(). Similarly, Y_local // has the range Map of the operator, which may be a permuted subset // of the local Map corresponding to Y.getMap(). numRows_ here // gives the number of rows in the row Map of the local operator. // // FIXME (mfh 20 Aug 2013) There might be an implicit assumption // here that the row Map and the range Map of that operator are // the same. // // FIXME (mfh 20 Aug 2013) This "local permutation" functionality // really belongs in Tpetra. if (X_.is_null ()) { X_ = rcp (new local_mv_type (localMap_, numVecs)); } RCP<local_mv_type> X_local = X_; TEUCHOS_TEST_FOR_EXCEPTION( X_local->getLocalLength () != numRows_, std::logic_error, "Ifpack2::DenseContainer::weightedApply: " "X_local has length " << X_local->getLocalLength () << ", which does " "not match numRows_ = " << numRows_ << ". Please report this bug to " "the Ifpack2 developers."); ArrayView<const local_ordinal_type> localRows = this->getLocalRows (); Details::MultiVectorLocalGatherScatter<global_mv_type, local_mv_type> mvgs; mvgs.gather (*X_local, X, localRows); // We must gather the output multivector Y even on input to // applyImpl(), since the local operator might use it as an initial // guess for a linear solve. We have no way of knowing whether it // does or does not. if (Y_.is_null ()) { Y_ = rcp (new local_mv_type (localMap_, numVecs)); } RCP<local_mv_type> Y_local = Y_; TEUCHOS_TEST_FOR_EXCEPTION( Y_local->getLocalLength () != numRows_, std::logic_error, "Ifpack2::DenseContainer::weightedApply: " "Y_local has length " << X_local->getLocalLength () << ", which does " "not match numRows_ = " << numRows_ << ". Please report this bug to " "the Ifpack2 developers."); mvgs.gather (*Y_local, Y, localRows); // Apply the diagonal scaling D to the input X. It's our choice // whether the result has the original input Map of X, or the // permuted subset Map of X_local. If the latter, we also need to // gather D into the permuted subset Map. We choose the latter, to // save memory and computation. Thus, we do the following: // // 1. Gather D into a temporary vector D_local. // 2. Create a temporary X_scaled to hold diag(D_local) * X_local. // 3. Compute X_scaled := diag(D_loca) * X_local. local_vec_type D_local (localMap_); TEUCHOS_TEST_FOR_EXCEPTION( D_local.getLocalLength () != numRows_, std::logic_error, "Ifpack2::DenseContainer::weightedApply: " "D_local has length " << D_local.getLocalLength () << ", which does " "not match numRows_ = " << numRows_ << ". Please report this bug to " "the Ifpack2 developers."); mvgs.gather (D_local, D, localRows); local_mv_type X_scaled (localMap_, numVecs); X_scaled.elementWiseMultiply (STS::one (), D_local, *X_local, STS::zero ()); // Y_temp will hold the result of M^{-1}*X_scaled. If beta == 0, we // can write the result of Inverse_->apply() directly to Y_local, so // Y_temp may alias Y_local. Otherwise, if beta != 0, we need // temporary storage for M^{-1}*X_scaled, so Y_temp must be // different than Y_local. RCP<local_mv_type> Y_temp; if (beta == STS::zero ()) { Y_temp = Y_local; } else { Y_temp = rcp (new local_mv_type (localMap_, numVecs)); } // Apply the local operator: Y_temp := M^{-1} * X_scaled this->applyImpl (X_scaled, *Y_temp, mode, STS::one (), STS::zero ()); // Y_local := beta * Y_local + alpha * diag(D_local) * Y_temp. // // Note that we still use the permuted subset scaling D_local here, // because Y_temp has the same permuted subset Map. That's good, in // fact, because it's a subset: less data to read and multiply. Y_local->elementWiseMultiply (alpha, D_local, *Y_temp, beta); // Copy the permuted subset output vector Y_local into the original // output multivector Y. mvgs.scatter (Y, *Y_local, localRows); }
void DenseContainer<MatrixType, LocalScalarType>:: apply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X, Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y, Teuchos::ETransp mode, scalar_type alpha, scalar_type beta) const { using Teuchos::ArrayView; using Teuchos::as; using Teuchos::RCP; using Teuchos::rcp; // The local operator might have a different Scalar type than // MatrixType. This means that we might have to convert X and Y to // the Tpetra::MultiVector specialization that the local operator // wants. This class' X_ and Y_ internal fields are of the right // type for the local operator, so we can use those as targets. // Tpetra::MultiVector specialization corresponding to the global operator. typedef Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type> global_mv_type; const char prefix[] = "Ifpack2::DenseContainer::weightedApply: "; TEUCHOS_TEST_FOR_EXCEPTION( ! IsComputed_, std::runtime_error, prefix << "You must have called the " "compute() method before you may call this method. You may call " "apply() as many times as you want after calling compute() once, " "but you must have called compute() at least once first."); const size_t numVecs = X.getNumVectors (); TEUCHOS_TEST_FOR_EXCEPTION( numVecs != Y.getNumVectors (), std::runtime_error, prefix << "X and Y have different numbers of vectors (columns). X has " << X.getNumVectors () << ", but Y has " << X.getNumVectors () << "."); if (numVecs == 0) { return; // done! nothing to do } // The local operator works on a permuted subset of the local parts // of X and Y. The subset and permutation are defined by the index // array returned by getLocalRows(). If the permutation is trivial // and the subset is exactly equal to the local indices, then we // could use the local parts of X and Y exactly, without needing to // permute. Otherwise, we have to use temporary storage to permute // X and Y. For now, we always use temporary storage. // // Create temporary permuted versions of the input and output. // (Re)allocate X_ and/or Y_ only if necessary. We'll use them to // store the permuted versions of X resp. Y. Note that X_local has // the domain Map of the operator, which may be a permuted subset of // the local Map corresponding to X.getMap(). Similarly, Y_local // has the range Map of the operator, which may be a permuted subset // of the local Map corresponding to Y.getMap(). numRows_ here // gives the number of rows in the row Map of the local Inverse_ // operator. // // FIXME (mfh 20 Aug 2013) There might be an implicit assumption // here that the row Map and the range Map of that operator are // the same. // // FIXME (mfh 20 Aug 2013) This "local permutation" functionality // really belongs in Tpetra. if (X_.is_null ()) { X_ = rcp (new local_mv_type (localMap_, numVecs)); } RCP<local_mv_type> X_local = X_; TEUCHOS_TEST_FOR_EXCEPTION( X_local->getLocalLength () != numRows_, std::logic_error, "Ifpack2::DenseContainer::apply: " "X_local has length " << X_local->getLocalLength () << ", which does " "not match numRows_ = " << numRows_ << ". Please report this bug to " "the Ifpack2 developers."); ArrayView<const local_ordinal_type> localRows = this->getLocalRows (); Details::MultiVectorLocalGatherScatter<global_mv_type, local_mv_type> mvgs; mvgs.gather (*X_local, X, localRows); // We must gather the contents of the output multivector Y even on // input to applyImpl(), since the inverse operator might use it as // an initial guess for a linear solve. We have no way of knowing // whether it does or does not. if (Y_.is_null ()) { Y_ = rcp (new local_mv_type (localMap_, numVecs)); } RCP<local_mv_type> Y_local = Y_; TEUCHOS_TEST_FOR_EXCEPTION( Y_local->getLocalLength () != numRows_, std::logic_error, "Ifpack2::DenseContainer::apply: " "Y_local has length " << X_local->getLocalLength () << ", which does " "not match numRows_ = " << numRows_ << ". Please report this bug to " "the Ifpack2 developers."); mvgs.gather (*Y_local, Y, localRows); // Apply the local operator: // Y_local := beta*Y_local + alpha*M^{-1}*X_local this->applyImpl (*X_local, *Y_local, mode, as<local_scalar_type> (alpha), as<local_scalar_type> (beta)); // Scatter the permuted subset output vector Y_local back into the // original output multivector Y. mvgs.scatter (Y, *Y_local, localRows); }
void SupportGraph<MatrixType>:: apply (const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X, Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y, Teuchos::ETransp mode, scalar_type alpha, scalar_type beta) const { using Teuchos::FancyOStream; using Teuchos::getFancyOStream; using Teuchos::RCP; using Teuchos::rcp; using Teuchos::rcpFromRef; using Teuchos::Time; using Teuchos::TimeMonitor; typedef scalar_type DomainScalar; typedef scalar_type RangeScalar; typedef Tpetra::MultiVector<DomainScalar, local_ordinal_type, global_ordinal_type, node_type> MV; RCP<FancyOStream> out = getFancyOStream(rcpFromRef(std::cout)); // Create a timer for this method, if it doesn't exist already. // TimeMonitor::getNewCounter registers the timer, so that // TimeMonitor's class methods like summarize() will report the // total time spent in successful calls to this method. const std::string timerName ("Ifpack2::SupportGraph::apply"); RCP<Time> timer = TimeMonitor::lookupCounter(timerName); if (timer.is_null()) { timer = TimeMonitor::getNewCounter(timerName); } { // Start timing here. Teuchos::TimeMonitor timeMon (*timer); TEUCHOS_TEST_FOR_EXCEPTION( ! isComputed(), std::runtime_error, "Ifpack2::SupportGraph::apply: You must call compute() to compute the " "incomplete factorization, before calling apply()."); TEUCHOS_TEST_FOR_EXCEPTION( X.getNumVectors() != Y.getNumVectors(), std::runtime_error, "Ifpack2::SupportGraph::apply: X and Y must have the same number of " "columns. X has " << X.getNumVectors() << " columns, but Y has " << Y.getNumVectors() << " columns."); TEUCHOS_TEST_FOR_EXCEPTION( beta != STS::zero(), std::logic_error, "Ifpack2::SupportGraph::apply: This method does not currently work when " "beta != 0."); // If X and Y are pointing to the same memory location, // we need to create an auxiliary vector, Xcopy RCP<const MV> Xcopy; if (X.getLocalMV().getValues() == Y.getLocalMV().getValues()) { Xcopy = rcp (new MV(X)); } else { Xcopy = rcpFromRef(X); } if (alpha != STS::one()) { Y.scale(alpha); } RCP<MV> Ycopy = rcpFromRef(Y); solver_->setB(Xcopy); solver_->setX(Ycopy); solver_->solve (); } // Stop timing here. ++NumApply_; // timer->totalElapsedTime() returns the total time over all timer // calls. Thus, we use = instead of +=. ApplyTime_ = timer->totalElapsedTime(); }
void OverlappingRowMatrix<MatrixType>:: apply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> &X, Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> &Y, Teuchos::ETransp mode, scalar_type alpha, scalar_type beta) const { using Teuchos::ArrayRCP; using Teuchos::as; typedef scalar_type RangeScalar; typedef scalar_type DomainScalar; typedef Teuchos::ScalarTraits<RangeScalar> STRS; TEUCHOS_TEST_FOR_EXCEPTION( X.getNumVectors() != Y.getNumVectors(), std::runtime_error, "Ifpack2::OverlappingRowMatrix::apply: The input X and the output Y must " "have the same number of columns. X.getNumVectors() = " << X.getNumVectors() << " != Y.getNumVectors() = " << Y.getNumVectors() << "."); // FIXME (mfh 13 July 2013) This would be a good candidate for a // Kokkos local parallel operator implementation. That would // obviate the need for getting views of the data and make the code // below a lot simpler. const RangeScalar zero = STRS::zero (); ArrayRCP<ArrayRCP<const DomainScalar> > x_ptr = X.get2dView(); ArrayRCP<ArrayRCP<RangeScalar> > y_ptr = Y.get2dViewNonConst(); Y.putScalar(zero); size_t NumVectors = Y.getNumVectors(); const size_t numMyRowsA = A_->getNodeNumRows (); for (size_t i = 0; i < numMyRowsA; ++i) { size_t Nnz; // Use this class's getrow to make the below code simpler A_->getLocalRowCopy (i, Indices_ (),Values_ (), Nnz); if (mode == Teuchos::NO_TRANS) { for (size_t j = 0; j < Nnz; ++j) for (size_t k = 0; k < NumVectors; ++k) y_ptr[k][i] += as<RangeScalar> (Values_[j]) * as<RangeScalar> (x_ptr[k][Indices_[j]]); } else if (mode == Teuchos::TRANS){ for (size_t j = 0; j < Nnz; ++j) for (size_t k = 0; k < NumVectors; ++k) y_ptr[k][Indices_[j]] += as<RangeScalar> (Values_[j]) * as<RangeScalar> (x_ptr[k][i]); } else { // mode == Teuchos::CONJ_TRANS for (size_t j = 0; j < Nnz; ++j) for (size_t k = 0; k < NumVectors; ++k) y_ptr[k][Indices_[j]] += STRS::conjugate (as<RangeScalar> (Values_[j])) * as<RangeScalar> (x_ptr[k][i]); } } const size_t numMyRowsB = ExtMatrix_->getNodeNumRows (); for (size_t i = 0 ; i < numMyRowsB ; ++i) { size_t Nnz; // Use this class's getrow to make the below code simpler ExtMatrix_->getLocalRowCopy (i, Indices_ (), Values_ (), Nnz); if (mode == Teuchos::NO_TRANS) { for (size_t j = 0; j < Nnz; ++j) for (size_t k = 0; k < NumVectors; ++k) y_ptr[k][numMyRowsA+i] += as<RangeScalar> (Values_[j]) * as<RangeScalar> (x_ptr[k][Indices_[j]]); } else if (mode == Teuchos::TRANS) { for (size_t j = 0; j < Nnz; ++j) for (size_t k = 0; k < NumVectors; ++k) y_ptr[k][numMyRowsA+Indices_[j]] += as<RangeScalar> (Values_[j]) * as<RangeScalar> (x_ptr[k][i]); } else { // mode == Teuchos::CONJ_TRANS for (size_t j = 0; j < Nnz; ++j) for (size_t k = 0; k < NumVectors; ++k) y_ptr[k][numMyRowsA+Indices_[j]] += STRS::conjugate (as<RangeScalar> (Values_[j])) * as<RangeScalar> (x_ptr[k][i]); } } }
void TomBlockRelaxation<MatrixType,ContainerType>::DoSGS(const Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& X, Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Xcopy, Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Y) const { Scalar one=Teuchos::ScalarTraits<Scalar>::one(); int Length = A_->getNodeMaxNumRowEntries(); int NumVectors = X.getNumVectors(); Teuchos::Array<Scalar> Values; Teuchos::Array<LocalOrdinal> Indices; Values.resize(Length); Indices.resize(Length); // an additonal vector is needed by parallel computations // (note that applications through Ifpack2_AdditiveSchwarz // are always seen are serial) Teuchos::RCP< Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node > > Y2; if (IsParallel_) Y2 = Teuchos::rcp( new Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>(Importer_->getTargetMap(), NumVectors) ); else Y2 = Teuchos::rcp( &Y, false ); Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar> > x_ptr = X.get2dView(); Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> > xcopy_ptr = Xcopy.get2dViewNonConst(); Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> > y_ptr = Y.get2dViewNonConst(); Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> > y2_ptr = Y2->get2dViewNonConst(); // data exchange is here, once per sweep if (IsParallel_) Y2->doImport(Y,*Importer_,Tpetra::INSERT); // Forward Sweep for(LocalOrdinal i = 0 ; i < NumLocalBlocks_ ; i++) { // may happen that a partition is empty if (Containers_[i]->getNumRows() == 0) continue; LocalOrdinal LID; // update from previous block for(size_t j = 0 ; j < Containers_[i]->getNumRows(); j++) { LID = Containers_[i]->ID(j); size_t NumEntries; A_->getLocalRowCopy(LID,Indices(),Values(),NumEntries); for (size_t k = 0 ; k < NumEntries ; k++) { LocalOrdinal col = Indices[k]; for (int kk = 0 ; kk < NumVectors ; kk++) xcopy_ptr[kk][LID] -= Values[k] * y2_ptr[kk][col]; } } // solve with this block // Note: I'm abusing the ordering information, knowing that X/Y and Y2 have the same ordering for on-proc unknowns. // Note: Add flop counts for inverse apply Containers_[i]->apply(Xcopy,*Y2,Teuchos::NO_TRANS,DampingFactor_,one); // operations for all getrow's ApplyFlops_ += NumVectors * (2 * NumGlobalNonzeros_ + 2 * NumGlobalRows_); }// end forward sweep // Reverse Sweep Xcopy = X; for(LocalOrdinal i = NumLocalBlocks_-1; i >=0 ; i--) { // may happen that a partition is empty if (Containers_[i]->getNumRows() == 0) continue; LocalOrdinal LID; // update from previous block for(size_t j = 0 ; j < Containers_[i]->getNumRows(); j++) { LID = Containers_[i]->ID(j); size_t NumEntries; A_->getLocalRowCopy(LID,Indices(),Values(),NumEntries); for (size_t k = 0 ; k < NumEntries ; k++) { LocalOrdinal col = Indices[k]; for (int kk = 0 ; kk < NumVectors ; kk++) xcopy_ptr[kk][LID] -= Values[k] * y2_ptr[kk][col]; } } // solve with this block // Note: I'm abusing the ordering information, knowing that X/Y and Y2 have the same ordering for on-proc unknowns. // Note: Add flop counts for inverse apply Containers_[i]->apply(Xcopy,*Y2,Teuchos::NO_TRANS,DampingFactor_,one); // operations for all getrow's ApplyFlops_ += NumVectors * (2 * NumGlobalNonzeros_ + 2 * NumGlobalRows_); } //end reverse sweep // Attention: this is delicate... Not all combinations // of Y2 and Y will always work (tough for ML it should be ok) if (IsParallel_) for (int m = 0 ; m < NumVectors ; ++m) for (size_t i = 0 ; i < NumMyRows_ ; ++i) y_ptr[m][i] = y2_ptr[m][i]; }