void SingletonFilter<MatrixType>::CreateReducedRHSTempl(const Tpetra::MultiVector<DomainScalar,LocalOrdinal,GlobalOrdinal,Node>& LHS, const Tpetra::MultiVector<RangeScalar,LocalOrdinal,GlobalOrdinal,Node>& RHS, Tpetra::MultiVector<RangeScalar,LocalOrdinal,GlobalOrdinal,Node>& ReducedRHS) { Teuchos::ArrayRCP<Teuchos::ArrayRCP<const RangeScalar > > RHS_ptr = RHS.get2dView(); Teuchos::ArrayRCP<Teuchos::ArrayRCP<const DomainScalar> > LHS_ptr = LHS.get2dView(); Teuchos::ArrayRCP<Teuchos::ArrayRCP<RangeScalar> > ReducedRHS_ptr = ReducedRHS.get2dViewNonConst(); size_t NumVectors = LHS.getNumVectors(); for (size_t i = 0 ; i < NumRows_ ; ++i) for (size_t k = 0 ; k < NumVectors ; ++k) ReducedRHS_ptr[k][i] = RHS_ptr[k][InvReorder_[i]]; for (size_t i = 0 ; i < NumRows_ ; ++i) { LocalOrdinal ii = InvReorder_[i]; size_t Nnz; A_->getLocalRowCopy(ii,Indices_(),Values_(),Nnz); for (size_t j = 0 ; j < Nnz ; ++j) { if (Reorder_[Indices_[j]] == -1) { for (size_t k = 0 ; k < NumVectors ; ++k) ReducedRHS_ptr[k][i] -= (RangeScalar)Values_[j] * (RangeScalar)LHS_ptr[k][Indices_[j]]; } } } }
void OverlappingRowMatrix<MatrixType>::applyTempl(const Tpetra::MultiVector<DomainScalar,LocalOrdinal,GlobalOrdinal,Node> &X, Tpetra::MultiVector<RangeScalar,LocalOrdinal,GlobalOrdinal,Node> &Y, Teuchos::ETransp mode, RangeScalar alpha, RangeScalar beta) const { // Note: This isn't AztecOO compliant. But neither was Ifpack's version. TEUCHOS_TEST_FOR_EXCEPTION(X.getNumVectors() != Y.getNumVectors(), std::runtime_error, "Ifpack2::OverlappingRowMatrix::apply ERROR: X.getNumVectors() != Y.getNumVectors()."); RangeScalar zero = Teuchos::ScalarTraits<RangeScalar>::zero(); Teuchos::ArrayRCP<Teuchos::ArrayRCP<const DomainScalar> > x_ptr = X.get2dView(); Teuchos::ArrayRCP<Teuchos::ArrayRCP<RangeScalar> > y_ptr = Y.get2dViewNonConst(); Y.putScalar(zero); size_t NumVectors = Y.getNumVectors(); for (size_t i = 0 ; i < NumMyRowsA_ ; ++i) { size_t Nnz; // Use this class's getrow to make the below code simpler A_->getLocalRowCopy(i,Indices_(),Values_(),Nnz); if (mode==Teuchos::NO_TRANS){ for (size_t j = 0 ; j < Nnz ; ++j) for (size_t k = 0 ; k < NumVectors ; ++k) y_ptr[k][i] += (RangeScalar)Values_[j] * (RangeScalar)x_ptr[k][Indices_[j]]; } else if (mode==Teuchos::TRANS){ for (size_t j = 0 ; j < Nnz ; ++j) for (size_t k = 0 ; k < NumVectors ; ++k) y_ptr[k][Indices_[j]] += (RangeScalar)Values_[j] * (RangeScalar)x_ptr[k][i]; } else { //mode==Teuchos::CONJ_TRANS for (size_t j = 0 ; j < Nnz ; ++j) for (size_t k = 0 ; k < NumVectors ; ++k) y_ptr[k][Indices_[j]] += Teuchos::ScalarTraits<RangeScalar>::conjugate((RangeScalar)Values_[j]) * (RangeScalar)x_ptr[k][i]; } } for (size_t i = 0 ; i < NumMyRowsB_ ; ++i) { size_t Nnz; // Use this class's getrow to make the below code simpler ExtMatrix_->getLocalRowCopy(i,Indices_(),Values_(),Nnz); if (mode==Teuchos::NO_TRANS){ for (size_t j = 0 ; j < Nnz ; ++j) for (size_t k = 0 ; k < NumVectors ; ++k) y_ptr[k][NumMyRowsA_+i] += (RangeScalar)Values_[j] * (RangeScalar)x_ptr[k][Indices_[j]]; } else if (mode==Teuchos::TRANS){ for (size_t j = 0 ; j < Nnz ; ++j) for (size_t k = 0 ; k < NumVectors ; ++k) y_ptr[k][NumMyRowsA_+Indices_[j]] += (RangeScalar)Values_[j] * (RangeScalar)x_ptr[k][i]; } else { //mode==Teuchos::CONJ_TRANS for (size_t j = 0 ; j < Nnz ; ++j) for (size_t k = 0 ; k < NumVectors ; ++k) y_ptr[k][NumMyRowsA_+Indices_[j]] += Teuchos::ScalarTraits<RangeScalar>::conjugate((RangeScalar)Values_[j]) * (RangeScalar)x_ptr[k][i]; } } }
void SingletonFilter<MatrixType>::UpdateLHSTempl(const Tpetra::MultiVector<DomainScalar,LocalOrdinal,GlobalOrdinal,Node>& ReducedLHS, Tpetra::MultiVector<RangeScalar,LocalOrdinal,GlobalOrdinal,Node>& LHS) { Teuchos::ArrayRCP<Teuchos::ArrayRCP<RangeScalar> > LHS_ptr = LHS.get2dViewNonConst(); Teuchos::ArrayRCP<Teuchos::ArrayRCP<const DomainScalar> > ReducedLHS_ptr = ReducedLHS.get2dView(); for (size_t i = 0 ; i < NumRows_ ; ++i) for (size_t k = 0 ; k < LHS.getNumVectors() ; ++k) LHS_ptr[k][InvReorder_[i]] = (RangeScalar)ReducedLHS_ptr[k][i]; }
void ReorderFilter<MatrixType>::permuteReorderedToOriginalTempl(const Tpetra::MultiVector<DomainScalar,LocalOrdinal,GlobalOrdinal,Node> &reorderedX, Tpetra::MultiVector<RangeScalar,LocalOrdinal,GlobalOrdinal,Node> &originalY) const { TEUCHOS_TEST_FOR_EXCEPTION(reorderedX.getNumVectors() != originalY.getNumVectors(), std::runtime_error, "Ifpack2::ReorderFilter::permuteReorderedToOriginal ERROR: X.getNumVectors() != Y.getNumVectors()."); Teuchos::ArrayRCP<Teuchos::ArrayRCP<const DomainScalar> > x_ptr = reorderedX.get2dView(); Teuchos::ArrayRCP<Teuchos::ArrayRCP<RangeScalar> > y_ptr = originalY.get2dViewNonConst(); for(size_t k=0; k < reorderedX.getNumVectors(); k++) for(LocalOrdinal i=0; (size_t)i< reorderedX.getLocalLength(); i++) y_ptr[k][reverseperm_[i]] = (RangeScalar)x_ptr[k][i]; }
void ReorderFilter<MatrixType>::permuteOriginalToReorderedTempl(const Tpetra::MultiVector<DomainScalar,local_ordinal_type,global_ordinal_type,node_type> &originalX, Tpetra::MultiVector<RangeScalar,local_ordinal_type,global_ordinal_type,node_type> &reorderedY) const { TEUCHOS_TEST_FOR_EXCEPTION(originalX.getNumVectors() != reorderedY.getNumVectors(), std::runtime_error, "Ifpack2::ReorderFilter::permuteOriginalToReordered ERROR: X.getNumVectors() != Y.getNumVectors()."); Teuchos::ArrayRCP<Teuchos::ArrayRCP<const DomainScalar> > x_ptr = originalX.get2dView(); Teuchos::ArrayRCP<Teuchos::ArrayRCP<RangeScalar> > y_ptr = reorderedY.get2dViewNonConst(); for(size_t k=0; k < originalX.getNumVectors(); k++) for(local_ordinal_type i=0; (size_t)i< originalX.getLocalLength(); i++) y_ptr[k][perm_[i]] = (RangeScalar)x_ptr[k][i]; }
void ReorderFilter<MatrixType>:: apply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> &X, Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> &Y, Teuchos::ETransp mode, scalar_type alpha, scalar_type beta) const { typedef Teuchos::ScalarTraits<scalar_type> STS; // Note: This isn't AztecOO compliant. But neither was Ifpack's version. // Note: The localized maps mean the matvec is trivial (and has no import) TEUCHOS_TEST_FOR_EXCEPTION( X.getNumVectors() != Y.getNumVectors(), std::runtime_error, "Ifpack2::ReorderFilter::apply: X.getNumVectors() != Y.getNumVectors()."); const scalar_type zero = STS::zero (); Teuchos::ArrayRCP<Teuchos::ArrayRCP<const scalar_type> > x_ptr = X.get2dView(); Teuchos::ArrayRCP<Teuchos::ArrayRCP<scalar_type> > y_ptr = Y.get2dViewNonConst(); Y.putScalar (zero); const size_t NumVectors = Y.getNumVectors (); for (size_t i = 0; i < A_->getNodeNumRows (); ++i) { size_t Nnz; // Use this class's getrow to make the below code simpler getLocalRowCopy (i, Indices_ (), Values_ (), Nnz); if (mode == Teuchos::NO_TRANS) { for (size_t j = 0; j < Nnz; ++j) { for (size_t k = 0; k < NumVectors; ++k) { y_ptr[k][i] += Values_[j] * x_ptr[k][Indices_[j]]; } } } else if (mode == Teuchos::TRANS) { for (size_t j = 0; j < Nnz; ++j) { for (size_t k = 0; k < NumVectors; ++k) { y_ptr[k][Indices_[j]] += Values_[j] * x_ptr[k][i]; } } } else { //mode==Teuchos::CONJ_TRANS for (size_t j = 0; j < Nnz; ++j) { for (size_t k = 0; k < NumVectors; ++k) { y_ptr[k][Indices_[j]] += STS::conjugate(Values_[j]) * x_ptr[k][i]; } } } } }
void SingletonFilter<MatrixType>::SolveSingletonsTempl(const Tpetra::MultiVector<DomainScalar,LocalOrdinal,GlobalOrdinal,Node>& RHS, Tpetra::MultiVector<RangeScalar,LocalOrdinal,GlobalOrdinal,Node>& LHS) { Teuchos::ArrayRCP<Teuchos::ArrayRCP<const DomainScalar> > RHS_ptr = RHS.get2dView(); Teuchos::ArrayRCP<Teuchos::ArrayRCP<RangeScalar> > LHS_ptr = LHS.get2dViewNonConst(); for (size_t i = 0 ; i < NumSingletons_ ; ++i) { LocalOrdinal ii = SingletonIndex_[i]; // get the diagonal value for the singleton size_t Nnz; A_->getLocalRowCopy(ii,Indices_(),Values_(),Nnz); for (size_t j = 0 ; j < Nnz ; ++j) { if (Indices_[j] == ii) { for (size_t k = 0 ; k < LHS.getNumVectors() ; ++k) LHS_ptr[k][ii] = (RangeScalar)RHS_ptr[k][ii] / (RangeScalar)Values_[j]; } } } }
void SingletonFilter<MatrixType>::apply(const Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &X, Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Y, Teuchos::ETransp mode, Scalar alpha, Scalar beta) const { // Note: This isn't AztecOO compliant. But neither was Ifpack's version. TEUCHOS_TEST_FOR_EXCEPTION(X.getNumVectors() != Y.getNumVectors(), std::runtime_error, "Ifpack2::SingletonFilter::apply ERROR: X.getNumVectors() != Y.getNumVectors()."); Scalar zero = Teuchos::ScalarTraits<Scalar>::zero(); Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar> > x_ptr = X.get2dView(); Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> > y_ptr = Y.get2dViewNonConst(); Y.putScalar(zero); size_t NumVectors = Y.getNumVectors(); for (size_t i = 0 ; i < NumRows_ ; ++i) { size_t Nnz; // Use this class's getrow to make the below code simpler getLocalRowCopy(i,Indices_(),Values_(),Nnz); if (mode==Teuchos::NO_TRANS){ for (size_t j = 0 ; j < Nnz ; ++j) for (size_t k = 0 ; k < NumVectors ; ++k) y_ptr[k][i] += Values_[j] * x_ptr[k][Indices_[j]]; } else if (mode==Teuchos::TRANS){ for (size_t j = 0 ; j < Nnz ; ++j) for (size_t k = 0 ; k < NumVectors ; ++k) y_ptr[k][Indices_[j]] += Values_[j] * x_ptr[k][i]; } else { //mode==Teuchos::CONJ_TRANS for (size_t j = 0 ; j < Nnz ; ++j) for (size_t k = 0 ; k < NumVectors ; ++k) y_ptr[k][Indices_[j]] += Teuchos::ScalarTraits<Scalar>::conjugate(Values_[j]) * x_ptr[k][i]; } } }
void TomBlockRelaxation<MatrixType,ContainerType>::DoSGS(const Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& X, Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Xcopy, Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Y) const { Scalar one=Teuchos::ScalarTraits<Scalar>::one(); int Length = A_->getNodeMaxNumRowEntries(); int NumVectors = X.getNumVectors(); Teuchos::Array<Scalar> Values; Teuchos::Array<LocalOrdinal> Indices; Values.resize(Length); Indices.resize(Length); // an additonal vector is needed by parallel computations // (note that applications through Ifpack2_AdditiveSchwarz // are always seen are serial) Teuchos::RCP< Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node > > Y2; if (IsParallel_) Y2 = Teuchos::rcp( new Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>(Importer_->getTargetMap(), NumVectors) ); else Y2 = Teuchos::rcp( &Y, false ); Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar> > x_ptr = X.get2dView(); Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> > xcopy_ptr = Xcopy.get2dViewNonConst(); Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> > y_ptr = Y.get2dViewNonConst(); Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> > y2_ptr = Y2->get2dViewNonConst(); // data exchange is here, once per sweep if (IsParallel_) Y2->doImport(Y,*Importer_,Tpetra::INSERT); // Forward Sweep for(LocalOrdinal i = 0 ; i < NumLocalBlocks_ ; i++) { // may happen that a partition is empty if (Containers_[i]->getNumRows() == 0) continue; LocalOrdinal LID; // update from previous block for(size_t j = 0 ; j < Containers_[i]->getNumRows(); j++) { LID = Containers_[i]->ID(j); size_t NumEntries; A_->getLocalRowCopy(LID,Indices(),Values(),NumEntries); for (size_t k = 0 ; k < NumEntries ; k++) { LocalOrdinal col = Indices[k]; for (int kk = 0 ; kk < NumVectors ; kk++) xcopy_ptr[kk][LID] -= Values[k] * y2_ptr[kk][col]; } } // solve with this block // Note: I'm abusing the ordering information, knowing that X/Y and Y2 have the same ordering for on-proc unknowns. // Note: Add flop counts for inverse apply Containers_[i]->apply(Xcopy,*Y2,Teuchos::NO_TRANS,DampingFactor_,one); // operations for all getrow's ApplyFlops_ += NumVectors * (2 * NumGlobalNonzeros_ + 2 * NumGlobalRows_); }// end forward sweep // Reverse Sweep Xcopy = X; for(LocalOrdinal i = NumLocalBlocks_-1; i >=0 ; i--) { // may happen that a partition is empty if (Containers_[i]->getNumRows() == 0) continue; LocalOrdinal LID; // update from previous block for(size_t j = 0 ; j < Containers_[i]->getNumRows(); j++) { LID = Containers_[i]->ID(j); size_t NumEntries; A_->getLocalRowCopy(LID,Indices(),Values(),NumEntries); for (size_t k = 0 ; k < NumEntries ; k++) { LocalOrdinal col = Indices[k]; for (int kk = 0 ; kk < NumVectors ; kk++) xcopy_ptr[kk][LID] -= Values[k] * y2_ptr[kk][col]; } } // solve with this block // Note: I'm abusing the ordering information, knowing that X/Y and Y2 have the same ordering for on-proc unknowns. // Note: Add flop counts for inverse apply Containers_[i]->apply(Xcopy,*Y2,Teuchos::NO_TRANS,DampingFactor_,one); // operations for all getrow's ApplyFlops_ += NumVectors * (2 * NumGlobalNonzeros_ + 2 * NumGlobalRows_); } //end reverse sweep // Attention: this is delicate... Not all combinations // of Y2 and Y will always work (tough for ML it should be ok) if (IsParallel_) for (int m = 0 ; m < NumVectors ; ++m) for (size_t i = 0 ; i < NumMyRows_ ; ++i) y_ptr[m][i] = y2_ptr[m][i]; }
void OverlappingRowMatrix<MatrixType>:: apply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> &X, Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> &Y, Teuchos::ETransp mode, scalar_type alpha, scalar_type beta) const { using Teuchos::ArrayRCP; using Teuchos::as; typedef scalar_type RangeScalar; typedef scalar_type DomainScalar; typedef Teuchos::ScalarTraits<RangeScalar> STRS; TEUCHOS_TEST_FOR_EXCEPTION( X.getNumVectors() != Y.getNumVectors(), std::runtime_error, "Ifpack2::OverlappingRowMatrix::apply: The input X and the output Y must " "have the same number of columns. X.getNumVectors() = " << X.getNumVectors() << " != Y.getNumVectors() = " << Y.getNumVectors() << "."); // FIXME (mfh 13 July 2013) This would be a good candidate for a // Kokkos local parallel operator implementation. That would // obviate the need for getting views of the data and make the code // below a lot simpler. const RangeScalar zero = STRS::zero (); ArrayRCP<ArrayRCP<const DomainScalar> > x_ptr = X.get2dView(); ArrayRCP<ArrayRCP<RangeScalar> > y_ptr = Y.get2dViewNonConst(); Y.putScalar(zero); size_t NumVectors = Y.getNumVectors(); const size_t numMyRowsA = A_->getNodeNumRows (); for (size_t i = 0; i < numMyRowsA; ++i) { size_t Nnz; // Use this class's getrow to make the below code simpler A_->getLocalRowCopy (i, Indices_ (),Values_ (), Nnz); if (mode == Teuchos::NO_TRANS) { for (size_t j = 0; j < Nnz; ++j) for (size_t k = 0; k < NumVectors; ++k) y_ptr[k][i] += as<RangeScalar> (Values_[j]) * as<RangeScalar> (x_ptr[k][Indices_[j]]); } else if (mode == Teuchos::TRANS){ for (size_t j = 0; j < Nnz; ++j) for (size_t k = 0; k < NumVectors; ++k) y_ptr[k][Indices_[j]] += as<RangeScalar> (Values_[j]) * as<RangeScalar> (x_ptr[k][i]); } else { // mode == Teuchos::CONJ_TRANS for (size_t j = 0; j < Nnz; ++j) for (size_t k = 0; k < NumVectors; ++k) y_ptr[k][Indices_[j]] += STRS::conjugate (as<RangeScalar> (Values_[j])) * as<RangeScalar> (x_ptr[k][i]); } } const size_t numMyRowsB = ExtMatrix_->getNodeNumRows (); for (size_t i = 0 ; i < numMyRowsB ; ++i) { size_t Nnz; // Use this class's getrow to make the below code simpler ExtMatrix_->getLocalRowCopy (i, Indices_ (), Values_ (), Nnz); if (mode == Teuchos::NO_TRANS) { for (size_t j = 0; j < Nnz; ++j) for (size_t k = 0; k < NumVectors; ++k) y_ptr[k][numMyRowsA+i] += as<RangeScalar> (Values_[j]) * as<RangeScalar> (x_ptr[k][Indices_[j]]); } else if (mode == Teuchos::TRANS) { for (size_t j = 0; j < Nnz; ++j) for (size_t k = 0; k < NumVectors; ++k) y_ptr[k][numMyRowsA+Indices_[j]] += as<RangeScalar> (Values_[j]) * as<RangeScalar> (x_ptr[k][i]); } else { // mode == Teuchos::CONJ_TRANS for (size_t j = 0; j < Nnz; ++j) for (size_t k = 0; k < NumVectors; ++k) y_ptr[k][numMyRowsA+Indices_[j]] += STRS::conjugate (as<RangeScalar> (Values_[j])) * as<RangeScalar> (x_ptr[k][i]); } } }