void LocalSparseTriangularSolver<MatrixType>:: localApply (const MV& X, MV& Y, const Teuchos::ETransp mode, const scalar_type& alpha, const scalar_type& beta) const { using Teuchos::RCP; typedef scalar_type ST; typedef Teuchos::ScalarTraits<ST> STS; if (beta == STS::zero ()) { if (alpha == STS::zero ()) { Y.putScalar (STS::zero ()); // Y := 0 * Y (ignore contents of Y) } else { // alpha != 0 A_crs_->template localSolve<ST, ST> (X, Y, mode); if (alpha != STS::one ()) { Y.scale (alpha); } } } else { // beta != 0 if (alpha == STS::zero ()) { Y.scale (beta); // Y := beta * Y } else { // alpha != 0 MV Y_tmp (Y, Teuchos::Copy); A_crs_->template localSolve<ST, ST> (X, Y_tmp, mode); // Y_tmp := M * X Y.update (alpha, Y_tmp, beta); // Y := beta * Y + alpha * Y_tmp } } }
//! Do the transpose or conjugate transpose solve. void applyTranspose (const MV& X_in, MV& Y_in, const Teuchos::ETransp mode) const { typedef Teuchos::ScalarTraits<Scalar> ST; using Teuchos::null; TEUCHOS_TEST_FOR_EXCEPTION (mode != Teuchos::TRANS && mode != Teuchos::CONJ_TRANS, std::logic_error, "Tpetra::CrsMatrixSolveOp::applyTranspose: mode is neither TRANS nor " "CONJ_TRANS. Should never get here! Please report this bug to the " "Tpetra developers."); const size_t numVectors = X_in.getNumVectors(); Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Node> > importer = matrix_->getGraph ()->getImporter (); Teuchos::RCP<const Export<LocalOrdinal,GlobalOrdinal,Node> > exporter = matrix_->getGraph ()->getExporter (); Teuchos::RCP<const MV> X; // it is okay if X and Y reference the same data, because we can // perform a triangular solve in-situ. however, we require that // column access to each is strided. // set up import/export temporary multivectors if (importer != null) { if (importMV_ != null && importMV_->getNumVectors() != numVectors) { importMV_ = null; } if (importMV_ == null) { importMV_ = Teuchos::rcp( new MV(matrix_->getColMap(),numVectors) ); } } if (exporter != null) { if (exportMV_ != null && exportMV_->getNumVectors() != numVectors) { exportMV_ = null; } if (exportMV_ == null) { exportMV_ = Teuchos::rcp( new MV(matrix_->getRowMap(),numVectors) ); } } // solve(TRANS): DomainMap -> RangeMap // lclMatSolve_(TRANS): ColMap -> RowMap // importer: DomainMap -> ColMap // exporter: RowMap -> RangeMap // // solve = importer o lclMatSolve_ o exporter // Domainmap -> ColMap -> RowMap -> RangeMap // // If we have a non-trivial importer, we must import elements that // are permuted or are on other processes. if (importer != null) { importMV_->doImport(X_in,*importer,INSERT); X = importMV_; } else if (X_in.isConstantStride() == false) { // cannot handle non-constant stride right now // generate a copy of X_in X = Teuchos::rcp(new MV(X_in)); } else { // just temporary, so this non-owning RCP is okay X = Teuchos::rcpFromRef (X_in); } // If we have a non-trivial exporter, we must export elements that // are permuted or belong to other processes. We will compute // solution into the to-be-exported MV; get a view. if (exporter != null) { matrix_->template localSolve<Scalar, Scalar> (*X, *exportMV_, Teuchos::CONJ_TRANS); // Make sure target is zero: necessary because we are adding Y_in.putScalar(ST::zero()); Y_in.doExport(*importMV_, *importer, ADD); } // otherwise, solve into Y else { if (Y_in.isConstantStride() == false) { // generate a strided copy of Y MV Y(Y_in); matrix_->template localSolve<Scalar, Scalar> (*X, Y, Teuchos::CONJ_TRANS); Y_in = Y; } else { matrix_->template localSolve<Scalar, Scalar> (*X, Y_in, Teuchos::CONJ_TRANS); } } }
//! Do the non-transpose solve. void applyNonTranspose (const MV& X_in, MV& Y_in) const { using Teuchos::NO_TRANS; using Teuchos::null; typedef Teuchos::ScalarTraits<Scalar> ST; // Solve U X = Y or L X = Y // X belongs to domain map, while Y belongs to range map const size_t numVectors = X_in.getNumVectors(); Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Node> > importer = matrix_->getGraph ()->getImporter (); Teuchos::RCP<const Export<LocalOrdinal,GlobalOrdinal,Node> > exporter = matrix_->getGraph ()->getExporter (); Teuchos::RCP<const MV> X; // it is okay if X and Y reference the same data, because we can // perform a triangular solve in-situ. however, we require that // column access to each is strided. // set up import/export temporary multivectors if (importer != null) { if (importMV_ != null && importMV_->getNumVectors () != numVectors) { importMV_ = null; } if (importMV_ == null) { importMV_ = Teuchos::rcp (new MV (matrix_->getColMap (), numVectors)); } } if (exporter != null) { if (exportMV_ != null && exportMV_->getNumVectors () != numVectors) { exportMV_ = null; } if (exportMV_ == null) { exportMV_ = Teuchos::rcp (new MV (matrix_->getRowMap (), numVectors)); } } // solve(NO_TRANS): RangeMap -> DomainMap // lclMatSolve_: RowMap -> ColMap // importer: DomainMap -> ColMap // exporter: RowMap -> RangeMap // // solve = reverse(exporter) o lclMatSolve_ o reverse(importer) // RangeMap -> RowMap -> ColMap -> DomainMap // // If we have a non-trivial exporter, we must import elements that // are permuted or are on other processors if (exporter != null) { exportMV_->doImport (X_in, *exporter, INSERT); X = exportMV_; } else if (! X_in.isConstantStride ()) { // cannot handle non-constant stride right now // generate a copy of X_in X = Teuchos::rcp (new MV (X_in)); } else { // just temporary, so this non-owning RCP is okay X = Teuchos::rcpFromRef (X_in); } // If we have a non-trivial importer, we must export elements that // are permuted or belong to other processes. We will compute // solution into the to-be-exported MV. if (importer != null) { matrix_->template localSolve<Scalar, Scalar> (*X, *importMV_, NO_TRANS); // Make sure target is zero: necessary because we are adding. Y_in.putScalar (ST::zero ()); Y_in.doExport (*importMV_, *importer, ADD); } // otherwise, solve into Y else { // can't solve into non-strided multivector if (! Y_in.isConstantStride ()) { // generate a strided copy of Y MV Y (Y_in); matrix_->template localSolve<Scalar, Scalar> (*X, Y, NO_TRANS); Tpetra::deep_copy (Y_in, Y); } else { matrix_->template localSolve<Scalar, Scalar> (*X, Y_in, NO_TRANS); } } }
void Chebyshev<MatrixType>:: applyImpl (const MV& X, MV& Y, Teuchos::ETransp mode, scalar_type alpha, scalar_type beta) const { using Teuchos::ArrayRCP; using Teuchos::as; using Teuchos::RCP; using Teuchos::rcp; using Teuchos::rcp_const_cast; using Teuchos::rcpFromRef; const scalar_type zero = STS::zero(); const scalar_type one = STS::one(); // Y = beta*Y + alpha*M*X. // If alpha == 0, then we don't need to do Chebyshev at all. if (alpha == zero) { if (beta == zero) { // Obey Sparse BLAS rules; avoid 0*NaN. Y.putScalar (zero); } else { Y.scale (beta); } return; } // If beta != 0, then we need to keep a copy of the initial value of // Y, so that we can add beta*it to the Chebyshev result at the end. // Usually this method is called with beta == 0, so we don't have to // worry about caching Y_org. RCP<MV> Y_orig; if (beta != zero) { Y_orig = rcp (new MV (Y)); } // If X and Y point to the same memory location, we need to use a // copy of X (X_copy) as the input MV. Otherwise, just let X_copy // point to X. // // This is hopefully an uncommon use case, so we don't bother to // optimize for it by caching X_copy. RCP<const MV> X_copy; bool copiedInput = false; if (X.getLocalMV().getValues() == Y.getLocalMV().getValues()) { X_copy = rcp (new MV (X)); copiedInput = true; } else { X_copy = rcpFromRef (X); } // If alpha != 1, fold alpha into (a copy of) X. // // This is an uncommon use case, so we don't bother to optimize for // it by caching X_copy. However, we do check whether we've already // copied X above, to avoid a second copy. if (alpha != one) { RCP<MV> X_copy_nonConst = rcp_const_cast<MV> (X_copy); if (! copiedInput) { X_copy_nonConst = rcp (new MV (X)); copiedInput = true; } X_copy_nonConst->scale (alpha); X_copy = rcp_const_cast<const MV> (X_copy_nonConst); } impl_.apply (*X_copy, Y); if (beta != zero) { Y.update (beta, *Y_orig, one); // Y = beta * Y_orig + 1 * Y } }