/// \brief Return a nonconstant view of the input MultiVector. /// /// TSQR represents the local (to each MPI process) part of a /// multivector as a KokkosClassic::MultiVector (KMV), which gives a /// nonconstant view of the original multivector's data. This /// class method tells TSQR how to get the KMV from the input /// multivector. The KMV is not a persistent view of the data; /// its scope is contained within the scope of the multivector. /// /// \warning TSQR does not currently support multivectors with /// nonconstant stride. This method will raise an exception /// if A has nonconstant stride. static KokkosClassic::MultiVector<scalar_type, node_type> getNonConstView (MV& A) { // FIXME (mfh 25 Oct 2010) We should be able to run TSQR even if // storage of A uses nonconstant stride internally. We would // have to copy and pack into a matrix with constant stride, and // then unpack on exit. For now we choose just to raise an // exception. TEUCHOS_TEST_FOR_EXCEPTION(! A.ConstantStride(), std::invalid_argument, "TSQR does not currently support Epetra_MultiVector " "inputs that do not have constant stride."); const int numRows = A.MyLength(); const int numCols = A.NumVectors(); const int stride = A.Stride(); // A_ptr does _not_ own the data. TSQR only operates within the // scope of the multivector objects on which it operates, so it // doesn't need ownership of the data. Teuchos::ArrayRCP<double> A_ptr (A.Values(), 0, numRows*stride, false); typedef KokkosClassic::MultiVector<scalar_type, node_type> KMV; // KMV objects want a Kokkos Node instance. Epetra objects // don't have a Kokkos Node, so we make a temporary node just // for the KMV. // // KokkosClassic::SerialNode wants an empty ParameterList. Teuchos::ParameterList plist; Teuchos::RCP<node_type> node (new node_type (plist)); KMV A_kmv (node); A_kmv.initializeValues (numRows, numCols, A_ptr, stride); return A_kmv; }
/// \brief Compute QR factorization [Q,R] = qr(A,0). /// /// \param A [in/out] On input: the multivector to factor. /// Overwritten with garbage on output. /// /// \param Q [out] On output: the (explicitly stored) Q factor in /// the QR factorization of the (input) multivector A. /// /// \param R [out] On output: the R factor in the QR factorization /// of the (input) multivector A. /// /// \param forceNonnegativeDiagonal [in] If true, then (if /// necessary) do extra work (modifying both the Q and R /// factors) in order to force the R factor to have a /// nonnegative diagonal. /// /// \warning Currently, this method only works if A and Q have the /// same communicator and row distribution ("Map," in Petra /// terms) as those of the multivector given to this adapter /// instance's constructor. Otherwise, the result of this /// method is undefined. void factorExplicit (MV& A, MV& Q, dense_matrix_type& R, const bool forceNonnegativeDiagonal=false) { TEUCHOS_TEST_FOR_EXCEPTION (! A.isConstantStride (), std::invalid_argument, "TsqrAdaptor::" "factorExplicit: Input MultiVector A must have constant stride."); TEUCHOS_TEST_FOR_EXCEPTION (! Q.isConstantStride (), std::invalid_argument, "TsqrAdaptor::" "factorExplicit: Input MultiVector Q must have constant stride."); prepareTsqr (Q); // Finish initializing TSQR. // FIXME (mfh 16 Jan 2016) Currently, TSQR is a host-only // implementation. A.template sync<Kokkos::HostSpace> (); A.template modify<Kokkos::HostSpace> (); Q.template sync<Kokkos::HostSpace> (); Q.template modify<Kokkos::HostSpace> (); auto A_view = A.template getLocalView<Kokkos::HostSpace> (); auto Q_view = Q.template getLocalView<Kokkos::HostSpace> (); scalar_type* const A_ptr = reinterpret_cast<scalar_type*> (A_view.ptr_on_device ()); scalar_type* const Q_ptr = reinterpret_cast<scalar_type*> (Q_view.ptr_on_device ()); const bool contiguousCacheBlocks = false; tsqr_->factorExplicitRaw (A_view.dimension_0 (), A_view.dimension_1 (), A_ptr, A.getStride (), Q_ptr, Q.getStride (), R.values (), R.stride (), contiguousCacheBlocks, forceNonnegativeDiagonal); }
/// \brief Rank-revealing decomposition /// /// Using the R factor and explicit Q factor from /// factorExplicit(), compute the singular value decomposition /// (SVD) of R: \f$R = U \Sigma V^*\f$. If R is full rank (with /// respect to the given relative tolerance \c tol), do not modify /// Q or R. Otherwise, compute \f$Q := Q \cdot U\f$ and \f$R := /// \Sigma V^*\f$ in place. If R was modified, then it may not /// necessarily be upper triangular on output. /// /// \param Q [in/out] On input: explicit Q factor computed by /// factorExplicit(). (Must be an orthogonal resp. unitary /// matrix.) On output: If R is of full numerical rank with /// respect to the tolerance tol, Q is unmodified. Otherwise, Q /// is updated so that the first \c rank columns of Q are a /// basis for the column space of A (the original matrix whose /// QR factorization was computed by factorExplicit()). The /// remaining columns of Q are a basis for the null space of A. /// /// \param R [in/out] On input: N by N upper triangular matrix /// with leading dimension LDR >= N. On output: if input is /// full rank, R is unchanged on output. Otherwise, if \f$R = U /// \Sigma V^*\f$ is the SVD of R, on output R is overwritten /// with \f$\Sigma \cdot V^*\f$. This is also an N by N matrix, /// but it may not necessarily be upper triangular. /// /// \param tol [in] Relative tolerance for computing the numerical /// rank of the matrix R. /// /// \return Rank \f$r\f$ of R: \f$ 0 \leq r \leq N\f$. int revealRank (MV& Q, dense_matrix_type& R, const magnitude_type& tol) { TEUCHOS_TEST_FOR_EXCEPTION (! Q.isConstantStride (), std::invalid_argument, "TsqrAdaptor::" "revealRank: Input MultiVector Q must have constant stride."); prepareTsqr (Q); // Finish initializing TSQR. // FIXME (mfh 18 Oct 2010) Check Teuchos::Comm<int> object in Q // to make sure it is the same communicator as the one we are // using in our dist_tsqr_type implementation. Q.template sync<Kokkos::HostSpace> (); Q.template modify<Kokkos::HostSpace> (); auto Q_view = Q.template getLocalView<Kokkos::HostSpace> (); scalar_type* const Q_ptr = reinterpret_cast<scalar_type*> (Q_view.ptr_on_device ()); const bool contiguousCacheBlocks = false; return tsqr_->revealRankRaw (Q_view.dimension_0 (), Q_view.dimension_1 (), Q_ptr, Q.getStride (), R.values (), R.stride (), tol, contiguousCacheBlocks); }
void LocalSparseTriangularSolver<MatrixType>:: localApply (const MV& X, MV& Y, const Teuchos::ETransp mode, const scalar_type& alpha, const scalar_type& beta) const { using Teuchos::RCP; typedef scalar_type ST; typedef Teuchos::ScalarTraits<ST> STS; if (beta == STS::zero ()) { if (alpha == STS::zero ()) { Y.putScalar (STS::zero ()); // Y := 0 * Y (ignore contents of Y) } else { // alpha != 0 A_crs_->template localSolve<ST, ST> (X, Y, mode); if (alpha != STS::one ()) { Y.scale (alpha); } } } else { // beta != 0 if (alpha == STS::zero ()) { Y.scale (beta); // Y := beta * Y } else { // alpha != 0 MV Y_tmp (Y, Teuchos::Copy); A_crs_->template localSolve<ST, ST> (X, Y_tmp, mode); // Y_tmp := M * X Y.update (alpha, Y_tmp, beta); // Y := beta * Y + alpha * Y_tmp } } }
/// \brief Extract A's underlying KokkosClassic::MultiVector instance. /// /// TSQR represents the local (to each MPI process) part of a /// multivector as a KokkosClassic::MultiVector (KMV), which gives a /// nonconstant view of the original multivector's data. This /// class method tells TSQR how to get the KMV from the input /// multivector. The KMV is not a persistent view of the data; /// its scope is contained within the scope of the multivector. /// /// \warning TSQR does not currently support multivectors with /// nonconstant stride. If A has nonconstant stride, this /// method will throw an exception. static KokkosClassic::MultiVector<scalar_type, node_type> getNonConstView (MV& A) { // FIXME (mfh 25 Oct 2010) We should be able to run TSQR even if // storage of A uses nonconstant stride internally. We would // have to copy and pack into a matrix with constant stride, and // then unpack on exit. For now we choose just to raise an // exception. TEUCHOS_TEST_FOR_EXCEPTION(! A.isConstantStride(), std::invalid_argument, "TSQR does not currently support Tpetra::MultiVector " "inputs that do not have constant stride."); return A.getLocalMVNonConst(); }
// Compute \f$\alpha A^\top \text{this}\f$ void innerProducts(const Real alpha, const MV &A, Teuchos::SerialDenseMatrix<int,Real> &B) const { // TEUCHOS_TEST_FOR_EXCEPTION( this->dimensionMismatch(A), // std::invalid_argument, // "Error: MultiVectors must have the same dimensions."); for(int i=0;i<A.getNumberOfVectors();++i) { for(int j=0;j<numVectors_;++j) { B(i,j) = alpha*mvec_[j]->dot(*A.getVector(i)); } } }
void dft_HardSphereA22_Tpetra_Operator<Scalar,MatrixType>:: apply (const MV& X, MV& Y, Teuchos::ETransp mode, Scalar alpha, Scalar beta) const { TEUCHOS_TEST_FOR_EXCEPT(Y.getNumVectors()!=X.getNumVectors()); #ifdef KDEBUG TEUCHOS_TEST_FOR_EXCEPT(!X.getMap()->isSameAs(*getDomainMap())); TEUCHOS_TEST_FOR_EXCEPT(!Y.getMap()->isSameAs(*getRangeMap())); #endif Y.elementWiseMultiply(STS::one(), *densityOnDensityMatrix_, X, STS::zero()); }
// Set some of the vectors in this MultiVector equal to corresponding // vectors in another MultiVector void set(const MV &A, const std::vector<int> &index) { // TEUCHOS_TEST_FOR_EXCEPTION( this->dimensionMismatch(A), // std::invalid_argument, // "Error: MultiVectors must have the same dimensions."); int n = index.size(); for(int i=0;i<n;++i) { int k = index[i]; if(k<numVectors_ && i<A.getNumberOfVectors()) { mvec_[k]->set(*A.getVector(i)); } } }
//! Compute rank-revealing decomposition using results of factorExplicit(). int revealRank (MV& Q, dense_matrix_type& R, const magnitude_type& tol) { return Q.revealRank (R, tol); }
int plugin_exec( PluginParam *par ) { MV epg; if ( epg.error() == errorCodeNone ) { // Stop enigma passing rogue keypress to // the EPG on startup KeyCatcher kc; showExecHide( &kc ); epg.run(); } return 0; }
void dft_HardSphereA22_Tpetra_Operator<Scalar,MatrixType>:: applyInverse (const MV& X, MV& Y) const { // Our algorithm is: // Y = D \ X TEUCHOS_TEST_FOR_EXCEPT(Y.getNumVectors()!=X.getNumVectors()); #ifdef KDEBUG TEUCHOS_TEST_FOR_EXCEPT(!X.getMap()->isSameAs(*getDomainMap())); TEUCHOS_TEST_FOR_EXCEPT(!Y.getMap()->isSameAs(*getRangeMap())); #endif Y.elementWiseMultiply(STS::one(), *densityOnDensityInverse_, X, STS::zero()); }
//! Compute QR factorization A = QR, using TSQR. void factorExplicit (MV& A, MV& Q, dense_matrix_type& R, const bool forceNonnegativeDiagonal=false) { A.factorExplicit (Q, R, forceNonnegativeDiagonal); }
void dump(const MV& v, const std::string& name) { std::cout << name << std::endl; Teuchos::ArrayRCP<const Scalar> view = v.get1dView(); for (Teuchos::ArrayRCP<const Scalar>::iterator it = view.begin(); it != view.end(); ++it) { std::cout << *it << std::endl; } std::cout << std::endl; }
// Set the MultiVector equal to another MultiVector void set(const MV &A) { // TEUCHOS_TEST_FOR_EXCEPTION( this->dimensionMismatch(A), // std::invalid_argument, // "Error: MultiVectors must have the same dimensions."); for(int i=0;i<numVectors_;++i) { mvec_[i]->set(*(A.getVector(i))); } }
// Compute dot products of pairs of vectors void dots(const MV &A, std::vector<Real> &b) const { TEUCHOS_TEST_FOR_EXCEPTION( this->dimensionMismatch(A), std::invalid_argument, "Error: MultiVectors must have the same dimensions."); for(int i=0;i<numVectors_;++i) { b[i] = mvec_[i]->dot(*A.getVector(i)); } }
/// \brief Extract A's underlying KokkosClassic::MultiVector instance. /// /// TSQR represents the local (to each MPI process) part of a /// multivector as a KokkosClassic::MultiVector (KMV), which gives a /// nonconstant view of the original multivector's data. This /// class method tells TSQR how to get the KMV from the input /// multivector. The KMV is not a persistent view of the data; /// its scope is contained within the scope of the multivector. /// /// \warning TSQR does not currently support multivectors with /// nonconstant stride. If A has nonconstant stride, this /// method will throw an exception. static KokkosClassic::MultiVector<scalar_type, node_type> getNonConstView (MV& A) { // FIXME (mfh 25 Oct 2010) We should be able to run TSQR even if // storage of A uses nonconstant stride internally. We would // have to copy and pack into a matrix with constant stride, and // then unpack on exit. For now we choose just to raise an // exception. TEUCHOS_TEST_FOR_EXCEPTION(! A.isConstantStride(), std::invalid_argument, "TSQR does not currently support Tpetra::MultiVector " "inputs that do not have constant stride."); typedef typename Teuchos::ArrayRCP<mp_scalar_type>::size_type size_type; typedef typename MV::dual_view_type view_type; typedef typename view_type::t_dev::array_type flat_array_type; // Create new Kokkos::MultiVector reinterpreting the data as a longer // array of the base scalar type // Create new ArrayRCP holding data view_type pce_mv = A.getDualView(); flat_array_type flat_mv = pce_mv.d_view; const size_t num_rows = flat_mv.dimension_0(); const size_t num_cols = flat_mv.dimension_1(); const size_t size = num_rows * num_cols; ArrayRCP<scalar_type> vals = Teuchos::arcp(flat_mv.ptr_on_device(), size_type(0), size, false); // Create new MultiVector // Owing to the above comment, we don't need to worry about // non-constant stride size_t strides[2]; flat_mv.stride(strides); const size_t stride = strides[0]; KokkosClassic::MultiVector<scalar_type, node_type> mv(A.getMap()->getNode()); mv.initializeValues(num_rows, num_cols, vals, stride); return mv; }
/// \brief Finish internode TSQR initialization. /// /// \param mv [in] A valid Tpetra::MultiVector instance whose /// communicator wrapper we will use to prepare TSQR. /// /// \note It's OK to call this method more than once; it is idempotent. void prepareDistTsqr (const MV& mv) { using Teuchos::RCP; using Teuchos::rcp_implicit_cast; typedef TSQR::TeuchosMessenger<scalar_type> mess_type; typedef TSQR::MessengerBase<scalar_type> base_mess_type; RCP<const Teuchos::Comm<int> > comm = mv.getMap()->getComm(); RCP<mess_type> mess (new mess_type (comm)); RCP<base_mess_type> messBase = rcp_implicit_cast<base_mess_type> (mess); distTsqr_->init (messBase); }
// Generic BLAS level 3 matrix multiplication // \f$\text{this}\leftarrow \alpha A B+\beta\text{this}\f$ void gemm(const Real alpha, const MV& A, const Teuchos::SerialDenseMatrix<int,Real> &B, const Real beta) { // Scale this by beta this->scale(beta); for(int i=0;i<B.numRows();++i) { for(int j=0;j<B.numCols();++j) { mvec_[j]->axpy(alpha*B(i,j),*A.getVector(i)); } } }
/// \brief Finish internode TSQR initialization. /// /// \param mv [in] A multivector, from which to extract the /// Epetra_Comm communicator wrapper to use to initialize TSQR. /// /// \note It's OK to call this method more than once; it is idempotent. void prepareDistTsqr (const MV& mv) { using Teuchos::RCP; using Teuchos::rcp; using TSQR::Epetra::makeTsqrMessenger; typedef TSQR::MessengerBase<scalar_type> base_mess_type; // If mv falls out of scope, its Epetra_Comm may become invalid. // Thus, we clone the input Epetra_Comm, so that the messenger // owns the object. RCP<const Epetra_Comm> comm = rcp (mv.Comm().Clone()); RCP<base_mess_type> messBase = makeTsqrMessenger<scalar_type> (comm); distTsqr_->init (messBase); }
void dft_PolyA11_Tpetra_Operator<Scalar,MatrixType>:: applyInverse (const MV& X, MV& Y) const { TEUCHOS_TEST_FOR_EXCEPT(Y.getNumVectors()!=X.getNumVectors()); #ifdef KDEBUG TEUCHOS_TEST_FOR_EXCEPT(!X.getMap()->isSameAs(*getDomainMap())); TEUCHOS_TEST_FOR_EXCEPT(!Y.getMap()->isSameAs(*getRangeMap())); printf("\n\n\n\ndft_PolyA11_Tpetra_Operator::applyInverse()\n\n\n\n"); #endif Scalar ONE = STS::one(); Scalar ZERO = STS::zero(); size_t NumVectors = Y.getNumVectors(); size_t numMyElements = ownedMap_->getNodeNumElements(); RCP<MV > Ytmp = rcp(new MV(ownedMap_,NumVectors)); Y=X; // We can safely do this RCP<MV > curY = Y.offsetViewNonConst(ownedMap_, 0); RCP<VEC> diagVec = invDiagonal_->offsetViewNonConst(ownedMap_, 0)->getVectorNonConst(0); curY->elementWiseMultiply(ONE, *diagVec, *curY, ZERO); // Scale Y by the first block diagonal // Loop over block 1 through numBlocks (indexing 0 to numBlocks-1) for (LocalOrdinal i=OTLO::zero(); i< numBlocks_-1; i++) { // Update views of Y and diagonal blocks curY = Y.offsetViewNonConst(ownedMap_, (i+1)*numMyElements); diagVec = invDiagonal_->offsetViewNonConst(ownedMap_, (i+1)*numMyElements)->getVectorNonConst(0); matrixOperator_[i]->apply(Y, *Ytmp); // Multiply block lower triangular block curY->update(-ONE, *Ytmp, ONE); // curY = curX - Ytmp (Note that curX is in curY from initial copy Y = X) curY->elementWiseMultiply(ONE, *diagVec, *curY, ZERO); // Scale Y by the first block diagonal } } //end applyInverse
void dft_PolyA11_Tpetra_Operator<Scalar,MatrixType>:: apply (const MV& X, MV& Y, Teuchos::ETransp mode, Scalar alpha, Scalar beta) const { TEUCHOS_TEST_FOR_EXCEPT(Y.getNumVectors()!=X.getNumVectors()); #ifdef KDEBUG TEUCHOS_TEST_FOR_EXCEPT(!X.getMap()->isSameAs(*getDomainMap())); TEUCHOS_TEST_FOR_EXCEPT(!Y.getMap()->isSameAs(*getRangeMap())); #endif size_t numMyElements = ownedMap_->getNodeNumElements(); RCP<MV > curY = Y.offsetViewNonConst(ownedMap_, 0); for (LocalOrdinal i=OTLO::zero(); i< numBlocks_-1; i++) { curY = Y.offsetViewNonConst(ownedMap_, (i+1)*numMyElements); matrixOperator_[i]->apply(X, *curY); // This gives a result that is off-diagonal-matrix*X } Y.elementWiseMultiply(STS::one(),*diagonal_, X, STS::one()); // Add diagonal contribution } //end Apply
void thread1(MV & x, T & result) { MV::Version v = x.get(); result = *v; }
// Compute Y := alpha Op X + beta Y. // // We ignore the cases alpha != 1 and beta != 0 for simplicity. void apply (const MV& X, MV& Y, Teuchos::ETransp mode = Teuchos::NO_TRANS, scalar_type alpha = Teuchos::ScalarTraits<scalar_type>::one (), scalar_type beta = Teuchos::ScalarTraits<scalar_type>::zero ()) const { using Teuchos::RCP; using Teuchos::rcp; using std::cout; using std::endl; typedef Teuchos::ScalarTraits<scalar_type> STS; RCP<const Teuchos::Comm<int> > comm = opMap_->getComm (); const int myRank = comm->getRank (); const int numProcs = comm->getSize (); if (myRank == 0) { cout << "MyOp::apply" << endl; } // We're writing the Operator subclass, so we are responsible for // error handling. You can decide how much error checking you // want to do. Just remember that checking things like Map // sameness or compatibility are expensive. TEUCHOS_TEST_FOR_EXCEPTION( X.getNumVectors () != Y.getNumVectors (), std::invalid_argument, "X and Y do not have the same numbers of vectors (columns)."); // Let's make sure alpha is 1 and beta is 0... // This will throw an exception if that is not the case. TEUCHOS_TEST_FOR_EXCEPTION( alpha != STS::one() || beta != STS::zero(), std::logic_error, "MyOp::apply was given alpha != 1 or beta != 0. " "These cases are not implemented."); // Get the number of vectors (columns) in X (and Y). const size_t numVecs = X.getNumVectors (); // Make a temporary multivector for holding the redistributed // data. You could also create this in the constructor and reuse // it across different apply() calls, but you would need to be // careful to reallocate if it has a different number of vectors // than X. The number of vectors in X can vary across different // apply() calls. RCP<MV> redistData = rcp (new MV (redistMap_, numVecs)); // Redistribute the data. // This will do all the necessary communication for you. // All processes now own enough data to do the matvec. redistData->doImport (X, *importer_, Tpetra::INSERT); // Get the number of local rows in X, on the calling process. const local_ordinal_type nlocRows = static_cast<local_ordinal_type> (X.getLocalLength ()); // Perform the matvec with the data we now locally own. // // For each column... for (size_t c = 0; c < numVecs; ++c) { // Get a view of the desired column Teuchos::ArrayRCP<scalar_type> colView = redistData->getDataNonConst (c); local_ordinal_type offset; // Y[0,c] = -colView[0] + 2*colView[1] - colView[2] (using local indices) if (myRank > 0) { Y.replaceLocalValue (0, c, -colView[0] + 2*colView[1] - colView[2]); offset = 0; } // Y[0,c] = 2*colView[1] - colView[2] (using local indices) else { Y.replaceLocalValue (0, c, 2*colView[0] - colView[1]); offset = 1; } // Y[r,c] = -colView[r-offset] + 2*colView[r+1-offset] - colView[r+2-offset] for (local_ordinal_type r = 1; r < nlocRows - 1; ++r) { const scalar_type newVal = -colView[r-offset] + 2*colView[r+1-offset] - colView[r+2-offset]; Y.replaceLocalValue (r, c, newVal); } // Y[nlocRows-1,c] = -colView[nlocRows-1-offset] + 2*colView[nlocRows-offset] // - colView[nlocRows+1-offset] if (myRank < numProcs - 1) { const scalar_type newVal = -colView[nlocRows-1-offset] + 2*colView[nlocRows-offset] - colView[nlocRows+1-offset]; Y.replaceLocalValue (nlocRows-1, c, newVal); } // Y[nlocRows-1,c] = -colView[nlocRows-1-offset] + 2*colView[nlocRows-offset] else { const scalar_type newVal = -colView[nlocRows-1-offset] + 2*colView[nlocRows-offset]; Y.replaceLocalValue (nlocRows-1, c, newVal); } } }
/// \brief Solve AX=B for X with Chebyshev iteration with left /// diagonal scaling, imitating ML's implementation. /// /// \pre A must be real-valued and symmetric positive definite. /// \pre numIters >= 0 /// \pre eigRatio >= 1 /// \pre 0 < lambdaMax /// \pre All entries of D_inv are positive. /// /// \param A [in] The matrix A in the linear system to solve. /// \param B [in] Right-hand side(s) in the linear system to solve. /// \param X [in] Initial guess(es) for the linear system to solve. /// \param numIters [in] Number of Chebyshev iterations. /// \param lambdaMax [in] Estimate of max eigenvalue of D_inv*A. /// \param lambdaMin [in] Estimate of min eigenvalue of D_inv*A. We /// only use this to determine if A is the identity matrix. /// \param eigRatio [in] Estimate of max / min eigenvalue ratio of /// D_inv*A. We use this along with lambdaMax to compute the /// Chebyshev coefficients. This need not be the same as /// lambdaMax/lambdaMin. /// \param D_inv [in] Vector of diagonal entries of A. It must have /// the same distribution as b. void mlApplyImpl (const MAT& A, const MV& B, MV& X, const int numIters, const ST lambdaMax, const ST lambdaMin, const ST eigRatio, const V& D_inv) { const ST zero = Teuchos::as<ST> (0); const ST one = Teuchos::as<ST> (1); const ST two = Teuchos::as<ST> (2); MV pAux (B.getMap (), B.getNumVectors ()); // Result of A*X MV dk (B.getMap (), B.getNumVectors ()); // Solution update MV R (B.getMap (), B.getNumVectors ()); // Not in original ML; need for B - pAux ST beta = Teuchos::as<ST> (1.1) * lambdaMax; ST alpha = lambdaMax / eigRatio; ST delta = (beta - alpha) / two; ST theta = (beta + alpha) / two; ST s1 = theta / delta; ST rhok = one / s1; // Diagonal: ML replaces entries containing 0 with 1. We // shouldn't have any entries like that in typical test problems, // so it's OK not to do that here. // The (scaled) matrix is the identity: set X = D_inv * B. (If A // is the identity, then certainly D_inv is too. D_inv comes from // A, so if D_inv * A is the identity, then we still need to apply // the "preconditioner" D_inv to B as well, to get X.) if (lambdaMin == one && lambdaMin == lambdaMax) { solve (X, D_inv, B); return; } // The next bit of code is a direct translation of code from ML's // ML_Cheby function, in the "normal point scaling" section, which // is in lines 7365-7392 of ml_smoother.c. if (! zeroStartingSolution_) { // dk = (1/theta) * D_inv * (B - (A*X)) A.apply (X, pAux); // pAux = A * X R = B; R.update (-one, pAux, one); // R = B - pAux dk.elementWiseMultiply (one/theta, D_inv, R, zero); // dk = (1/theta)*D_inv*R X.update (one, dk, one); // X = X + dk } else { dk.elementWiseMultiply (one/theta, D_inv, B, zero); // dk = (1/theta)*D_inv*B X = dk; } ST rhokp1, dtemp1, dtemp2; for (int k = 0; k < numIters-1; ++k) { A.apply (X, pAux); rhokp1 = one / (two*s1 - rhok); dtemp1 = rhokp1*rhok; dtemp2 = two*rhokp1/delta; rhok = rhokp1; R = B; R.update (-one, pAux, one); // R = B - pAux // dk = dtemp1 * dk + dtemp2 * D_inv * (B - pAux) dk.elementWiseMultiply (dtemp2, D_inv, B, dtemp1); X.update (one, dk, one); // X = X + dk } }
//! Do the transpose or conjugate transpose solve. void applyTranspose (const MV& X_in, MV& Y_in, const Teuchos::ETransp mode) const { typedef Teuchos::ScalarTraits<Scalar> ST; using Teuchos::null; TEUCHOS_TEST_FOR_EXCEPTION (mode != Teuchos::TRANS && mode != Teuchos::CONJ_TRANS, std::logic_error, "Tpetra::CrsMatrixSolveOp::applyTranspose: mode is neither TRANS nor " "CONJ_TRANS. Should never get here! Please report this bug to the " "Tpetra developers."); const size_t numVectors = X_in.getNumVectors(); Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Node> > importer = matrix_->getGraph ()->getImporter (); Teuchos::RCP<const Export<LocalOrdinal,GlobalOrdinal,Node> > exporter = matrix_->getGraph ()->getExporter (); Teuchos::RCP<const MV> X; // it is okay if X and Y reference the same data, because we can // perform a triangular solve in-situ. however, we require that // column access to each is strided. // set up import/export temporary multivectors if (importer != null) { if (importMV_ != null && importMV_->getNumVectors() != numVectors) { importMV_ = null; } if (importMV_ == null) { importMV_ = Teuchos::rcp( new MV(matrix_->getColMap(),numVectors) ); } } if (exporter != null) { if (exportMV_ != null && exportMV_->getNumVectors() != numVectors) { exportMV_ = null; } if (exportMV_ == null) { exportMV_ = Teuchos::rcp( new MV(matrix_->getRowMap(),numVectors) ); } } // solve(TRANS): DomainMap -> RangeMap // lclMatSolve_(TRANS): ColMap -> RowMap // importer: DomainMap -> ColMap // exporter: RowMap -> RangeMap // // solve = importer o lclMatSolve_ o exporter // Domainmap -> ColMap -> RowMap -> RangeMap // // If we have a non-trivial importer, we must import elements that // are permuted or are on other processes. if (importer != null) { importMV_->doImport(X_in,*importer,INSERT); X = importMV_; } else if (X_in.isConstantStride() == false) { // cannot handle non-constant stride right now // generate a copy of X_in X = Teuchos::rcp(new MV(X_in)); } else { // just temporary, so this non-owning RCP is okay X = Teuchos::rcpFromRef (X_in); } // If we have a non-trivial exporter, we must export elements that // are permuted or belong to other processes. We will compute // solution into the to-be-exported MV; get a view. if (exporter != null) { matrix_->template localSolve<Scalar, Scalar> (*X, *exportMV_, Teuchos::CONJ_TRANS); // Make sure target is zero: necessary because we are adding Y_in.putScalar(ST::zero()); Y_in.doExport(*importMV_, *importer, ADD); } // otherwise, solve into Y else { if (Y_in.isConstantStride() == false) { // generate a strided copy of Y MV Y(Y_in); matrix_->template localSolve<Scalar, Scalar> (*X, Y, Teuchos::CONJ_TRANS); Y_in = Y; } else { matrix_->template localSolve<Scalar, Scalar> (*X, Y_in, Teuchos::CONJ_TRANS); } } }
//! Do the non-transpose solve. void applyNonTranspose (const MV& X_in, MV& Y_in) const { using Teuchos::NO_TRANS; using Teuchos::null; typedef Teuchos::ScalarTraits<Scalar> ST; // Solve U X = Y or L X = Y // X belongs to domain map, while Y belongs to range map const size_t numVectors = X_in.getNumVectors(); Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Node> > importer = matrix_->getGraph ()->getImporter (); Teuchos::RCP<const Export<LocalOrdinal,GlobalOrdinal,Node> > exporter = matrix_->getGraph ()->getExporter (); Teuchos::RCP<const MV> X; // it is okay if X and Y reference the same data, because we can // perform a triangular solve in-situ. however, we require that // column access to each is strided. // set up import/export temporary multivectors if (importer != null) { if (importMV_ != null && importMV_->getNumVectors () != numVectors) { importMV_ = null; } if (importMV_ == null) { importMV_ = Teuchos::rcp (new MV (matrix_->getColMap (), numVectors)); } } if (exporter != null) { if (exportMV_ != null && exportMV_->getNumVectors () != numVectors) { exportMV_ = null; } if (exportMV_ == null) { exportMV_ = Teuchos::rcp (new MV (matrix_->getRowMap (), numVectors)); } } // solve(NO_TRANS): RangeMap -> DomainMap // lclMatSolve_: RowMap -> ColMap // importer: DomainMap -> ColMap // exporter: RowMap -> RangeMap // // solve = reverse(exporter) o lclMatSolve_ o reverse(importer) // RangeMap -> RowMap -> ColMap -> DomainMap // // If we have a non-trivial exporter, we must import elements that // are permuted or are on other processors if (exporter != null) { exportMV_->doImport (X_in, *exporter, INSERT); X = exportMV_; } else if (! X_in.isConstantStride ()) { // cannot handle non-constant stride right now // generate a copy of X_in X = Teuchos::rcp (new MV (X_in)); } else { // just temporary, so this non-owning RCP is okay X = Teuchos::rcpFromRef (X_in); } // If we have a non-trivial importer, we must export elements that // are permuted or belong to other processes. We will compute // solution into the to-be-exported MV. if (importer != null) { matrix_->template localSolve<Scalar, Scalar> (*X, *importMV_, NO_TRANS); // Make sure target is zero: necessary because we are adding. Y_in.putScalar (ST::zero ()); Y_in.doExport (*importMV_, *importer, ADD); } // otherwise, solve into Y else { // can't solve into non-strided multivector if (! Y_in.isConstantStride ()) { // generate a strided copy of Y MV Y (Y_in); matrix_->template localSolve<Scalar, Scalar> (*X, Y, NO_TRANS); Tpetra::deep_copy (Y_in, Y); } else { matrix_->template localSolve<Scalar, Scalar> (*X, Y_in, NO_TRANS); } } }
typename MV::size_type num_non_zeros (const MV &mv) { return mv.non_zeros (); }
void Chebyshev<MatrixType>:: applyImpl (const MV& X, MV& Y, Teuchos::ETransp mode, scalar_type alpha, scalar_type beta) const { using Teuchos::ArrayRCP; using Teuchos::as; using Teuchos::RCP; using Teuchos::rcp; using Teuchos::rcp_const_cast; using Teuchos::rcpFromRef; const scalar_type zero = STS::zero(); const scalar_type one = STS::one(); // Y = beta*Y + alpha*M*X. // If alpha == 0, then we don't need to do Chebyshev at all. if (alpha == zero) { if (beta == zero) { // Obey Sparse BLAS rules; avoid 0*NaN. Y.putScalar (zero); } else { Y.scale (beta); } return; } // If beta != 0, then we need to keep a copy of the initial value of // Y, so that we can add beta*it to the Chebyshev result at the end. // Usually this method is called with beta == 0, so we don't have to // worry about caching Y_org. RCP<MV> Y_orig; if (beta != zero) { Y_orig = rcp (new MV (Y)); } // If X and Y point to the same memory location, we need to use a // copy of X (X_copy) as the input MV. Otherwise, just let X_copy // point to X. // // This is hopefully an uncommon use case, so we don't bother to // optimize for it by caching X_copy. RCP<const MV> X_copy; bool copiedInput = false; if (X.getLocalMV().getValues() == Y.getLocalMV().getValues()) { X_copy = rcp (new MV (X)); copiedInput = true; } else { X_copy = rcpFromRef (X); } // If alpha != 1, fold alpha into (a copy of) X. // // This is an uncommon use case, so we don't bother to optimize for // it by caching X_copy. However, we do check whether we've already // copied X above, to avoid a second copy. if (alpha != one) { RCP<MV> X_copy_nonConst = rcp_const_cast<MV> (X_copy); if (! copiedInput) { X_copy_nonConst = rcp (new MV (X)); copiedInput = true; } X_copy_nonConst->scale (alpha); X_copy = rcp_const_cast<const MV> (X_copy_nonConst); } impl_.apply (*X_copy, Y); if (beta != zero) { Y.update (beta, *Y_orig, one); // Y = beta * Y_orig + 1 * Y } }
void thread2(MV & x, const char * result) { x.set(std::make_shared<T>(result)); }
/// \brief Finish intranode TSQR initialization. /// /// \note It's OK to call this method more than once; it is idempotent. void prepareNodeTsqr (const MV& mv) { node_tsqr_factory_type::prepareNodeTsqr (nodeTsqr_, mv.getMap()->getNode()); }