void dft_HardSphereA22_Tpetra_Operator<Scalar,MatrixType>:: apply (const MV& X, MV& Y, Teuchos::ETransp mode, Scalar alpha, Scalar beta) const { TEUCHOS_TEST_FOR_EXCEPT(Y.getNumVectors()!=X.getNumVectors()); #ifdef KDEBUG TEUCHOS_TEST_FOR_EXCEPT(!X.getMap()->isSameAs(*getDomainMap())); TEUCHOS_TEST_FOR_EXCEPT(!Y.getMap()->isSameAs(*getRangeMap())); #endif Y.elementWiseMultiply(STS::one(), *densityOnDensityMatrix_, X, STS::zero()); }
void dft_HardSphereA22_Tpetra_Operator<Scalar,MatrixType>:: applyInverse (const MV& X, MV& Y) const { // Our algorithm is: // Y = D \ X TEUCHOS_TEST_FOR_EXCEPT(Y.getNumVectors()!=X.getNumVectors()); #ifdef KDEBUG TEUCHOS_TEST_FOR_EXCEPT(!X.getMap()->isSameAs(*getDomainMap())); TEUCHOS_TEST_FOR_EXCEPT(!Y.getMap()->isSameAs(*getRangeMap())); #endif Y.elementWiseMultiply(STS::one(), *densityOnDensityInverse_, X, STS::zero()); }
void dft_PolyA11_Tpetra_Operator<Scalar,MatrixType>:: applyInverse (const MV& X, MV& Y) const { TEUCHOS_TEST_FOR_EXCEPT(Y.getNumVectors()!=X.getNumVectors()); #ifdef KDEBUG TEUCHOS_TEST_FOR_EXCEPT(!X.getMap()->isSameAs(*getDomainMap())); TEUCHOS_TEST_FOR_EXCEPT(!Y.getMap()->isSameAs(*getRangeMap())); printf("\n\n\n\ndft_PolyA11_Tpetra_Operator::applyInverse()\n\n\n\n"); #endif Scalar ONE = STS::one(); Scalar ZERO = STS::zero(); size_t NumVectors = Y.getNumVectors(); size_t numMyElements = ownedMap_->getNodeNumElements(); RCP<MV > Ytmp = rcp(new MV(ownedMap_,NumVectors)); Y=X; // We can safely do this RCP<MV > curY = Y.offsetViewNonConst(ownedMap_, 0); RCP<VEC> diagVec = invDiagonal_->offsetViewNonConst(ownedMap_, 0)->getVectorNonConst(0); curY->elementWiseMultiply(ONE, *diagVec, *curY, ZERO); // Scale Y by the first block diagonal // Loop over block 1 through numBlocks (indexing 0 to numBlocks-1) for (LocalOrdinal i=OTLO::zero(); i< numBlocks_-1; i++) { // Update views of Y and diagonal blocks curY = Y.offsetViewNonConst(ownedMap_, (i+1)*numMyElements); diagVec = invDiagonal_->offsetViewNonConst(ownedMap_, (i+1)*numMyElements)->getVectorNonConst(0); matrixOperator_[i]->apply(Y, *Ytmp); // Multiply block lower triangular block curY->update(-ONE, *Ytmp, ONE); // curY = curX - Ytmp (Note that curX is in curY from initial copy Y = X) curY->elementWiseMultiply(ONE, *diagVec, *curY, ZERO); // Scale Y by the first block diagonal } } //end applyInverse
/// \brief Finish internode TSQR initialization. /// /// \param mv [in] A valid Tpetra::MultiVector instance whose /// communicator wrapper we will use to prepare TSQR. /// /// \note It's OK to call this method more than once; it is idempotent. void prepareDistTsqr (const MV& mv) { using Teuchos::RCP; using Teuchos::rcp_implicit_cast; typedef TSQR::TeuchosMessenger<scalar_type> mess_type; typedef TSQR::MessengerBase<scalar_type> base_mess_type; RCP<const Teuchos::Comm<int> > comm = mv.getMap()->getComm(); RCP<mess_type> mess (new mess_type (comm)); RCP<base_mess_type> messBase = rcp_implicit_cast<base_mess_type> (mess); distTsqr_->init (messBase); }
void dft_PolyA11_Tpetra_Operator<Scalar,MatrixType>:: apply (const MV& X, MV& Y, Teuchos::ETransp mode, Scalar alpha, Scalar beta) const { TEUCHOS_TEST_FOR_EXCEPT(Y.getNumVectors()!=X.getNumVectors()); #ifdef KDEBUG TEUCHOS_TEST_FOR_EXCEPT(!X.getMap()->isSameAs(*getDomainMap())); TEUCHOS_TEST_FOR_EXCEPT(!Y.getMap()->isSameAs(*getRangeMap())); #endif size_t numMyElements = ownedMap_->getNodeNumElements(); RCP<MV > curY = Y.offsetViewNonConst(ownedMap_, 0); for (LocalOrdinal i=OTLO::zero(); i< numBlocks_-1; i++) { curY = Y.offsetViewNonConst(ownedMap_, (i+1)*numMyElements); matrixOperator_[i]->apply(X, *curY); // This gives a result that is off-diagonal-matrix*X } Y.elementWiseMultiply(STS::one(),*diagonal_, X, STS::one()); // Add diagonal contribution } //end Apply
/// \brief Extract A's underlying KokkosClassic::MultiVector instance. /// /// TSQR represents the local (to each MPI process) part of a /// multivector as a KokkosClassic::MultiVector (KMV), which gives a /// nonconstant view of the original multivector's data. This /// class method tells TSQR how to get the KMV from the input /// multivector. The KMV is not a persistent view of the data; /// its scope is contained within the scope of the multivector. /// /// \warning TSQR does not currently support multivectors with /// nonconstant stride. If A has nonconstant stride, this /// method will throw an exception. static KokkosClassic::MultiVector<scalar_type, node_type> getNonConstView (MV& A) { // FIXME (mfh 25 Oct 2010) We should be able to run TSQR even if // storage of A uses nonconstant stride internally. We would // have to copy and pack into a matrix with constant stride, and // then unpack on exit. For now we choose just to raise an // exception. TEUCHOS_TEST_FOR_EXCEPTION(! A.isConstantStride(), std::invalid_argument, "TSQR does not currently support Tpetra::MultiVector " "inputs that do not have constant stride."); typedef typename Teuchos::ArrayRCP<mp_scalar_type>::size_type size_type; typedef typename MV::dual_view_type view_type; typedef typename view_type::t_dev::array_type flat_array_type; // Create new Kokkos::MultiVector reinterpreting the data as a longer // array of the base scalar type // Create new ArrayRCP holding data view_type pce_mv = A.getDualView(); flat_array_type flat_mv = pce_mv.d_view; const size_t num_rows = flat_mv.dimension_0(); const size_t num_cols = flat_mv.dimension_1(); const size_t size = num_rows * num_cols; ArrayRCP<scalar_type> vals = Teuchos::arcp(flat_mv.ptr_on_device(), size_type(0), size, false); // Create new MultiVector // Owing to the above comment, we don't need to worry about // non-constant stride size_t strides[2]; flat_mv.stride(strides); const size_t stride = strides[0]; KokkosClassic::MultiVector<scalar_type, node_type> mv(A.getMap()->getNode()); mv.initializeValues(num_rows, num_cols, vals, stride); return mv; }
/// \brief Solve AX=B for X with Chebyshev iteration with left /// diagonal scaling, imitating ML's implementation. /// /// \pre A must be real-valued and symmetric positive definite. /// \pre numIters >= 0 /// \pre eigRatio >= 1 /// \pre 0 < lambdaMax /// \pre All entries of D_inv are positive. /// /// \param A [in] The matrix A in the linear system to solve. /// \param B [in] Right-hand side(s) in the linear system to solve. /// \param X [in] Initial guess(es) for the linear system to solve. /// \param numIters [in] Number of Chebyshev iterations. /// \param lambdaMax [in] Estimate of max eigenvalue of D_inv*A. /// \param lambdaMin [in] Estimate of min eigenvalue of D_inv*A. We /// only use this to determine if A is the identity matrix. /// \param eigRatio [in] Estimate of max / min eigenvalue ratio of /// D_inv*A. We use this along with lambdaMax to compute the /// Chebyshev coefficients. This need not be the same as /// lambdaMax/lambdaMin. /// \param D_inv [in] Vector of diagonal entries of A. It must have /// the same distribution as b. void mlApplyImpl (const MAT& A, const MV& B, MV& X, const int numIters, const ST lambdaMax, const ST lambdaMin, const ST eigRatio, const V& D_inv) { const ST zero = Teuchos::as<ST> (0); const ST one = Teuchos::as<ST> (1); const ST two = Teuchos::as<ST> (2); MV pAux (B.getMap (), B.getNumVectors ()); // Result of A*X MV dk (B.getMap (), B.getNumVectors ()); // Solution update MV R (B.getMap (), B.getNumVectors ()); // Not in original ML; need for B - pAux ST beta = Teuchos::as<ST> (1.1) * lambdaMax; ST alpha = lambdaMax / eigRatio; ST delta = (beta - alpha) / two; ST theta = (beta + alpha) / two; ST s1 = theta / delta; ST rhok = one / s1; // Diagonal: ML replaces entries containing 0 with 1. We // shouldn't have any entries like that in typical test problems, // so it's OK not to do that here. // The (scaled) matrix is the identity: set X = D_inv * B. (If A // is the identity, then certainly D_inv is too. D_inv comes from // A, so if D_inv * A is the identity, then we still need to apply // the "preconditioner" D_inv to B as well, to get X.) if (lambdaMin == one && lambdaMin == lambdaMax) { solve (X, D_inv, B); return; } // The next bit of code is a direct translation of code from ML's // ML_Cheby function, in the "normal point scaling" section, which // is in lines 7365-7392 of ml_smoother.c. if (! zeroStartingSolution_) { // dk = (1/theta) * D_inv * (B - (A*X)) A.apply (X, pAux); // pAux = A * X R = B; R.update (-one, pAux, one); // R = B - pAux dk.elementWiseMultiply (one/theta, D_inv, R, zero); // dk = (1/theta)*D_inv*R X.update (one, dk, one); // X = X + dk } else { dk.elementWiseMultiply (one/theta, D_inv, B, zero); // dk = (1/theta)*D_inv*B X = dk; } ST rhokp1, dtemp1, dtemp2; for (int k = 0; k < numIters-1; ++k) { A.apply (X, pAux); rhokp1 = one / (two*s1 - rhok); dtemp1 = rhokp1*rhok; dtemp2 = two*rhokp1/delta; rhok = rhokp1; R = B; R.update (-one, pAux, one); // R = B - pAux // dk = dtemp1 * dk + dtemp2 * D_inv * (B - pAux) dk.elementWiseMultiply (dtemp2, D_inv, B, dtemp1); X.update (one, dk, one); // X = X + dk } }
/// \brief Finish intranode TSQR initialization. /// /// \note It's OK to call this method more than once; it is idempotent. void prepareNodeTsqr (const MV& mv) { node_tsqr_factory_type::prepareNodeTsqr (nodeTsqr_, mv.getMap()->getNode()); }
void dft_PolyA22_Tpetra_Operator<Scalar,MatrixType>:: apply (const MV& X, MV& Y, Teuchos::ETransp mode, Scalar alpha, Scalar beta) const { TEUCHOS_TEST_FOR_EXCEPT(Y.getNumVectors()!=X.getNumVectors()); #ifdef KDEBUG TEUCHOS_TEST_FOR_EXCEPT(!X.getMap()->isSameAs(*getDomainMap())); TEUCHOS_TEST_FOR_EXCEPT(!Y.getMap()->isSameAs(*getRangeMap())); #endif Scalar ONE = STS::one(); Scalar ZERO = STS::zero(); if (F_location_ == 1) { //F in NE size_t numCmsElements = cmsMap_->getNodeNumElements(); // Y1 is a view of the first numCms elements of Y RCP<MV> Y1 = Y.offsetViewNonConst(cmsMap_, 0); // Y2 is a view of the last numDensity elements of Y RCP<MV> Y2 = Y.offsetViewNonConst(densityMap_, numCmsElements); // X1 is a view of the first numCms elements of X RCP<const MV> X1 = X.offsetView(cmsMap_, 0); // X2 is a view of the last numDensity elements of X RCP<const MV> X2 = X.offsetView(densityMap_, numCmsElements); // First block row cmsOnDensityMatrixOp_->apply(*X2, *Y1); cmsOnCmsMatrixOp_->apply(*X1, *tmpCmsVec_); Y1->update(ONE, *tmpCmsVec_, ONE); // Second block row if (hasDensityOnCms_) { densityOnCmsMatrixOp_->apply(*X1, *Y2); Y2->elementWiseMultiply(ONE, *densityOnDensityMatrix_, *X2, ONE); } else { Y2->elementWiseMultiply(ONE, *densityOnDensityMatrix_, *X2, ZERO); } } else { //F in SW size_t numDensityElements = densityMap_->getNodeNumElements(); // Y1 is a view of the first numDensity elements of Y RCP<MV> Y1 = Y.offsetViewNonConst(densityMap_, 0); // Y2 is a view of the last numCms elements of Y RCP<MV> Y2 = Y.offsetViewNonConst(cmsMap_, numDensityElements); // X1 is a view of the first numDensity elements of X RCP<const MV> X1 = X.offsetView(densityMap_, 0); // X2 is a view of the last numCms elements of X RCP<const MV> X2 = X.offsetView(cmsMap_, numDensityElements); // First block row if (hasDensityOnCms_) { densityOnCmsMatrixOp_->apply(*X2, *Y1); Y1->elementWiseMultiply(ONE, *densityOnDensityMatrix_, *X1, ONE); } else { Y1->elementWiseMultiply(ONE, *densityOnDensityMatrix_, *X1, ZERO); } // Second block row cmsOnDensityMatrixOp_->apply(*X1, *Y2); cmsOnCmsMatrixOp_->apply(*X2, *tmpCmsVec_); Y2->update(ONE, *tmpCmsVec_, ONE); } } //end Apply
void dft_PolyA22_Tpetra_Operator<Scalar,MatrixType>:: applyInverse (const MV& X, MV& Y) const { // The true A22 block is of the form: // | Dcc F | // | Ddc Ddd | // where // Dcc is Cms on Cms (diagonal), // F is Cms on Density (fairly dense) // Ddc is Density on Cms (diagonal with small coefficient values), // Ddd is Density on Density (diagonal). // // We will approximate A22 with: // | Dcc F | // | 0 Ddd | // replacing Ddc with a zero matrix for the applyInverse method only. // Our algorithm is then: // Y2 = Ddd \ X2 // Y1 = Dcc \ (X1 - F*Y2) // Or, if F is in the SW quadrant: // The true A22 block is of the form: // | Ddd Ddc | // | F Dcc | // where // Ddd is Density on Density (diagonal), // Ddc is Density on Cms (diagonal with small coefficient values), // F is Cms on Density (fairly dense), // Dcc is Cms on Cms (diagonal). // // We will approximate A22 with: // | Ddd 0 | // | F Dcc | // replacing Ddc with a zero matrix for the applyInverse method only. // Our algorithm is then: // Y1 = Ddd \ X1 // Y2 = Dcc \ (X2 - F*Y1) TEUCHOS_TEST_FOR_EXCEPT(Y.getNumVectors()!=X.getNumVectors()); #ifdef KDEBUG TEUCHOS_TEST_FOR_EXCEPT(!X.getMap()->isSameAs(*getDomainMap())); TEUCHOS_TEST_FOR_EXCEPT(!Y.getMap()->isSameAs(*getRangeMap())); printf("\n\n\n\ndft_PolyA22_Tpetra_Operator::applyInverse()\n\n\n\n"); #endif Scalar ONE = STS::one(); Scalar ZERO = STS::zero(); if (F_location_ == 1) { //F in NE size_t numCmsElements = cmsMap_->getNodeNumElements(); // Y1 is a view of the first numCms elements of Y RCP<MV > Y1 = Y.offsetViewNonConst(cmsMap_, 0); // Y2 is a view of the last numDensity elements of Y RCP<MV > Y2 = Y.offsetViewNonConst(densityMap_, numCmsElements); // X1 is a view of the first numCms elements of X RCP<const MV > X1 = X.offsetView(cmsMap_, 0); // X2 is a view of the last numDensity elements of X RCP<const MV > X2 = X.offsetView(densityMap_, numCmsElements); // Second block row: Y2 = DD\X2 Y2->elementWiseMultiply(ONE, *densityOnDensityInverse_, *X2, ZERO); // First block row: Y1 = CC \ (X1 - CD*Y2) cmsOnDensityMatrixOp_->apply(*Y2, *tmpCmsVec_); tmpCmsVec_->update(ONE, *X1, -ONE); cmsOnCmsInverseOp_->apply(*tmpCmsVec_, *Y1); } else { //F in SW size_t numDensityElements = densityMap_->getNodeNumElements(); // Y1 is a view of the first numDensity elements of Y RCP<MV > Y1 = Y.offsetViewNonConst(densityMap_, 0); // Y2 is a view of the last numCms elements of Y RCP<MV > Y2 = Y.offsetViewNonConst(cmsMap_, numDensityElements); // X1 is a view of the first numDensity elements of X RCP<const MV > X1 = X.offsetView(densityMap_, 0); // X2 is a view of the last numCms elements of X RCP<const MV > X2 = X.offsetView(cmsMap_, numDensityElements); // First block row: Y1 = DD\X1 Y1->elementWiseMultiply(ONE, *densityOnDensityInverse_, *X1, ZERO); // Second block row: Y2 = CC \ (X2 - CD*Y1) cmsOnDensityMatrixOp_->apply(*Y1, *tmpCmsVec_); tmpCmsVec_->update(ONE, *X2, -ONE); cmsOnCmsInverseOp_->apply(*tmpCmsVec_, *Y2); } } //end applyInverse