void LocalSparseTriangularSolver<MatrixType>:: localApply (const MV& X, MV& Y, const Teuchos::ETransp mode, const scalar_type& alpha, const scalar_type& beta) const { using Teuchos::RCP; typedef scalar_type ST; typedef Teuchos::ScalarTraits<ST> STS; if (beta == STS::zero ()) { if (alpha == STS::zero ()) { Y.putScalar (STS::zero ()); // Y := 0 * Y (ignore contents of Y) } else { // alpha != 0 A_crs_->template localSolve<ST, ST> (X, Y, mode); if (alpha != STS::one ()) { Y.scale (alpha); } } } else { // beta != 0 if (alpha == STS::zero ()) { Y.scale (beta); // Y := beta * Y } else { // alpha != 0 MV Y_tmp (Y, Teuchos::Copy); A_crs_->template localSolve<ST, ST> (X, Y_tmp, mode); // Y_tmp := M * X Y.update (alpha, Y_tmp, beta); // Y := beta * Y + alpha * Y_tmp } } }
/// \brief Solve AX=B for X with Chebyshev iteration with left /// diagonal scaling, imitating ML's implementation. /// /// \pre A must be real-valued and symmetric positive definite. /// \pre numIters >= 0 /// \pre eigRatio >= 1 /// \pre 0 < lambdaMax /// \pre All entries of D_inv are positive. /// /// \param A [in] The matrix A in the linear system to solve. /// \param B [in] Right-hand side(s) in the linear system to solve. /// \param X [in] Initial guess(es) for the linear system to solve. /// \param numIters [in] Number of Chebyshev iterations. /// \param lambdaMax [in] Estimate of max eigenvalue of D_inv*A. /// \param lambdaMin [in] Estimate of min eigenvalue of D_inv*A. We /// only use this to determine if A is the identity matrix. /// \param eigRatio [in] Estimate of max / min eigenvalue ratio of /// D_inv*A. We use this along with lambdaMax to compute the /// Chebyshev coefficients. This need not be the same as /// lambdaMax/lambdaMin. /// \param D_inv [in] Vector of diagonal entries of A. It must have /// the same distribution as b. void mlApplyImpl (const MAT& A, const MV& B, MV& X, const int numIters, const ST lambdaMax, const ST lambdaMin, const ST eigRatio, const V& D_inv) { const ST zero = Teuchos::as<ST> (0); const ST one = Teuchos::as<ST> (1); const ST two = Teuchos::as<ST> (2); MV pAux (B.getMap (), B.getNumVectors ()); // Result of A*X MV dk (B.getMap (), B.getNumVectors ()); // Solution update MV R (B.getMap (), B.getNumVectors ()); // Not in original ML; need for B - pAux ST beta = Teuchos::as<ST> (1.1) * lambdaMax; ST alpha = lambdaMax / eigRatio; ST delta = (beta - alpha) / two; ST theta = (beta + alpha) / two; ST s1 = theta / delta; ST rhok = one / s1; // Diagonal: ML replaces entries containing 0 with 1. We // shouldn't have any entries like that in typical test problems, // so it's OK not to do that here. // The (scaled) matrix is the identity: set X = D_inv * B. (If A // is the identity, then certainly D_inv is too. D_inv comes from // A, so if D_inv * A is the identity, then we still need to apply // the "preconditioner" D_inv to B as well, to get X.) if (lambdaMin == one && lambdaMin == lambdaMax) { solve (X, D_inv, B); return; } // The next bit of code is a direct translation of code from ML's // ML_Cheby function, in the "normal point scaling" section, which // is in lines 7365-7392 of ml_smoother.c. if (! zeroStartingSolution_) { // dk = (1/theta) * D_inv * (B - (A*X)) A.apply (X, pAux); // pAux = A * X R = B; R.update (-one, pAux, one); // R = B - pAux dk.elementWiseMultiply (one/theta, D_inv, R, zero); // dk = (1/theta)*D_inv*R X.update (one, dk, one); // X = X + dk } else { dk.elementWiseMultiply (one/theta, D_inv, B, zero); // dk = (1/theta)*D_inv*B X = dk; } ST rhokp1, dtemp1, dtemp2; for (int k = 0; k < numIters-1; ++k) { A.apply (X, pAux); rhokp1 = one / (two*s1 - rhok); dtemp1 = rhokp1*rhok; dtemp2 = two*rhokp1/delta; rhok = rhokp1; R = B; R.update (-one, pAux, one); // R = B - pAux // dk = dtemp1 * dk + dtemp2 * D_inv * (B - pAux) dk.elementWiseMultiply (dtemp2, D_inv, B, dtemp1); X.update (one, dk, one); // X = X + dk } }
void Chebyshev<MatrixType>:: applyImpl (const MV& X, MV& Y, Teuchos::ETransp mode, scalar_type alpha, scalar_type beta) const { using Teuchos::ArrayRCP; using Teuchos::as; using Teuchos::RCP; using Teuchos::rcp; using Teuchos::rcp_const_cast; using Teuchos::rcpFromRef; const scalar_type zero = STS::zero(); const scalar_type one = STS::one(); // Y = beta*Y + alpha*M*X. // If alpha == 0, then we don't need to do Chebyshev at all. if (alpha == zero) { if (beta == zero) { // Obey Sparse BLAS rules; avoid 0*NaN. Y.putScalar (zero); } else { Y.scale (beta); } return; } // If beta != 0, then we need to keep a copy of the initial value of // Y, so that we can add beta*it to the Chebyshev result at the end. // Usually this method is called with beta == 0, so we don't have to // worry about caching Y_org. RCP<MV> Y_orig; if (beta != zero) { Y_orig = rcp (new MV (Y)); } // If X and Y point to the same memory location, we need to use a // copy of X (X_copy) as the input MV. Otherwise, just let X_copy // point to X. // // This is hopefully an uncommon use case, so we don't bother to // optimize for it by caching X_copy. RCP<const MV> X_copy; bool copiedInput = false; if (X.getLocalMV().getValues() == Y.getLocalMV().getValues()) { X_copy = rcp (new MV (X)); copiedInput = true; } else { X_copy = rcpFromRef (X); } // If alpha != 1, fold alpha into (a copy of) X. // // This is an uncommon use case, so we don't bother to optimize for // it by caching X_copy. However, we do check whether we've already // copied X above, to avoid a second copy. if (alpha != one) { RCP<MV> X_copy_nonConst = rcp_const_cast<MV> (X_copy); if (! copiedInput) { X_copy_nonConst = rcp (new MV (X)); copiedInput = true; } X_copy_nonConst->scale (alpha); X_copy = rcp_const_cast<const MV> (X_copy_nonConst); } impl_.apply (*X_copy, Y); if (beta != zero) { Y.update (beta, *Y_orig, one); // Y = beta * Y_orig + 1 * Y } }