void
LocalSparseTriangularSolver<MatrixType>::
localApply (const MV& X,
            MV& Y,
            const Teuchos::ETransp mode,
            const scalar_type& alpha,
            const scalar_type& beta) const
{
  using Teuchos::RCP;
  typedef scalar_type ST;
  typedef Teuchos::ScalarTraits<ST> STS;

  if (beta == STS::zero ()) {
    if (alpha == STS::zero ()) {
      Y.putScalar (STS::zero ()); // Y := 0 * Y (ignore contents of Y)
    }
    else { // alpha != 0
      A_crs_->template localSolve<ST, ST> (X, Y, mode);
      if (alpha != STS::one ()) {
        Y.scale (alpha);
      }
    }
  }
  else { // beta != 0
    if (alpha == STS::zero ()) {
      Y.scale (beta); // Y := beta * Y
    }
    else { // alpha != 0
      MV Y_tmp (Y, Teuchos::Copy);
      A_crs_->template localSolve<ST, ST> (X, Y_tmp, mode); // Y_tmp := M * X
      Y.update (alpha, Y_tmp, beta); // Y := beta * Y + alpha * Y_tmp
    }
  }
}
Esempio n. 2
0
  /// \brief Solve AX=B for X with Chebyshev iteration with left
  ///   diagonal scaling, imitating ML's implementation.
  ///
  /// \pre A must be real-valued and symmetric positive definite.
  /// \pre numIters >= 0
  /// \pre eigRatio >= 1
  /// \pre 0 < lambdaMax
  /// \pre All entries of D_inv are positive.
  ///
  /// \param A [in] The matrix A in the linear system to solve.
  /// \param B [in] Right-hand side(s) in the linear system to solve.
  /// \param X [in] Initial guess(es) for the linear system to solve.
  /// \param numIters [in] Number of Chebyshev iterations.
  /// \param lambdaMax [in] Estimate of max eigenvalue of D_inv*A.
  /// \param lambdaMin [in] Estimate of min eigenvalue of D_inv*A.  We
  ///   only use this to determine if A is the identity matrix.
  /// \param eigRatio [in] Estimate of max / min eigenvalue ratio of
          ///   D_inv*A.  We use this along with lambdaMax to compute the
          ///   Chebyshev coefficients.  This need not be the same as
          ///   lambdaMax/lambdaMin.
          /// \param D_inv [in] Vector of diagonal entries of A.  It must have
          ///   the same distribution as b.
          void
          mlApplyImpl (const MAT& A,
                   const MV& B,
                   MV& X,
                   const int numIters,
                   const ST lambdaMax,
                   const ST lambdaMin,
                   const ST eigRatio,
                   const V& D_inv)
          {
            const ST zero = Teuchos::as<ST> (0);
            const ST one = Teuchos::as<ST> (1);
            const ST two = Teuchos::as<ST> (2);

            MV pAux (B.getMap (), B.getNumVectors ()); // Result of A*X
            MV dk (B.getMap (), B.getNumVectors ()); // Solution update
            MV R (B.getMap (), B.getNumVectors ()); // Not in original ML; need for B - pAux

            ST beta = Teuchos::as<ST> (1.1) * lambdaMax;
            ST alpha = lambdaMax / eigRatio;

            ST delta = (beta - alpha) / two;
            ST theta = (beta + alpha) / two;
            ST s1 = theta / delta;
            ST rhok = one / s1;

            // Diagonal: ML replaces entries containing 0 with 1.  We
            // shouldn't have any entries like that in typical test problems,
            // so it's OK not to do that here.

            // The (scaled) matrix is the identity: set X = D_inv * B.  (If A
            // is the identity, then certainly D_inv is too.  D_inv comes from
            // A, so if D_inv * A is the identity, then we still need to apply
            // the "preconditioner" D_inv to B as well, to get X.)
            if (lambdaMin == one && lambdaMin == lambdaMax) {
              solve (X, D_inv, B);
              return;
            }

            // The next bit of code is a direct translation of code from ML's
            // ML_Cheby function, in the "normal point scaling" section, which
            // is in lines 7365-7392 of ml_smoother.c.

            if (! zeroStartingSolution_) {
              // dk = (1/theta) * D_inv * (B - (A*X))
              A.apply (X, pAux); // pAux = A * X
              R = B;
              R.update (-one, pAux, one); // R = B - pAux
              dk.elementWiseMultiply (one/theta, D_inv, R, zero); // dk = (1/theta)*D_inv*R
              X.update (one, dk, one); // X = X + dk
            } else {
              dk.elementWiseMultiply (one/theta, D_inv, B, zero); // dk = (1/theta)*D_inv*B
              X = dk;
            }

            ST rhokp1, dtemp1, dtemp2;
            for (int k = 0; k < numIters-1; ++k) {
              A.apply (X, pAux);
              rhokp1 = one / (two*s1 - rhok);
              dtemp1 = rhokp1*rhok;
              dtemp2 = two*rhokp1/delta;
              rhok = rhokp1;

              R = B;
              R.update (-one, pAux, one); // R = B - pAux
              // dk = dtemp1 * dk + dtemp2 * D_inv * (B - pAux)
              dk.elementWiseMultiply (dtemp2, D_inv, B, dtemp1);
              X.update (one, dk, one); // X = X + dk
            }
          }
void 
Chebyshev<MatrixType>::
applyImpl (const MV& X,
	   MV& Y,
	   Teuchos::ETransp mode,
	   scalar_type alpha,
	   scalar_type beta) const 
{
  using Teuchos::ArrayRCP;
  using Teuchos::as;
  using Teuchos::RCP;
  using Teuchos::rcp;
  using Teuchos::rcp_const_cast;
  using Teuchos::rcpFromRef;

  const scalar_type zero = STS::zero();
  const scalar_type one = STS::one();

  // Y = beta*Y + alpha*M*X.

  // If alpha == 0, then we don't need to do Chebyshev at all.
  if (alpha == zero) {
    if (beta == zero) { // Obey Sparse BLAS rules; avoid 0*NaN.
      Y.putScalar (zero);
    }
    else {
      Y.scale (beta);
    }
    return;
  }

  // If beta != 0, then we need to keep a copy of the initial value of
  // Y, so that we can add beta*it to the Chebyshev result at the end.
  // Usually this method is called with beta == 0, so we don't have to 
  // worry about caching Y_org.
  RCP<MV> Y_orig;
  if (beta != zero) {
    Y_orig = rcp (new MV (Y));
  }

  // If X and Y point to the same memory location, we need to use a
  // copy of X (X_copy) as the input MV.  Otherwise, just let X_copy
  // point to X.
  //
  // This is hopefully an uncommon use case, so we don't bother to
  // optimize for it by caching X_copy.
  RCP<const MV> X_copy;
  bool copiedInput = false;
  if (X.getLocalMV().getValues() == Y.getLocalMV().getValues()) {
    X_copy = rcp (new MV (X));
    copiedInput = true;
  }
  else {
    X_copy = rcpFromRef (X);
  }
  
  // If alpha != 1, fold alpha into (a copy of) X.
  //
  // This is an uncommon use case, so we don't bother to optimize for
  // it by caching X_copy.  However, we do check whether we've already
  // copied X above, to avoid a second copy.
  if (alpha != one) {
    RCP<MV> X_copy_nonConst = rcp_const_cast<MV> (X_copy);
    if (! copiedInput) {
      X_copy_nonConst = rcp (new MV (X));
      copiedInput = true;
    }
    X_copy_nonConst->scale (alpha);
    X_copy = rcp_const_cast<const MV> (X_copy_nonConst);
  }

  impl_.apply (*X_copy, Y);

  if (beta != zero) {
    Y.update (beta, *Y_orig, one); // Y = beta * Y_orig + 1 * Y
  }
}