void
dft_HardSphereA22_Tpetra_Operator<Scalar,MatrixType>::
apply
(const MV& X, MV& Y, Teuchos::ETransp mode, Scalar alpha, Scalar beta) const
{

    TEUCHOS_TEST_FOR_EXCEPT(Y.getNumVectors()!=X.getNumVectors());
#ifdef KDEBUG
    TEUCHOS_TEST_FOR_EXCEPT(!X.getMap()->isSameAs(*getDomainMap()));
    TEUCHOS_TEST_FOR_EXCEPT(!Y.getMap()->isSameAs(*getRangeMap()));
#endif

    Y.elementWiseMultiply(STS::one(), *densityOnDensityMatrix_, X, STS::zero());

}
void
dft_HardSphereA22_Tpetra_Operator<Scalar,MatrixType>::
applyInverse
(const MV& X, MV& Y) const
{
    // Our algorithm is:
    // Y = D \ X

    TEUCHOS_TEST_FOR_EXCEPT(Y.getNumVectors()!=X.getNumVectors());
#ifdef KDEBUG
    TEUCHOS_TEST_FOR_EXCEPT(!X.getMap()->isSameAs(*getDomainMap()));
    TEUCHOS_TEST_FOR_EXCEPT(!Y.getMap()->isSameAs(*getRangeMap()));
#endif

    Y.elementWiseMultiply(STS::one(), *densityOnDensityInverse_, X, STS::zero());

}
void
dft_PolyA11_Tpetra_Operator<Scalar,MatrixType>::
applyInverse
(const MV& X, MV& Y) const
{

    TEUCHOS_TEST_FOR_EXCEPT(Y.getNumVectors()!=X.getNumVectors());
#ifdef KDEBUG
    TEUCHOS_TEST_FOR_EXCEPT(!X.getMap()->isSameAs(*getDomainMap()));
    TEUCHOS_TEST_FOR_EXCEPT(!Y.getMap()->isSameAs(*getRangeMap()));
    printf("\n\n\n\ndft_PolyA11_Tpetra_Operator::applyInverse()\n\n\n\n");
#endif

    Scalar ONE = STS::one();
    Scalar ZERO = STS::zero();

    size_t NumVectors = Y.getNumVectors();
    size_t numMyElements = ownedMap_->getNodeNumElements();
    RCP<MV > Ytmp = rcp(new MV(ownedMap_,NumVectors));

    Y=X; // We can safely do this

    RCP<MV > curY = Y.offsetViewNonConst(ownedMap_, 0);

    RCP<VEC> diagVec = invDiagonal_->offsetViewNonConst(ownedMap_, 0)->getVectorNonConst(0);

    curY->elementWiseMultiply(ONE, *diagVec, *curY, ZERO); // Scale Y by the first block diagonal

    // Loop over block 1 through numBlocks (indexing 0 to numBlocks-1)
    for (LocalOrdinal i=OTLO::zero(); i< numBlocks_-1; i++)
    {
        // Update views of Y and diagonal blocks
        curY = Y.offsetViewNonConst(ownedMap_, (i+1)*numMyElements);

        diagVec = invDiagonal_->offsetViewNonConst(ownedMap_, (i+1)*numMyElements)->getVectorNonConst(0);

        matrixOperator_[i]->apply(Y, *Ytmp); // Multiply block lower triangular block
        curY->update(-ONE, *Ytmp, ONE); // curY = curX - Ytmp (Note that curX is in curY from initial copy Y = X)
        curY->elementWiseMultiply(ONE, *diagVec, *curY, ZERO); // Scale Y by the first block diagonal
    }
} //end applyInverse
void
dft_PolyA11_Tpetra_Operator<Scalar,MatrixType>::
apply
(const MV& X, MV& Y, Teuchos::ETransp mode, Scalar alpha, Scalar beta) const
{

    TEUCHOS_TEST_FOR_EXCEPT(Y.getNumVectors()!=X.getNumVectors());
#ifdef KDEBUG
    TEUCHOS_TEST_FOR_EXCEPT(!X.getMap()->isSameAs(*getDomainMap()));
    TEUCHOS_TEST_FOR_EXCEPT(!Y.getMap()->isSameAs(*getRangeMap()));
#endif

    size_t numMyElements = ownedMap_->getNodeNumElements();

    RCP<MV > curY = Y.offsetViewNonConst(ownedMap_, 0);

    for (LocalOrdinal i=OTLO::zero(); i< numBlocks_-1; i++) {
        curY = Y.offsetViewNonConst(ownedMap_, (i+1)*numMyElements);
        matrixOperator_[i]->apply(X, *curY); // This gives a result that is off-diagonal-matrix*X
    }

    Y.elementWiseMultiply(STS::one(),*diagonal_, X, STS::one()); // Add diagonal contribution

} //end Apply
예제 #5
0
  // Compute Y := alpha Op X + beta Y.
  //
  // We ignore the cases alpha != 1 and beta != 0 for simplicity.
  void
  apply (const MV& X,
         MV& Y,
         Teuchos::ETransp mode = Teuchos::NO_TRANS,
         scalar_type alpha = Teuchos::ScalarTraits<scalar_type>::one (),
         scalar_type beta = Teuchos::ScalarTraits<scalar_type>::zero ()) const
  {
    using Teuchos::RCP;
    using Teuchos::rcp;
    using std::cout;
    using std::endl;
    typedef Teuchos::ScalarTraits<scalar_type> STS;

    RCP<const Teuchos::Comm<int> > comm = opMap_->getComm ();
    const int myRank = comm->getRank ();
    const int numProcs = comm->getSize ();

    if (myRank == 0) {
      cout << "MyOp::apply" << endl;
    }

    // We're writing the Operator subclass, so we are responsible for
    // error handling.  You can decide how much error checking you
    // want to do.  Just remember that checking things like Map
    // sameness or compatibility are expensive.
    TEUCHOS_TEST_FOR_EXCEPTION(
      X.getNumVectors () != Y.getNumVectors (), std::invalid_argument,
      "X and Y do not have the same numbers of vectors (columns).");

    // Let's make sure alpha is 1 and beta is 0...
    // This will throw an exception if that is not the case.
    TEUCHOS_TEST_FOR_EXCEPTION(
      alpha != STS::one() || beta != STS::zero(), std::logic_error,
      "MyOp::apply was given alpha != 1 or beta != 0. "
      "These cases are not implemented.");

    // Get the number of vectors (columns) in X (and Y).
    const size_t numVecs = X.getNumVectors ();

    // Make a temporary multivector for holding the redistributed
    // data.  You could also create this in the constructor and reuse
    // it across different apply() calls, but you would need to be
    // careful to reallocate if it has a different number of vectors
    // than X.  The number of vectors in X can vary across different
    // apply() calls.
    RCP<MV> redistData = rcp (new MV (redistMap_, numVecs));

    // Redistribute the data.
    // This will do all the necessary communication for you.
    // All processes now own enough data to do the matvec.
    redistData->doImport (X, *importer_, Tpetra::INSERT);

    // Get the number of local rows in X, on the calling process.
    const local_ordinal_type nlocRows =
      static_cast<local_ordinal_type> (X.getLocalLength ());

    // Perform the matvec with the data we now locally own.
    //
    // For each column...
    for (size_t c = 0; c < numVecs; ++c) {
      // Get a view of the desired column
      Teuchos::ArrayRCP<scalar_type> colView = redistData->getDataNonConst (c);

      local_ordinal_type offset;
      // Y[0,c] = -colView[0] + 2*colView[1] - colView[2] (using local indices)
      if (myRank > 0) {
        Y.replaceLocalValue (0, c, -colView[0] + 2*colView[1] - colView[2]);
        offset = 0;
      }
      // Y[0,c] = 2*colView[1] - colView[2] (using local indices)
      else {
        Y.replaceLocalValue (0, c, 2*colView[0] - colView[1]);
        offset = 1;
      }

      // Y[r,c] = -colView[r-offset] + 2*colView[r+1-offset] - colView[r+2-offset]
      for (local_ordinal_type r = 1; r < nlocRows - 1; ++r) {
        const scalar_type newVal =
          -colView[r-offset] + 2*colView[r+1-offset] - colView[r+2-offset];
        Y.replaceLocalValue (r, c, newVal);
      }
      // Y[nlocRows-1,c] = -colView[nlocRows-1-offset] + 2*colView[nlocRows-offset]
      //                   - colView[nlocRows+1-offset]
      if (myRank < numProcs - 1) {
        const scalar_type newVal =
          -colView[nlocRows-1-offset] + 2*colView[nlocRows-offset]
          - colView[nlocRows+1-offset];
        Y.replaceLocalValue (nlocRows-1, c, newVal);
      }
      // Y[nlocRows-1,c] = -colView[nlocRows-1-offset] + 2*colView[nlocRows-offset]
      else {
        const scalar_type newVal =
          -colView[nlocRows-1-offset] + 2*colView[nlocRows-offset];
        Y.replaceLocalValue (nlocRows-1, c, newVal);
      }
    }
  }
예제 #6
0
  /// \brief Solve AX=B for X with Chebyshev iteration with left
  ///   diagonal scaling, imitating ML's implementation.
  ///
  /// \pre A must be real-valued and symmetric positive definite.
  /// \pre numIters >= 0
  /// \pre eigRatio >= 1
  /// \pre 0 < lambdaMax
  /// \pre All entries of D_inv are positive.
  ///
  /// \param A [in] The matrix A in the linear system to solve.
  /// \param B [in] Right-hand side(s) in the linear system to solve.
  /// \param X [in] Initial guess(es) for the linear system to solve.
  /// \param numIters [in] Number of Chebyshev iterations.
  /// \param lambdaMax [in] Estimate of max eigenvalue of D_inv*A.
  /// \param lambdaMin [in] Estimate of min eigenvalue of D_inv*A.  We
  ///   only use this to determine if A is the identity matrix.
  /// \param eigRatio [in] Estimate of max / min eigenvalue ratio of
          ///   D_inv*A.  We use this along with lambdaMax to compute the
          ///   Chebyshev coefficients.  This need not be the same as
          ///   lambdaMax/lambdaMin.
          /// \param D_inv [in] Vector of diagonal entries of A.  It must have
          ///   the same distribution as b.
          void
          mlApplyImpl (const MAT& A,
                   const MV& B,
                   MV& X,
                   const int numIters,
                   const ST lambdaMax,
                   const ST lambdaMin,
                   const ST eigRatio,
                   const V& D_inv)
          {
            const ST zero = Teuchos::as<ST> (0);
            const ST one = Teuchos::as<ST> (1);
            const ST two = Teuchos::as<ST> (2);

            MV pAux (B.getMap (), B.getNumVectors ()); // Result of A*X
            MV dk (B.getMap (), B.getNumVectors ()); // Solution update
            MV R (B.getMap (), B.getNumVectors ()); // Not in original ML; need for B - pAux

            ST beta = Teuchos::as<ST> (1.1) * lambdaMax;
            ST alpha = lambdaMax / eigRatio;

            ST delta = (beta - alpha) / two;
            ST theta = (beta + alpha) / two;
            ST s1 = theta / delta;
            ST rhok = one / s1;

            // Diagonal: ML replaces entries containing 0 with 1.  We
            // shouldn't have any entries like that in typical test problems,
            // so it's OK not to do that here.

            // The (scaled) matrix is the identity: set X = D_inv * B.  (If A
            // is the identity, then certainly D_inv is too.  D_inv comes from
            // A, so if D_inv * A is the identity, then we still need to apply
            // the "preconditioner" D_inv to B as well, to get X.)
            if (lambdaMin == one && lambdaMin == lambdaMax) {
              solve (X, D_inv, B);
              return;
            }

            // The next bit of code is a direct translation of code from ML's
            // ML_Cheby function, in the "normal point scaling" section, which
            // is in lines 7365-7392 of ml_smoother.c.

            if (! zeroStartingSolution_) {
              // dk = (1/theta) * D_inv * (B - (A*X))
              A.apply (X, pAux); // pAux = A * X
              R = B;
              R.update (-one, pAux, one); // R = B - pAux
              dk.elementWiseMultiply (one/theta, D_inv, R, zero); // dk = (1/theta)*D_inv*R
              X.update (one, dk, one); // X = X + dk
            } else {
              dk.elementWiseMultiply (one/theta, D_inv, B, zero); // dk = (1/theta)*D_inv*B
              X = dk;
            }

            ST rhokp1, dtemp1, dtemp2;
            for (int k = 0; k < numIters-1; ++k) {
              A.apply (X, pAux);
              rhokp1 = one / (two*s1 - rhok);
              dtemp1 = rhokp1*rhok;
              dtemp2 = two*rhokp1/delta;
              rhok = rhokp1;

              R = B;
              R.update (-one, pAux, one); // R = B - pAux
              // dk = dtemp1 * dk + dtemp2 * D_inv * (B - pAux)
              dk.elementWiseMultiply (dtemp2, D_inv, B, dtemp1);
              X.update (one, dk, one); // X = X + dk
            }
          }
예제 #7
0
    //! Do the transpose or conjugate transpose solve.
    void applyTranspose (const MV& X_in, MV& Y_in, const Teuchos::ETransp mode) const
    {
      typedef Teuchos::ScalarTraits<Scalar> ST;
      using Teuchos::null;

      TEUCHOS_TEST_FOR_EXCEPTION
        (mode != Teuchos::TRANS && mode != Teuchos::CONJ_TRANS, std::logic_error,
         "Tpetra::CrsMatrixSolveOp::applyTranspose: mode is neither TRANS nor "
         "CONJ_TRANS.  Should never get here!  Please report this bug to the "
         "Tpetra developers.");

      const size_t numVectors = X_in.getNumVectors();
      Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Node> > importer =
        matrix_->getGraph ()->getImporter ();
      Teuchos::RCP<const Export<LocalOrdinal,GlobalOrdinal,Node> > exporter =
        matrix_->getGraph ()->getExporter ();
      Teuchos::RCP<const MV> X;

      // it is okay if X and Y reference the same data, because we can
      // perform a triangular solve in-situ.  however, we require that
      // column access to each is strided.

      // set up import/export temporary multivectors
      if (importer != null) {
        if (importMV_ != null && importMV_->getNumVectors() != numVectors) {
          importMV_ = null;
        }
        if (importMV_ == null) {
          importMV_ = Teuchos::rcp( new MV(matrix_->getColMap(),numVectors) );
        }
      }
      if (exporter != null) {
        if (exportMV_ != null && exportMV_->getNumVectors() != numVectors) {
          exportMV_ = null;
        }
        if (exportMV_ == null) {
          exportMV_ = Teuchos::rcp( new MV(matrix_->getRowMap(),numVectors) );
        }
      }

      // solve(TRANS): DomainMap -> RangeMap
      // lclMatSolve_(TRANS): ColMap -> RowMap
      // importer: DomainMap -> ColMap
      // exporter: RowMap -> RangeMap
      //
      // solve = importer o   lclMatSolve_  o  exporter
      //         Domainmap -> ColMap     ->      RowMap -> RangeMap
      //
      // If we have a non-trivial importer, we must import elements that
      // are permuted or are on other processes.
      if (importer != null) {
        importMV_->doImport(X_in,*importer,INSERT);
        X = importMV_;
      }
      else if (X_in.isConstantStride() == false) {
        // cannot handle non-constant stride right now
        // generate a copy of X_in
        X = Teuchos::rcp(new MV(X_in));
      }
      else {
        // just temporary, so this non-owning RCP is okay
        X = Teuchos::rcpFromRef (X_in);
      }


      // If we have a non-trivial exporter, we must export elements that
      // are permuted or belong to other processes.  We will compute
      // solution into the to-be-exported MV; get a view.
      if (exporter != null) {
        matrix_->template localSolve<Scalar, Scalar> (*X, *exportMV_,
                                                      Teuchos::CONJ_TRANS);
        // Make sure target is zero: necessary because we are adding
        Y_in.putScalar(ST::zero());
        Y_in.doExport(*importMV_, *importer, ADD);
      }
      // otherwise, solve into Y
      else {
        if (Y_in.isConstantStride() == false) {
          // generate a strided copy of Y
          MV Y(Y_in);
          matrix_->template localSolve<Scalar, Scalar> (*X, Y, Teuchos::CONJ_TRANS);
          Y_in = Y;
        }
        else {
          matrix_->template localSolve<Scalar, Scalar> (*X, Y_in, Teuchos::CONJ_TRANS);
        }
      }
    }
예제 #8
0
    //! Do the non-transpose solve.
    void applyNonTranspose (const MV& X_in, MV& Y_in) const
    {
      using Teuchos::NO_TRANS;
      using Teuchos::null;
      typedef Teuchos::ScalarTraits<Scalar> ST;

      // Solve U X = Y  or  L X = Y
      // X belongs to domain map, while Y belongs to range map

      const size_t numVectors = X_in.getNumVectors();
      Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Node> > importer =
        matrix_->getGraph ()->getImporter ();
      Teuchos::RCP<const Export<LocalOrdinal,GlobalOrdinal,Node> > exporter =
        matrix_->getGraph ()->getExporter ();
      Teuchos::RCP<const MV> X;

      // it is okay if X and Y reference the same data, because we can
      // perform a triangular solve in-situ.  however, we require that
      // column access to each is strided.

      // set up import/export temporary multivectors
      if (importer != null) {
        if (importMV_ != null && importMV_->getNumVectors () != numVectors) {
          importMV_ = null;
        }
        if (importMV_ == null) {
          importMV_ = Teuchos::rcp (new MV (matrix_->getColMap (), numVectors));
        }
      }
      if (exporter != null) {
        if (exportMV_ != null && exportMV_->getNumVectors () != numVectors) {
          exportMV_ = null;
        }
        if (exportMV_ == null) {
          exportMV_ = Teuchos::rcp (new MV (matrix_->getRowMap (), numVectors));
        }
      }

      // solve(NO_TRANS): RangeMap -> DomainMap
      // lclMatSolve_: RowMap -> ColMap
      // importer: DomainMap -> ColMap
      // exporter: RowMap -> RangeMap
      //
      // solve = reverse(exporter)  o   lclMatSolve_  o reverse(importer)
      //         RangeMap   ->    RowMap     ->     ColMap         ->    DomainMap
      //
      // If we have a non-trivial exporter, we must import elements that
      // are permuted or are on other processors
      if (exporter != null) {
        exportMV_->doImport (X_in, *exporter, INSERT);
        X = exportMV_;
      }
      else if (! X_in.isConstantStride ()) {
        // cannot handle non-constant stride right now
        // generate a copy of X_in
        X = Teuchos::rcp (new MV (X_in));
      }
      else {
        // just temporary, so this non-owning RCP is okay
        X = Teuchos::rcpFromRef (X_in);
      }

      // If we have a non-trivial importer, we must export elements that
      // are permuted or belong to other processes.  We will compute
      // solution into the to-be-exported MV.
      if (importer != null) {
        matrix_->template localSolve<Scalar, Scalar> (*X, *importMV_, NO_TRANS);
        // Make sure target is zero: necessary because we are adding.
        Y_in.putScalar (ST::zero ());
        Y_in.doExport (*importMV_, *importer, ADD);
      }
      // otherwise, solve into Y
      else {
        // can't solve into non-strided multivector
        if (! Y_in.isConstantStride ()) {
          // generate a strided copy of Y
          MV Y (Y_in);
          matrix_->template localSolve<Scalar, Scalar> (*X, Y, NO_TRANS);
          Tpetra::deep_copy (Y_in, Y);
        }
        else {
          matrix_->template localSolve<Scalar, Scalar> (*X, Y_in, NO_TRANS);
        }
      }
    }
  //
  // Computes Y = alpha Op X + beta Y
  // TraceMin will never use alpha ~= 1 or beta ~= 0,
  // so we have ignored those options for simplicity.
  //
  void apply(const MV& X, MV& Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=SCT::one(), Scalar beta=SCT::zero()) const
  {
    //
    // Let's make sure alpha is 1 and beta is 0...
    // This will throw an exception if that is not the case.
    //
    TEUCHOS_TEST_FOR_EXCEPTION(alpha != SCT::one() || beta != SCT::zero(),std::invalid_argument,
           "MyOp::apply was given alpha != 1 or beta != 0. That's not supposed to happen.");

    //
    // Get the number of local rows
    //
    int nlocRows = X.getLocalLength();

    //
    // Get the number of vectors
    //
    int numVecs = X.getNumVectors();

    //
    // Make a multivector for holding the redistributed data
    //
    RCP<MV> redistData = rcp(new MV(redistMap_, numVecs));

    //
    // Redistribute the data.
    // This will do all the necessary communication for you.
    // All processes now own enough data to do the matvec.
    //
    redistData->doImport(X, *importer_, Tpetra::INSERT);

    //
    // Perform the matvec with the data we now locally own
    //
    // For each column...
    for(int c=0; c<numVecs; c++)
    {
      // Get a view of the desired column
      Teuchos::ArrayRCP<Scalar> colView = redistData->getDataNonConst(c); 

      int offset;
      // Y[0,c] = -colView[0] + 2*colView[1] - colView[2] (using local indices)
      if(myRank_ > 0)
      {
        Y.replaceLocalValue(0, c, -colView[0] + 2*colView[1] - colView[2]);
        offset = 0;
      }
      // Y[0,c] = 2*colView[1] - colView[2] (using local indices)
      else
      {
        Y.replaceLocalValue(0, c, 2*colView[0] - colView[1]);
        offset = 1;
      }
      
      // Y[r,c] = -colView[r-offset] + 2*colView[r+1-offset] - colView[r+2-offset]
      for(int r=1; r<nlocRows-1; r++)
      {
        Y.replaceLocalValue(r, c, -colView[r-offset] + 2*colView[r+1-offset] - colView[r+2-offset]);
      }

      // Y[nlocRows-1,c] = -colView[nlocRows-1-offset] + 2*colView[nlocRows-offset] - colView[nlocRows+1-offset]
      if(myRank_ < numProcs_-1)
      {
        Y.replaceLocalValue(nlocRows-1, c, -colView[nlocRows-1-offset] + 2*colView[nlocRows-offset] - colView[nlocRows+1-offset]);
      }
      // Y[nlocRows-1,c] = -colView[nlocRows-1-offset] + 2*colView[nlocRows-offset]
      else
      {
        Y.replaceLocalValue(nlocRows-1, c, -colView[nlocRows-1-offset] + 2*colView[nlocRows-offset]);
      }
    }
  }
void
dft_PolyA22_Tpetra_Operator<Scalar,MatrixType>::
apply
(const MV& X, MV& Y, Teuchos::ETransp mode, Scalar alpha, Scalar beta) const
{

  TEUCHOS_TEST_FOR_EXCEPT(Y.getNumVectors()!=X.getNumVectors());
#ifdef KDEBUG
  TEUCHOS_TEST_FOR_EXCEPT(!X.getMap()->isSameAs(*getDomainMap()));
  TEUCHOS_TEST_FOR_EXCEPT(!Y.getMap()->isSameAs(*getRangeMap()));
#endif

  Scalar ONE = STS::one();
  Scalar ZERO = STS::zero();

  if (F_location_ == 1)
  {
    //F in NE

    size_t numCmsElements = cmsMap_->getNodeNumElements();

    // Y1 is a view of the first numCms elements of Y
    RCP<MV> Y1 = Y.offsetViewNonConst(cmsMap_, 0);
    // Y2 is a view of the last numDensity elements of Y
    RCP<MV> Y2 = Y.offsetViewNonConst(densityMap_, numCmsElements);
    // X1 is a view of the first numCms elements of X
    RCP<const MV> X1 = X.offsetView(cmsMap_, 0);
    // X2 is a view of the last numDensity elements of X
    RCP<const MV> X2 = X.offsetView(densityMap_, numCmsElements);

    // First block row
    cmsOnDensityMatrixOp_->apply(*X2, *Y1);
    cmsOnCmsMatrixOp_->apply(*X1, *tmpCmsVec_);
    Y1->update(ONE, *tmpCmsVec_, ONE);

    // Second block row
    if (hasDensityOnCms_) {
      densityOnCmsMatrixOp_->apply(*X1, *Y2);
      Y2->elementWiseMultiply(ONE, *densityOnDensityMatrix_, *X2, ONE);
    } else {
      Y2->elementWiseMultiply(ONE, *densityOnDensityMatrix_, *X2, ZERO);
    }

  }
  else
  {
    //F in SW

    size_t numDensityElements = densityMap_->getNodeNumElements();

    // Y1 is a view of the first numDensity elements of Y
    RCP<MV> Y1 = Y.offsetViewNonConst(densityMap_, 0);
    // Y2 is a view of the last numCms elements of Y
    RCP<MV> Y2 = Y.offsetViewNonConst(cmsMap_, numDensityElements);
    // X1 is a view of the first numDensity elements of X
    RCP<const MV> X1 = X.offsetView(densityMap_, 0);
    // X2 is a view of the last numCms elements of X
    RCP<const MV> X2 = X.offsetView(cmsMap_, numDensityElements);

    // First block row
    if (hasDensityOnCms_) {
      densityOnCmsMatrixOp_->apply(*X2, *Y1);
      Y1->elementWiseMultiply(ONE, *densityOnDensityMatrix_, *X1, ONE);
    } else {
      Y1->elementWiseMultiply(ONE, *densityOnDensityMatrix_, *X1, ZERO);
    }

    // Second block row
    cmsOnDensityMatrixOp_->apply(*X1, *Y2);
    cmsOnCmsMatrixOp_->apply(*X2, *tmpCmsVec_);
    Y2->update(ONE, *tmpCmsVec_, ONE);

  }

} //end Apply
void
dft_PolyA22_Tpetra_Operator<Scalar,MatrixType>::
applyInverse
(const MV& X, MV& Y) const
{
  // The true A22 block is of the form:

  // |  Dcc     F    |
  // |  Ddc     Ddd  |

  // where
  // Dcc is Cms on Cms (diagonal),
  // F is Cms on Density (fairly dense)
  // Ddc is Density on Cms (diagonal with small coefficient values),
  // Ddd is Density on Density (diagonal).
  //
  // We will approximate A22 with:

  // |  Dcc     F    |
  // |  0       Ddd  |

  // replacing Ddc with a zero matrix for the applyInverse method only.

  // Our algorithm is then:
  // Y2 = Ddd \ X2
  // Y1 = Dcc \ (X1 - F*Y2)

  // Or, if F is in the SW quadrant:
  // The true A22 block is of the form:

  // |  Ddd     Ddc  |
  // |  F       Dcc  |

  // where
  // Ddd is Density on Density (diagonal),
  // Ddc is Density on Cms (diagonal with small coefficient values),
  // F is Cms on Density (fairly dense),
  // Dcc is Cms on Cms (diagonal).
  //
  // We will approximate A22 with:

  // |  Ddd     0    |
  // |  F       Dcc  |

  // replacing Ddc with a zero matrix for the applyInverse method only.

  // Our algorithm is then:
  // Y1 = Ddd \ X1
  // Y2 = Dcc \ (X2 - F*Y1)

  TEUCHOS_TEST_FOR_EXCEPT(Y.getNumVectors()!=X.getNumVectors());
#ifdef KDEBUG
  TEUCHOS_TEST_FOR_EXCEPT(!X.getMap()->isSameAs(*getDomainMap()));
  TEUCHOS_TEST_FOR_EXCEPT(!Y.getMap()->isSameAs(*getRangeMap()));
  printf("\n\n\n\ndft_PolyA22_Tpetra_Operator::applyInverse()\n\n\n\n");
#endif

  Scalar ONE = STS::one();
  Scalar ZERO = STS::zero();

  if (F_location_ == 1)
  {
    //F in NE

    size_t numCmsElements = cmsMap_->getNodeNumElements();

    // Y1 is a view of the first numCms elements of Y
    RCP<MV > Y1 = Y.offsetViewNonConst(cmsMap_, 0);
    // Y2 is a view of the last numDensity elements of Y
    RCP<MV > Y2 = Y.offsetViewNonConst(densityMap_, numCmsElements);
    // X1 is a view of the first numCms elements of X
    RCP<const MV > X1 = X.offsetView(cmsMap_, 0);
    // X2 is a view of the last numDensity elements of X
    RCP<const MV > X2 = X.offsetView(densityMap_, numCmsElements);

    // Second block row: Y2 = DD\X2
    Y2->elementWiseMultiply(ONE, *densityOnDensityInverse_, *X2, ZERO);

    // First block row: Y1 = CC \ (X1 - CD*Y2)
    cmsOnDensityMatrixOp_->apply(*Y2, *tmpCmsVec_);
    tmpCmsVec_->update(ONE, *X1, -ONE);
    cmsOnCmsInverseOp_->apply(*tmpCmsVec_, *Y1);

  }
  else
  {
    //F in SW

    size_t numDensityElements = densityMap_->getNodeNumElements();

    // Y1 is a view of the first numDensity elements of Y
    RCP<MV > Y1 = Y.offsetViewNonConst(densityMap_, 0);
    // Y2 is a view of the last numCms elements of Y
    RCP<MV > Y2 = Y.offsetViewNonConst(cmsMap_, numDensityElements);
    // X1 is a view of the first numDensity elements of X
    RCP<const MV > X1 = X.offsetView(densityMap_, 0);
    // X2 is a view of the last numCms elements of X
    RCP<const MV > X2 = X.offsetView(cmsMap_, numDensityElements);

    // First block row: Y1 = DD\X1
    Y1->elementWiseMultiply(ONE, *densityOnDensityInverse_, *X1, ZERO);

    // Second block row: Y2 = CC \ (X2 - CD*Y1)
    cmsOnDensityMatrixOp_->apply(*Y1, *tmpCmsVec_);
    tmpCmsVec_->update(ONE, *X2, -ONE);
    cmsOnCmsInverseOp_->apply(*tmpCmsVec_, *Y2);
  }

} //end applyInverse