void
dft_PolyA11_Tpetra_Operator<Scalar,MatrixType>::
applyInverse
(const MV& X, MV& Y) const
{

    TEUCHOS_TEST_FOR_EXCEPT(Y.getNumVectors()!=X.getNumVectors());
#ifdef KDEBUG
    TEUCHOS_TEST_FOR_EXCEPT(!X.getMap()->isSameAs(*getDomainMap()));
    TEUCHOS_TEST_FOR_EXCEPT(!Y.getMap()->isSameAs(*getRangeMap()));
    printf("\n\n\n\ndft_PolyA11_Tpetra_Operator::applyInverse()\n\n\n\n");
#endif

    Scalar ONE = STS::one();
    Scalar ZERO = STS::zero();

    size_t NumVectors = Y.getNumVectors();
    size_t numMyElements = ownedMap_->getNodeNumElements();
    RCP<MV > Ytmp = rcp(new MV(ownedMap_,NumVectors));

    Y=X; // We can safely do this

    RCP<MV > curY = Y.offsetViewNonConst(ownedMap_, 0);

    RCP<VEC> diagVec = invDiagonal_->offsetViewNonConst(ownedMap_, 0)->getVectorNonConst(0);

    curY->elementWiseMultiply(ONE, *diagVec, *curY, ZERO); // Scale Y by the first block diagonal

    // Loop over block 1 through numBlocks (indexing 0 to numBlocks-1)
    for (LocalOrdinal i=OTLO::zero(); i< numBlocks_-1; i++)
    {
        // Update views of Y and diagonal blocks
        curY = Y.offsetViewNonConst(ownedMap_, (i+1)*numMyElements);

        diagVec = invDiagonal_->offsetViewNonConst(ownedMap_, (i+1)*numMyElements)->getVectorNonConst(0);

        matrixOperator_[i]->apply(Y, *Ytmp); // Multiply block lower triangular block
        curY->update(-ONE, *Ytmp, ONE); // curY = curX - Ytmp (Note that curX is in curY from initial copy Y = X)
        curY->elementWiseMultiply(ONE, *diagVec, *curY, ZERO); // Scale Y by the first block diagonal
    }
} //end applyInverse
void
dft_PolyA11_Tpetra_Operator<Scalar,MatrixType>::
apply
(const MV& X, MV& Y, Teuchos::ETransp mode, Scalar alpha, Scalar beta) const
{

    TEUCHOS_TEST_FOR_EXCEPT(Y.getNumVectors()!=X.getNumVectors());
#ifdef KDEBUG
    TEUCHOS_TEST_FOR_EXCEPT(!X.getMap()->isSameAs(*getDomainMap()));
    TEUCHOS_TEST_FOR_EXCEPT(!Y.getMap()->isSameAs(*getRangeMap()));
#endif

    size_t numMyElements = ownedMap_->getNodeNumElements();

    RCP<MV > curY = Y.offsetViewNonConst(ownedMap_, 0);

    for (LocalOrdinal i=OTLO::zero(); i< numBlocks_-1; i++) {
        curY = Y.offsetViewNonConst(ownedMap_, (i+1)*numMyElements);
        matrixOperator_[i]->apply(X, *curY); // This gives a result that is off-diagonal-matrix*X
    }

    Y.elementWiseMultiply(STS::one(),*diagonal_, X, STS::one()); // Add diagonal contribution

} //end Apply
void
dft_PolyA22_Tpetra_Operator<Scalar,MatrixType>::
apply
(const MV& X, MV& Y, Teuchos::ETransp mode, Scalar alpha, Scalar beta) const
{

  TEUCHOS_TEST_FOR_EXCEPT(Y.getNumVectors()!=X.getNumVectors());
#ifdef KDEBUG
  TEUCHOS_TEST_FOR_EXCEPT(!X.getMap()->isSameAs(*getDomainMap()));
  TEUCHOS_TEST_FOR_EXCEPT(!Y.getMap()->isSameAs(*getRangeMap()));
#endif

  Scalar ONE = STS::one();
  Scalar ZERO = STS::zero();

  if (F_location_ == 1)
  {
    //F in NE

    size_t numCmsElements = cmsMap_->getNodeNumElements();

    // Y1 is a view of the first numCms elements of Y
    RCP<MV> Y1 = Y.offsetViewNonConst(cmsMap_, 0);
    // Y2 is a view of the last numDensity elements of Y
    RCP<MV> Y2 = Y.offsetViewNonConst(densityMap_, numCmsElements);
    // X1 is a view of the first numCms elements of X
    RCP<const MV> X1 = X.offsetView(cmsMap_, 0);
    // X2 is a view of the last numDensity elements of X
    RCP<const MV> X2 = X.offsetView(densityMap_, numCmsElements);

    // First block row
    cmsOnDensityMatrixOp_->apply(*X2, *Y1);
    cmsOnCmsMatrixOp_->apply(*X1, *tmpCmsVec_);
    Y1->update(ONE, *tmpCmsVec_, ONE);

    // Second block row
    if (hasDensityOnCms_) {
      densityOnCmsMatrixOp_->apply(*X1, *Y2);
      Y2->elementWiseMultiply(ONE, *densityOnDensityMatrix_, *X2, ONE);
    } else {
      Y2->elementWiseMultiply(ONE, *densityOnDensityMatrix_, *X2, ZERO);
    }

  }
  else
  {
    //F in SW

    size_t numDensityElements = densityMap_->getNodeNumElements();

    // Y1 is a view of the first numDensity elements of Y
    RCP<MV> Y1 = Y.offsetViewNonConst(densityMap_, 0);
    // Y2 is a view of the last numCms elements of Y
    RCP<MV> Y2 = Y.offsetViewNonConst(cmsMap_, numDensityElements);
    // X1 is a view of the first numDensity elements of X
    RCP<const MV> X1 = X.offsetView(densityMap_, 0);
    // X2 is a view of the last numCms elements of X
    RCP<const MV> X2 = X.offsetView(cmsMap_, numDensityElements);

    // First block row
    if (hasDensityOnCms_) {
      densityOnCmsMatrixOp_->apply(*X2, *Y1);
      Y1->elementWiseMultiply(ONE, *densityOnDensityMatrix_, *X1, ONE);
    } else {
      Y1->elementWiseMultiply(ONE, *densityOnDensityMatrix_, *X1, ZERO);
    }

    // Second block row
    cmsOnDensityMatrixOp_->apply(*X1, *Y2);
    cmsOnCmsMatrixOp_->apply(*X2, *tmpCmsVec_);
    Y2->update(ONE, *tmpCmsVec_, ONE);

  }

} //end Apply
void
dft_PolyA22_Tpetra_Operator<Scalar,MatrixType>::
applyInverse
(const MV& X, MV& Y) const
{
  // The true A22 block is of the form:

  // |  Dcc     F    |
  // |  Ddc     Ddd  |

  // where
  // Dcc is Cms on Cms (diagonal),
  // F is Cms on Density (fairly dense)
  // Ddc is Density on Cms (diagonal with small coefficient values),
  // Ddd is Density on Density (diagonal).
  //
  // We will approximate A22 with:

  // |  Dcc     F    |
  // |  0       Ddd  |

  // replacing Ddc with a zero matrix for the applyInverse method only.

  // Our algorithm is then:
  // Y2 = Ddd \ X2
  // Y1 = Dcc \ (X1 - F*Y2)

  // Or, if F is in the SW quadrant:
  // The true A22 block is of the form:

  // |  Ddd     Ddc  |
  // |  F       Dcc  |

  // where
  // Ddd is Density on Density (diagonal),
  // Ddc is Density on Cms (diagonal with small coefficient values),
  // F is Cms on Density (fairly dense),
  // Dcc is Cms on Cms (diagonal).
  //
  // We will approximate A22 with:

  // |  Ddd     0    |
  // |  F       Dcc  |

  // replacing Ddc with a zero matrix for the applyInverse method only.

  // Our algorithm is then:
  // Y1 = Ddd \ X1
  // Y2 = Dcc \ (X2 - F*Y1)

  TEUCHOS_TEST_FOR_EXCEPT(Y.getNumVectors()!=X.getNumVectors());
#ifdef KDEBUG
  TEUCHOS_TEST_FOR_EXCEPT(!X.getMap()->isSameAs(*getDomainMap()));
  TEUCHOS_TEST_FOR_EXCEPT(!Y.getMap()->isSameAs(*getRangeMap()));
  printf("\n\n\n\ndft_PolyA22_Tpetra_Operator::applyInverse()\n\n\n\n");
#endif

  Scalar ONE = STS::one();
  Scalar ZERO = STS::zero();

  if (F_location_ == 1)
  {
    //F in NE

    size_t numCmsElements = cmsMap_->getNodeNumElements();

    // Y1 is a view of the first numCms elements of Y
    RCP<MV > Y1 = Y.offsetViewNonConst(cmsMap_, 0);
    // Y2 is a view of the last numDensity elements of Y
    RCP<MV > Y2 = Y.offsetViewNonConst(densityMap_, numCmsElements);
    // X1 is a view of the first numCms elements of X
    RCP<const MV > X1 = X.offsetView(cmsMap_, 0);
    // X2 is a view of the last numDensity elements of X
    RCP<const MV > X2 = X.offsetView(densityMap_, numCmsElements);

    // Second block row: Y2 = DD\X2
    Y2->elementWiseMultiply(ONE, *densityOnDensityInverse_, *X2, ZERO);

    // First block row: Y1 = CC \ (X1 - CD*Y2)
    cmsOnDensityMatrixOp_->apply(*Y2, *tmpCmsVec_);
    tmpCmsVec_->update(ONE, *X1, -ONE);
    cmsOnCmsInverseOp_->apply(*tmpCmsVec_, *Y1);

  }
  else
  {
    //F in SW

    size_t numDensityElements = densityMap_->getNodeNumElements();

    // Y1 is a view of the first numDensity elements of Y
    RCP<MV > Y1 = Y.offsetViewNonConst(densityMap_, 0);
    // Y2 is a view of the last numCms elements of Y
    RCP<MV > Y2 = Y.offsetViewNonConst(cmsMap_, numDensityElements);
    // X1 is a view of the first numDensity elements of X
    RCP<const MV > X1 = X.offsetView(densityMap_, 0);
    // X2 is a view of the last numCms elements of X
    RCP<const MV > X2 = X.offsetView(cmsMap_, numDensityElements);

    // First block row: Y1 = DD\X1
    Y1->elementWiseMultiply(ONE, *densityOnDensityInverse_, *X1, ZERO);

    // Second block row: Y2 = CC \ (X2 - CD*Y1)
    cmsOnDensityMatrixOp_->apply(*Y1, *tmpCmsVec_);
    tmpCmsVec_->update(ONE, *X2, -ONE);
    cmsOnCmsInverseOp_->apply(*tmpCmsVec_, *Y2);
  }

} //end applyInverse