void dft_HardSphereA22_Tpetra_Operator<Scalar,MatrixType>:: apply (const MV& X, MV& Y, Teuchos::ETransp mode, Scalar alpha, Scalar beta) const { TEUCHOS_TEST_FOR_EXCEPT(Y.getNumVectors()!=X.getNumVectors()); #ifdef KDEBUG TEUCHOS_TEST_FOR_EXCEPT(!X.getMap()->isSameAs(*getDomainMap())); TEUCHOS_TEST_FOR_EXCEPT(!Y.getMap()->isSameAs(*getRangeMap())); #endif Y.elementWiseMultiply(STS::one(), *densityOnDensityMatrix_, X, STS::zero()); }
void dft_HardSphereA22_Tpetra_Operator<Scalar,MatrixType>:: applyInverse (const MV& X, MV& Y) const { // Our algorithm is: // Y = D \ X TEUCHOS_TEST_FOR_EXCEPT(Y.getNumVectors()!=X.getNumVectors()); #ifdef KDEBUG TEUCHOS_TEST_FOR_EXCEPT(!X.getMap()->isSameAs(*getDomainMap())); TEUCHOS_TEST_FOR_EXCEPT(!Y.getMap()->isSameAs(*getRangeMap())); #endif Y.elementWiseMultiply(STS::one(), *densityOnDensityInverse_, X, STS::zero()); }
void dft_PolyA11_Tpetra_Operator<Scalar,MatrixType>:: applyInverse (const MV& X, MV& Y) const { TEUCHOS_TEST_FOR_EXCEPT(Y.getNumVectors()!=X.getNumVectors()); #ifdef KDEBUG TEUCHOS_TEST_FOR_EXCEPT(!X.getMap()->isSameAs(*getDomainMap())); TEUCHOS_TEST_FOR_EXCEPT(!Y.getMap()->isSameAs(*getRangeMap())); printf("\n\n\n\ndft_PolyA11_Tpetra_Operator::applyInverse()\n\n\n\n"); #endif Scalar ONE = STS::one(); Scalar ZERO = STS::zero(); size_t NumVectors = Y.getNumVectors(); size_t numMyElements = ownedMap_->getNodeNumElements(); RCP<MV > Ytmp = rcp(new MV(ownedMap_,NumVectors)); Y=X; // We can safely do this RCP<MV > curY = Y.offsetViewNonConst(ownedMap_, 0); RCP<VEC> diagVec = invDiagonal_->offsetViewNonConst(ownedMap_, 0)->getVectorNonConst(0); curY->elementWiseMultiply(ONE, *diagVec, *curY, ZERO); // Scale Y by the first block diagonal // Loop over block 1 through numBlocks (indexing 0 to numBlocks-1) for (LocalOrdinal i=OTLO::zero(); i< numBlocks_-1; i++) { // Update views of Y and diagonal blocks curY = Y.offsetViewNonConst(ownedMap_, (i+1)*numMyElements); diagVec = invDiagonal_->offsetViewNonConst(ownedMap_, (i+1)*numMyElements)->getVectorNonConst(0); matrixOperator_[i]->apply(Y, *Ytmp); // Multiply block lower triangular block curY->update(-ONE, *Ytmp, ONE); // curY = curX - Ytmp (Note that curX is in curY from initial copy Y = X) curY->elementWiseMultiply(ONE, *diagVec, *curY, ZERO); // Scale Y by the first block diagonal } } //end applyInverse
void dft_PolyA11_Tpetra_Operator<Scalar,MatrixType>:: apply (const MV& X, MV& Y, Teuchos::ETransp mode, Scalar alpha, Scalar beta) const { TEUCHOS_TEST_FOR_EXCEPT(Y.getNumVectors()!=X.getNumVectors()); #ifdef KDEBUG TEUCHOS_TEST_FOR_EXCEPT(!X.getMap()->isSameAs(*getDomainMap())); TEUCHOS_TEST_FOR_EXCEPT(!Y.getMap()->isSameAs(*getRangeMap())); #endif size_t numMyElements = ownedMap_->getNodeNumElements(); RCP<MV > curY = Y.offsetViewNonConst(ownedMap_, 0); for (LocalOrdinal i=OTLO::zero(); i< numBlocks_-1; i++) { curY = Y.offsetViewNonConst(ownedMap_, (i+1)*numMyElements); matrixOperator_[i]->apply(X, *curY); // This gives a result that is off-diagonal-matrix*X } Y.elementWiseMultiply(STS::one(),*diagonal_, X, STS::one()); // Add diagonal contribution } //end Apply
// Compute Y := alpha Op X + beta Y. // // We ignore the cases alpha != 1 and beta != 0 for simplicity. void apply (const MV& X, MV& Y, Teuchos::ETransp mode = Teuchos::NO_TRANS, scalar_type alpha = Teuchos::ScalarTraits<scalar_type>::one (), scalar_type beta = Teuchos::ScalarTraits<scalar_type>::zero ()) const { using Teuchos::RCP; using Teuchos::rcp; using std::cout; using std::endl; typedef Teuchos::ScalarTraits<scalar_type> STS; RCP<const Teuchos::Comm<int> > comm = opMap_->getComm (); const int myRank = comm->getRank (); const int numProcs = comm->getSize (); if (myRank == 0) { cout << "MyOp::apply" << endl; } // We're writing the Operator subclass, so we are responsible for // error handling. You can decide how much error checking you // want to do. Just remember that checking things like Map // sameness or compatibility are expensive. TEUCHOS_TEST_FOR_EXCEPTION( X.getNumVectors () != Y.getNumVectors (), std::invalid_argument, "X and Y do not have the same numbers of vectors (columns)."); // Let's make sure alpha is 1 and beta is 0... // This will throw an exception if that is not the case. TEUCHOS_TEST_FOR_EXCEPTION( alpha != STS::one() || beta != STS::zero(), std::logic_error, "MyOp::apply was given alpha != 1 or beta != 0. " "These cases are not implemented."); // Get the number of vectors (columns) in X (and Y). const size_t numVecs = X.getNumVectors (); // Make a temporary multivector for holding the redistributed // data. You could also create this in the constructor and reuse // it across different apply() calls, but you would need to be // careful to reallocate if it has a different number of vectors // than X. The number of vectors in X can vary across different // apply() calls. RCP<MV> redistData = rcp (new MV (redistMap_, numVecs)); // Redistribute the data. // This will do all the necessary communication for you. // All processes now own enough data to do the matvec. redistData->doImport (X, *importer_, Tpetra::INSERT); // Get the number of local rows in X, on the calling process. const local_ordinal_type nlocRows = static_cast<local_ordinal_type> (X.getLocalLength ()); // Perform the matvec with the data we now locally own. // // For each column... for (size_t c = 0; c < numVecs; ++c) { // Get a view of the desired column Teuchos::ArrayRCP<scalar_type> colView = redistData->getDataNonConst (c); local_ordinal_type offset; // Y[0,c] = -colView[0] + 2*colView[1] - colView[2] (using local indices) if (myRank > 0) { Y.replaceLocalValue (0, c, -colView[0] + 2*colView[1] - colView[2]); offset = 0; } // Y[0,c] = 2*colView[1] - colView[2] (using local indices) else { Y.replaceLocalValue (0, c, 2*colView[0] - colView[1]); offset = 1; } // Y[r,c] = -colView[r-offset] + 2*colView[r+1-offset] - colView[r+2-offset] for (local_ordinal_type r = 1; r < nlocRows - 1; ++r) { const scalar_type newVal = -colView[r-offset] + 2*colView[r+1-offset] - colView[r+2-offset]; Y.replaceLocalValue (r, c, newVal); } // Y[nlocRows-1,c] = -colView[nlocRows-1-offset] + 2*colView[nlocRows-offset] // - colView[nlocRows+1-offset] if (myRank < numProcs - 1) { const scalar_type newVal = -colView[nlocRows-1-offset] + 2*colView[nlocRows-offset] - colView[nlocRows+1-offset]; Y.replaceLocalValue (nlocRows-1, c, newVal); } // Y[nlocRows-1,c] = -colView[nlocRows-1-offset] + 2*colView[nlocRows-offset] else { const scalar_type newVal = -colView[nlocRows-1-offset] + 2*colView[nlocRows-offset]; Y.replaceLocalValue (nlocRows-1, c, newVal); } } }
/// \brief Solve AX=B for X with Chebyshev iteration with left /// diagonal scaling, imitating ML's implementation. /// /// \pre A must be real-valued and symmetric positive definite. /// \pre numIters >= 0 /// \pre eigRatio >= 1 /// \pre 0 < lambdaMax /// \pre All entries of D_inv are positive. /// /// \param A [in] The matrix A in the linear system to solve. /// \param B [in] Right-hand side(s) in the linear system to solve. /// \param X [in] Initial guess(es) for the linear system to solve. /// \param numIters [in] Number of Chebyshev iterations. /// \param lambdaMax [in] Estimate of max eigenvalue of D_inv*A. /// \param lambdaMin [in] Estimate of min eigenvalue of D_inv*A. We /// only use this to determine if A is the identity matrix. /// \param eigRatio [in] Estimate of max / min eigenvalue ratio of /// D_inv*A. We use this along with lambdaMax to compute the /// Chebyshev coefficients. This need not be the same as /// lambdaMax/lambdaMin. /// \param D_inv [in] Vector of diagonal entries of A. It must have /// the same distribution as b. void mlApplyImpl (const MAT& A, const MV& B, MV& X, const int numIters, const ST lambdaMax, const ST lambdaMin, const ST eigRatio, const V& D_inv) { const ST zero = Teuchos::as<ST> (0); const ST one = Teuchos::as<ST> (1); const ST two = Teuchos::as<ST> (2); MV pAux (B.getMap (), B.getNumVectors ()); // Result of A*X MV dk (B.getMap (), B.getNumVectors ()); // Solution update MV R (B.getMap (), B.getNumVectors ()); // Not in original ML; need for B - pAux ST beta = Teuchos::as<ST> (1.1) * lambdaMax; ST alpha = lambdaMax / eigRatio; ST delta = (beta - alpha) / two; ST theta = (beta + alpha) / two; ST s1 = theta / delta; ST rhok = one / s1; // Diagonal: ML replaces entries containing 0 with 1. We // shouldn't have any entries like that in typical test problems, // so it's OK not to do that here. // The (scaled) matrix is the identity: set X = D_inv * B. (If A // is the identity, then certainly D_inv is too. D_inv comes from // A, so if D_inv * A is the identity, then we still need to apply // the "preconditioner" D_inv to B as well, to get X.) if (lambdaMin == one && lambdaMin == lambdaMax) { solve (X, D_inv, B); return; } // The next bit of code is a direct translation of code from ML's // ML_Cheby function, in the "normal point scaling" section, which // is in lines 7365-7392 of ml_smoother.c. if (! zeroStartingSolution_) { // dk = (1/theta) * D_inv * (B - (A*X)) A.apply (X, pAux); // pAux = A * X R = B; R.update (-one, pAux, one); // R = B - pAux dk.elementWiseMultiply (one/theta, D_inv, R, zero); // dk = (1/theta)*D_inv*R X.update (one, dk, one); // X = X + dk } else { dk.elementWiseMultiply (one/theta, D_inv, B, zero); // dk = (1/theta)*D_inv*B X = dk; } ST rhokp1, dtemp1, dtemp2; for (int k = 0; k < numIters-1; ++k) { A.apply (X, pAux); rhokp1 = one / (two*s1 - rhok); dtemp1 = rhokp1*rhok; dtemp2 = two*rhokp1/delta; rhok = rhokp1; R = B; R.update (-one, pAux, one); // R = B - pAux // dk = dtemp1 * dk + dtemp2 * D_inv * (B - pAux) dk.elementWiseMultiply (dtemp2, D_inv, B, dtemp1); X.update (one, dk, one); // X = X + dk } }
//! Do the transpose or conjugate transpose solve. void applyTranspose (const MV& X_in, MV& Y_in, const Teuchos::ETransp mode) const { typedef Teuchos::ScalarTraits<Scalar> ST; using Teuchos::null; TEUCHOS_TEST_FOR_EXCEPTION (mode != Teuchos::TRANS && mode != Teuchos::CONJ_TRANS, std::logic_error, "Tpetra::CrsMatrixSolveOp::applyTranspose: mode is neither TRANS nor " "CONJ_TRANS. Should never get here! Please report this bug to the " "Tpetra developers."); const size_t numVectors = X_in.getNumVectors(); Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Node> > importer = matrix_->getGraph ()->getImporter (); Teuchos::RCP<const Export<LocalOrdinal,GlobalOrdinal,Node> > exporter = matrix_->getGraph ()->getExporter (); Teuchos::RCP<const MV> X; // it is okay if X and Y reference the same data, because we can // perform a triangular solve in-situ. however, we require that // column access to each is strided. // set up import/export temporary multivectors if (importer != null) { if (importMV_ != null && importMV_->getNumVectors() != numVectors) { importMV_ = null; } if (importMV_ == null) { importMV_ = Teuchos::rcp( new MV(matrix_->getColMap(),numVectors) ); } } if (exporter != null) { if (exportMV_ != null && exportMV_->getNumVectors() != numVectors) { exportMV_ = null; } if (exportMV_ == null) { exportMV_ = Teuchos::rcp( new MV(matrix_->getRowMap(),numVectors) ); } } // solve(TRANS): DomainMap -> RangeMap // lclMatSolve_(TRANS): ColMap -> RowMap // importer: DomainMap -> ColMap // exporter: RowMap -> RangeMap // // solve = importer o lclMatSolve_ o exporter // Domainmap -> ColMap -> RowMap -> RangeMap // // If we have a non-trivial importer, we must import elements that // are permuted or are on other processes. if (importer != null) { importMV_->doImport(X_in,*importer,INSERT); X = importMV_; } else if (X_in.isConstantStride() == false) { // cannot handle non-constant stride right now // generate a copy of X_in X = Teuchos::rcp(new MV(X_in)); } else { // just temporary, so this non-owning RCP is okay X = Teuchos::rcpFromRef (X_in); } // If we have a non-trivial exporter, we must export elements that // are permuted or belong to other processes. We will compute // solution into the to-be-exported MV; get a view. if (exporter != null) { matrix_->template localSolve<Scalar, Scalar> (*X, *exportMV_, Teuchos::CONJ_TRANS); // Make sure target is zero: necessary because we are adding Y_in.putScalar(ST::zero()); Y_in.doExport(*importMV_, *importer, ADD); } // otherwise, solve into Y else { if (Y_in.isConstantStride() == false) { // generate a strided copy of Y MV Y(Y_in); matrix_->template localSolve<Scalar, Scalar> (*X, Y, Teuchos::CONJ_TRANS); Y_in = Y; } else { matrix_->template localSolve<Scalar, Scalar> (*X, Y_in, Teuchos::CONJ_TRANS); } } }
//! Do the non-transpose solve. void applyNonTranspose (const MV& X_in, MV& Y_in) const { using Teuchos::NO_TRANS; using Teuchos::null; typedef Teuchos::ScalarTraits<Scalar> ST; // Solve U X = Y or L X = Y // X belongs to domain map, while Y belongs to range map const size_t numVectors = X_in.getNumVectors(); Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Node> > importer = matrix_->getGraph ()->getImporter (); Teuchos::RCP<const Export<LocalOrdinal,GlobalOrdinal,Node> > exporter = matrix_->getGraph ()->getExporter (); Teuchos::RCP<const MV> X; // it is okay if X and Y reference the same data, because we can // perform a triangular solve in-situ. however, we require that // column access to each is strided. // set up import/export temporary multivectors if (importer != null) { if (importMV_ != null && importMV_->getNumVectors () != numVectors) { importMV_ = null; } if (importMV_ == null) { importMV_ = Teuchos::rcp (new MV (matrix_->getColMap (), numVectors)); } } if (exporter != null) { if (exportMV_ != null && exportMV_->getNumVectors () != numVectors) { exportMV_ = null; } if (exportMV_ == null) { exportMV_ = Teuchos::rcp (new MV (matrix_->getRowMap (), numVectors)); } } // solve(NO_TRANS): RangeMap -> DomainMap // lclMatSolve_: RowMap -> ColMap // importer: DomainMap -> ColMap // exporter: RowMap -> RangeMap // // solve = reverse(exporter) o lclMatSolve_ o reverse(importer) // RangeMap -> RowMap -> ColMap -> DomainMap // // If we have a non-trivial exporter, we must import elements that // are permuted or are on other processors if (exporter != null) { exportMV_->doImport (X_in, *exporter, INSERT); X = exportMV_; } else if (! X_in.isConstantStride ()) { // cannot handle non-constant stride right now // generate a copy of X_in X = Teuchos::rcp (new MV (X_in)); } else { // just temporary, so this non-owning RCP is okay X = Teuchos::rcpFromRef (X_in); } // If we have a non-trivial importer, we must export elements that // are permuted or belong to other processes. We will compute // solution into the to-be-exported MV. if (importer != null) { matrix_->template localSolve<Scalar, Scalar> (*X, *importMV_, NO_TRANS); // Make sure target is zero: necessary because we are adding. Y_in.putScalar (ST::zero ()); Y_in.doExport (*importMV_, *importer, ADD); } // otherwise, solve into Y else { // can't solve into non-strided multivector if (! Y_in.isConstantStride ()) { // generate a strided copy of Y MV Y (Y_in); matrix_->template localSolve<Scalar, Scalar> (*X, Y, NO_TRANS); Tpetra::deep_copy (Y_in, Y); } else { matrix_->template localSolve<Scalar, Scalar> (*X, Y_in, NO_TRANS); } } }
// // Computes Y = alpha Op X + beta Y // TraceMin will never use alpha ~= 1 or beta ~= 0, // so we have ignored those options for simplicity. // void apply(const MV& X, MV& Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=SCT::one(), Scalar beta=SCT::zero()) const { // // Let's make sure alpha is 1 and beta is 0... // This will throw an exception if that is not the case. // TEUCHOS_TEST_FOR_EXCEPTION(alpha != SCT::one() || beta != SCT::zero(),std::invalid_argument, "MyOp::apply was given alpha != 1 or beta != 0. That's not supposed to happen."); // // Get the number of local rows // int nlocRows = X.getLocalLength(); // // Get the number of vectors // int numVecs = X.getNumVectors(); // // Make a multivector for holding the redistributed data // RCP<MV> redistData = rcp(new MV(redistMap_, numVecs)); // // Redistribute the data. // This will do all the necessary communication for you. // All processes now own enough data to do the matvec. // redistData->doImport(X, *importer_, Tpetra::INSERT); // // Perform the matvec with the data we now locally own // // For each column... for(int c=0; c<numVecs; c++) { // Get a view of the desired column Teuchos::ArrayRCP<Scalar> colView = redistData->getDataNonConst(c); int offset; // Y[0,c] = -colView[0] + 2*colView[1] - colView[2] (using local indices) if(myRank_ > 0) { Y.replaceLocalValue(0, c, -colView[0] + 2*colView[1] - colView[2]); offset = 0; } // Y[0,c] = 2*colView[1] - colView[2] (using local indices) else { Y.replaceLocalValue(0, c, 2*colView[0] - colView[1]); offset = 1; } // Y[r,c] = -colView[r-offset] + 2*colView[r+1-offset] - colView[r+2-offset] for(int r=1; r<nlocRows-1; r++) { Y.replaceLocalValue(r, c, -colView[r-offset] + 2*colView[r+1-offset] - colView[r+2-offset]); } // Y[nlocRows-1,c] = -colView[nlocRows-1-offset] + 2*colView[nlocRows-offset] - colView[nlocRows+1-offset] if(myRank_ < numProcs_-1) { Y.replaceLocalValue(nlocRows-1, c, -colView[nlocRows-1-offset] + 2*colView[nlocRows-offset] - colView[nlocRows+1-offset]); } // Y[nlocRows-1,c] = -colView[nlocRows-1-offset] + 2*colView[nlocRows-offset] else { Y.replaceLocalValue(nlocRows-1, c, -colView[nlocRows-1-offset] + 2*colView[nlocRows-offset]); } } }
void dft_PolyA22_Tpetra_Operator<Scalar,MatrixType>:: apply (const MV& X, MV& Y, Teuchos::ETransp mode, Scalar alpha, Scalar beta) const { TEUCHOS_TEST_FOR_EXCEPT(Y.getNumVectors()!=X.getNumVectors()); #ifdef KDEBUG TEUCHOS_TEST_FOR_EXCEPT(!X.getMap()->isSameAs(*getDomainMap())); TEUCHOS_TEST_FOR_EXCEPT(!Y.getMap()->isSameAs(*getRangeMap())); #endif Scalar ONE = STS::one(); Scalar ZERO = STS::zero(); if (F_location_ == 1) { //F in NE size_t numCmsElements = cmsMap_->getNodeNumElements(); // Y1 is a view of the first numCms elements of Y RCP<MV> Y1 = Y.offsetViewNonConst(cmsMap_, 0); // Y2 is a view of the last numDensity elements of Y RCP<MV> Y2 = Y.offsetViewNonConst(densityMap_, numCmsElements); // X1 is a view of the first numCms elements of X RCP<const MV> X1 = X.offsetView(cmsMap_, 0); // X2 is a view of the last numDensity elements of X RCP<const MV> X2 = X.offsetView(densityMap_, numCmsElements); // First block row cmsOnDensityMatrixOp_->apply(*X2, *Y1); cmsOnCmsMatrixOp_->apply(*X1, *tmpCmsVec_); Y1->update(ONE, *tmpCmsVec_, ONE); // Second block row if (hasDensityOnCms_) { densityOnCmsMatrixOp_->apply(*X1, *Y2); Y2->elementWiseMultiply(ONE, *densityOnDensityMatrix_, *X2, ONE); } else { Y2->elementWiseMultiply(ONE, *densityOnDensityMatrix_, *X2, ZERO); } } else { //F in SW size_t numDensityElements = densityMap_->getNodeNumElements(); // Y1 is a view of the first numDensity elements of Y RCP<MV> Y1 = Y.offsetViewNonConst(densityMap_, 0); // Y2 is a view of the last numCms elements of Y RCP<MV> Y2 = Y.offsetViewNonConst(cmsMap_, numDensityElements); // X1 is a view of the first numDensity elements of X RCP<const MV> X1 = X.offsetView(densityMap_, 0); // X2 is a view of the last numCms elements of X RCP<const MV> X2 = X.offsetView(cmsMap_, numDensityElements); // First block row if (hasDensityOnCms_) { densityOnCmsMatrixOp_->apply(*X2, *Y1); Y1->elementWiseMultiply(ONE, *densityOnDensityMatrix_, *X1, ONE); } else { Y1->elementWiseMultiply(ONE, *densityOnDensityMatrix_, *X1, ZERO); } // Second block row cmsOnDensityMatrixOp_->apply(*X1, *Y2); cmsOnCmsMatrixOp_->apply(*X2, *tmpCmsVec_); Y2->update(ONE, *tmpCmsVec_, ONE); } } //end Apply
void dft_PolyA22_Tpetra_Operator<Scalar,MatrixType>:: applyInverse (const MV& X, MV& Y) const { // The true A22 block is of the form: // | Dcc F | // | Ddc Ddd | // where // Dcc is Cms on Cms (diagonal), // F is Cms on Density (fairly dense) // Ddc is Density on Cms (diagonal with small coefficient values), // Ddd is Density on Density (diagonal). // // We will approximate A22 with: // | Dcc F | // | 0 Ddd | // replacing Ddc with a zero matrix for the applyInverse method only. // Our algorithm is then: // Y2 = Ddd \ X2 // Y1 = Dcc \ (X1 - F*Y2) // Or, if F is in the SW quadrant: // The true A22 block is of the form: // | Ddd Ddc | // | F Dcc | // where // Ddd is Density on Density (diagonal), // Ddc is Density on Cms (diagonal with small coefficient values), // F is Cms on Density (fairly dense), // Dcc is Cms on Cms (diagonal). // // We will approximate A22 with: // | Ddd 0 | // | F Dcc | // replacing Ddc with a zero matrix for the applyInverse method only. // Our algorithm is then: // Y1 = Ddd \ X1 // Y2 = Dcc \ (X2 - F*Y1) TEUCHOS_TEST_FOR_EXCEPT(Y.getNumVectors()!=X.getNumVectors()); #ifdef KDEBUG TEUCHOS_TEST_FOR_EXCEPT(!X.getMap()->isSameAs(*getDomainMap())); TEUCHOS_TEST_FOR_EXCEPT(!Y.getMap()->isSameAs(*getRangeMap())); printf("\n\n\n\ndft_PolyA22_Tpetra_Operator::applyInverse()\n\n\n\n"); #endif Scalar ONE = STS::one(); Scalar ZERO = STS::zero(); if (F_location_ == 1) { //F in NE size_t numCmsElements = cmsMap_->getNodeNumElements(); // Y1 is a view of the first numCms elements of Y RCP<MV > Y1 = Y.offsetViewNonConst(cmsMap_, 0); // Y2 is a view of the last numDensity elements of Y RCP<MV > Y2 = Y.offsetViewNonConst(densityMap_, numCmsElements); // X1 is a view of the first numCms elements of X RCP<const MV > X1 = X.offsetView(cmsMap_, 0); // X2 is a view of the last numDensity elements of X RCP<const MV > X2 = X.offsetView(densityMap_, numCmsElements); // Second block row: Y2 = DD\X2 Y2->elementWiseMultiply(ONE, *densityOnDensityInverse_, *X2, ZERO); // First block row: Y1 = CC \ (X1 - CD*Y2) cmsOnDensityMatrixOp_->apply(*Y2, *tmpCmsVec_); tmpCmsVec_->update(ONE, *X1, -ONE); cmsOnCmsInverseOp_->apply(*tmpCmsVec_, *Y1); } else { //F in SW size_t numDensityElements = densityMap_->getNodeNumElements(); // Y1 is a view of the first numDensity elements of Y RCP<MV > Y1 = Y.offsetViewNonConst(densityMap_, 0); // Y2 is a view of the last numCms elements of Y RCP<MV > Y2 = Y.offsetViewNonConst(cmsMap_, numDensityElements); // X1 is a view of the first numDensity elements of X RCP<const MV > X1 = X.offsetView(densityMap_, 0); // X2 is a view of the last numCms elements of X RCP<const MV > X2 = X.offsetView(cmsMap_, numDensityElements); // First block row: Y1 = DD\X1 Y1->elementWiseMultiply(ONE, *densityOnDensityInverse_, *X1, ZERO); // Second block row: Y2 = CC \ (X2 - CD*Y1) cmsOnDensityMatrixOp_->apply(*Y1, *tmpCmsVec_); tmpCmsVec_->update(ONE, *X2, -ONE); cmsOnCmsInverseOp_->apply(*tmpCmsVec_, *Y2); } } //end applyInverse