예제 #1
0
  void IfpackSmoother::Apply(MultiVector& X, const MultiVector& B, bool InitialGuessIsZero) const {
    TEUCHOS_TEST_FOR_EXCEPTION(SmootherPrototype::IsSetup() == false, Exceptions::RuntimeError, "MueLu::IfpackSmoother::Apply(): Setup() has not been called");

    // Forward the InitialGuessIsZero option to Ifpack
    Teuchos::ParameterList  paramList;
    if (type_ == "Chebyshev")
      paramList.set("chebyshev: zero starting solution",  InitialGuessIsZero);

    else if (type_ == "point relaxation stand-alone")
      paramList.set("relaxation: zero starting solution", InitialGuessIsZero);

    SetPrecParameters(paramList);

    // Apply
    if (InitialGuessIsZero) {
      Epetra_MultiVector&       epX = Utils::MV2NonConstEpetraMV(X);
      const Epetra_MultiVector& epB = Utils::MV2EpetraMV(B);
      prec_->ApplyInverse(epB, epX);

    } else {
      RCP<MultiVector> Residual = Utils::Residual(*A_,X,B);
      RCP<MultiVector> Correction = MultiVectorFactory::Build(A_->getDomainMap(), X.getNumVectors());
      Epetra_MultiVector&       epX = Utils::MV2NonConstEpetraMV(*Correction);
      const Epetra_MultiVector& epB = Utils::MV2EpetraMV(*Residual);
      prec_->ApplyInverse(epB, epX);
      X.update(1.0, *Correction, 1.0);
    }
  }
  void ZoltanInterface<LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::
  GetProblemGeometry(void *data, int numGIDEntries, int numLIDEntries, int numObjectIDs,
                     ZOLTAN_ID_PTR gids, ZOLTAN_ID_PTR lids, int dim, double *coordinates, int *ierr)
  {
    if (data == NULL) {
      *ierr = ZOLTAN_FATAL;
      return;
    }

    MultiVector *Coords = (MultiVector*) data;

    if (dim != Teuchos::as<int>(Coords->getNumVectors())) {
      //FIXME I'm assuming dim should be 1, 2, or 3 coming in?!
      *ierr = ZOLTAN_FATAL;
      return;
    }

    TEUCHOS_TEST_FOR_EXCEPTION(numObjectIDs != Teuchos::as<int>(Coords->getLocalLength()), Exceptions::Incompatible, "Length of coordinates must be the same as the number of objects");

    ArrayRCP<ArrayRCP<const SC> > CoordsData(dim);
    for (int j = 0; j < dim; ++j)
      CoordsData[j] = Coords->getData(j);

    size_t numElements = Coords->getLocalLength();
    for (size_t i = 0; i < numElements; ++i)
      for (int j = 0; j < dim; ++j)
        coordinates[i*dim+j] = (double) CoordsData[j][i];

    *ierr = ZOLTAN_OK;

  } //GetProblemGeometry
  void IndefBlockedDiagonalSmoother<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Apply(MultiVector& X, const MultiVector& B, bool InitialGuessIsZero) const
  {
    TEUCHOS_TEST_FOR_EXCEPTION(SmootherPrototype::IsSetup() == false, Exceptions::RuntimeError, "MueLu::IndefBlockedDiagonalSmoother::Apply(): Setup() has not been called");

    Teuchos::RCP<Teuchos::FancyOStream> fos = Teuchos::getFancyOStream(Teuchos::rcpFromRef(std::cout));

    SC zero = Teuchos::ScalarTraits<SC>::zero(), one = Teuchos::ScalarTraits<SC>::one();

    // extract parameters from internal parameter list
    const ParameterList & pL = Factory::GetParameterList();
    LocalOrdinal nSweeps = pL.get<LocalOrdinal>("Sweeps");
    Scalar omega = pL.get<Scalar>("Damping factor");

    // wrap current solution vector in RCP
    RCP<MultiVector> rcpX = Teuchos::rcpFromRef(X);

    // create residual vector
    // contains current residual of current solution X with rhs B
    RCP<MultiVector> residual = MultiVectorFactory::Build(B.getMap(), B.getNumVectors());

    // incrementally improve solution vector X
    for (LocalOrdinal run = 0; run < nSweeps; ++run) {
      // 1) calculate current residual
      residual->update(one,B,zero); // residual = B
      A_->apply(*rcpX, *residual, Teuchos::NO_TRANS, -one, one);

      // split residual vector
      Teuchos::RCP<MultiVector> r1 = rangeMapExtractor_->ExtractVector(residual, 0);
      Teuchos::RCP<MultiVector> r2 = rangeMapExtractor_->ExtractVector(residual, 1);

      // 2) solve F * \Delta \tilde{x}_1 = r_1
      //    start with zero guess \Delta \tilde{x}_1
      RCP<MultiVector> xtilde1 = MultiVectorFactory::Build(F_->getRowMap(),1);
      xtilde1->putScalar(zero);
      velPredictSmoo_->Apply(*xtilde1,*r1);

      // 3) solve SchurComp equation
      //    start with zero guess \Delta \tilde{x}_2
      RCP<MultiVector> xtilde2 = MultiVectorFactory::Build(Z_->getRowMap(),1);
      xtilde2->putScalar(zero);
      schurCompSmoo_->Apply(*xtilde2,*r2);

      // 4) extract parts of solution vector X
      Teuchos::RCP<MultiVector> x1 = domainMapExtractor_->ExtractVector(rcpX, 0);
      Teuchos::RCP<MultiVector> x2 = domainMapExtractor_->ExtractVector(rcpX, 1);

      // 5) update solution vector with increments xhat1 and xhat2
      //    rescale increment for x2 with omega_
      x1->update(omega,*xtilde1,one); // x1 = x1_old + omega xtilde1
      x2->update(omega,*xtilde2,one); // x2 = x2_old + omega xtilde2

      // write back solution in global vector X
      domainMapExtractor_->InsertVector(x1, 0, rcpX);
      domainMapExtractor_->InsertVector(x2, 1, rcpX);

    }

  }
예제 #4
0
 void 
 CrsMatrixSolveOp<OpScalar,MatScalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::apply(
             const MultiVector<OpScalar,LocalOrdinal,GlobalOrdinal,Node> & X,
                   MultiVector<OpScalar,LocalOrdinal,GlobalOrdinal,Node> & Y,
                   Teuchos::ETransp mode, OpScalar alpha, OpScalar beta) const 
 {
   TEUCHOS_TEST_FOR_EXCEPTION(!matrix_->isFillComplete(), std::runtime_error, 
       Teuchos::typeName(*this) << "::apply(): underlying matrix is not fill-complete.");
   TEUCHOS_TEST_FOR_EXCEPTION(X.getNumVectors() != Y.getNumVectors(), std::runtime_error,
       Teuchos::typeName(*this) << "::apply(X,Y): X and Y must have the same number of vectors.");
   TEUCHOS_TEST_FOR_EXCEPTION(matrix_->isLowerTriangular() == false && matrix_->isUpperTriangular() == false, std::runtime_error,
       Teuchos::typeName(*this) << "::apply() requires either upper or lower triangular structure in underlying matrix.");
   TEUCHOS_TEST_FOR_EXCEPTION( alpha != Teuchos::ScalarTraits<OpScalar>::one() || beta != Teuchos::ScalarTraits<OpScalar>::zero(), std::runtime_error,
       Teuchos::typeName(*this) << "::apply(): non-trivial alpha,beta not supported at this time.");
   if (mode == Teuchos::NO_TRANS) {
     applyNonTranspose(X,Y);
   }
   else {
     applyTranspose(X,Y);
   }
 }
예제 #5
0
    /// \brief Compute \f$Y = \beta Y + \alpha B X\f$, where \f$B X\f$
    ///   represents the result of the local triangular solve.
    void
    apply (const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> & X,
           MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Y,
           Teuchos::ETransp mode = Teuchos::NO_TRANS,
           Scalar alpha = Teuchos::ScalarTraits<Scalar>::one (),
           Scalar beta = Teuchos::ScalarTraits<Scalar>::zero ()) const
    {
      typedef Teuchos::ScalarTraits<Scalar> STOS;
      const char prefix[] = "Tpetra::CrsMatrixSolveOp::apply: ";

      TEUCHOS_TEST_FOR_EXCEPTION
        (! matrix_->isFillComplete (), std::runtime_error,
         prefix << "Underlying matrix is not fill complete.");
      TEUCHOS_TEST_FOR_EXCEPTION
        (! matrix_->isLowerTriangular () && ! matrix_->isUpperTriangular (),
         std::runtime_error, prefix << "The matrix is neither lower nor upper "
         "triangular.  Remember that in Tpetra, triangular-ness is a local "
         "(per MPI process) property.");
      TEUCHOS_TEST_FOR_EXCEPTION
        (X.getNumVectors () != Y.getNumVectors (), std::invalid_argument,
         prefix << "X and Y must have the same number of columns (vectors).  "
         "X.getNumVectors() = " << X.getNumVectors ()
         << " != Y.getNumVectors() = " << Y.getNumVectors () << ".");
      TEUCHOS_TEST_FOR_EXCEPTION
        (alpha != STOS::one () || beta != STOS::zero (), std::logic_error,
         prefix << "The case alpha != 1 or beta != 0 has not yet been "
         "implemented.  Please speak with the Tpetra developers.");
      if (mode == Teuchos::NO_TRANS) {
        applyNonTranspose (X,Y);
      } else if (mode == Teuchos::TRANS || mode == Teuchos::CONJ_TRANS) {
        applyTranspose (X, Y, mode);
      } else {
        TEUCHOS_TEST_FOR_EXCEPTION
          (true, std::invalid_argument, prefix << "The 'mode' argument has an "
           "invalid value " << mode << ".  Valid values are Teuchos::NO_TRANS="
           << Teuchos::NO_TRANS << ", Teuchos::TRANS=" << Teuchos::TRANS << ", "
           "and Teuchos::CONJ_TRANS=" << Teuchos::CONJ_TRANS << ".");
      }
    }
예제 #6
0
      void
      apply (const MultiVector<S,LO,GO,Node> &X, 
	     MultiVector<S,LO,GO,Node> &Y,
	     Teuchos::ETransp mode = Teuchos::NO_TRANS, 
	     S alpha = Teuchos::ScalarTraits<S>::one (), 
	     S beta = Teuchos::ScalarTraits<S>::zero ()) const
      {
	const size_t numVectors = X.getNumVectors ();
	RCP<MultiVector<S,LO,GO,Node> > mvec_inout;
	RCP<const MultiVector<S,LO,GO,Node> > mvec_in2;

	if (_importer != null) {
	  if (_importMV != null && _importMV->getNumVectors () != numVectors) {
	    _importMV = null;
	  }
	  if (_importMV == null) {
	    _importMV = createMultiVector<S> (_importer->getTargetMap (), numVectors);
	  }
	  _importMV->doImport (X, *_importer, INSERT);
	  mvec_in2 = _importMV;
	}
	else {
	  mvec_in2 = rcpFromRef(X);
	}

	if (_exporter != null) {
	  if (_exportMV != null && _exportMV->getNumVectors () != numVectors) {
	    _exportMV = null;
	  }
	  if (_exportMV == null) {
	    _exportMV = createMultiVector<S> (_exporter->getSourceMap (), numVectors);
	  }
	  mvec_inout = _exportMV;
	}
	else {
	  mvec_inout = rcpFromRef (Y);
	}
	_kernel.setAlphaBeta (alpha, beta);
	//
	for (size_t j=0; j < numVectors; ++j) {
	  RCP<       Vector<S,LO,GO,Node> > vec_inout = mvec_inout->getVectorNonConst(j);
	  RCP< const Vector<S,LO,GO,Node> > vec_in2   = mvec_in2->getVector(j);
	  Tpetra::RTI::detail::binary_transform( *vec_inout, *vec_in2, _kernel );
	}
	// export
	if (_exporter != null) {
	  Y.doExport (*_exportMV, *_exporter, ADD);
	}
      }
  void Amesos2Smoother<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Apply(MultiVector& X, const MultiVector& B, bool InitialGuessIsZero) const {
    TEUCHOS_TEST_FOR_EXCEPTION(SmootherPrototype::IsSetup() == false, Exceptions::RuntimeError, "MueLu::Amesos2Smoother::Apply(): Setup() has not been called");

    RCP<Tpetra_MultiVector> tX, tB;
    if (!useTransformation_) {
      tX = Utilities::MV2NonConstTpetraMV2(X);
      tB = Utilities::MV2NonConstTpetraMV2(const_cast<MultiVector&>(B));
    } else {
      // Copy data of the original vectors into the transformed ones
      size_t numVectors = X.getNumVectors();
      size_t length     = X.getLocalLength();

      TEUCHOS_TEST_FOR_EXCEPTION(numVectors > 1, Exceptions::RuntimeError,
        "MueLu::Amesos2Smoother::Apply: Fixing coarse matrix for Amesos2 for multivectors has not been implemented yet.");
      ArrayRCP<const SC> Xdata  = X.  getData(0),         Bdata  = B.  getData(0);
      ArrayRCP<SC>       X_data = X_->getDataNonConst(0), B_data = B_->getDataNonConst(0);

      for (size_t i = 0; i < length; i++) {
        X_data[i] = Xdata[i];
        B_data[i] = Bdata[i];
      }

      tX = Utilities::MV2NonConstTpetraMV2(*X_);
      tB = Utilities::MV2NonConstTpetraMV2(*B_);
    }

    prec_->setX(tX);
    prec_->setB(tB);

    prec_->solve();

    prec_->setX(Teuchos::null);
    prec_->setB(Teuchos::null);

    if (useTransformation_) {
      // Copy data from the transformed vectors into the original ones
      size_t length     = X.getLocalLength();

      ArrayRCP<SC>       Xdata  = X.  getDataNonConst(0);
      ArrayRCP<const SC> X_data = X_->getData(0);

      for (size_t i = 0; i < length; i++)
        Xdata[i] = X_data[i];
    }
  }
예제 #8
0
  void Constraint<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Setup(const MultiVector& B, const MultiVector& Bc, RCP<const CrsGraph> Ppattern) {
    Ppattern_ = Ppattern;

    const RCP<const Map> uniqueMap    = Ppattern_->getDomainMap();
    const RCP<const Map> nonUniqueMap = Ppattern_->getColMap();
    RCP<const Import> importer = ImportFactory::Build(uniqueMap, nonUniqueMap);

    const size_t NSDim = Bc.getNumVectors();
    X_ = MultiVectorFactory::Build(nonUniqueMap, NSDim);
    X_->doImport(Bc, *importer, Xpetra::INSERT);

    size_t numRows = Ppattern_->getNodeNumRows();
    XXtInv_.resize(numRows);
    Teuchos::SerialDenseVector<LO,SC> BcRow(NSDim, false);
    for (size_t i = 0; i < numRows; i++) {
      Teuchos::ArrayView<const LO> indices;
      Ppattern_->getLocalRowView(i, indices);

      size_t nnz = indices.size();

      Teuchos::SerialDenseMatrix<LO,SC> locX(NSDim, nnz, false);
      for (size_t j = 0; j < nnz; j++) {
        for (size_t k = 0; k < NSDim; k++)
          BcRow[k] = X_->getData(k)[indices[j]];

        Teuchos::setCol(BcRow, (LO)j, locX);
      }

      XXtInv_[i] = Teuchos::SerialDenseMatrix<LO,SC>(NSDim, NSDim, false);

      Teuchos::BLAS<LO,SC> blas;
      blas.GEMM(Teuchos::NO_TRANS, Teuchos::CONJ_TRANS, NSDim, NSDim, nnz,
                Teuchos::ScalarTraits<SC>::one(), locX.values(), locX.stride(),
                locX.values(), locX.stride(), Teuchos::ScalarTraits<SC>::zero(),
                XXtInv_[i].values(), XXtInv_[i].stride());

      Teuchos::LAPACK<LO,SC> lapack;
      LO info, lwork = 3*NSDim;
      ArrayRCP<LO> IPIV(NSDim);
      ArrayRCP<SC> WORK(lwork);
      lapack.GETRF(NSDim, NSDim, XXtInv_[i].values(), XXtInv_[i].stride(), IPIV.get(), &info);
      lapack.GETRI(NSDim, XXtInv_[i].values(), XXtInv_[i].stride(), IPIV.get(), WORK.get(), lwork, &info);
    }
  }
  void BraessSarazinSmoother<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Apply(MultiVector& X, const MultiVector& B, bool InitialGuessIsZero) const {
    TEUCHOS_TEST_FOR_EXCEPTION(SmootherPrototype::IsSetup() == false, Exceptions::RuntimeError,
                               "MueLu::BraessSarazinSmoother::Apply(): Setup() has not been called");

    RCP<MultiVector> rcpX    = rcpFromRef(X);
    RCP<MultiVector> deltaX0 = MultiVectorFactory::Build(A00_->getRowMap(), 1);
    RCP<MultiVector> deltaX1 = MultiVectorFactory::Build(A10_->getRowMap(), 1);
    RCP<MultiVector> Rtmp    = MultiVectorFactory::Build(A10_->getRowMap(), 1);

    typedef Teuchos::ScalarTraits<SC> STS;
    SC one = STS::one(), zero = STS::zero();

    // extract parameters from internal parameter list
    const ParameterList& pL = Factory::GetParameterList();
    LO nSweeps = pL.get<LO>("Sweeps");

    RCP<MultiVector> R;
    if (InitialGuessIsZero)  {
      R = MultiVectorFactory::Build(B.getMap(), B.getNumVectors());
      R->update(one, B, zero);
    } else {
      R = Utilities::Residual(*A_, X, B);
    }

    for (LO run = 0; run < nSweeps; ++run) {
      // Extract corresponding subvectors from X and R
      RCP<MultiVector> R0 = rangeMapExtractor_ ->ExtractVector(R, 0);
      RCP<MultiVector> R1 = rangeMapExtractor_ ->ExtractVector(R, 1);

      RCP<MultiVector> X0 = domainMapExtractor_->ExtractVector(rcpX, 0);
      RCP<MultiVector> X1 = domainMapExtractor_->ExtractVector(rcpX, 1);

      // Calculate Rtmp = R1 - D * deltaX0 (equation 8.14)
      deltaX0->putScalar(zero);
      deltaX0->elementWiseMultiply(one, *D_, *R0, zero);    // deltaX0 = D * R0 (equation 8.13)
      A10_->apply(*deltaX0, *Rtmp);                         // Rtmp    = A10*D*deltaX0 (intermediate step)
      Rtmp->update(one, *R1, -one);                         // Rtmp    = R1 - A10*D*deltaX0

      // Compute deltaX1 (pressure correction)
      // We use user provided preconditioner
      deltaX1->putScalar(zero);                             // just for safety
      smoo_->Apply(*deltaX1, *Rtmp);

      // Compute deltaX0
      deltaX0->putScalar(zero);                             // just for safety
      A01_->apply(*deltaX1, *deltaX0);                      // deltaX0 = A01*deltaX1
      deltaX0->update(one, *R0, -one);                      // deltaX0 = R0 - A01*deltaX1
      R0.swap(deltaX0);
      deltaX0->elementWiseMultiply(one, *D_, *R0, zero);    // deltaX0 = D*(R0 - A01*deltaX1)

      // Update solution
      X0->update(one, *deltaX0, one);
      X1->update(one, *deltaX1, one);

      domainMapExtractor_->InsertVector(X0, 0, rcpX);
      domainMapExtractor_->InsertVector(X1, 1, rcpX);

      if (run < nSweeps-1)
        R = Utilities::Residual(*A_, X, B);
    }
  }
  void Constraint<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Setup(const MultiVector& B, const MultiVector& Bc, RCP<const CrsGraph> Ppattern) {
    const size_t NSDim = Bc.getNumVectors();

    Ppattern_ = Ppattern;

    size_t numRows = Ppattern_->getNodeNumRows();
    XXtInv_.resize(numRows);

    RCP<const Import> importer = Ppattern_->getImporter();

    X_ = MultiVectorFactory::Build(Ppattern_->getColMap(), NSDim);
    if (!importer.is_null())
      X_->doImport(Bc, *importer, Xpetra::INSERT);
    else
      *X_ = Bc;

    std::vector<const SC*> Xval(NSDim);
    for (size_t j = 0; j < NSDim; j++)
      Xval[j] = X_->getData(j).get();

    SC zero = Teuchos::ScalarTraits<SC>::zero();
    SC one  = Teuchos::ScalarTraits<SC>::one();

    Teuchos::BLAS  <LO,SC> blas;
    Teuchos::LAPACK<LO,SC> lapack;
    LO lwork = 3*NSDim;
    ArrayRCP<LO> IPIV(NSDim);
    ArrayRCP<SC> WORK(lwork);

    for (size_t i = 0; i < numRows; i++) {
      Teuchos::ArrayView<const LO> indices;
      Ppattern_->getLocalRowView(i, indices);

      size_t nnz = indices.size();

      XXtInv_[i] = Teuchos::SerialDenseMatrix<LO,SC>(NSDim, NSDim, false/*zeroOut*/);
      Teuchos::SerialDenseMatrix<LO,SC>& XXtInv = XXtInv_[i];

      if (NSDim == 1) {
        SC d = zero;
        for (size_t j = 0; j < nnz; j++)
          d += Xval[0][indices[j]] * Xval[0][indices[j]];
        XXtInv(0,0) = one/d;

      } else {
        Teuchos::SerialDenseMatrix<LO,SC> locX(NSDim, nnz, false/*zeroOut*/);
        for (size_t j = 0; j < nnz; j++)
          for (size_t k = 0; k < NSDim; k++)
            locX(k,j) = Xval[k][indices[j]];

        // XXtInv_ = (locX*locX^T)^{-1}
        blas.GEMM(Teuchos::NO_TRANS, Teuchos::CONJ_TRANS, NSDim, NSDim, nnz,
                   one,   locX.values(),   locX.stride(),
                          locX.values(),   locX.stride(),
                  zero, XXtInv.values(), XXtInv.stride());

        LO info;
        // Compute LU factorization using partial pivoting with row exchanges
        lapack.GETRF(NSDim, NSDim, XXtInv.values(), XXtInv.stride(), IPIV.get(), &info);
        // Use the computed factorization to compute the inverse
        lapack.GETRI(NSDim, XXtInv.values(), XXtInv.stride(), IPIV.get(), WORK.get(), lwork, &info);
      }
    }
  }
  void BlockedGaussSeidelSmoother<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Apply(MultiVector &X, const MultiVector& B, bool InitialGuessIsZero) const
  {
    TEUCHOS_TEST_FOR_EXCEPTION(SmootherPrototype::IsSetup() == false, Exceptions::RuntimeError, "MueLu::BlockedGaussSeidelSmoother::Apply(): Setup() has not been called");

    RCP<Xpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> > residual = MultiVectorFactory::Build(B.getMap(), B.getNumVectors());
    RCP<Xpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> > tempres = MultiVectorFactory::Build(B.getMap(), B.getNumVectors());
    RCP<MultiVector> rcpX = Teuchos::rcpFromRef(X);


    //Teuchos::RCP<Teuchos::FancyOStream> fos = Teuchos::getFancyOStream(Teuchos::rcpFromRef(std::cout));

    // extract parameters from internal parameter list
    const ParameterList & pL = Factory::GetParameterList();
    LocalOrdinal nSweeps = pL.get<LocalOrdinal>("Sweeps");
    Scalar omega = pL.get<Scalar>("Damping factor");

    // outer Richardson loop
    for (LocalOrdinal run = 0; run < nSweeps; ++run) {
      // one BGS sweep
      // loop over all block rows
      for(size_t i = 0; i<Inverse_.size(); i++) {

        // calculate block residual r = B-A*X
        // note: A_ is the full blocked operator
        residual->update(1.0,B,0.0); // r = B
        A_->apply(X, *residual, Teuchos::NO_TRANS, -1.0, 1.0);

        // extract corresponding subvectors from X and residual
        size_t blockRowIndex = at(bgsOrderingIndex2blockRowIndex_, i); // == bgsOrderingIndex2blockRowIndex_.at(i) (only available since C++11)
        Teuchos::RCP<MultiVector> Xi = domainMapExtractor_->ExtractVector(rcpX, blockRowIndex);
        Teuchos::RCP<MultiVector> ri = rangeMapExtractor_->ExtractVector(residual, blockRowIndex);

        Teuchos::RCP<MultiVector> tXi = domainMapExtractor_->getVector(blockRowIndex, X.getNumVectors());

        // apply solver/smoother
        Inverse_.at(i)->Apply(*tXi, *ri, false);

        // update vector
        Xi->update(omega,*tXi,1.0);  // X_{i+1} = X_i + omega \Delta X_i

        // update corresponding part of rhs and lhs
        domainMapExtractor_->InsertVector(Xi, blockRowIndex, rcpX); // TODO wrong! fix me
      }
    }

  }
예제 #12
0
  void 
  CrsMatrixSolveOp<OpScalar,MatScalar,LocalOrdinal,GlobalOrdinal,Node,LocalMatOps>::applyNonTranspose(
      const MultiVector<OpScalar,LocalOrdinal,GlobalOrdinal,Node> & X_in, 
            MultiVector<OpScalar,LocalOrdinal,GlobalOrdinal,Node> & Y_in) const 
  {
    // Solve U X = Y  or  L X = Y
    // X belongs to domain map, while Y belongs to range map
    typedef Teuchos::ScalarTraits<OpScalar> ST;
    using Teuchos::null;

    const size_t numVectors = X_in.getNumVectors();
    Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Node> > importer = matrix_->getGraph()->getImporter();
    Teuchos::RCP<const Export<LocalOrdinal,GlobalOrdinal,Node> > exporter = matrix_->getGraph()->getExporter();
    Teuchos::RCP<const MV> X;

    // it is okay if X and Y reference the same data, because we can perform a triangular solve in-situ.
    // however, we require that column access to each is strided.

    // set up import/export temporary multivectors
    if (importer != null) {
      if (importMV_ != null && importMV_->getNumVectors() != numVectors) importMV_ = null;
      if (importMV_ == null) {
        importMV_ = Teuchos::rcp( new MV(matrix_->getColMap(),numVectors) );
      }
    }
    if (exporter != null) {
      if (exportMV_ != null && exportMV_->getNumVectors() != numVectors) exportMV_ = null;
      if (exportMV_ == null) {
        exportMV_ = Teuchos::rcp( new MV(matrix_->getRowMap(),numVectors) );
      }
    }

    // solve(NO_TRANS): RangeMap -> DomainMap
    // lclMatSolve_: RowMap -> ColMap
    // importer: DomainMap -> ColMap
    // exporter: RowMap -> RangeMap
    // 
    // solve = reverse(exporter)  o   lclMatSolve_  o reverse(importer)
    //         RangeMap   ->    RowMap     ->     ColMap         ->    DomainMap
    // If we have a non-trivial exporter, we must import elements that are permuted or are on other processors
    if (exporter != null) {
      exportMV_->doImport(X_in, *exporter, INSERT);
      X = exportMV_;
    }
    else if (X_in.isConstantStride() == false) {
      // cannot handle non-constant stride right now
      // generate a copy of X_in
      X = Teuchos::rcp(new MV(X_in));
    }
    else {
      // just temporary, so this non-owning RCP is okay
      X = Teuchos::rcp( &X_in, false );
    }

    // If we have a non-trivial importer, we must export elements that are permuted or belong to other processors
    // We will compute solution into the to-be-exported MV
    if (importer != null) {
      matrix_->template localSolve<OpScalar,OpScalar>(*X,*importMV_,Teuchos::NO_TRANS);
      // Make sure target is zero: necessary because we are adding.
      Y_in.putScalar(ST::zero());  
      Y_in.doExport(*importMV_, *importer, ADD);
    }
    // otherwise, solve into Y
    else {
      // can't solve into non-strided multivector
      if (Y_in.isConstantStride() == false) {
        // generate a strided copy of Y
        MV Y(Y_in);
        matrix_->template localSolve<OpScalar,OpScalar>(*X,Y,Teuchos::NO_TRANS);
        Y_in = Y;
      }
      else {
        matrix_->template localSolve<OpScalar,OpScalar>(*X,Y_in,Teuchos::NO_TRANS);
      }
    }
  }
예제 #13
0
  void Ifpack2Smoother<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Apply(MultiVector& X, const MultiVector& B, bool InitialGuessIsZero) const {
    TEUCHOS_TEST_FOR_EXCEPTION(SmootherPrototype::IsSetup() == false, Exceptions::RuntimeError, "MueLu::IfpackSmoother::Apply(): Setup() has not been called");

    // Forward the InitialGuessIsZero option to Ifpack2
    // TODO:  It might be nice to switch back the internal
    //        "zero starting solution" option of the ifpack2 object prec_ to his
    //        initial value at the end but there is no way right now to get
    //        the current value of the "zero starting solution" in ifpack2.
    //        It's not really an issue, as prec_  can only be used by this method.
    // TODO: When https://software.sandia.gov/bugzilla/show_bug.cgi?id=5283#c2 is done
    // we should remove the if/else/elseif and just test if this
    // option is supported by current ifpack2 preconditioner
    Teuchos::ParameterList paramList;
    bool supportInitialGuess = false;
    if (type_ == "CHEBYSHEV") {
      paramList.set("chebyshev: zero starting solution", InitialGuessIsZero);
      SetPrecParameters(paramList);
      supportInitialGuess = true;

    } else if (type_ == "RELAXATION") {
      paramList.set("relaxation: zero starting solution", InitialGuessIsZero);
      SetPrecParameters(paramList);
      supportInitialGuess = true;

    } else if (type_ == "KRYLOV") {
      paramList.set("krylov: zero starting solution", InitialGuessIsZero);
      SetPrecParameters(paramList);
      supportInitialGuess = true;

    } else if (type_ == "SCHWARZ") {
      paramList.set("schwarz: zero starting solution", InitialGuessIsZero);
      //Because additive Schwarz has "delta" semantics, it's sufficient to
      //toggle only the zero initial guess flag, and not pass in already
      //set parameters.  If we call SetPrecParameters, the subdomain solver
      //will be destroyed.
      prec_->setParameters(paramList);
      supportInitialGuess = true;
    }

    //TODO JJH 30Apr2014  Calling SetPrecParameters(paramList) when the smoother
    //is Ifpack2::AdditiveSchwarz::setParameterList() will destroy the subdomain
    //(aka inner) solver.  This behavior is documented but a departure from what
    //it previously did, and what other Ifpack2 solvers currently do.  So I have
    //moved SetPrecParameters(paramList) into the if-else block above.

    // Apply
    if (InitialGuessIsZero || supportInitialGuess) {
      Tpetra::MultiVector<SC,LO,GO,NO>&       tpX = Utilities::MV2NonConstTpetraMV(X);
      const Tpetra::MultiVector<SC,LO,GO,NO>& tpB = Utilities::MV2TpetraMV(B);
      prec_->apply(tpB, tpX);
    } else {
      typedef Teuchos::ScalarTraits<Scalar> TST;
      RCP<MultiVector> Residual   = Utilities::Residual(*A_, X, B);
      RCP<MultiVector> Correction = MultiVectorFactory::Build(A_->getDomainMap(), X.getNumVectors());

      Tpetra::MultiVector<SC,LO,GO,NO>&       tpX = Utilities::MV2NonConstTpetraMV(*Correction);
      const Tpetra::MultiVector<SC,LO,GO,NO>& tpB = Utilities::MV2TpetraMV(*Residual);

      prec_->apply(tpB, tpX);

      X.update(TST::one(), *Correction, TST::one());
    }
  }
  void IndefBlockedDiagonalSmoother<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Apply(MultiVector& X, const MultiVector& B, bool InitialGuessIsZero) const
  {
    TEUCHOS_TEST_FOR_EXCEPTION(SmootherPrototype::IsSetup() == false, Exceptions::RuntimeError, "MueLu::IndefBlockedDiagonalSmoother::Apply(): Setup() has not been called");

    Teuchos::RCP<Teuchos::FancyOStream> fos = Teuchos::getFancyOStream(Teuchos::rcpFromRef(std::cout));

    SC zero = Teuchos::ScalarTraits<SC>::zero(), one = Teuchos::ScalarTraits<SC>::one();

    // The following boolean flags catch the case where we need special transformation
    // for the GIDs when calling the subsmoothers.
    RCP<BlockedCrsMatrix> bA00 = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(F_);
    RCP<BlockedCrsMatrix> bA11 = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(Z_);
    bool bA00ThyraSpecialTreatment = false;
    bool bA11ThyraSpecialTreatment = false;
    if (bA00 != Teuchos::null) {
      if(bA00->Rows() == 1 && bA00->Cols() == 1 && rangeMapExtractor_->getThyraMode() == true) bA00ThyraSpecialTreatment = true;
    }
    if (bA11 != Teuchos::null) {
      if(bA11->Rows() == 1 && bA11->Cols() == 1 && rangeMapExtractor_->getThyraMode() == true) bA11ThyraSpecialTreatment = true;
    }

    // extract parameters from internal parameter list
    const ParameterList & pL = Factory::GetParameterList();
    LocalOrdinal nSweeps = pL.get<LocalOrdinal>("Sweeps");
    Scalar omega = pL.get<Scalar>("Damping factor");

    // wrap current solution vector in RCP
    RCP<MultiVector> rcpX = Teuchos::rcpFromRef(X);

    // create residual vector
    // contains current residual of current solution X with rhs B
    RCP<MultiVector> residual = MultiVectorFactory::Build(B.getMap(), B.getNumVectors());

    // incrementally improve solution vector X
    for (LocalOrdinal run = 0; run < nSweeps; ++run) {
      // 1) calculate current residual
      residual->update(one,B,zero); // residual = B
      A_->apply(*rcpX, *residual, Teuchos::NO_TRANS, -one, one);

      // split residual vector
      Teuchos::RCP<MultiVector> r1 = rangeMapExtractor_->ExtractVector(residual, 0);
      Teuchos::RCP<MultiVector> r2 = rangeMapExtractor_->ExtractVector(residual, 1);

      // 2) solve F * \Delta \tilde{x}_1 = r_1
      //    start with zero guess \Delta \tilde{x}_1
      RCP<MultiVector> xtilde1 = MultiVectorFactory::Build(F_->getRowMap(),X.getNumVectors(),true);

      // Special handling if SchurComplement operator was a 1x1 blocked operator in Thyra mode
      // Then, we have to translate the Xpetra offset GIDs to plain Thyra GIDs and vice versa
      if(bA00ThyraSpecialTreatment == true) {
        RCP<MultiVector> xtilde1_thyra = domainMapExtractor_->getVector(0, X.getNumVectors(), true);
        RCP<MultiVector> r1_thyra = rangeMapExtractor_->getVector(0, B.getNumVectors(), true);
        // transform vector
        for(size_t k=0; k < r1->getNumVectors(); k++) {
          Teuchos::ArrayRCP<const Scalar> xpetraVecData  = r1->getData(k);
          Teuchos::ArrayRCP<Scalar> thyraVecData = r1_thyra->getDataNonConst(k);
          for(size_t i=0; i < r1->getLocalLength(); i++) {
            thyraVecData[i] = xpetraVecData[i];
          }
        }

        velPredictSmoo_->Apply(*xtilde1_thyra,*r1_thyra);

        for(size_t k=0; k < xtilde1_thyra->getNumVectors(); k++) {
          Teuchos::ArrayRCP<Scalar> xpetraVecData  = xtilde1->getDataNonConst(k);
          Teuchos::ArrayRCP<const Scalar> thyraVecData = xtilde1_thyra->getData(k);
          for(size_t i=0; i < xtilde1_thyra->getLocalLength(); i++) {
            xpetraVecData[i] = thyraVecData[i];
          }
        }
      } else {
        velPredictSmoo_->Apply(*xtilde1,*r1);
      }

      // 3) solve SchurComp equation
      //    start with zero guess \Delta \tilde{x}_2
      RCP<MultiVector> xtilde2 = MultiVectorFactory::Build(Z_->getRowMap(),X.getNumVectors(),true);

      // Special handling if SchurComplement operator was a 1x1 blocked operator in Thyra mode
      // Then, we have to translate the Xpetra offset GIDs to plain Thyra GIDs and vice versa
      if(bA11ThyraSpecialTreatment == true) {
        RCP<MultiVector> xtilde2_thyra = domainMapExtractor_->getVector(1, X.getNumVectors(), true);
        RCP<MultiVector> r2_thyra = rangeMapExtractor_->getVector(1, B.getNumVectors(), true);
        // transform vector
        for(size_t k=0; k < r2->getNumVectors(); k++) {
          Teuchos::ArrayRCP<const Scalar> xpetraVecData  = r2->getData(k);
          Teuchos::ArrayRCP<Scalar> thyraVecData = r2_thyra->getDataNonConst(k);
          for(size_t i=0; i < r2->getLocalLength(); i++) {
            thyraVecData[i] = xpetraVecData[i];
          }
        }

        schurCompSmoo_->Apply(*xtilde2_thyra,*r2_thyra);

        for(size_t k=0; k < xtilde2_thyra->getNumVectors(); k++) {
          Teuchos::ArrayRCP<Scalar> xpetraVecData  = xtilde2->getDataNonConst(k);
          Teuchos::ArrayRCP<const Scalar> thyraVecData = xtilde2_thyra->getData(k);
          for(size_t i=0; i < xtilde2_thyra->getLocalLength(); i++) {
            xpetraVecData[i] = thyraVecData[i];
          }
        }
      } else {
        schurCompSmoo_->Apply(*xtilde2,*r2);
      }

      // 4) extract parts of solution vector X
      Teuchos::RCP<MultiVector> x1 = domainMapExtractor_->ExtractVector(rcpX, 0);
      Teuchos::RCP<MultiVector> x2 = domainMapExtractor_->ExtractVector(rcpX, 1);

      // 5) update solution vector with increments xhat1 and xhat2
      //    rescale increment for x2 with omega_
      x1->update(omega,*xtilde1,one); // x1 = x1_old + omega xtilde1
      x2->update(omega,*xtilde2,one); // x2 = x2_old + omega xtilde2

      // write back solution in global vector X
      domainMapExtractor_->InsertVector(x1, 0, rcpX);
      domainMapExtractor_->InsertVector(x2, 1, rcpX);
    }
  }
  void SimpleSmoother<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Apply(MultiVector& X, const MultiVector& B, bool InitialGuessIsZero) const
  {
    TEUCHOS_TEST_FOR_EXCEPTION(SmootherPrototype::IsSetup() == false, Exceptions::RuntimeError, "MueLu::SimpleSmoother::Apply(): Setup() has not been called");
#ifdef HAVE_MUELU_DEBUG
    TEUCHOS_TEST_FOR_EXCEPTION(A_->getRangeMap()->isSameAs(*(B.getMap())) == false, Exceptions::RuntimeError, "MueLu::SimpleSmoother::Apply(): The map of RHS vector B is not the same as range map of the blocked operator A. Please check the map of B and A.");
    TEUCHOS_TEST_FOR_EXCEPTION(A_->getDomainMap()->isSameAs(*(X.getMap())) == false, Exceptions::RuntimeError, "MueLu::SimpleSmoother::Apply(): The map of the solution vector X is not the same as domain map of the blocked operator A. Please check the map of X and A.");
#endif

    Teuchos::RCP<Teuchos::FancyOStream> fos = Teuchos::getFancyOStream(Teuchos::rcpFromRef(std::cout));

    SC zero = Teuchos::ScalarTraits<SC>::zero(), one = Teuchos::ScalarTraits<SC>::one();

    // extract parameters from internal parameter list
    const ParameterList & pL = Factory::GetParameterList();
    LocalOrdinal nSweeps = pL.get<LocalOrdinal>("Sweeps");
    Scalar omega = pL.get<Scalar>("Damping factor");

    // The boolean flags check whether we use Thyra or Xpetra style GIDs
    // However, assuming that SIMPLE always only works for 2x2 blocked operators, we
    // most often have to use the ReorderedBlockedCrsOperator as input. If either the
    // F or Z (or SchurComplement block S) are 1x1 blocked operators with Thyra style
    // GIDs we need an extra transformation of vectors
    // In this case, we use the Xpetra (offset) GIDs for all operations and only transform
    // the input/output vectors before and after the subsolver calls!
    bool bRangeThyraModePredict  = rangeMapExtractor_->getThyraMode()  && (Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(F_) == Teuchos::null);
    bool bDomainThyraModePredict = domainMapExtractor_->getThyraMode() && (Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(F_) == Teuchos::null);
    bool bRangeThyraModeSchur    = rangeMapExtractor_->getThyraMode()  && (Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(Z_) == Teuchos::null);
    bool bDomainThyraModeSchur   = domainMapExtractor_->getThyraMode() && (Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(Z_) == Teuchos::null);

    // The following boolean flags catch the case where we need special transformation
    // for the GIDs when calling the subsmoothers.
    RCP<BlockedCrsMatrix> bF = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(F_);
    RCP<BlockedCrsMatrix> bZ = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(Z_);
    bool bFThyraSpecialTreatment = false;
    bool bZThyraSpecialTreatment = false;
    if (bF != Teuchos::null) {
      if(bF->Rows() == 1 && bF->Cols() == 1 && rangeMapExtractor_->getThyraMode() == true) bFThyraSpecialTreatment = true;
    }
    if (bZ != Teuchos::null) {
      if(bZ->Rows() == 1 && bZ->Cols() == 1 && rangeMapExtractor_->getThyraMode() == true) bZThyraSpecialTreatment = true;
    }

#if 1// new implementation

    // create a new vector for storing the current residual in a blocked multi vector
    RCP<MultiVector> res = MultiVectorFactory::Build(B.getMap(), B.getNumVectors());
    RCP<BlockedMultiVector> residual = Teuchos::rcp(new BlockedMultiVector(rangeMapExtractor_,res));

    // create a new solution vector as a blocked multi vector
    RCP<MultiVector> rcpX = Teuchos::rcpFromRef(X);
    RCP<BlockedMultiVector> bX = Teuchos::rcp(new BlockedMultiVector(domainMapExtractor_,rcpX));

    // create a blocked rhs vector
    RCP<const MultiVector> rcpB = Teuchos::rcpFromRef(B);
    RCP<const BlockedMultiVector> bB = Teuchos::rcp(new const BlockedMultiVector(rangeMapExtractor_,rcpB));


    // incrementally improve solution vector X
    for (LocalOrdinal run = 0; run < nSweeps; ++run) {
      // 1) calculate current residual
      residual->update(one,*bB,zero); // r = B
      A_->apply(*bX, *residual, Teuchos::NO_TRANS, -one, one);

      // split residual vector
      Teuchos::RCP<MultiVector> r1 = rangeMapExtractor_->ExtractVector(residual, 0, bRangeThyraModePredict);
      Teuchos::RCP<MultiVector> r2 = rangeMapExtractor_->ExtractVector(residual, 1, bRangeThyraModeSchur);

      // 2) solve F * \Delta \tilde{x}_1 = r_1
      //    start with zero guess \Delta \tilde{x}_1
      RCP<MultiVector> xtilde1 = domainMapExtractor_->getVector(0, X.getNumVectors(), bDomainThyraModePredict);
      xtilde1->putScalar(zero);

      if(bFThyraSpecialTreatment == true) {
        xtilde1->replaceMap(domainMapExtractor_->getMap(0,true));
        r1->replaceMap(rangeMapExtractor_->getMap(0,true));
        velPredictSmoo_->Apply(*xtilde1,*r1);
        xtilde1->replaceMap(domainMapExtractor_->getMap(0,false));
      } else {
        velPredictSmoo_->Apply(*xtilde1,*r1);
      }

      // 3) calculate rhs for SchurComp equation
      //    r_2 - D \Delta \tilde{x}_1
      RCP<MultiVector> schurCompRHS = rangeMapExtractor_->getVector(1, B.getNumVectors(), bRangeThyraModeSchur);
      D_->apply(*xtilde1,*schurCompRHS);
      schurCompRHS->update(one,*r2,-one);

      // 4) solve SchurComp equation
      //    start with zero guess \Delta \tilde{x}_2
      RCP<MultiVector> xtilde2 = domainMapExtractor_->getVector(1, X.getNumVectors(), bDomainThyraModeSchur);
      xtilde2->putScalar(zero);

      // Special handling if SchurComplement operator was a 1x1 blocked operator in Thyra mode
      // Then, we have to translate the Xpetra offset GIDs to plain Thyra GIDs and vice versa
      if(bZThyraSpecialTreatment == true) {
        xtilde2->replaceMap(domainMapExtractor_->getMap(1,true));
        schurCompRHS->replaceMap(rangeMapExtractor_->getMap(1,true));
        schurCompSmoo_->Apply(*xtilde2,*schurCompRHS);
        xtilde2->replaceMap(domainMapExtractor_->getMap(1,false));
      } else {
        schurCompSmoo_->Apply(*xtilde2,*schurCompRHS);
      }

      // 5) scale xtilde2 with omega
      //    store this in xhat2
      RCP<MultiVector> xhat2 = domainMapExtractor_->getVector(1, X.getNumVectors(), bDomainThyraModeSchur);
      xhat2->update(omega,*xtilde2,zero);

      // 6) calculate xhat1
      RCP<MultiVector> xhat1      = domainMapExtractor_->getVector(0, X.getNumVectors(), bDomainThyraModePredict);
      RCP<MultiVector> xhat1_temp = domainMapExtractor_->getVector(0, X.getNumVectors(), bDomainThyraModePredict);
      G_->apply(*xhat2,*xhat1_temp); // store result temporarely in xtilde1_temp
      xhat1->elementWiseMultiply(one/*/omega*/,*diagFinv_,*xhat1_temp,zero);
      xhat1->update(one,*xtilde1,-one);

      // 7) extract parts of solution vector X
      Teuchos::RCP<MultiVector> x1 = domainMapExtractor_->ExtractVector(bX, 0, bDomainThyraModePredict);
      Teuchos::RCP<MultiVector> x2 = domainMapExtractor_->ExtractVector(bX, 1, bDomainThyraModeSchur);

      // 8) update solution vector with increments xhat1 and xhat2
      //    rescale increment for x2 with omega_
      x1->update(one,*xhat1,one);    // x1 = x1_old + xhat1
      x2->update(/*omega*/ one,*xhat2,one); // x2 = x2_old + omega xhat2
      // write back solution in global vector X
      domainMapExtractor_->InsertVector(x1, 0, bX, bDomainThyraModePredict);
      domainMapExtractor_->InsertVector(x2, 1, bX, bDomainThyraModeSchur);
    }

    // write back solution
    domainMapExtractor_->InsertVector(bX->getMultiVector(0,bDomainThyraModePredict), 0, rcpX, bDomainThyraModePredict);
    domainMapExtractor_->InsertVector(bX->getMultiVector(1,bDomainThyraModeSchur), 1, rcpX, bDomainThyraModeSchur);
#else

    // wrap current solution vector in RCP
    RCP<MultiVector> rcpX = Teuchos::rcpFromRef(X);

    // create residual vector
    // contains current residual of current solution X with rhs B
    RCP<MultiVector> residual = MultiVectorFactory::Build(B.getMap(), B.getNumVectors());

    // incrementally improve solution vector X
    for (LocalOrdinal run = 0; run < nSweeps; ++run) {
      // 1) calculate current residual
      residual->update(one,B,zero); // residual = B
      A_->apply(*rcpX, *residual, Teuchos::NO_TRANS, -one, one);
      // split residual vector
      Teuchos::RCP<MultiVector> r1 = rangeMapExtractor_->ExtractVector(residual, 0, bRangeThyraModePredict);
      Teuchos::RCP<MultiVector> r2 = rangeMapExtractor_->ExtractVector(residual, 1, bRangeThyraModeSchur);

      // 2) solve F * \Delta \tilde{x}_1 = r_1
      //    start with zero guess \Delta \tilde{x}_1
      RCP<MultiVector> xtilde1 = domainMapExtractor_->getVector(0, X.getNumVectors(), bDomainThyraModePredict);
      xtilde1->putScalar(zero);

      // Special handling in case that F block is a 1x1 blocked operator in Thyra mode
      // Then we have to feed the smoother with real Thyra-based vectors
      if(bFThyraSpecialTreatment == true) {
        // create empty solution vector based on Thyra GIDs
        RCP<MultiVector> xtilde1_thyra = domainMapExtractor_->getVector(0, X.getNumVectors(), true);
        // create new RHS vector based on Thyra GIDs
        Teuchos::RCP<MultiVector> r1_thyra = rangeMapExtractor_->ExtractVector(residual, 0, true);
        velPredictSmoo_->Apply(*xtilde1_thyra,*r1_thyra);
        for(size_t k=0; k < xtilde1_thyra->getNumVectors(); k++) {
          Teuchos::ArrayRCP<Scalar> xpetraVecData  = xtilde1->getDataNonConst(k);
          Teuchos::ArrayRCP<const Scalar> thyraVecData = xtilde1_thyra->getData(k);
          for(size_t i=0; i < xtilde1_thyra->getLocalLength(); i++) {
            xpetraVecData[i] = thyraVecData[i];
          }
        }
      } else {
        velPredictSmoo_->Apply(*xtilde1,*r1);
      }

      // 3) calculate rhs for SchurComp equation
      //    r_2 - D \Delta \tilde{x}_1
      RCP<MultiVector> schurCompRHS = rangeMapExtractor_->getVector(1, B.getNumVectors(), bRangeThyraModeSchur);
      D_->apply(*xtilde1,*schurCompRHS);
      schurCompRHS->update(one,*r2,-one);

      // 4) solve SchurComp equation
      //    start with zero guess \Delta \tilde{x}_2
      RCP<MultiVector> xtilde2 = domainMapExtractor_->getVector(1, X.getNumVectors(), bDomainThyraModeSchur);
      xtilde2->putScalar(zero);

      // Special handling if SchurComplement operator was a 1x1 blocked operator in Thyra mode
      // Then, we have to translate the Xpetra offset GIDs to plain Thyra GIDs and vice versa
      if(bZThyraSpecialTreatment == true) {
        // create empty solution vector based on Thyra GIDs
        RCP<MultiVector> xtilde2_thyra = domainMapExtractor_->getVector(1, X.getNumVectors(), true);
        // create new RHS vector based on Thyra GIDs
        RCP<MultiVector> schurCompRHS_thyra = rangeMapExtractor_->getVector(1, B.getNumVectors(), true);
        // transform vector
        for(size_t k=0; k < schurCompRHS->getNumVectors(); k++) {
          Teuchos::ArrayRCP<const Scalar> xpetraVecData  = schurCompRHS->getData(k);
          Teuchos::ArrayRCP<Scalar> thyraVecData = schurCompRHS_thyra->getDataNonConst(k);
          for(size_t i=0; i < schurCompRHS->getLocalLength(); i++) {
            thyraVecData[i] = xpetraVecData[i];
          }
        }

        schurCompSmoo_->Apply(*xtilde2_thyra,*schurCompRHS_thyra);

        for(size_t k=0; k < xtilde2_thyra->getNumVectors(); k++) {
          Teuchos::ArrayRCP<Scalar> xpetraVecData  = xtilde2->getDataNonConst(k);
          Teuchos::ArrayRCP<const Scalar> thyraVecData = xtilde2_thyra->getData(k);
          for(size_t i=0; i < xtilde2_thyra->getLocalLength(); i++) {
            xpetraVecData[i] = thyraVecData[i];
          }
        }
      } else {
        schurCompSmoo_->Apply(*xtilde2,*schurCompRHS);
      }

      // 5) scale xtilde2 with omega
      //    store this in xhat2
      RCP<MultiVector> xhat2 = domainMapExtractor_->getVector(1, X.getNumVectors(), bDomainThyraModeSchur);
      xhat2->update(omega,*xtilde2,zero);

      // 6) calculate xhat1
      RCP<MultiVector> xhat1      = domainMapExtractor_->getVector(0, X.getNumVectors(), bDomainThyraModePredict);
      RCP<MultiVector> xhat1_temp = domainMapExtractor_->getVector(0, X.getNumVectors(), bDomainThyraModePredict);
      G_->apply(*xhat2,*xhat1_temp); // store result temporarely in xtilde1_temp
      xhat1->elementWiseMultiply(one/*/omega*/,*diagFinv_,*xhat1_temp,zero);
      xhat1->update(one,*xtilde1,-one);

      // 7) extract parts of solution vector X
      Teuchos::RCP<MultiVector> x1 = domainMapExtractor_->ExtractVector(rcpX, 0, bDomainThyraModePredict);
      Teuchos::RCP<MultiVector> x2 = domainMapExtractor_->ExtractVector(rcpX, 1, bDomainThyraModeSchur);

      // 8) update solution vector with increments xhat1 and xhat2
      //    rescale increment for x2 with omega_
      x1->update(one,*xhat1,one);    // x1 = x1_old + xhat1
      x2->update(/*omega*/ one,*xhat2,one); // x2 = x2_old + omega xhat2
      // write back solution in global vector X
      domainMapExtractor_->InsertVector(x1, 0, rcpX, bDomainThyraModePredict);
      domainMapExtractor_->InsertVector(x2, 1, rcpX, bDomainThyraModeSchur);
    }
#endif
  }
  void BraessSarazinSmoother<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Apply(MultiVector& X, const MultiVector& B, bool InitialGuessIsZero) const {
    TEUCHOS_TEST_FOR_EXCEPTION(SmootherPrototype::IsSetup() == false, Exceptions::RuntimeError,
                               "MueLu::BraessSarazinSmoother::Apply(): Setup() has not been called");

#ifdef HAVE_MUELU_DEBUG
    TEUCHOS_TEST_FOR_EXCEPTION(A_->getRangeMap()->isSameAs(*(B.getMap())) == false, Exceptions::RuntimeError, "MueLu::BlockedGaussSeidelSmoother::Apply(): The map of RHS vector B is not the same as range map of the blocked operator A. Please check the map of B and A.");
    TEUCHOS_TEST_FOR_EXCEPTION(A_->getDomainMap()->isSameAs(*(X.getMap())) == false, Exceptions::RuntimeError, "MueLu::BlockedGaussSeidelSmoother::Apply(): The map of the solution vector X is not the same as domain map of the blocked operator A. Please check the map of X and A.");
#endif


    // The following boolean flags catch the case where we need special transformation
    // for the GIDs when calling the subsmoothers.
    RCP<BlockedCrsMatrix> bA11 = Teuchos::rcp_dynamic_cast<BlockedCrsMatrix>(A11_);
    bool bA11ThyraSpecialTreatment = false;
    if (bA11 != Teuchos::null) {
      if(bA11->Rows() == 1 && bA11->Cols() == 1 && rangeMapExtractor_->getThyraMode() == true) bA11ThyraSpecialTreatment = true;
    }

    RCP<MultiVector> rcpX    = rcpFromRef(X);

    // use the GIDs of the sub blocks
    // This is valid as the subblocks actually represent the GIDs (either Thyra, Xpetra or pseudo Xpetra)
    RCP<MultiVector> deltaX0 = MultiVectorFactory::Build(A00_->getRowMap(), X.getNumVectors());
    RCP<MultiVector> deltaX1 = MultiVectorFactory::Build(A10_->getRowMap(), X.getNumVectors());
    RCP<MultiVector> Rtmp    = MultiVectorFactory::Build(A10_->getRowMap(), B.getNumVectors());

    typedef Teuchos::ScalarTraits<SC> STS;
    SC one = STS::one(), zero = STS::zero();

    // extract parameters from internal parameter list
    const ParameterList& pL = Factory::GetParameterList();
    LO nSweeps = pL.get<LO>("Sweeps");

    RCP<MultiVector> R;
    if (InitialGuessIsZero)  {
      R = MultiVectorFactory::Build(B.getMap(), B.getNumVectors());
      R->update(one, B, zero);
    } else {
      R = Utilities::Residual(*A_, X, B);
    }

    // extract diagonal of Schur complement operator
    RCP<Vector> diagSVector = VectorFactory::Build(S_->getRowMap());
    S_->getLocalDiagCopy(*diagSVector);
    ArrayRCP<SC> Sdiag = diagSVector->getDataNonConst(0);

    for (LO run = 0; run < nSweeps; ++run) {
      // Extract corresponding subvectors from X and R
      // Automatically detect whether we use Thyra or Xpetra GIDs
      // The GIDs should be always compatible with the GIDs of A00, A01, etc...
      RCP<MultiVector> R0 = rangeMapExtractor_ ->ExtractVector(R, 0);
      RCP<MultiVector> R1 = rangeMapExtractor_ ->ExtractVector(R, 1);

      RCP<MultiVector> X0 = domainMapExtractor_->ExtractVector(rcpX, 0);
      RCP<MultiVector> X1 = domainMapExtractor_->ExtractVector(rcpX, 1);

      // Calculate Rtmp = R1 - D * deltaX0 (equation 8.14)
      deltaX0->putScalar(zero);
      deltaX0->elementWiseMultiply(one, *D_, *R0, zero);    // deltaX0 = D * R0 (equation 8.13)
      A10_->apply(*deltaX0, *Rtmp);                         // Rtmp    = A10*D*deltaX0 (intermediate step)
      Rtmp->update(one, *R1, -one);                         // Rtmp    = R1 - A10*D*deltaX0

      if (!pL.get<bool>("q2q1 mode")) {
        deltaX1->putScalar(zero);
      } else {
        ArrayRCP<SC> deltaX1data = deltaX1->getDataNonConst(0);
        ArrayRCP<SC> Rtmpdata    = Rtmp->getDataNonConst(0);
        for (GO row = 0; row < deltaX1data.size(); row++)
          deltaX1data[row] = 1.1*Rtmpdata[row] / Sdiag[row];
      }

      // Special handling if SchurComplement operator was a 1x1 blocked operator in Thyra mode
      // Then, we have to translate the Xpetra offset GIDs to plain Thyra GIDs and vice versa
      if(bA11ThyraSpecialTreatment == true) {
        RCP<MultiVector> deltaX1_thyra = domainMapExtractor_->getVector(1, X.getNumVectors(), true);
        RCP<MultiVector> Rtmp_thyra = rangeMapExtractor_->getVector(1, B.getNumVectors(), true);
        // transform vector
        for(size_t k=0; k < Rtmp->getNumVectors(); k++) {
          Teuchos::ArrayRCP<const Scalar> xpetraVecData  = Rtmp->getData(k);
          Teuchos::ArrayRCP<Scalar> thyraVecData = Rtmp_thyra->getDataNonConst(k);
          for(size_t i=0; i < Rtmp->getLocalLength(); i++) {
            thyraVecData[i] = xpetraVecData[i];
          }
        }

        smoo_->Apply(*deltaX1_thyra,*Rtmp_thyra);

        for(size_t k=0; k < deltaX1_thyra->getNumVectors(); k++) {
          Teuchos::ArrayRCP<Scalar> xpetraVecData  = deltaX1->getDataNonConst(k);
          Teuchos::ArrayRCP<const Scalar> thyraVecData = deltaX1_thyra->getData(k);
          for(size_t i=0; i < deltaX1_thyra->getLocalLength(); i++) {
            xpetraVecData[i] = thyraVecData[i];
          }
        }
      } else {
        // Compute deltaX1 (pressure correction)
        // We use user provided preconditioner
        smoo_->Apply(*deltaX1,*Rtmp);
      }

      // Compute deltaX0
      deltaX0->putScalar(zero);                             // just for safety
      A01_->apply(*deltaX1, *deltaX0);                      // deltaX0 = A01*deltaX1
      deltaX0->update(one, *R0, -one);                      // deltaX0 = R0 - A01*deltaX1
      R0.swap(deltaX0);
      deltaX0->elementWiseMultiply(one, *D_, *R0, zero);    // deltaX0 = D*(R0 - A01*deltaX1)

      // Update solution
      X0->update(one, *deltaX0, one);
      X1->update(one, *deltaX1, one);

      domainMapExtractor_->InsertVector(X0, 0, rcpX);
      domainMapExtractor_->InsertVector(X1, 1, rcpX);

      if (run < nSweeps-1)
        R = Utilities::Residual(*A_, X, B);
    }
  }