void DefaultMultipliedLinearOp<Scalar>::applyImpl(
  const EOpTransp M_trans,
  const MultiVectorBase<Scalar> &X,
  const Ptr<MultiVectorBase<Scalar> > &Y,
  const Scalar alpha,
  const Scalar beta
  ) const
{
  using Teuchos::rcpFromPtr;
  using Teuchos::rcpFromRef;
#ifdef TEUCHOS_DEBUG
  THYRA_ASSERT_LINEAR_OP_MULTIVEC_APPLY_SPACES(
    "DefaultMultipliedLinearOp<Scalar>::apply(...)", *this, M_trans, X, &*Y
    );
#endif // TEUCHOS_DEBUG  
  const int nOps = Ops_.size();
  const Ordinal m = X.domain()->dim();
  if( real_trans(M_trans)==NOTRANS ) {
    //
    // Y = alpha * M * X + beta*Y
    // =>
    // Y = alpha * op(Op[0]) * op(Op[1]) * ... * op(Op[numOps-1]) * X + beta*Y
    //
    RCP<MultiVectorBase<Scalar> > T_kp1, T_k; // Temporary propagated between loops 
    for( int k = nOps-1; k >= 0; --k ) {
      RCP<MultiVectorBase<Scalar> > Y_k;
      RCP<const MultiVectorBase<Scalar> > X_k;
      if(k==0) Y_k = rcpFromPtr(Y);  else Y_k = T_k = createMembers(getOp(k)->range(), m);
      if(k==nOps-1) X_k = rcpFromRef(X); else X_k = T_kp1;
      if( k > 0 )
        Thyra::apply(*getOp(k), M_trans, *X_k, Y_k.ptr());
      else
        Thyra::apply(*getOp(k), M_trans, *X_k, Y_k.ptr(), alpha, beta);
      T_kp1 = T_k;
    }
  }
  else {
    //
    // Y = alpha * M' * X + beta*Y
    // =>
    // Y = alpha * Op[numOps-1]' * Op[1]' * ... * Op[0]' * X + beta * Y
    //
    RCP<MultiVectorBase<Scalar> > T_km1, T_k; // Temporary propagated between loops 
    for( int k = 0; k <= nOps-1; ++k ) {
      RCP<MultiVectorBase<Scalar> >         Y_k;
      RCP<const MultiVectorBase<Scalar> >   X_k;
      if(k==nOps-1) Y_k = rcpFromPtr(Y);  else Y_k = T_k = createMembers(getOp(k)->domain(), m);
      if(k==0) X_k = rcpFromRef(X); else X_k = T_km1;
      if( k < nOps-1 )
        Thyra::apply(*getOp(k), M_trans, *X_k, Y_k.ptr());
      else
        Thyra::apply(*getOp(k), M_trans, *X_k, Y_k.ptr(), alpha, beta);
      T_km1 = T_k;
    }
  }
}
void XpetraLinearOp<Scalar,LocalOrdinal,GlobalOrdinal,Node>::applyImpl(
  const Thyra::EOpTransp M_trans,
  const Thyra::MultiVectorBase<Scalar> &X_in,
  const Teuchos::Ptr<Thyra::MultiVectorBase<Scalar> > &Y_inout,
  const Scalar alpha,
  const Scalar beta
  ) const
{
  using Teuchos::rcpFromRef;
  using Teuchos::rcpFromPtr;

  TEUCHOS_TEST_FOR_EXCEPTION(getConstXpetraOperator() == Teuchos::null, MueLu::Exceptions::RuntimeError, "XpetraLinearOp::applyImpl: internal Xpetra::Operator is null.");
  RCP< const Teuchos::Comm< int > > comm = getConstXpetraOperator()->getRangeMap()->getComm();

  const RCP<const Xpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> > tX_in =
      Xpetra::ThyraUtils<Scalar,LocalOrdinal,GlobalOrdinal,Node>::toXpetra(rcpFromRef(X_in), comm);
  RCP<Xpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> > tY_inout =
      Xpetra::ThyraUtils<Scalar,LocalOrdinal,GlobalOrdinal,Node>::toXpetra(rcpFromPtr(Y_inout), comm);
  Teuchos::ETransp transp;
  switch (M_trans) {
    case NOTRANS:   transp = Teuchos::NO_TRANS;   break;
    case TRANS:     transp = Teuchos::TRANS;      break;
    case CONJTRANS: transp = Teuchos::CONJ_TRANS; break;
    default: TEUCHOS_TEST_FOR_EXCEPTION(true, MueLu::Exceptions::NotImplemented, "Thyra::XpetraLinearOp::apply. Unknown value for M_trans. Only NOTRANS, TRANS and CONJTRANS are supported.");
  }

  xpetraOperator_->apply(*tX_in, *tY_inout, transp, alpha, beta);

  // check whether Y is a product vector
  RCP<const Xpetra::MapExtractor<Scalar, LocalOrdinal, GlobalOrdinal,Node> > rgMapExtractor = Teuchos::null;
  Teuchos::Ptr<Thyra::ProductMultiVectorBase<Scalar> > prodY_inout =
      Teuchos::ptr_dynamic_cast<Thyra::ProductMultiVectorBase<Scalar> >(Y_inout);
  if(prodY_inout != Teuchos::null) {
    // If Y is a product vector we split up the data from tY and merge them
    // into the product vector. The necessary Xpetra::MapExtractor is extracted
    // from the fine level operator (not this!)

    // get underlying fine level operator (BlockedCrsMatrix)
    // to extract the range MapExtractor
    RCP<MueLu::XpetraOperator<Scalar, LocalOrdinal, GlobalOrdinal,Node> > mueXop =
        Teuchos::rcp_dynamic_cast<MueLu::XpetraOperator<Scalar, LocalOrdinal, GlobalOrdinal,Node> >(xpetraOperator_.getNonconstObj());

    RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal,Node> > A =
        mueXop->GetHierarchy()->GetLevel(0)->template Get<RCP<Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal,Node> > >("A");
    TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(A));

    RCP<Xpetra::BlockedCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal,Node> > bA =
        Teuchos::rcp_dynamic_cast<Xpetra::BlockedCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal,Node> >(A);
    TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(bA));

    rgMapExtractor = bA->getRangeMapExtractor();
    TEUCHOS_TEST_FOR_EXCEPT(Teuchos::is_null(rgMapExtractor));
  }

  // copy back Xpetra results from tY to Thyra vector Y
  Xpetra::ThyraUtils<Scalar,LocalOrdinal,GlobalOrdinal,Node>::updateThyra(
      tY_inout,
      rgMapExtractor,
      Teuchos::rcpFromPtr(Y_inout));
}
void TpetraLinearOp<Scalar,LocalOrdinal,GlobalOrdinal,Node>::applyImpl(
  const Thyra::EOpTransp M_trans,
  const Thyra::MultiVectorBase<Scalar> &X_in,
  const Teuchos::Ptr<Thyra::MultiVectorBase<Scalar> > &Y_inout,
  const Scalar alpha,
  const Scalar beta
  ) const
{
  using Teuchos::rcpFromRef;
  using Teuchos::rcpFromPtr;
  typedef TpetraOperatorVectorExtraction<Scalar,LocalOrdinal,GlobalOrdinal,Node>
    ConverterT;
  typedef Tpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>
    TpetraMultiVector_t;

  // Get Tpetra::MultiVector objects for X and Y

  const RCP<const TpetraMultiVector_t> tX =
    ConverterT::getConstTpetraMultiVector(rcpFromRef(X_in));

  const RCP<TpetraMultiVector_t> tY =
    ConverterT::getTpetraMultiVector(rcpFromPtr(Y_inout));

  const Teuchos::ETransp tTransp = convertToTeuchosTransMode<Scalar>(M_trans);

  // Apply the operator

  tpetraOperator_->apply(*tX, *tY, tTransp, alpha, beta);

}
void
MultiVecAdapter<
MultiVector<Scalar,
            LocalOrdinal,
            GlobalOrdinal,
            Node> >::get1dCopy(const Teuchos::ArrayView<scalar_t>& av,
                               size_t lda,
                               Teuchos::Ptr<
                               const Tpetra::Map<LocalOrdinal,
                               GlobalOrdinal,
                               Node> > distribution_map ) const
{
    using Teuchos::rcpFromPtr;
    using Teuchos::as;

    size_t num_vecs = getGlobalNumVectors();

#ifdef HAVE_AMESOS2_DEBUG
    size_t requested_vector_length = distribution_map->getNodeNumElements();
    TEUCHOS_TEST_FOR_EXCEPTION( lda < requested_vector_length,
                                std::invalid_argument,
                                "Given stride is not large enough for local vector length" );
    TEUCHOS_TEST_FOR_EXCEPTION( as<size_t>(av.size()) < as<size_t>((num_vecs-1) * lda + requested_vector_length),
                                std::invalid_argument,
                                "MultiVector storage not large enough given leading dimension "
                                "and number of vectors" );
#endif

    multivec_t redist_mv(rcpFromPtr(distribution_map), num_vecs);

    typedef Tpetra::Import<LocalOrdinal,GlobalOrdinal,Node> import_type;
    import_type importer (this->getMap (), rcpFromPtr (distribution_map));
    redist_mv.doImport (*mv_, importer, Tpetra::REPLACE);

    // do copy
    redist_mv.get1dCopy (av, lda);
}
void EpetraLinearOp::getRowStatImpl(
  const RowStatLinearOpBaseUtils::ERowStat rowStat,
  const Ptr<VectorBase<double> > &rowStatVec_in
  ) const
{
  using Teuchos::rcpFromPtr;
  const RCP<Epetra_Vector> rowStatVec =
    get_Epetra_Vector(getRangeMap(), rcpFromPtr(rowStatVec_in));
  switch (rowStat) {
    case RowStatLinearOpBaseUtils::ROW_STAT_INV_ROW_SUM:
      rowMatrix_->InvRowSums(*rowStatVec);
      break;
    case RowStatLinearOpBaseUtils::ROW_STAT_ROW_SUM:
      // compute absolute row sum
      computeAbsRowSum(*rowStatVec);
      break;
    default:
      TEUCHOS_TEST_FOR_EXCEPT(true);
  }
}
void
MultiVecAdapter<
MultiVector<Scalar,
            LocalOrdinal,
            GlobalOrdinal,
            Node> >::put1dData(const Teuchos::ArrayView<const scalar_t>& new_data,
                               size_t lda,
                               Teuchos::Ptr<
                               const Tpetra::Map<LocalOrdinal,
                               GlobalOrdinal,
                               Node> > source_map)
{
    using Teuchos::rcpFromPtr;

    const size_t num_vecs = getGlobalNumVectors ();
    const multivec_t source_mv (rcpFromPtr (source_map), new_data, lda, num_vecs);

    typedef Tpetra::Import<LocalOrdinal,GlobalOrdinal,Node> import_type;
    import_type importer (rcpFromPtr (source_map), this->getMap ());

    mv_->doImport (source_mv, importer, Tpetra::REPLACE);
}
    //! Copy a flat vector into a product vector
    void copyFlatThyraIntoBlockedThyra(const Thyra::VectorBase<double>& src, 
                                       const Teuchos::Ptr<Thyra::VectorBase<double> > & dest) const
    {
      using Teuchos::RCP;
      using Teuchos::ArrayView;
      using Teuchos::rcpFromPtr;
      using Teuchos::rcp_dynamic_cast;
    
      const RCP<Thyra::ProductVectorBase<double> > prodDest =
        Thyra::castOrCreateNonconstProductVectorBase(rcpFromPtr(dest));

      const Thyra::SpmdVectorBase<double> & spmdSrc =
             Teuchos::dyn_cast<const Thyra::SpmdVectorBase<double> >(src);
    
      // get access to flat data
      Teuchos::ArrayRCP<const double> srcData;
      spmdSrc.getLocalData(Teuchos::ptrFromRef(srcData));
    
      std::size_t offset = 0;
      const int numBlocks = prodDest->productSpace()->numBlocks();
      for (int b = 0; b < numBlocks; ++b) {
        const RCP<Thyra::VectorBase<double> > destBlk = prodDest->getNonconstVectorBlock(b);

        // get access to blocked data
        const RCP<Thyra::SpmdVectorBase<double> > spmdBlk =
               rcp_dynamic_cast<Thyra::SpmdVectorBase<double> >(destBlk, true);
        Teuchos::ArrayRCP<double> destData;
        spmdBlk->getNonconstLocalData(Teuchos::ptrFromRef(destData));
    
        // perform copy
        for (int i=0; i < destData.size(); ++i) {
          destData[i] = srcData[i+offset];
        }
        offset += destData.size();
      }
    
    }
void EpetraOperatorWrapper::copyEpetraIntoThyra(const Epetra_MultiVector& x,
  const Ptr<VectorBase<double> > &thyraVec) const
{

  using Teuchos::rcpFromPtr;
  using Teuchos::rcp_dynamic_cast;

  const int numVecs = x.NumVectors();

  TEUCHOS_TEST_FOR_EXCEPTION(numVecs != 1, std::runtime_error,
    "epetraToThyra does not work with MV dimension != 1");

  const RCP<ProductVectorBase<double> > prodThyraVec =
    castOrCreateNonconstProductVectorBase(rcpFromPtr(thyraVec));

  const ArrayView<const double> epetraData(x[0], x.Map().NumMyElements());
  // NOTE: I tried using Epetra_MultiVector::operator()(int) to return an
  // Epetra_Vector object but it has a defect when Reset(...) is called which
  // results in a memory access error (see bug 4700).

  int offset = 0;
  const int numBlocks = prodThyraVec->productSpace()->numBlocks();
  for (int b = 0; b < numBlocks; ++b) {
    const RCP<VectorBase<double> > vec_b = prodThyraVec->getNonconstVectorBlock(b);
    const RCP<const SpmdVectorSpaceBase<double> > spmd_vs_b =
      rcp_dynamic_cast<const SpmdVectorSpaceBase<double> >(vec_b->space(), true);
    DetachedSpmdVectorView<double> view(vec_b);
    const ArrayRCP<double> thyraData = view.sv().values();
    const int localNumElems = spmd_vs_b->localSubDim();
    for (int i=0; i < localNumElems; ++i) {
      thyraData[i] = epetraData[i+offset];
    }
    offset += localNumElems;
  }

}
SolveStatus<Scalar>
BelosLinearOpWithSolve<Scalar>::solveImpl(
  const EOpTransp M_trans,
  const MultiVectorBase<Scalar> &B,
  const Ptr<MultiVectorBase<Scalar> > &X,
  const Ptr<const SolveCriteria<Scalar> > solveCriteria
  ) const
{

  TEUCHOS_FUNC_TIME_MONITOR("BelosLOWS");

  using Teuchos::rcp;
  using Teuchos::rcpFromRef;
  using Teuchos::rcpFromPtr;
  using Teuchos::FancyOStream;
  using Teuchos::OSTab;
  using Teuchos::describe;
  typedef Teuchos::ScalarTraits<Scalar> ST;
  typedef typename ST::magnitudeType ScalarMag;
  Teuchos::Time totalTimer(""), timer("");
  totalTimer.start(true);

  assertSolveSupports(*this, M_trans, solveCriteria);
  // 2010/08/22: rabartl: Bug 4915 ToDo: Move the above into the NIV function
  // solve(...).

  const int numRhs = B.domain()->dim();
  const int numEquations = B.range()->dim();

  const RCP<FancyOStream> out = this->getOStream();
  const Teuchos::EVerbosityLevel verbLevel = this->getVerbLevel();
  OSTab tab = this->getOSTab();
  if (out.get() && static_cast<int>(verbLevel) > static_cast<int>(Teuchos::VERB_LOW)) {
    *out << "\nStarting iterations with Belos:\n";
    OSTab tab2(out);
    *out << "Using forward operator = " << describe(*fwdOpSrc_->getOp(),verbLevel);
    *out << "Using iterative solver = " << describe(*iterativeSolver_,verbLevel);
    *out << "With #Eqns="<<numEquations<<", #RHSs="<<numRhs<<" ...\n";
  }

  //
  // Set RHS and LHS
  //

  bool ret = lp_->setProblem( rcpFromPtr(X), rcpFromRef(B) );
  TEST_FOR_EXCEPTION(
    ret == false, CatastrophicSolveFailure
    ,"Error, the Belos::LinearProblem could not be set for the current solve!"
    );

  //
  // Set the solution criteria
  //

  const RCP<Teuchos::ParameterList> tmpPL = Teuchos::parameterList();

  SolveMeasureType solveMeasureType;
  RCP<GeneralSolveCriteriaBelosStatusTest<Scalar> > generalSolveCriteriaBelosStatusTest;
  if (nonnull(solveCriteria)) {
    solveMeasureType = solveCriteria->solveMeasureType;
    const ScalarMag requestedTol = solveCriteria->requestedTol;
    if (solveMeasureType.useDefault()) {
      tmpPL->set("Convergence Tolerance", defaultTol_);
    }
    else if (solveMeasureType(SOLVE_MEASURE_NORM_RESIDUAL, SOLVE_MEASURE_NORM_RHS)) {
      if (requestedTol != SolveCriteria<Scalar>::unspecifiedTolerance()) {
        tmpPL->set("Convergence Tolerance", requestedTol);
      }
      else {
        tmpPL->set("Convergence Tolerance", defaultTol_);
      }
      tmpPL->set("Explicit Residual Scaling", "Norm of RHS");
    }
    else if (solveMeasureType(SOLVE_MEASURE_NORM_RESIDUAL, SOLVE_MEASURE_NORM_INIT_RESIDUAL)) {
      if (requestedTol != SolveCriteria<Scalar>::unspecifiedTolerance()) {
        tmpPL->set("Convergence Tolerance", requestedTol);
      }
      else {
        tmpPL->set("Convergence Tolerance", defaultTol_);
      }
      tmpPL->set("Explicit Residual Scaling", "Norm of Initial Residual");
    }
    else {
      // Set the most generic (and inefficient) solve criteria
      generalSolveCriteriaBelosStatusTest = createGeneralSolveCriteriaBelosStatusTest(
        *solveCriteria, convergenceTestFrequency_);
      // Set the verbosity level (one level down)
      generalSolveCriteriaBelosStatusTest->setOStream(out);
      generalSolveCriteriaBelosStatusTest->setVerbLevel(incrVerbLevel(verbLevel, -1));
      // Set the default convergence tolerance to always converged to allow
      // the above status test to control things.
      tmpPL->set("Convergence Tolerance", 1.0);
    }
  }
  else {
    // No solveCriteria was even passed in!
    tmpPL->set("Convergence Tolerance", defaultTol_);
  }

  //
  // Reset the blocksize if we adding more vectors than half the number of equations,
  // orthogonalization will fail on the first iteration!
  //

  RCP<const Teuchos::ParameterList> solverParams = iterativeSolver_->getCurrentParameters();
  const int currBlockSize = Teuchos::getParameter<int>(*solverParams, "Block Size");
  bool isNumBlocks = false;
  int currNumBlocks = 0;
  if (Teuchos::isParameterType<int>(*solverParams, "Num Blocks")) {
    currNumBlocks = Teuchos::getParameter<int>(*solverParams, "Num Blocks");
    isNumBlocks = true;
  }
  const int newBlockSize = TEUCHOS_MIN(currBlockSize,numEquations/2);
  if (nonnull(out)
    && static_cast<int>(verbLevel) > static_cast<int>(Teuchos::VERB_NONE)
    && newBlockSize != currBlockSize)
  {
    *out << "\nAdjusted block size = " << newBlockSize << "\n";
  }
  //
  tmpPL->set("Block Size",newBlockSize);

  //
  // Set the number of Krylov blocks if we are using a GMRES solver, or a solver
  // that recognizes "Num Blocks". Otherwise the solver will throw an error!
  //

  if (isNumBlocks) {
    const int Krylov_length = (currNumBlocks*currBlockSize)/newBlockSize;
    tmpPL->set("Num Blocks",Krylov_length);
  
    if (newBlockSize != currBlockSize) {
      if (out.get() && static_cast<int>(verbLevel) > static_cast<int>(Teuchos::VERB_NONE))
        *out
          << "\nAdjusted max number of Krylov basis blocks = " << Krylov_length << "\n";
    }
  }

  //
  // Solve the linear system
  //

  Belos::ReturnType belosSolveStatus;
  {
    RCP<std::ostream>
      outUsed =
      ( static_cast<int>(verbLevel) > static_cast<int>(Teuchos::VERB_NONE)
        ? out
        : rcp(new FancyOStream(rcp(new Teuchos::oblackholestream())))
        );
    Teuchos::OSTab tab(outUsed,1,"BELOS");
    tmpPL->set("Output Stream", outUsed);
    iterativeSolver_->setParameters(tmpPL);
    if (nonnull(generalSolveCriteriaBelosStatusTest)) {
      iterativeSolver_->setUserConvStatusTest(generalSolveCriteriaBelosStatusTest);
    }
    belosSolveStatus = iterativeSolver_->solve();
  }

  //
  // Report the solve status
  //

  totalTimer.stop();

  SolveStatus<Scalar> solveStatus;

  switch (belosSolveStatus) {
    case Belos::Unconverged: {
      solveStatus.solveStatus = SOLVE_STATUS_UNCONVERGED;
      break;
    }
    case Belos::Converged: {
      solveStatus.solveStatus = SOLVE_STATUS_CONVERGED;
      if (nonnull(generalSolveCriteriaBelosStatusTest)) {
        const ArrayView<const ScalarMag> achievedTol = 
          generalSolveCriteriaBelosStatusTest->achievedTol();
        solveStatus.achievedTol = ST::zero();
        for (Ordinal i = 0; i < achievedTol.size(); ++i) {
          solveStatus.achievedTol = std::max(solveStatus.achievedTol, achievedTol[i]);
        }
      }
      else {
        solveStatus.achievedTol = tmpPL->get("Convergence Tolerance", defaultTol_);
      }
      break;
    }
    TEUCHOS_SWITCH_DEFAULT_DEBUG_ASSERT();
  }

  std::ostringstream ossmessage;
  ossmessage
    << "The Belos solver of type \""<<iterativeSolver_->description()
    <<"\" returned a solve status of \""<< toString(solveStatus.solveStatus) << "\""
    << " in " << iterativeSolver_->getNumIters() << " iterations"
    << " with total CPU time of " << totalTimer.totalElapsedTime() << " sec" ;
  if (out.get() && static_cast<int>(verbLevel) >=static_cast<int>(Teuchos::VERB_LOW))
    *out << "\n" << ossmessage.str() << "\n";

  solveStatus.message = ossmessage.str();

  if (out.get() && static_cast<int>(verbLevel) >= static_cast<int>(Teuchos::VERB_LOW))
    *out << "\nTotal solve time in Belos = "<<totalTimer.totalElapsedTime()<<" sec\n";

  return solveStatus;

}
SolveStatus<Scalar>
BelosLinearOpWithSolve<Scalar>::solveImpl(
  const EOpTransp M_trans,
  const MultiVectorBase<Scalar> &B,
  const Ptr<MultiVectorBase<Scalar> > &X,
  const Ptr<const SolveCriteria<Scalar> > solveCriteria
  ) const
{

  THYRA_FUNC_TIME_MONITOR("Stratimikos: BelosLOWS");

  using Teuchos::rcp;
  using Teuchos::rcpFromRef;
  using Teuchos::rcpFromPtr;
  using Teuchos::FancyOStream;
  using Teuchos::OSTab;
  using Teuchos::ParameterList;
  using Teuchos::parameterList;
  using Teuchos::describe;
  typedef Teuchos::ScalarTraits<Scalar> ST;
  typedef typename ST::magnitudeType ScalarMag;
  Teuchos::Time totalTimer(""), timer("");
  totalTimer.start(true);

  assertSolveSupports(*this, M_trans, solveCriteria);
  // 2010/08/22: rabartl: Bug 4915 ToDo: Move the above into the NIV function
  // solve(...).

  const RCP<FancyOStream> out = this->getOStream();
  const Teuchos::EVerbosityLevel verbLevel = this->getVerbLevel();
  OSTab tab = this->getOSTab();
  if (out.get() && static_cast<int>(verbLevel) > static_cast<int>(Teuchos::VERB_LOW)) {
    *out << "\nStarting iterations with Belos:\n";
    OSTab tab2(out);
    *out << "Using forward operator = " << describe(*fwdOpSrc_->getOp(),verbLevel);
    *out << "Using iterative solver = " << describe(*iterativeSolver_,verbLevel);
    *out << "With #Eqns="<<B.range()->dim()<<", #RHSs="<<B.domain()->dim()<<" ...\n";
  }

  //
  // Set RHS and LHS
  //

  bool ret = lp_->setProblem( rcpFromPtr(X), rcpFromRef(B) );
  TEUCHOS_TEST_FOR_EXCEPTION(
    ret == false, CatastrophicSolveFailure
    ,"Error, the Belos::LinearProblem could not be set for the current solve!"
    );

  //
  // Set the solution criteria
  //

  // Parameter list for the current solve.
  const RCP<ParameterList> tmpPL = Teuchos::parameterList();

  // The solver's valid parameter list.
  RCP<const ParameterList> validPL = iterativeSolver_->getValidParameters();

  SolveMeasureType solveMeasureType;
  RCP<GeneralSolveCriteriaBelosStatusTest<Scalar> > generalSolveCriteriaBelosStatusTest;
  if (nonnull(solveCriteria)) {
    solveMeasureType = solveCriteria->solveMeasureType;
    const ScalarMag requestedTol = solveCriteria->requestedTol;
    if (solveMeasureType.useDefault()) {
      tmpPL->set("Convergence Tolerance", defaultTol_);
    }
    else if (solveMeasureType(SOLVE_MEASURE_NORM_RESIDUAL, SOLVE_MEASURE_NORM_RHS)) {
      if (requestedTol != SolveCriteria<Scalar>::unspecifiedTolerance()) {
        tmpPL->set("Convergence Tolerance", requestedTol);
      }
      else {
        tmpPL->set("Convergence Tolerance", defaultTol_);
      }
      setResidualScalingType (tmpPL, validPL, "Norm of RHS");
    }
    else if (solveMeasureType(SOLVE_MEASURE_NORM_RESIDUAL, SOLVE_MEASURE_NORM_INIT_RESIDUAL)) {
      if (requestedTol != SolveCriteria<Scalar>::unspecifiedTolerance()) {
        tmpPL->set("Convergence Tolerance", requestedTol);
      }
      else {
        tmpPL->set("Convergence Tolerance", defaultTol_);
      }
      setResidualScalingType (tmpPL, validPL, "Norm of Initial Residual");
    }
    else {
      // Set the most generic (and inefficient) solve criteria
      generalSolveCriteriaBelosStatusTest = createGeneralSolveCriteriaBelosStatusTest(
        *solveCriteria, convergenceTestFrequency_);
      // Set the verbosity level (one level down)
      generalSolveCriteriaBelosStatusTest->setOStream(out);
      generalSolveCriteriaBelosStatusTest->setVerbLevel(incrVerbLevel(verbLevel, -1));
      // Set the default convergence tolerance to always converged to allow
      // the above status test to control things.
      tmpPL->set("Convergence Tolerance", 1.0);
    }
    // maximum iterations
    if (nonnull(solveCriteria->extraParameters)) {
      if (Teuchos::isParameterType<int>(*solveCriteria->extraParameters,"Maximum Iterations")) {
        tmpPL->set("Maximum Iterations", Teuchos::get<int>(*solveCriteria->extraParameters,"Maximum Iterations"));
      }
    }
  }
  else {
    // No solveCriteria was even passed in!
    tmpPL->set("Convergence Tolerance", defaultTol_);
  }

  //
  // Solve the linear system
  //

  Belos::ReturnType belosSolveStatus;
  {
    RCP<std::ostream>
      outUsed =
      ( static_cast<int>(verbLevel) > static_cast<int>(Teuchos::VERB_LOW)
        ? out
        : rcp(new FancyOStream(rcp(new Teuchos::oblackholestream())))
        );
    Teuchos::OSTab tab1(outUsed,1,"BELOS");
    tmpPL->set("Output Stream", outUsed);
    iterativeSolver_->setParameters(tmpPL);
    if (nonnull(generalSolveCriteriaBelosStatusTest)) {
      iterativeSolver_->setUserConvStatusTest(generalSolveCriteriaBelosStatusTest);
    }
    belosSolveStatus = iterativeSolver_->solve();
  }

  //
  // Report the solve status
  //

  totalTimer.stop();

  SolveStatus<Scalar> solveStatus;

  switch (belosSolveStatus) {
    case Belos::Unconverged: {
      solveStatus.solveStatus = SOLVE_STATUS_UNCONVERGED;
      // Set achievedTol even if the solver did not converge.  This is
      // helpful for things like nonlinear solvers, which might be
      // able to use a partially converged result, and which would
      // like to know the achieved convergence tolerance for use in
      // computing bounds.  It's also helpful for estimating whether a
      // small increase in the maximum iteration count might be
      // helpful next time.
      try {
	// Some solvers might not have implemented achievedTol(). 
	// The default implementation throws std::runtime_error.
	solveStatus.achievedTol = iterativeSolver_->achievedTol();
      } catch (std::runtime_error&) {
	// Do nothing; use the default value of achievedTol.
      }
      break;
    }
    case Belos::Converged: {
      solveStatus.solveStatus = SOLVE_STATUS_CONVERGED;
      if (nonnull(generalSolveCriteriaBelosStatusTest)) {
	// The user set a custom status test.  This means that we
	// should ask the custom status test itself, rather than the
	// Belos solver, what the final achieved convergence tolerance
	// was.
        const ArrayView<const ScalarMag> achievedTol = 
          generalSolveCriteriaBelosStatusTest->achievedTol();
        solveStatus.achievedTol = ST::zero();
        for (Ordinal i = 0; i < achievedTol.size(); ++i) {
          solveStatus.achievedTol = std::max(solveStatus.achievedTol, achievedTol[i]);
        }
      }
      else {
	try {
	  // Some solvers might not have implemented achievedTol(). 
	  // The default implementation throws std::runtime_error.
	  solveStatus.achievedTol = iterativeSolver_->achievedTol();
	} catch (std::runtime_error&) {
	  // Use the default convergence tolerance.  This is a correct
	  // upper bound, since we did actually converge.
	  solveStatus.achievedTol = tmpPL->get("Convergence Tolerance", defaultTol_);
	}
      }
      break;
    }
    TEUCHOS_SWITCH_DEFAULT_DEBUG_ASSERT();
  }

  std::ostringstream ossmessage;
  ossmessage
    << "The Belos solver of type \""<<iterativeSolver_->description()
    <<"\" returned a solve status of \""<< toString(solveStatus.solveStatus) << "\""
    << " in " << iterativeSolver_->getNumIters() << " iterations"
    << " with total CPU time of " << totalTimer.totalElapsedTime() << " sec" ;
  if (out.get() && static_cast<int>(verbLevel) > static_cast<int>(Teuchos::VERB_NONE))
    *out << "\n" << ossmessage.str() << "\n";

  solveStatus.message = ossmessage.str();

  // Dump the getNumIters() and the achieved convergence tolerance
  // into solveStatus.extraParameters, as the "Belos/Iteration Count"
  // resp. "Belos/Achieved Tolerance" parameters.
  if (solveStatus.extraParameters.is_null()) {
    solveStatus.extraParameters = parameterList ();
  }
  solveStatus.extraParameters->set ("Belos/Iteration Count", 
				    iterativeSolver_->getNumIters());\
  // package independent version of the same
  solveStatus.extraParameters->set ("Iteration Count", 
				    iterativeSolver_->getNumIters());\
  // NOTE (mfh 13 Dec 2011) Though the most commonly used Belos
  // solvers do implement achievedTol(), some Belos solvers currently
  // do not.  In the latter case, if the solver did not converge, the
  // reported achievedTol() value may just be the default "invalid"
  // value -1, and if the solver did converge, the reported value will
  // just be the convergence tolerance (a correct upper bound).
  solveStatus.extraParameters->set ("Belos/Achieved Tolerance", 
				    solveStatus.achievedTol);

//  This information is in the previous line, which is printed anytime the verbosity
//  is not set to Teuchos::VERB_NONE, so I'm commenting this out for now.
//  if (out.get() && static_cast<int>(verbLevel) > static_cast<int>(Teuchos::VERB_NONE))
//    *out << "\nTotal solve time in Belos = "<<totalTimer.totalElapsedTime()<<" sec\n";
  
  return solveStatus;

}
NonlinearCGUtils::ESolveReturn
NonlinearCG<Scalar>::doSolve(
  const Ptr<Thyra::VectorBase<Scalar> > &p_inout,
  const Ptr<ScalarMag> &g_opt_out,
  const Ptr<const ScalarMag> &g_reduct_tol_in,
  const Ptr<const ScalarMag> &g_grad_tol_in,
  const Ptr<const ScalarMag> &alpha_init_in,
  const Ptr<int> &numIters_out
  )
{

  typedef ScalarTraits<Scalar> ST;
  typedef ScalarTraits<ScalarMag> SMT;
  
  using Teuchos::null;
  using Teuchos::as;
  using Teuchos::tuple;
  using Teuchos::rcpFromPtr;
  using Teuchos::optInArg;
  using Teuchos::inOutArg;
  using GlobiPack::computeValue;
  using GlobiPack::PointEval1D;
  using Thyra::VectorSpaceBase;
  using Thyra::VectorBase;
  using Thyra::MultiVectorBase;
  using Thyra::scalarProd;
  using Thyra::createMember;
  using Thyra::createMembers;
  using Thyra::get_ele;
  using Thyra::norm;
  using Thyra::V_StV;
  using Thyra::Vt_S;
  using Thyra::eval_g_DgDp;
  typedef Thyra::Ordinal Ordinal;
  typedef Thyra::ModelEvaluatorBase MEB;
  namespace NCGU = NonlinearCGUtils;
  using std::max;

  // Validate input

  g_opt_out.assert_not_null();

  // Set streams

  const RCP<Teuchos::FancyOStream> out = this->getOStream();
  linesearch_->setOStream(out);

  // Determine what step constants will be computed

  const bool compute_beta_PR =
    (
      solverType_ == NCGU::NONLINEAR_CG_PR_PLUS
      ||
      solverType_ == NCGU::NONLINEAR_CG_FR_PR
      );

  const bool compute_beta_HS = (solverType_ == NCGU::NONLINEAR_CG_HS);

  //
  // A) Set up the storage for the algorithm
  //
  
  const RCP<DefaultPolyLineSearchPointEvaluator<Scalar> >
    pointEvaluator = defaultPolyLineSearchPointEvaluator<Scalar>();

  const RCP<UnconstrainedOptMeritFunc1D<Scalar> >
    meritFunc = unconstrainedOptMeritFunc1D<Scalar>(
      model_, paramIndex_, responseIndex_ );

  const RCP<const VectorSpaceBase<Scalar> >
    p_space = model_->get_p_space(paramIndex_),
    g_space = model_->get_g_space(responseIndex_);

  // Stoarge for current iteration
  RCP<VectorBase<Scalar> >
    p_k = rcpFromPtr(p_inout),        // Current solution for p
    p_kp1 = createMember(p_space),    // Trial point for p (in line search)
    g_vec = createMember(g_space),    // Vector (size 1) form of objective g(p) 
    g_grad_k = createMember(p_space), // Gradient of g DgDp^T
    d_k = createMember(p_space),      // Search direction
    g_grad_k_diff_km1 = null;         // g_grad_k - g_grad_km1 (if needed)

  // Storage for previous iteration
  RCP<VectorBase<Scalar> >
    g_grad_km1 = null, // Will allocate if we need it!
    d_km1 = null; // Will allocate if we need it!
  ScalarMag
    alpha_km1 = SMT::zero(),
    g_km1 = SMT::zero(),
    g_grad_km1_inner_g_grad_km1 = SMT::zero(),
    g_grad_km1_inner_d_km1 = SMT::zero();
  
  if (compute_beta_PR || compute_beta_HS) {
    g_grad_km1 = createMember(p_space);
    g_grad_k_diff_km1 = createMember(p_space);
  }
  
  if (compute_beta_HS) {
    d_km1 = createMember(p_space);
  }

  //
  // B) Do the nonlinear CG iterations
  //

  *out << "\nStarting nonlinear CG iterations ...\n";

  if (and_conv_tests_) {
    *out << "\nNOTE: Using AND of convergence tests!\n";
  }
  else {
    *out << "\nNOTE: Using OR of convergence tests!\n";
  }

  const Scalar alpha_init =
    ( !is_null(alpha_init_in) ? *alpha_init_in : alpha_init_ );
  const Scalar g_reduct_tol =
    ( !is_null(g_reduct_tol_in) ? *g_reduct_tol_in : g_reduct_tol_ );
  const Scalar g_grad_tol =
    ( !is_null(g_grad_tol_in) ? *g_grad_tol_in : g_grad_tol_ );

  const Ordinal globalDim = p_space->dim();

  bool foundSolution = false;
  bool fatalLinesearchFailure = false;
  bool restart = true;
  int numConsecutiveLineSearchFailures = 0;

  int numConsecutiveIters = 0;

  for (numIters_ = 0; numIters_ < maxIters_; ++numIters_, ++numConsecutiveIters) {

    Teuchos::OSTab tab(out);

    *out << "\nNonlinear CG Iteration k = " << numIters_ << "\n";

    Teuchos::OSTab tab2(out);

    //
    // B.1) Evaluate the point (on first iteration)
    //
    
    eval_g_DgDp(
      *model_, paramIndex_, *p_k, responseIndex_,
      numIters_ == 0 ? g_vec.ptr() : null, // Only on first iteration
      MEB::Derivative<Scalar>(g_grad_k, MEB::DERIV_MV_GRADIENT_FORM) );

    const ScalarMag g_k = get_ele(*g_vec, 0);
    // Above: If numIters_ > 0, then g_vec was updated in meritFunc->eval(...).

    //
    // B.2) Check for convergence
    //

    // B.2.a) ||g_k - g_km1|| |g_k + g_mag| <= g_reduct_tol

    bool g_reduct_converged = false;

    if (numIters_ > 0) {

      const ScalarMag g_reduct = g_k - g_km1;
      
      *out << "\ng_k - g_km1 = "<<g_reduct<<"\n";
      
      const ScalarMag g_reduct_err =
        SMT::magnitude(g_reduct / SMT::magnitude(g_k + g_mag_));
      
      g_reduct_converged = (g_reduct_err <= g_reduct_tol);
      
      *out << "\nCheck convergence: |g_k - g_km1| / |g_k + g_mag| = "<<g_reduct_err
           << (g_reduct_converged ? " <= " : " > ")
           << "g_reduct_tol = "<<g_reduct_tol<<"\n";
      
    }

    // B.2.b) ||g_grad_k|| g_mag <= g_grad_tol

    const Scalar g_grad_k_inner_g_grad_k = scalarProd<Scalar>(*g_grad_k, *g_grad_k);
    const ScalarMag norm_g_grad_k = ST::magnitude(ST::squareroot(g_grad_k_inner_g_grad_k));

    *out << "\n||g_grad_k|| = "<<norm_g_grad_k << "\n";

    const ScalarMag g_grad_err = norm_g_grad_k / g_mag_;

    const bool g_grad_converged = (g_grad_err <= g_grad_tol);

    *out << "\nCheck convergence: ||g_grad_k|| / g_mag = "<<g_grad_err
         << (g_grad_converged ? " <= " : " > ")
         << "g_grad_tol = "<<g_grad_tol<<"\n";

    // B.2.c) Convergence status
    
    bool isConverged = false;
    if (and_conv_tests_) {
      isConverged = g_reduct_converged && g_grad_converged;
    }
    else {
      isConverged = g_reduct_converged || g_grad_converged;
    }

    if (isConverged) {
      if (numIters_ < minIters_) {
        *out << "\nnumIters="<<numIters_<<" < minIters="<<minIters_
             << ", continuing on!\n";
      }
      else {
        *out << "\nFound solution, existing algorithm!\n";
        foundSolution = true;
      }
    }
    else {
      *out << "\nNot converged!\n";
    }
    
    if (foundSolution) {
      break;
    }

    //
    // B.3) Compute the search direction d_k
    //

    if (numConsecutiveIters == globalDim) {

      *out << "\nThe number of consecutive iterations exceeds the"
           << " global dimension so restarting!\n";

      restart = true;

    }

    if (restart) {

      *out << "\nResetting search direction back to steppest descent!\n";

      // d_k = -g_grad_k
      V_StV( d_k.ptr(), as<Scalar>(-1.0), *g_grad_k );

      restart = false;

    }
    else {
      
      // g_grad_k - g_grad_km1
      if (!is_null(g_grad_k_diff_km1)) {
        V_VmV( g_grad_k_diff_km1.ptr(), *g_grad_k, *g_grad_km1 );
      }

      // beta_FR = inner(g_grad_k, g_grad_k) / inner(g_grad_km1, g_grad_km1)
      const Scalar beta_FR =
        g_grad_k_inner_g_grad_k / g_grad_km1_inner_g_grad_km1;
      *out << "\nbeta_FR = " << beta_FR << "\n";
      // NOTE: Computing beta_FR is free so we might as well just do it!

      // beta_PR = inner(g_grad_k, g_grad_k - g_grad_km1) /
      //    inner(g_grad_km1, g_grad_km1)
      Scalar beta_PR = ST::zero();
      if (compute_beta_PR) {
        beta_PR =
          inner(*g_grad_k, *g_grad_k_diff_km1) / g_grad_km1_inner_g_grad_km1;
        *out << "\nbeta_PR = " << beta_PR << "\n";
      }

      // beta_HS = inner(g_grad_k, g_grad_k - g_grad_km1) /
      //    inner(g_grad_k - g_grad_km1, d_km1)
      Scalar beta_HS = ST::zero();
      if (compute_beta_HS) {
        beta_HS =
          inner(*g_grad_k, *g_grad_k_diff_km1) / inner(*g_grad_k_diff_km1, *d_km1);
        *out << "\nbeta_HS = " << beta_HS << "\n";
      }
      
      Scalar beta_k = ST::zero();
      switch(solverType_) {
        case NCGU::NONLINEAR_CG_FR: {
          beta_k = beta_FR;
          break;
        }
        case NCGU::NONLINEAR_CG_PR_PLUS: {
          beta_k = max(beta_PR, ST::zero());
          break;
        }
        case NCGU::NONLINEAR_CG_FR_PR: {
          // NOTE: This does not seem to be working :-(
          if (numConsecutiveIters < 2) {
            beta_k = beta_PR;
          }
          else if (beta_PR < -beta_FR)
            beta_k = -beta_FR;
          else if (ST::magnitude(beta_PR) <= beta_FR)
            beta_k = beta_PR;
          else // beta_PR > beta_FR
            beta_k = beta_FR;
        }
        case NCGU::NONLINEAR_CG_HS: {
          beta_k = beta_HS;
          break;
        }
        default:
          TEUCHOS_TEST_FOR_EXCEPT(true);
      }
      *out << "\nbeta_k = " << beta_k << "\n";

      // d_k = beta_k * d_last + -g_grad_k
      if (!is_null(d_km1))
        V_StV( d_k.ptr(), beta_k, *d_km1 );
      else
        Vt_S( d_k.ptr(), beta_k );
      Vp_StV( d_k.ptr(), as<Scalar>(-1.0), *g_grad_k );

    }
    
    //
    // B.4) Perform the line search
    //

    // B.4.a) Compute the initial step length

    Scalar alpha_k = as<Scalar>(-1.0);

    if (numIters_ == 0) {
      alpha_k = alpha_init;
    }
    else {
      if (alpha_reinit_) {
        alpha_k = alpha_init;
      }
      else {
        alpha_k = alpha_km1;
        // ToDo: Implement better logic from Nocedal and Wright for selecting
        // this step length after first iteration!
      }
    }

    // B.4.b) Perform the linesearch (computing updated quantities in process)

    pointEvaluator->initialize(tuple<RCP<const VectorBase<Scalar> > >(p_k, d_k)());

    ScalarMag g_grad_k_inner_d_k = ST::zero();

    // Set up the merit function to only compute the value
    meritFunc->setEvaluationQuantities(pointEvaluator, p_kp1, g_vec, null);

    PointEval1D<ScalarMag> point_k(ST::zero(), g_k);
    if (linesearch_->requiresBaseDeriv()) {
      g_grad_k_inner_d_k = scalarProd(*g_grad_k, *d_k);
      point_k.Dphi = g_grad_k_inner_d_k;
    }

    ScalarMag g_kp1 = computeValue(*meritFunc, alpha_k);
    // NOTE: The above call updates p_kp1 and g_vec as well!

    PointEval1D<ScalarMag> point_kp1(alpha_k, g_kp1);

    const bool linesearchResult = linesearch_->doLineSearch(
      *meritFunc, point_k, inOutArg(point_kp1), null );

    alpha_k = point_kp1.alpha;
    g_kp1 = point_kp1.phi;

    if (linesearchResult) {
      numConsecutiveLineSearchFailures = 0;
    }
    else {
      if (numConsecutiveLineSearchFailures==0) {
        *out << "\nLine search failure, resetting the search direction!\n";
        restart = true;
      }
      if (numConsecutiveLineSearchFailures==1) {
        *out << "\nLine search failure on last iteration also, terminating algorithm!\n";
        fatalLinesearchFailure = true;
      }
      ++numConsecutiveLineSearchFailures;
    }

    if (fatalLinesearchFailure) {
      break;
    }

    //
    // B.5) Transition to the next iteration
    //
    
    alpha_km1 = alpha_k;
    g_km1 = g_k;
    g_grad_km1_inner_g_grad_km1 = g_grad_k_inner_g_grad_k;
    g_grad_km1_inner_d_km1 = g_grad_k_inner_d_k;
    std::swap(p_k, p_kp1);
    if (!is_null(g_grad_km1))
      std::swap(g_grad_km1, g_grad_k);
    if (!is_null(d_km1))
      std::swap(d_k, d_km1);
    
#ifdef TEUCHOS_DEBUG
    // Make sure we compute these correctly before they are used!
    V_S(g_grad_k.ptr(), ST::nan());
    V_S(p_kp1.ptr(), ST::nan());
#endif

  }

  //
  // C) Final clean up
  //
  
  // Get the most current value of g(p)
  *g_opt_out = get_ele(*g_vec, 0);

  // Make sure that the final value for p has been copied in!
  V_V( p_inout, *p_k );

  if (!is_null(numIters_out)) {
    *numIters_out = numIters_;
  }

  if (numIters_ == maxIters_) {
    *out << "\nMax nonlinear CG iterations exceeded!\n";
  }
  
  if (foundSolution) {
    return NonlinearCGUtils::SOLVE_SOLUTION_FOUND;
  }
  else if(fatalLinesearchFailure) {
    return NonlinearCGUtils::SOLVE_LINSEARCH_FAILURE;
  }

  // Else, the max number of iterations was exceeded
  return NonlinearCGUtils::SOLVE_MAX_ITERS_EXCEEDED;

}
SolveStatus<double>
AztecOOLinearOpWithSolve::solveImpl(
  const EOpTransp M_trans,
  const MultiVectorBase<double> &B,
  const Ptr<MultiVectorBase<double> > &X,
  const Ptr<const SolveCriteria<double> > solveCriteria
  ) const
{

  using Teuchos::rcp;
  using Teuchos::rcpFromRef;
  using Teuchos::rcpFromPtr;
  using Teuchos::OSTab;
  typedef SolveCriteria<double> SC;
  typedef SolveStatus<double> SS;

  THYRA_FUNC_TIME_MONITOR("Stratimikos: AztecOOLOWS");
  Teuchos::Time totalTimer(""), timer("");
  totalTimer.start(true);

  RCP<Teuchos::FancyOStream> out = this->getOStream();
  Teuchos::EVerbosityLevel verbLevel = this->getVerbLevel();
  OSTab tab = this->getOSTab();
  if (out.get() && static_cast<int>(verbLevel) > static_cast<int>(Teuchos::VERB_NONE))
    *out << "\nSolving block system using AztecOO ...\n\n";

  //
  // Validate input
  //
  TEUCHOS_ASSERT(this->solveSupportsImpl(M_trans));
  SolveMeasureType solveMeasureType;
  if (nonnull(solveCriteria)) {
    solveMeasureType = solveCriteria->solveMeasureType;
    assertSupportsSolveMeasureType(*this, M_trans, solveMeasureType);
  }

  //
  // Get the transpose argument
  //
  const EOpTransp aztecOpTransp = real_trans(M_trans);

  //
  // Get the solver, operator, and preconditioner that we will use
  //
  RCP<AztecOO>
    aztecSolver = ( aztecOpTransp == NOTRANS ? aztecFwdSolver_  : aztecAdjSolver_ );
  const Epetra_Operator
    *aztecOp = aztecSolver->GetUserOperator();

  //
  // Get the op(...) range and domain maps
  //
  const Epetra_Map
    &opRangeMap = aztecOp->OperatorRangeMap(),
    &opDomainMap = aztecOp->OperatorDomainMap();

  //
  // Get the convergence criteria
  //
  double tol = ( aztecOpTransp==NOTRANS ? fwdDefaultTol() : adjDefaultTol() );
  int maxIterations = ( aztecOpTransp==NOTRANS
    ? fwdDefaultMaxIterations() : adjDefaultMaxIterations() );
  bool isDefaultSolveCriteria = true;
  if (nonnull(solveCriteria)) {
    if ( solveCriteria->requestedTol != SC::unspecifiedTolerance() ) {
      tol = solveCriteria->requestedTol;
      isDefaultSolveCriteria = false;
    }
    if (nonnull(solveCriteria->extraParameters)) {
      maxIterations = solveCriteria->extraParameters->get("Maximum Iterations",maxIterations);
    }
  }

  //
  // Get Epetra_MultiVector views of B and X
  //

  RCP<const Epetra_MultiVector> epetra_B;
  RCP<Epetra_MultiVector> epetra_X;

  const EpetraOperatorWrapper* opWrapper =
    dynamic_cast<const EpetraOperatorWrapper*>(aztecOp);

  if (opWrapper == 0) {
    epetra_B = get_Epetra_MultiVector(opRangeMap, rcpFromRef(B));
    epetra_X = get_Epetra_MultiVector(opDomainMap, rcpFromPtr(X));
  }

  //
  // Use AztecOO to solve each RHS one at a time (which is all that I can do anyway)
  //

  int totalIterations = 0;
  SolveStatus<double> solveStatus;
  solveStatus.solveStatus = SOLVE_STATUS_CONVERGED;
  solveStatus.achievedTol = -1.0;

  /* Get the number of columns in the multivector. We use Thyra
   * functions rather than Epetra functions to do this, as we
   * might not yet have created an Epetra multivector. - KL */
  //const int m = epetra_B->NumVectors();
  const int m = B.domain()->dim();

  for( int j = 0; j < m; ++j ) {

    THYRA_FUNC_TIME_MONITOR_DIFF("Stratimikos: AztecOOLOWS:SingleSolve", SingleSolve);

    //
    // Get Epetra_Vector views of B(:,j) and X(:,j)
    // How this is done will depend on whether we have a true Epetra operator
    // or we are wrapping a general Thyra operator in an Epetra operator.
    //

    // We need to declare epetra_x_j as non-const because when we have a phony
    // Epetra operator we'll have to copy a thyra vector into it.
    RCP<Epetra_Vector> epetra_b_j;
    RCP<Epetra_Vector> epetra_x_j;

    if (opWrapper == 0) {
      epetra_b_j = rcpFromRef(*const_cast<Epetra_Vector*>((*epetra_B)(j)));
      epetra_x_j = rcpFromRef(*(*epetra_X)(j));
    }
    else {
      if (is_null(epetra_b_j)) {
        epetra_b_j = rcp(new Epetra_Vector(opRangeMap));
        epetra_x_j = rcp(new Epetra_Vector(opDomainMap));
      }
      opWrapper->copyThyraIntoEpetra(*B.col(j), *epetra_b_j);
      opWrapper->copyThyraIntoEpetra(*X->col(j), *epetra_x_j);
    }

    //
    // Set the RHS and LHS
    //

    aztecSolver->SetRHS(&*epetra_b_j);
    aztecSolver->SetLHS(&*epetra_x_j);

    //
    // Solve the linear system
    //
    timer.start(true);
    {
      SetAztecSolveState
        setAztecSolveState(aztecSolver,out,verbLevel,solveMeasureType);
      aztecSolver->Iterate( maxIterations, tol );
      // NOTE: We ignore the returned status but get it below
    }
    timer.stop();

    //
    // Scale the solution
    // (Originally, this was at the end of the loop after all columns had been
    // processed. It's moved here because we need to do it before copying the
    // solution back into a Thyra vector. - KL
    //
    if (aztecSolverScalar_ != 1.0)
      epetra_x_j->Scale(1.0/aztecSolverScalar_);

    //
    // If necessary, convert the solution back to a non-epetra vector
    //
    if (opWrapper != 0) {
      opWrapper->copyEpetraIntoThyra(*epetra_x_j, X->col(j).ptr());
    }

    //
    // Set the return solve status
    //

    const int iterations = aztecSolver->NumIters();
    const double achievedTol = aztecSolver->ScaledResidual();
    const double *AZ_status = aztecSolver->GetAztecStatus();
    std::ostringstream oss;
    bool converged = false;
    if (AZ_status[AZ_why]==AZ_normal) { oss << "Aztec returned AZ_normal."; converged = true; }
    else if (AZ_status[AZ_why]==AZ_param) oss << "Aztec returned AZ_param.";
    else if (AZ_status[AZ_why]==AZ_breakdown) oss << "Aztec returned AZ_breakdown.";
    else if (AZ_status[AZ_why]==AZ_loss) oss << "Aztec returned AZ_loss.";
    else if (AZ_status[AZ_why]==AZ_ill_cond) oss << "Aztec returned AZ_ill_cond.";
    else if (AZ_status[AZ_why]==AZ_maxits) oss << "Aztec returned AZ_maxits.";
    else oss << "Aztec returned an unknown status?";
    oss << "  Iterations = " << iterations << ".";
    oss << "  Achieved Tolerance = " << achievedTol << ".";
    oss << "  Total time = " << timer.totalElapsedTime() << " sec.";
    if (out.get() && static_cast<int>(verbLevel) > static_cast<int>(Teuchos::VERB_NONE) && outputEveryRhs())
      Teuchos::OSTab(out).o() << "j="<<j<<": " << oss.str() << "\n";

    solveStatus.achievedTol = TEUCHOS_MAX(solveStatus.achievedTol, achievedTol);
    // Note, achieveTol may actually be greater than tol due to ill conditioning and roundoff!

    totalIterations += iterations;

    solveStatus.message = oss.str();
    if ( isDefaultSolveCriteria ) {
      switch(solveStatus.solveStatus) {
        case SOLVE_STATUS_UNKNOWN:
          // Leave overall unknown!
          break;
        case SOLVE_STATUS_CONVERGED:
          solveStatus.solveStatus = ( converged ? SOLVE_STATUS_CONVERGED : SOLVE_STATUS_UNCONVERGED );
          break;
        case SOLVE_STATUS_UNCONVERGED:
          // Leave overall unconverged!
          break;
        default:
          TEUCHOS_TEST_FOR_EXCEPT(true); // Should never get here!
      }
    }
  }

  aztecSolver->UnsetLHSRHS();

  //
  // Release the Epetra_MultiVector views of X and B
  //
  epetra_X = Teuchos::null;
  epetra_B = Teuchos::null;

  //
  // Update the overall solve criteria
  //
  totalTimer.stop();
  SolveStatus<double> overallSolveStatus;
  if (isDefaultSolveCriteria) {
    overallSolveStatus.solveStatus = SOLVE_STATUS_UNKNOWN;
    overallSolveStatus.achievedTol = SS::unknownTolerance();
  }
  else {
    overallSolveStatus.solveStatus = solveStatus.solveStatus;
    overallSolveStatus.achievedTol = solveStatus.achievedTol;
  }
  std::ostringstream oss;
  oss
    << "AztecOO solver "
    << ( overallSolveStatus.solveStatus==SOLVE_STATUS_CONVERGED ? "converged" : "unconverged" )
    << " on m = "<<m<<" RHSs using " << totalIterations << " cumulative iterations"
    << " for an average of " << (totalIterations/m) << " iterations/RHS and"
    << " total CPU time of "<<totalTimer.totalElapsedTime()<<" sec.";
  overallSolveStatus.message = oss.str();

  // Added these statistics following what was done for Belos
  if (overallSolveStatus.extraParameters.is_null()) {
    overallSolveStatus.extraParameters = Teuchos::parameterList ();
  }
  overallSolveStatus.extraParameters->set ("AztecOO/Iteration Count",
                                            totalIterations);
  // package independent version of the same
  overallSolveStatus.extraParameters->set ("Iteration Count",
                                            totalIterations);
  overallSolveStatus.extraParameters->set ("AztecOO/Achieved Tolerance",
                                            overallSolveStatus.achievedTol);

  //
  // Report the overall time
  //
  if (out.get() && static_cast<int>(verbLevel) >= static_cast<int>(Teuchos::VERB_LOW))
    *out
      << "\nTotal solve time = "<<totalTimer.totalElapsedTime()<<" sec\n";

  return overallSolveStatus;

}
Example #13
0
Teuchos::RCP<const Epetra_Map>
Thyra::get_Epetra_Map(const VectorSpaceBase<double>& vs_in,
  const RCP<const Epetra_Comm>& comm)
{

  using Teuchos::rcpFromRef;
  using Teuchos::rcpFromPtr;
  using Teuchos::rcp_dynamic_cast;
  using Teuchos::ptrFromRef;
  using Teuchos::ptr_dynamic_cast;

  const Ptr<const VectorSpaceBase<double> > vs_ptr = ptrFromRef(vs_in);
  
  const Ptr<const SpmdVectorSpaceBase<double> > spmd_vs =
    ptr_dynamic_cast<const SpmdVectorSpaceBase<double> >(vs_ptr);

  const Ptr<const ProductVectorSpaceBase<double> > &prod_vs = 
    ptr_dynamic_cast<const ProductVectorSpaceBase<double> >(vs_ptr);

  TEUCHOS_TEST_FOR_EXCEPTION( is_null(spmd_vs) && is_null(prod_vs), std::logic_error,
    "Error, the concrete VectorSpaceBase object of type "
    +Teuchos::demangleName(typeid(vs_in).name())+" does not support the"
    " SpmdVectorSpaceBase or the ProductVectorSpaceBase interfaces!" );

  const int numBlocks = (nonnull(prod_vs) ? prod_vs->numBlocks() : 1);

  // Get an array of SpmdVectorBase objects for the blocks
  
  Array<RCP<const SpmdVectorSpaceBase<double> > > spmd_vs_blocks;
  if (nonnull(prod_vs)) {
    for (int block_i = 0; block_i < numBlocks; ++block_i) {
      const RCP<const SpmdVectorSpaceBase<double> > spmd_vs_i =
        rcp_dynamic_cast<const SpmdVectorSpaceBase<double> >(
          prod_vs->getBlock(block_i), true);
      spmd_vs_blocks.push_back(spmd_vs_i);
    }
  }
  else {
    spmd_vs_blocks.push_back(rcpFromPtr(spmd_vs));
  }
  
  // Find the number of local elements, summed over all blocks

  int myLocalElements = 0;
  for (int block_i = 0; block_i < numBlocks; ++block_i) {
    myLocalElements += spmd_vs_blocks[block_i]->localSubDim();
  }
  
  // Find the GIDs owned by this processor, taken from all blocks
  
  int count=0;
  int blockOffset = 0;
  Array<int> myGIDs(myLocalElements);
  for (int block_i = 0; block_i < numBlocks; ++block_i) {
    const RCP<const SpmdVectorSpaceBase<double> > spmd_vs_i = spmd_vs_blocks[block_i];
    const int lowGIDInBlock = spmd_vs_i->localOffset();
    const int numLocalElementsInBlock = spmd_vs_i->localSubDim();
    for (int i=0; i < numLocalElementsInBlock; ++i, ++count) {
      myGIDs[count] = blockOffset + lowGIDInBlock + i;
    }
    blockOffset += spmd_vs_i->dim();
  }
  
  const int globalDim = vs_in.dim();

  return Teuchos::rcp(
    new Epetra_Map(globalDim, myLocalElements, &(myGIDs[0]), 0, *comm));

}
SolveStatus<double>
AmesosLinearOpWithSolve::solveImpl(
  const EOpTransp M_trans,
  const MultiVectorBase<double> &B,
  const Ptr<MultiVectorBase<double> > &X,
  const Ptr<const SolveCriteria<double> > solveCriteria
  ) const
{
  using Teuchos::rcpFromPtr;
  using Teuchos::rcpFromRef;
  using Teuchos::OSTab;

  Teuchos::Time totalTimer("");
  totalTimer.start(true);

  TEUCHOS_FUNC_TIME_MONITOR("AmesosLOWS");

  Teuchos::RCP<Teuchos::FancyOStream> out = this->getOStream();
  Teuchos::EVerbosityLevel verbLevel = this->getVerbLevel();
  OSTab tab = this->getOSTab();
  if(out.get() && static_cast<int>(verbLevel) > static_cast<int>(Teuchos::VERB_NONE))
    *out << "\nSolving block system using Amesos solver "
         << typeName(*amesosSolver_) << " ...\n\n";

  //
  // Get the op(...) range and domain maps
  //
  const EOpTransp amesosOpTransp = real_trans(trans_trans(amesosSolverTransp_,M_trans));
  const Epetra_Operator *amesosOp = epetraLP_->GetOperator();
  const Epetra_Map
    &opRangeMap  = ( amesosOpTransp == NOTRANS
      ? amesosOp->OperatorRangeMap()  : amesosOp->OperatorDomainMap() ),
    &opDomainMap = ( amesosOpTransp == NOTRANS
      ? amesosOp->OperatorDomainMap() : amesosOp->OperatorRangeMap()  );

  //
  // Get Epetra_MultiVector views of B and X
  //
  Teuchos::RCP<const Epetra_MultiVector>
    epetra_B = get_Epetra_MultiVector(opRangeMap, rcpFromRef(B));
  Teuchos::RCP<Epetra_MultiVector>
    epetra_X = get_Epetra_MultiVector(opDomainMap, rcpFromPtr(X));

  //
  // Set B and X in the linear problem
  //
  epetraLP_->SetLHS(&*epetra_X);
  epetraLP_->SetRHS(const_cast<Epetra_MultiVector*>(&*epetra_B));
  // Above should be okay but cross your fingers!

  //
  // Solve the linear system
  //
  const bool oldUseTranspose = amesosSolver_->UseTranspose();
  amesosSolver_->SetUseTranspose(amesosOpTransp==TRANS);
  const int err = amesosSolver_->Solve();
  TEST_FOR_EXCEPTION( 0!=err, CatastrophicSolveFailure,
    "Error, the function Solve() on the amesos solver of type\n"
    "\'"<<typeName(*amesosSolver_)<<"\' failed with error code "<<err<<"!"
    );
  amesosSolver_->SetUseTranspose(oldUseTranspose);

  //
  // Unset B and X
  //
  epetraLP_->SetLHS(NULL);
  epetraLP_->SetRHS(NULL);
  epetra_X = Teuchos::null;
  epetra_B = Teuchos::null;

  //
  // Scale X if needed
  //
  if(amesosSolverScalar_!=1.0)
    Thyra::scale(1.0/amesosSolverScalar_, X);

  //
  // Set the solve status if requested
  //
  SolveStatus<double> solveStatus;
  solveStatus.solveStatus = SOLVE_STATUS_CONVERGED;
  solveStatus.achievedTol = SolveStatus<double>::unknownTolerance();
  solveStatus.message =
    std::string("Solver ")+typeName(*amesosSolver_)+std::string(" converged!");
  
  //
  // Report the overall time
  //
  if(out.get() && static_cast<int>(verbLevel) >= static_cast<int>(Teuchos::VERB_LOW))
    *out
      << "\nTotal solve time = "<<totalTimer.totalElapsedTime()<<" sec\n";

  return solveStatus;

}