SolveStatus<double>
AztecOOLinearOpWithSolve::solveImpl(
  const EOpTransp M_trans,
  const MultiVectorBase<double> &B,
  const Ptr<MultiVectorBase<double> > &X,
  const Ptr<const SolveCriteria<double> > solveCriteria
  ) const
{

  using Teuchos::rcp;
  using Teuchos::rcpFromRef;
  using Teuchos::rcpFromPtr;
  using Teuchos::OSTab;
  typedef SolveCriteria<double> SC;
  typedef SolveStatus<double> SS;

  THYRA_FUNC_TIME_MONITOR("Stratimikos: AztecOOLOWS");
  Teuchos::Time totalTimer(""), timer("");
  totalTimer.start(true);

  RCP<Teuchos::FancyOStream> out = this->getOStream();
  Teuchos::EVerbosityLevel verbLevel = this->getVerbLevel();
  OSTab tab = this->getOSTab();
  if (out.get() && static_cast<int>(verbLevel) > static_cast<int>(Teuchos::VERB_NONE))
    *out << "\nSolving block system using AztecOO ...\n\n";

  //
  // Validate input
  //
  TEUCHOS_ASSERT(this->solveSupportsImpl(M_trans));
  SolveMeasureType solveMeasureType;
  if (nonnull(solveCriteria)) {
    solveMeasureType = solveCriteria->solveMeasureType;
    assertSupportsSolveMeasureType(*this, M_trans, solveMeasureType);
  }

  //
  // Get the transpose argument
  //
  const EOpTransp aztecOpTransp = real_trans(M_trans);

  //
  // Get the solver, operator, and preconditioner that we will use
  //
  RCP<AztecOO>
    aztecSolver = ( aztecOpTransp == NOTRANS ? aztecFwdSolver_  : aztecAdjSolver_ );
  const Epetra_Operator
    *aztecOp = aztecSolver->GetUserOperator();

  //
  // Get the op(...) range and domain maps
  //
  const Epetra_Map
    &opRangeMap = aztecOp->OperatorRangeMap(),
    &opDomainMap = aztecOp->OperatorDomainMap();

  //
  // Get the convergence criteria
  //
  double tol = ( aztecOpTransp==NOTRANS ? fwdDefaultTol() : adjDefaultTol() );
  int maxIterations = ( aztecOpTransp==NOTRANS
    ? fwdDefaultMaxIterations() : adjDefaultMaxIterations() );
  bool isDefaultSolveCriteria = true;
  if (nonnull(solveCriteria)) {
    if ( solveCriteria->requestedTol != SC::unspecifiedTolerance() ) {
      tol = solveCriteria->requestedTol;
      isDefaultSolveCriteria = false;
    }
    if (nonnull(solveCriteria->extraParameters)) {
      maxIterations = solveCriteria->extraParameters->get("Maximum Iterations",maxIterations);
    }
  }

  //
  // Get Epetra_MultiVector views of B and X
  //

  RCP<const Epetra_MultiVector> epetra_B;
  RCP<Epetra_MultiVector> epetra_X;

  const EpetraOperatorWrapper* opWrapper =
    dynamic_cast<const EpetraOperatorWrapper*>(aztecOp);

  if (opWrapper == 0) {
    epetra_B = get_Epetra_MultiVector(opRangeMap, rcpFromRef(B));
    epetra_X = get_Epetra_MultiVector(opDomainMap, rcpFromPtr(X));
  }

  //
  // Use AztecOO to solve each RHS one at a time (which is all that I can do anyway)
  //

  int totalIterations = 0;
  SolveStatus<double> solveStatus;
  solveStatus.solveStatus = SOLVE_STATUS_CONVERGED;
  solveStatus.achievedTol = -1.0;

  /* Get the number of columns in the multivector. We use Thyra
   * functions rather than Epetra functions to do this, as we
   * might not yet have created an Epetra multivector. - KL */
  //const int m = epetra_B->NumVectors();
  const int m = B.domain()->dim();

  for( int j = 0; j < m; ++j ) {

    THYRA_FUNC_TIME_MONITOR_DIFF("Stratimikos: AztecOOLOWS:SingleSolve", SingleSolve);

    //
    // Get Epetra_Vector views of B(:,j) and X(:,j)
    // How this is done will depend on whether we have a true Epetra operator
    // or we are wrapping a general Thyra operator in an Epetra operator.
    //

    // We need to declare epetra_x_j as non-const because when we have a phony
    // Epetra operator we'll have to copy a thyra vector into it.
    RCP<Epetra_Vector> epetra_b_j;
    RCP<Epetra_Vector> epetra_x_j;

    if (opWrapper == 0) {
      epetra_b_j = rcpFromRef(*const_cast<Epetra_Vector*>((*epetra_B)(j)));
      epetra_x_j = rcpFromRef(*(*epetra_X)(j));
    }
    else {
      if (is_null(epetra_b_j)) {
        epetra_b_j = rcp(new Epetra_Vector(opRangeMap));
        epetra_x_j = rcp(new Epetra_Vector(opDomainMap));
      }
      opWrapper->copyThyraIntoEpetra(*B.col(j), *epetra_b_j);
      opWrapper->copyThyraIntoEpetra(*X->col(j), *epetra_x_j);
    }

    //
    // Set the RHS and LHS
    //

    aztecSolver->SetRHS(&*epetra_b_j);
    aztecSolver->SetLHS(&*epetra_x_j);

    //
    // Solve the linear system
    //
    timer.start(true);
    {
      SetAztecSolveState
        setAztecSolveState(aztecSolver,out,verbLevel,solveMeasureType);
      aztecSolver->Iterate( maxIterations, tol );
      // NOTE: We ignore the returned status but get it below
    }
    timer.stop();

    //
    // Scale the solution
    // (Originally, this was at the end of the loop after all columns had been
    // processed. It's moved here because we need to do it before copying the
    // solution back into a Thyra vector. - KL
    //
    if (aztecSolverScalar_ != 1.0)
      epetra_x_j->Scale(1.0/aztecSolverScalar_);

    //
    // If necessary, convert the solution back to a non-epetra vector
    //
    if (opWrapper != 0) {
      opWrapper->copyEpetraIntoThyra(*epetra_x_j, X->col(j).ptr());
    }

    //
    // Set the return solve status
    //

    const int iterations = aztecSolver->NumIters();
    const double achievedTol = aztecSolver->ScaledResidual();
    const double *AZ_status = aztecSolver->GetAztecStatus();
    std::ostringstream oss;
    bool converged = false;
    if (AZ_status[AZ_why]==AZ_normal) { oss << "Aztec returned AZ_normal."; converged = true; }
    else if (AZ_status[AZ_why]==AZ_param) oss << "Aztec returned AZ_param.";
    else if (AZ_status[AZ_why]==AZ_breakdown) oss << "Aztec returned AZ_breakdown.";
    else if (AZ_status[AZ_why]==AZ_loss) oss << "Aztec returned AZ_loss.";
    else if (AZ_status[AZ_why]==AZ_ill_cond) oss << "Aztec returned AZ_ill_cond.";
    else if (AZ_status[AZ_why]==AZ_maxits) oss << "Aztec returned AZ_maxits.";
    else oss << "Aztec returned an unknown status?";
    oss << "  Iterations = " << iterations << ".";
    oss << "  Achieved Tolerance = " << achievedTol << ".";
    oss << "  Total time = " << timer.totalElapsedTime() << " sec.";
    if (out.get() && static_cast<int>(verbLevel) > static_cast<int>(Teuchos::VERB_NONE) && outputEveryRhs())
      Teuchos::OSTab(out).o() << "j="<<j<<": " << oss.str() << "\n";

    solveStatus.achievedTol = TEUCHOS_MAX(solveStatus.achievedTol, achievedTol);
    // Note, achieveTol may actually be greater than tol due to ill conditioning and roundoff!

    totalIterations += iterations;

    solveStatus.message = oss.str();
    if ( isDefaultSolveCriteria ) {
      switch(solveStatus.solveStatus) {
        case SOLVE_STATUS_UNKNOWN:
          // Leave overall unknown!
          break;
        case SOLVE_STATUS_CONVERGED:
          solveStatus.solveStatus = ( converged ? SOLVE_STATUS_CONVERGED : SOLVE_STATUS_UNCONVERGED );
          break;
        case SOLVE_STATUS_UNCONVERGED:
          // Leave overall unconverged!
          break;
        default:
          TEUCHOS_TEST_FOR_EXCEPT(true); // Should never get here!
      }
    }
  }

  aztecSolver->UnsetLHSRHS();

  //
  // Release the Epetra_MultiVector views of X and B
  //
  epetra_X = Teuchos::null;
  epetra_B = Teuchos::null;

  //
  // Update the overall solve criteria
  //
  totalTimer.stop();
  SolveStatus<double> overallSolveStatus;
  if (isDefaultSolveCriteria) {
    overallSolveStatus.solveStatus = SOLVE_STATUS_UNKNOWN;
    overallSolveStatus.achievedTol = SS::unknownTolerance();
  }
  else {
    overallSolveStatus.solveStatus = solveStatus.solveStatus;
    overallSolveStatus.achievedTol = solveStatus.achievedTol;
  }
  std::ostringstream oss;
  oss
    << "AztecOO solver "
    << ( overallSolveStatus.solveStatus==SOLVE_STATUS_CONVERGED ? "converged" : "unconverged" )
    << " on m = "<<m<<" RHSs using " << totalIterations << " cumulative iterations"
    << " for an average of " << (totalIterations/m) << " iterations/RHS and"
    << " total CPU time of "<<totalTimer.totalElapsedTime()<<" sec.";
  overallSolveStatus.message = oss.str();

  // Added these statistics following what was done for Belos
  if (overallSolveStatus.extraParameters.is_null()) {
    overallSolveStatus.extraParameters = Teuchos::parameterList ();
  }
  overallSolveStatus.extraParameters->set ("AztecOO/Iteration Count",
                                            totalIterations);
  // package independent version of the same
  overallSolveStatus.extraParameters->set ("Iteration Count",
                                            totalIterations);
  overallSolveStatus.extraParameters->set ("AztecOO/Achieved Tolerance",
                                            overallSolveStatus.achievedTol);

  //
  // Report the overall time
  //
  if (out.get() && static_cast<int>(verbLevel) >= static_cast<int>(Teuchos::VERB_LOW))
    *out
      << "\nTotal solve time = "<<totalTimer.totalElapsedTime()<<" sec\n";

  return overallSolveStatus;

}