void
Albany::DistributedResponseFunction::
evaluateDerivativeT(
  const double current_time,
  const Tpetra_Vector* xdotT,
  const Tpetra_Vector* xdotdotT,
  const Tpetra_Vector& xT,
  const Teuchos::Array<ParamVec>& p,
  ParamVec* deriv_p,
  Tpetra_Vector* gT,
  const Thyra::ModelEvaluatorBase::Derivative<ST>& dg_dxT,
  const Thyra::ModelEvaluatorBase::Derivative<ST>& dg_dxdotT,
  const Thyra::ModelEvaluatorBase::Derivative<ST>& dg_dxdotdotT,
  const Thyra::ModelEvaluatorBase::Derivative<ST>& dg_dpT)
{
  Tpetra_Operator* dg_dxp;
  if(dg_dxT.isEmpty()){
    dg_dxp = NULL;
  }
  else {
    Teuchos::RCP<Tpetra_Operator> dgdxT = ConverterT::getTpetraOperator(dg_dxT.getLinearOp());
    dg_dxp = dgdxT.get();
  }

  Tpetra_Operator* dg_dxdotp;
  if(dg_dxdotT.isEmpty()){
    dg_dxdotp = NULL;
  }
  else {
    Teuchos::RCP<Tpetra_Operator> dgdxdotT = ConverterT::getTpetraOperator(dg_dxdotT.getLinearOp());
    dg_dxdotp = dgdxdotT.get();
  }

  Tpetra_Operator* dg_dxdotdotp;
  if(dg_dxdotdotT.isEmpty()){
    dg_dxdotdotp = NULL;
  }
  else {
    Teuchos::RCP<Tpetra_Operator> dgdxdotdotT = ConverterT::getTpetraOperator(dg_dxdotdotT.getLinearOp());
    dg_dxdotdotp = dgdxdotdotT.get();
  }

  Tpetra_MultiVector* dg_dpp;
  if(dg_dpT.isEmpty()){
    dg_dpp = NULL;
  }
  else {
    Teuchos::RCP<Tpetra_MultiVector> dgdpT = ConverterT::getTpetraMultiVector(dg_dpT.getMultiVector());
    dg_dpp = dgdpT.get();
  }

  this->evaluateGradientT(
    current_time, xdotT, xdotdotT, xT, p, deriv_p, gT,
    dg_dxp, dg_dxdotp, dg_dxdotdotp, dg_dpp);
}
void
Albany::ScalarResponseFunction::
evaluateDerivative(
    const double current_time,
    const Teuchos::RCP<const Thyra_Vector>& x,
    const Teuchos::RCP<const Thyra_Vector>& xdot,
    const Teuchos::RCP<const Thyra_Vector>& xdotdot,
    const Teuchos::Array<ParamVec>& p,
    ParamVec* deriv_p,
    const Teuchos::RCP<Thyra_Vector>& g,
    const Thyra::ModelEvaluatorBase::Derivative<ST>& dg_dx,
    const Thyra::ModelEvaluatorBase::Derivative<ST>& dg_dxdot,
    const Thyra::ModelEvaluatorBase::Derivative<ST>& dg_dxdotdot,
    const Thyra::ModelEvaluatorBase::Derivative<ST>& dg_dp)
{
  this->evaluateGradient(
    current_time, x, xdot, xdotdot, p, deriv_p, g,
    dg_dx.getMultiVector(), dg_dxdot.getMultiVector(),
    dg_dxdotdot.getMultiVector(), dg_dp.getMultiVector());
}
void
Albany::ScalarResponseFunction::
evaluateDerivativeT(
  const double current_time,
  const Tpetra_Vector* xdotT,
  const Tpetra_Vector* xdotdotT,
  const Tpetra_Vector& xT,
  const Teuchos::Array<ParamVec>& p,
  ParamVec* deriv_p,
  Tpetra_Vector* gT,
  const Thyra::ModelEvaluatorBase::Derivative<ST>& dg_dx,
  const Thyra::ModelEvaluatorBase::Derivative<ST>& dg_dxdot,
  const Thyra::ModelEvaluatorBase::Derivative<ST>& dg_dxdotdot,
  const Thyra::ModelEvaluatorBase::Derivative<ST>& dg_dp)
{

  const Teuchos::RCP<Tpetra_MultiVector> dg_dxT =
    Teuchos::nonnull(dg_dx.getMultiVector()) ?
    ConverterT::getTpetraMultiVector(dg_dx.getMultiVector()) :
    Teuchos::null;

  const Teuchos::RCP<Tpetra_MultiVector> dg_dxdotT =
    Teuchos::nonnull(dg_dxdot.getMultiVector()) ?
    ConverterT::getTpetraMultiVector(dg_dxdot.getMultiVector()) :
    Teuchos::null;

  const Teuchos::RCP<Tpetra_MultiVector> dg_dxdotdotT =
    Teuchos::nonnull(dg_dxdotdot.getMultiVector()) ?
    ConverterT::getTpetraMultiVector(dg_dxdotdot.getMultiVector()) :
    Teuchos::null;

  const Teuchos::RCP<Tpetra_MultiVector> dg_dpT =
    Teuchos::nonnull(dg_dp.getMultiVector()) ?
    ConverterT::getTpetraMultiVector(dg_dp.getMultiVector()) :
    Teuchos::null;

  this->evaluateGradientT(
    current_time, xdotT, xdotdotT, xT, p, deriv_p, gT,
    dg_dxT.get(), dg_dxdotT.get(), dg_dxdotdotT.get(), dg_dpT.get());
}
void Piro::SteadyStateSolver<Scalar>::evalConvergedModel(
    const Thyra::ModelEvaluatorBase::InArgs<Scalar>& modelInArgs,
    const Thyra::ModelEvaluatorBase::OutArgs<Scalar>& outArgs) const
{
  using Teuchos::RCP;
  using Teuchos::rcp;

  // Solution at convergence is the response at index num_g_
  {
    const RCP<Thyra::VectorBase<Scalar> > gx_out = outArgs.get_g(num_g_);
    if (Teuchos::nonnull(gx_out)) {
      Thyra::copy(*modelInArgs.get_x(), gx_out.ptr());
    }
  }

  // Setup output for final evalution of underlying model
  Thyra::ModelEvaluatorBase::OutArgs<Scalar> modelOutArgs = model_->createOutArgs();
  {
    // Responses
    for (int j = 0; j < num_g_; ++j) {
      const RCP<Thyra::VectorBase<Scalar> > g_out = outArgs.get_g(j);
      // Forward to underlying model
      modelOutArgs.set_g(j, g_out);
    }

    // Jacobian
    {
      bool jacobianRequired = false;
      for (int j = 0; j <= num_g_; ++j) {
        for (int l = 0; l < num_p_; ++l) {
          const Thyra::ModelEvaluatorBase::DerivativeSupport dgdp_support =
            outArgs.supports(Thyra::ModelEvaluatorBase::OUT_ARG_DgDp, j, l);
          if (!dgdp_support.none()) {
            const Thyra::ModelEvaluatorBase::Derivative<Scalar> dgdp_deriv =
              outArgs.get_DgDp(j, l);
            if (!dgdp_deriv.isEmpty()) {
              jacobianRequired = true;
            }
          }
        }
      }
      if (jacobianRequired) {
        const RCP<Thyra::LinearOpWithSolveBase<Scalar> > jacobian =
          model_->create_W();
        modelOutArgs.set_W(jacobian);
      }
    }

    // DfDp derivatives
    for (int l = 0; l < num_p_; ++l) {
      Thyra::ModelEvaluatorBase::DerivativeSupport dfdp_request;
      for (int j = 0; j <= num_g_; ++j) {
        const Thyra::ModelEvaluatorBase::DerivativeSupport dgdp_support =
          outArgs.supports(Thyra::ModelEvaluatorBase::OUT_ARG_DgDp, j, l);
        if (!dgdp_support.none()) {
          const Thyra::ModelEvaluatorBase::Derivative<Scalar> dgdp_deriv =
            outArgs.get_DgDp(j, l);
          if (Teuchos::nonnull(dgdp_deriv.getLinearOp())) {
            dfdp_request.plus(Thyra::ModelEvaluatorBase::DERIV_LINEAR_OP);
          } else if (Teuchos::nonnull(dgdp_deriv.getMultiVector())) {
            dfdp_request.plus(Thyra::ModelEvaluatorBase::DERIV_MV_JACOBIAN_FORM);
          }
        }
      }

      if (!dfdp_request.none()) {
        Thyra::ModelEvaluatorBase::Derivative<Scalar> dfdp_deriv;
        if (dfdp_request.supports(Thyra::ModelEvaluatorBase::DERIV_MV_JACOBIAN_FORM)) {
          dfdp_deriv = Thyra::create_DfDp_mv(*model_, l, Thyra::ModelEvaluatorBase::DERIV_MV_JACOBIAN_FORM);
        } else if (dfdp_request.supports(Thyra::ModelEvaluatorBase::DERIV_LINEAR_OP)) {
          dfdp_deriv = model_->create_DfDp_op(l);
        }
        modelOutArgs.set_DfDp(l, dfdp_deriv);
      }
    }

    // DgDx derivatives
    for (int j = 0; j < num_g_; ++j) {
      Thyra::ModelEvaluatorBase::DerivativeSupport dgdx_request;
      for (int l = 0; l < num_p_; ++l) {
        const Thyra::ModelEvaluatorBase::DerivativeSupport dgdp_support =
          outArgs.supports(Thyra::ModelEvaluatorBase::OUT_ARG_DgDp, j, l);
        if (!dgdp_support.none()) {
          const Thyra::ModelEvaluatorBase::Derivative<Scalar> dgdp_deriv =
            outArgs.get_DgDp(j, l);
          if (!dgdp_deriv.isEmpty()) {
            const bool dgdp_mvGrad_required =
              Teuchos::nonnull(dgdp_deriv.getMultiVector()) &&
              dgdp_deriv.getMultiVectorOrientation() == Thyra::ModelEvaluatorBase::DERIV_MV_GRADIENT_FORM;
            if (dgdp_mvGrad_required) {
              dgdx_request.plus(Thyra::ModelEvaluatorBase::DERIV_MV_GRADIENT_FORM);
            } else {
              dgdx_request.plus(Thyra::ModelEvaluatorBase::DERIV_LINEAR_OP);
            }
          }
        }
      }

      if (!dgdx_request.none()) {
        Thyra::ModelEvaluatorBase::Derivative<Scalar> dgdx_deriv;
        if (dgdx_request.supports(Thyra::ModelEvaluatorBase::DERIV_MV_GRADIENT_FORM)) {
          dgdx_deriv = Thyra::create_DgDx_mv(*model_, j, Thyra::ModelEvaluatorBase::DERIV_MV_GRADIENT_FORM);
        } else if (dgdx_request.supports(Thyra::ModelEvaluatorBase::DERIV_LINEAR_OP)) {
          dgdx_deriv = model_->create_DgDx_op(j);
        }
        modelOutArgs.set_DgDx(j, dgdx_deriv);
      }
    }

    // DgDp derivatives
    for (int l = 0; l < num_p_; ++l) {
      for (int j = 0; j < num_g_; ++j) {
        const Thyra::ModelEvaluatorBase::DerivativeSupport dgdp_support =
          outArgs.supports(Thyra::ModelEvaluatorBase::OUT_ARG_DgDp, j, l);
        if (!dgdp_support.none()) {
          const Thyra::ModelEvaluatorBase::Derivative<Scalar> dgdp_deriv =
            outArgs.get_DgDp(j, l);
          Thyra::ModelEvaluatorBase::Derivative<Scalar> model_dgdp_deriv;
          const RCP<Thyra::LinearOpBase<Scalar> > dgdp_op = dgdp_deriv.getLinearOp();
          if (Teuchos::nonnull(dgdp_op)) {
            model_dgdp_deriv = model_->create_DgDp_op(j, l);
          } else {
            model_dgdp_deriv = dgdp_deriv;
          }
          if (!model_dgdp_deriv.isEmpty()) {
            modelOutArgs.set_DgDp(j, l, model_dgdp_deriv);
          }
        }
      }
    }
  }

  // Evaluate underlying model
  model_->evalModel(modelInArgs, modelOutArgs);

  // Assemble user-requested sensitivities
  if (modelOutArgs.supports(Thyra::ModelEvaluatorBase::OUT_ARG_W)) {
    const RCP<Thyra::LinearOpWithSolveBase<Scalar> > jacobian =
      modelOutArgs.get_W();
    if (Teuchos::nonnull(jacobian)) {
      for (int l = 0; l < num_p_; ++l) {
        const Thyra::ModelEvaluatorBase::DerivativeSupport dfdp_support =
          modelOutArgs.supports(Thyra::ModelEvaluatorBase::OUT_ARG_DfDp, l);
        if (!dfdp_support.none()) {
          const Thyra::ModelEvaluatorBase::Derivative<Scalar> dfdp_deriv =
            modelOutArgs.get_DfDp(l);
          const RCP<Thyra::MultiVectorBase<Scalar> > dfdp_mv =
            dfdp_deriv.getMultiVector();
          RCP<Thyra::LinearOpBase<Scalar> > dfdp_op =
            dfdp_deriv.getLinearOp();
          if (Teuchos::is_null(dfdp_op)) {
            dfdp_op = dfdp_mv;
          }

          const Thyra::ModelEvaluatorBase::Derivative<Scalar> dxdp_deriv =
            outArgs.get_DgDp(num_g_, l);
          const RCP<Thyra::LinearOpBase<Scalar> > dxdp_op =
            dxdp_deriv.getLinearOp();
          const RCP<Thyra::MultiVectorBase<Scalar> > dxdp_mv =
            dxdp_deriv.getMultiVector();

          RCP<const Thyra::LinearOpBase<Scalar> > minus_dxdp_op;
          RCP<Thyra::MultiVectorBase<Scalar> > minus_dxdp_mv;
          if (Teuchos::nonnull(dfdp_mv)) {
            if (Teuchos::nonnull(dxdp_mv)) {
              minus_dxdp_mv = dxdp_mv; // Use user-provided object as temporary
            } else {
              minus_dxdp_mv =
                Thyra::createMembers(model_->get_x_space(), model_->get_p_space(l));
              minus_dxdp_op = minus_dxdp_mv;
            }
          }

          if (Teuchos::is_null(minus_dxdp_op)) {
            const RCP<const Thyra::LinearOpBase<Scalar> > dfdx_inv_op =
              Thyra::inverse<Scalar>(jacobian);
            minus_dxdp_op = Thyra::multiply<Scalar>(dfdx_inv_op, dfdp_op);
          }

          if (Teuchos::nonnull(minus_dxdp_mv)) {
            Thyra::assign(minus_dxdp_mv.ptr(), Teuchos::ScalarTraits<Scalar>::zero());

            const Thyra::SolveCriteria<Scalar> defaultSolveCriteria;
            const Thyra::SolveStatus<Scalar> solveStatus =
              Thyra::solve(
                  *jacobian,
                  Thyra::NOTRANS,
                  *dfdp_mv,
                  minus_dxdp_mv.ptr(),
                  Teuchos::ptr(&defaultSolveCriteria));
            TEUCHOS_TEST_FOR_EXCEPTION(
                solveStatus.solveStatus == Thyra::SOLVE_STATUS_UNCONVERGED,
                std::runtime_error,
                "Jacobian solver failed to converge");
          }

          // Solution sensitivities
          if (Teuchos::nonnull(dxdp_mv)) {
            minus_dxdp_mv = Teuchos::null; // Invalidates temporary
            Thyra::scale(-Teuchos::ScalarTraits<Scalar>::one(), dxdp_mv.ptr());
          } else if (Teuchos::nonnull(dxdp_op)) {
            const RCP<Thyra::DefaultMultipliedLinearOp<Scalar> > dxdp_op_downcasted =
              Teuchos::rcp_dynamic_cast<Thyra::DefaultMultipliedLinearOp<Scalar> >(dxdp_op);
            TEUCHOS_TEST_FOR_EXCEPTION(
                Teuchos::is_null(dxdp_op_downcasted),
                std::invalid_argument,
                "Illegal operator for DgDp(" <<
                "j = " << num_g_ << ", " <<
                "index l = " << l << ")\n");

            const RCP<const Thyra::LinearOpBase<Scalar> > minus_id_op =
              Thyra::scale<Scalar>(-Teuchos::ScalarTraits<Scalar>::one(), Thyra::identity(dfdp_op->domain()));

            dxdp_op_downcasted->initialize(Teuchos::tuple(minus_dxdp_op, minus_id_op));
          }

          // Response sensitivities
          for (int j = 0; j < num_g_; ++j) {
            const Thyra::ModelEvaluatorBase::DerivativeSupport dgdp_support =
              outArgs.supports(Thyra::ModelEvaluatorBase::OUT_ARG_DgDp, j, l);
            if (!dgdp_support.none()) {
              const Thyra::ModelEvaluatorBase::Derivative<Scalar> dgdp_deriv =
                outArgs.get_DgDp(j, l);
              if (!dgdp_deriv.isEmpty()) {
                const Thyra::ModelEvaluatorBase::Derivative<Scalar> dgdx_deriv =
                  modelOutArgs.get_DgDx(j);
                const RCP<const Thyra::MultiVectorBase<Scalar> > dgdx_mv =
                  dgdx_deriv.getMultiVector();
                RCP<const Thyra::LinearOpBase<Scalar> > dgdx_op =
                  dgdx_deriv.getLinearOp();
                if (Teuchos::is_null(dgdx_op)) {
                  dgdx_op = Thyra::adjoint<Scalar>(dgdx_mv);
                }

                const RCP<Thyra::LinearOpBase<Scalar> > dgdp_op =
                  dgdp_deriv.getLinearOp();
                if (Teuchos::nonnull(dgdp_op)) {
                  const RCP<Thyra::DefaultAddedLinearOp<Scalar> > dgdp_op_downcasted =
                    Teuchos::rcp_dynamic_cast<Thyra::DefaultAddedLinearOp<Scalar> >(dgdp_op);
                  TEUCHOS_TEST_FOR_EXCEPTION(
                      Teuchos::is_null(dgdp_op_downcasted),
                      std::invalid_argument,
                      "Illegal operator for DgDp(" <<
                      "j = " << j << ", " <<
                      "index l = " << l << ")\n");

                  dgdp_op_downcasted->uninitialize();

                  const RCP<const Thyra::LinearOpBase<Scalar> > implicit_dgdp_op =
                    Thyra::multiply<Scalar>(
                      Thyra::scale<Scalar>(-Teuchos::ScalarTraits<Scalar>::one(), dgdx_op),
                      minus_dxdp_op);

                  const RCP<const Thyra::LinearOpBase<Scalar> > model_dgdp_op =
                    modelOutArgs.get_DgDp(j, l).getLinearOp();

                  Teuchos::Array<RCP<const Thyra::LinearOpBase<Scalar> > > op_args(2);
                  op_args[0] = model_dgdp_op;
                  op_args[1] = implicit_dgdp_op;
                  dgdp_op_downcasted->initialize(op_args);
                }

                const RCP<Thyra::MultiVectorBase<Scalar> > dgdp_mv =
                  dgdp_deriv.getMultiVector();
                if (Teuchos::nonnull(dgdp_mv)) {
                  if (dgdp_deriv.getMultiVectorOrientation() == Thyra::ModelEvaluatorBase::DERIV_MV_GRADIENT_FORM) {
                    if (Teuchos::nonnull(dxdp_mv)) {
                      Thyra::apply(
                          *dxdp_mv,
                          Thyra::TRANS,
                          *dgdx_mv,
                          dgdp_mv.ptr(),
                          Teuchos::ScalarTraits<Scalar>::one(),
                          Teuchos::ScalarTraits<Scalar>::one());
                    } else {
                      Thyra::apply(
                          *minus_dxdp_mv,
                          Thyra::TRANS,
                          *dgdx_mv,
                          dgdp_mv.ptr(),
                          -Teuchos::ScalarTraits<Scalar>::one(),
                          Teuchos::ScalarTraits<Scalar>::one());
                    }
                  } else {
                    if (Teuchos::nonnull(dxdp_mv)) {
                      Thyra::apply(
                          *dgdx_op,
                          Thyra::NOTRANS,
                          *dxdp_mv,
                          dgdp_mv.ptr(),
                          Teuchos::ScalarTraits<Scalar>::one(),
                          Teuchos::ScalarTraits<Scalar>::one());
                    } else {
                      Thyra::apply(
                          *dgdx_op,
                          Thyra::NOTRANS,
                          *minus_dxdp_mv,
                          dgdp_mv.ptr(),
                          -Teuchos::ScalarTraits<Scalar>::one(),
                          Teuchos::ScalarTraits<Scalar>::one());
                    }
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}
void
Albany::ModelEvaluatorT::evalModelImpl(
    const Thyra::ModelEvaluatorBase::InArgs<ST>& inArgsT,
    const Thyra::ModelEvaluatorBase::OutArgs<ST>& outArgsT) const
{

  #ifdef OUTPUT_TO_SCREEN
    std::cout << "DEBUG: " << __PRETTY_FUNCTION__ << "\n";
  #endif

  Teuchos::TimeMonitor Timer(*timer); //start timer
  //
  // Get the input arguments
  //
  const Teuchos::RCP<const Tpetra_Vector> xT =
    ConverterT::getConstTpetraVector(inArgsT.get_x());

  const Teuchos::RCP<const Tpetra_Vector> x_dotT =
    (supports_xdot && Teuchos::nonnull(inArgsT.get_x_dot())) ?
    ConverterT::getConstTpetraVector(inArgsT.get_x_dot()) :
    Teuchos::null;


  const Teuchos::RCP<const Tpetra_Vector> x_dotdotT =
    (supports_xdotdot && Teuchos::nonnull(this->get_x_dotdot())) ?
    ConverterT::getConstTpetraVector(this->get_x_dotdot()) :
    Teuchos::null;

  const double alpha = (Teuchos::nonnull(x_dotT) || Teuchos::nonnull(x_dotdotT)) ? inArgsT.get_alpha() : 0.0;
  const double omega = Teuchos::nonnull(x_dotdotT) ? this->get_omega() : 0.0;
  const double beta = (Teuchos::nonnull(x_dotT) || Teuchos::nonnull(x_dotdotT)) ? inArgsT.get_beta() : 1.0;
  const double curr_time = (Teuchos::nonnull(x_dotT) || Teuchos::nonnull(x_dotdotT)) ? inArgsT.get_t() : 0.0;

  for (int l = 0; l < inArgsT.Np(); ++l) {
    const Teuchos::RCP<const Thyra::VectorBase<ST> > p = inArgsT.get_p(l);
    if (Teuchos::nonnull(p)) {
      const Teuchos::RCP<const Tpetra_Vector> pT = ConverterT::getConstTpetraVector(p);
      const Teuchos::ArrayRCP<const ST> pT_constView = pT->get1dView();

      ParamVec &sacado_param_vector = sacado_param_vec[l];
      for (unsigned int k = 0; k < sacado_param_vector.size(); ++k) {
        sacado_param_vector[k].baseValue = pT_constView[k];
      }
    }
  }

  //
  // Get the output arguments
  //
  const Teuchos::RCP<Tpetra_Vector> fT_out =
    Teuchos::nonnull(outArgsT.get_f()) ?
    ConverterT::getTpetraVector(outArgsT.get_f()) :
    Teuchos::null;

  const Teuchos::RCP<Tpetra_Operator> W_op_outT =
    Teuchos::nonnull(outArgsT.get_W_op()) ?
    ConverterT::getTpetraOperator(outArgsT.get_W_op()) :
    Teuchos::null;

#ifdef WRITE_MASS_MATRIX_TO_MM_FILE
  //IK, 4/24/15: adding object to hold mass matrix to be written to matrix market file
  const Teuchos::RCP<Tpetra_Operator> Mass =
    Teuchos::nonnull(outArgsT.get_W_op()) ?
    ConverterT::getTpetraOperator(outArgsT.get_W_op()) :
    Teuchos::null;
  //IK, 4/24/15: needed for writing mass matrix out to matrix market file
  const Teuchos::RCP<Tpetra_Vector> ftmp =
    Teuchos::nonnull(outArgsT.get_f()) ?
    ConverterT::getTpetraVector(outArgsT.get_f()) :
    Teuchos::null;
#endif

  // Cast W to a CrsMatrix, throw an exception if this fails
  const Teuchos::RCP<Tpetra_CrsMatrix> W_op_out_crsT =
    Teuchos::nonnull(W_op_outT) ?
    Teuchos::rcp_dynamic_cast<Tpetra_CrsMatrix>(W_op_outT, true) :
    Teuchos::null;

#ifdef WRITE_MASS_MATRIX_TO_MM_FILE
  //IK, 4/24/15: adding object to hold mass matrix to be written to matrix market file
  const Teuchos::RCP<Tpetra_CrsMatrix> Mass_crs =
    Teuchos::nonnull(Mass) ?
    Teuchos::rcp_dynamic_cast<Tpetra_CrsMatrix>(Mass, true) :
    Teuchos::null;
#endif

  //
  // Compute the functions
  //
  bool f_already_computed = false;

  // W matrix
  if (Teuchos::nonnull(W_op_out_crsT)) {
    app->computeGlobalJacobianT(
        alpha, beta, omega, curr_time, x_dotT.get(), x_dotdotT.get(),  *xT,
        sacado_param_vec, fT_out.get(), *W_op_out_crsT);
    f_already_computed = true;
#ifdef WRITE_MASS_MATRIX_TO_MM_FILE
    //IK, 4/24/15: write mass matrix to matrix market file
    //Warning: to read this in to MATLAB correctly, code must be run in serial.
    //Otherwise Mass will have a distributed Map which would also need to be read in to MATLAB for proper
    //reading in of Mass.
    app->computeGlobalJacobianT(1.0, 0.0, 0.0, curr_time, x_dotT.get(), x_dotdotT.get(), *xT,
                               sacado_param_vec, ftmp.get(), *Mass_crs);
    Tpetra_MatrixMarket_Writer::writeSparseFile("mass.mm", Mass_crs);
    Tpetra_MatrixMarket_Writer::writeMapFile("rowmap.mm", *Mass_crs->getRowMap());
    Tpetra_MatrixMarket_Writer::writeMapFile("colmap.mm", *Mass_crs->getColMap());
#endif
  }

  // df/dp
  for (int l = 0; l < outArgsT.Np(); ++l) {
    const Teuchos::RCP<Thyra::MultiVectorBase<ST> > dfdp_out =
      outArgsT.get_DfDp(l).getMultiVector();

    const Teuchos::RCP<Tpetra_MultiVector> dfdp_outT =
      Teuchos::nonnull(dfdp_out) ?
      ConverterT::getTpetraMultiVector(dfdp_out) :
      Teuchos::null;

    if (Teuchos::nonnull(dfdp_outT)) {
      const Teuchos::RCP<ParamVec> p_vec = Teuchos::rcpFromRef(sacado_param_vec[l]);

      app->computeGlobalTangentT(
          0.0, 0.0, 0.0, curr_time, false, x_dotT.get(), x_dotdotT.get(), *xT,
          sacado_param_vec, p_vec.get(),
          NULL, NULL, NULL, NULL, fT_out.get(), NULL,
          dfdp_outT.get());

      f_already_computed = true;
    }
  }

  // f
  if (app->is_adjoint) {
    const Thyra::ModelEvaluatorBase::Derivative<ST> f_derivT(
        outArgsT.get_f(),
        Thyra::ModelEvaluatorBase::DERIV_TRANS_MV_BY_ROW);

    const Thyra::ModelEvaluatorBase::Derivative<ST> dummy_derivT;

    const int response_index = 0; // need to add capability for sending this in
    app->evaluateResponseDerivativeT(
        response_index, curr_time, x_dotT.get(), x_dotdotT.get(), *xT,
        sacado_param_vec, NULL,
        NULL, f_derivT, dummy_derivT, dummy_derivT, dummy_derivT);
  } else {
    if (Teuchos::nonnull(fT_out) && !f_already_computed) {
      app->computeGlobalResidualT(
          curr_time, x_dotT.get(), x_dotdotT.get(), *xT,
          sacado_param_vec, *fT_out);
    }
  }

  // Response functions
  for (int j = 0; j < outArgsT.Ng(); ++j) {
    const Teuchos::RCP<Thyra::VectorBase<ST> > g_out = outArgsT.get_g(j);
    Teuchos::RCP<Tpetra_Vector> gT_out =
      Teuchos::nonnull(g_out) ?
      ConverterT::getTpetraVector(g_out) :
      Teuchos::null;

    const Thyra::ModelEvaluatorBase::Derivative<ST> dgdxT_out = outArgsT.get_DgDx(j);
    Thyra::ModelEvaluatorBase::Derivative<ST> dgdxdotT_out;

    if(supports_xdot)
      dgdxdotT_out = outArgsT.get_DgDx_dot(j);

//    const Thyra::ModelEvaluatorBase::Derivative<ST> dgdxdotdotT_out = this->get_DgDx_dotdot(j);
    const Thyra::ModelEvaluatorBase::Derivative<ST> dgdxdotdotT_out;

    sanitize_nans(dgdxT_out);
    sanitize_nans(dgdxdotT_out);
    sanitize_nans(dgdxdotdotT_out);

    // dg/dx, dg/dxdot
    if (!dgdxT_out.isEmpty() || !dgdxdotT_out.isEmpty()) {
      const Thyra::ModelEvaluatorBase::Derivative<ST> dummy_derivT;
      app->evaluateResponseDerivativeT(
          j, curr_time, x_dotT.get(), x_dotdotT.get(), *xT,
          sacado_param_vec, NULL,
          gT_out.get(), dgdxT_out,
          dgdxdotT_out, dgdxdotdotT_out, dummy_derivT);
      // Set gT_out to null to indicate that g_out was evaluated.
      gT_out = Teuchos::null;
    }

    // dg/dp
    for (int l = 0; l < outArgsT.Np(); ++l) {
      const Teuchos::RCP<Thyra::MultiVectorBase<ST> > dgdp_out =
        outArgsT.get_DgDp(j, l).getMultiVector();
      const Teuchos::RCP<Tpetra_MultiVector> dgdpT_out =
        Teuchos::nonnull(dgdp_out) ?
        ConverterT::getTpetraMultiVector(dgdp_out) :
        Teuchos::null;

      if (Teuchos::nonnull(dgdpT_out)) {
        const Teuchos::RCP<ParamVec> p_vec = Teuchos::rcpFromRef(sacado_param_vec[l]);
        app->evaluateResponseTangentT(
            j, alpha, beta, omega, curr_time, false,
            x_dotT.get(), x_dotdotT.get(), *xT,
            sacado_param_vec, p_vec.get(),
            NULL, NULL, NULL, NULL, gT_out.get(), NULL,
            dgdpT_out.get());
        gT_out = Teuchos::null;
      }
    }

    if (Teuchos::nonnull(gT_out)) {
      app->evaluateResponseT(
          j, curr_time, x_dotT.get(), x_dotdotT.get(), *xT,
          sacado_param_vec, *gT_out);
    }
  }

}
void
Albany::ModelEvaluatorT::evalModelImpl(
    const Thyra::ModelEvaluatorBase::InArgs<ST>& inArgsT,
    const Thyra::ModelEvaluatorBase::OutArgs<ST>& outArgsT) const
{

  Teuchos::TimeMonitor Timer(*timer); //start timer
  //
  // Get the input arguments
  //
  const Teuchos::RCP<const Tpetra_Vector> xT =
    ConverterT::getConstTpetraVector(inArgsT.get_x());

  const Teuchos::RCP<const Tpetra_Vector> x_dotT =
    Teuchos::nonnull(inArgsT.get_x_dot()) ?
    ConverterT::getConstTpetraVector(inArgsT.get_x_dot()) :
    Teuchos::null;

  // AGS: x_dotdot time integrators not imlemented in Thyra ME yet
  //const Teuchos::RCP<const Tpetra_Vector> x_dotdotT =
  //  Teuchos::nonnull(inArgsT.get_x_dotdot()) ?
  //  ConverterT::getConstTpetraVector(inArgsT.get_x_dotdot()) :
  //  Teuchos::null;
  const Teuchos::RCP<const Tpetra_Vector> x_dotdotT = Teuchos::null;


  const double alpha = (Teuchos::nonnull(x_dotT) || Teuchos::nonnull(x_dotdotT)) ? inArgsT.get_alpha() : 0.0;
  // AGS: x_dotdot time integrators not imlemented in Thyra ME yet
  // const double omega = (Teuchos::nonnull(x_dotT) || Teuchos::nonnull(x_dotdotT)) ? inArgsT.get_omega() : 0.0;
  const double omega = 0.0;
  const double beta = (Teuchos::nonnull(x_dotT) || Teuchos::nonnull(x_dotdotT)) ? inArgsT.get_beta() : 1.0;
  const double curr_time = (Teuchos::nonnull(x_dotT) || Teuchos::nonnull(x_dotdotT)) ? inArgsT.get_t() : 0.0;

  for (int l = 0; l < inArgsT.Np(); ++l) {
    const Teuchos::RCP<const Thyra::VectorBase<ST> > p = inArgsT.get_p(l);
    if (Teuchos::nonnull(p)) {
      const Teuchos::RCP<const Tpetra_Vector> pT = ConverterT::getConstTpetraVector(p);
      const Teuchos::ArrayRCP<const ST> pT_constView = pT->get1dView();

      ParamVec &sacado_param_vector = sacado_param_vec[l];
      for (unsigned int k = 0; k < sacado_param_vector.size(); ++k) {
        sacado_param_vector[k].baseValue = pT_constView[k];
      }
    }
  }

  //
  // Get the output arguments
  //
  const Teuchos::RCP<Tpetra_Vector> fT_out =
    Teuchos::nonnull(outArgsT.get_f()) ?
    ConverterT::getTpetraVector(outArgsT.get_f()) :
    Teuchos::null;

  const Teuchos::RCP<Tpetra_Operator> W_op_outT =
    Teuchos::nonnull(outArgsT.get_W_op()) ?
    ConverterT::getTpetraOperator(outArgsT.get_W_op()) :
    Teuchos::null;

  // Cast W to a CrsMatrix, throw an exception if this fails
  const Teuchos::RCP<Tpetra_CrsMatrix> W_op_out_crsT =
    Teuchos::nonnull(W_op_outT) ?
    Teuchos::rcp_dynamic_cast<Tpetra_CrsMatrix>(W_op_outT, true) :
    Teuchos::null;

  //
  // Compute the functions
  //
  bool f_already_computed = false;

  // W matrix
  if (Teuchos::nonnull(W_op_out_crsT)) {
    app->computeGlobalJacobianT(
        alpha, beta, omega, curr_time, x_dotT.get(), x_dotdotT.get(),  *xT,
        sacado_param_vec, fT_out.get(), *W_op_out_crsT);
    f_already_computed = true;
  }

  // df/dp
  for (int l = 0; l < outArgsT.Np(); ++l) {
    const Teuchos::RCP<Thyra::MultiVectorBase<ST> > dfdp_out =
      outArgsT.get_DfDp(l).getMultiVector();

    const Teuchos::RCP<Tpetra_MultiVector> dfdp_outT =
      Teuchos::nonnull(dfdp_out) ?
      ConverterT::getTpetraMultiVector(dfdp_out) :
      Teuchos::null;

    if (Teuchos::nonnull(dfdp_outT)) {
      const Teuchos::RCP<ParamVec> p_vec = Teuchos::rcpFromRef(sacado_param_vec[l]);

      app->computeGlobalTangentT(
          0.0, 0.0, 0.0, curr_time, false, x_dotT.get(), x_dotdotT.get(), *xT,
          sacado_param_vec, p_vec.get(),
          NULL, NULL, NULL, NULL, fT_out.get(), NULL,
          dfdp_outT.get());

      f_already_computed = true;
    }
  }

  // f
  if (app->is_adjoint) {
    const Thyra::ModelEvaluatorBase::Derivative<ST> f_derivT(
        outArgsT.get_f(),
        Thyra::ModelEvaluatorBase::DERIV_TRANS_MV_BY_ROW);

    const Thyra::ModelEvaluatorBase::Derivative<ST> dummy_derivT;

    const int response_index = 0; // need to add capability for sending this in
    app->evaluateResponseDerivativeT(
        response_index, curr_time, x_dotT.get(), x_dotdotT.get(), *xT,
        sacado_param_vec, NULL,
        NULL, f_derivT, dummy_derivT, dummy_derivT, dummy_derivT);
  } else {
    if (Teuchos::nonnull(fT_out) && !f_already_computed) {
      app->computeGlobalResidualT(
          curr_time, x_dotT.get(), x_dotdotT.get(), *xT,
          sacado_param_vec, *fT_out);
    }
  }

  // Response functions
  for (int j = 0; j < outArgsT.Ng(); ++j) {
    const Teuchos::RCP<Thyra::VectorBase<ST> > g_out = outArgsT.get_g(j);
    Teuchos::RCP<Tpetra_Vector> gT_out =
      Teuchos::nonnull(g_out) ?
      ConverterT::getTpetraVector(g_out) :
      Teuchos::null;

    const Thyra::ModelEvaluatorBase::Derivative<ST> dgdxT_out = outArgsT.get_DgDx(j);
    const Thyra::ModelEvaluatorBase::Derivative<ST> dgdxdotT_out = outArgsT.get_DgDx_dot(j);
    // AGS: x_dotdot time integrators not imlemented in Thyra ME yet
    const Thyra::ModelEvaluatorBase::Derivative<ST> dgdxdotdotT_out;

    // dg/dx, dg/dxdot
    if (!dgdxT_out.isEmpty() || !dgdxdotT_out.isEmpty()) {
      const Thyra::ModelEvaluatorBase::Derivative<ST> dummy_derivT;
      app->evaluateResponseDerivativeT(
          j, curr_time, x_dotT.get(), x_dotdotT.get(), *xT,
          sacado_param_vec, NULL,
          gT_out.get(), dgdxT_out,
          dgdxdotT_out, dgdxdotdotT_out, dummy_derivT);
      // Set gT_out to null to indicate that g_out was evaluated.
      gT_out = Teuchos::null;
    }

    // dg/dp
    for (int l = 0; l < outArgsT.Np(); ++l) {
      const Teuchos::RCP<Thyra::MultiVectorBase<ST> > dgdp_out =
        outArgsT.get_DgDp(j, l).getMultiVector();
      const Teuchos::RCP<Tpetra_MultiVector> dgdpT_out =
        Teuchos::nonnull(dgdp_out) ?
        ConverterT::getTpetraMultiVector(dgdp_out) :
        Teuchos::null;

      if (Teuchos::nonnull(dgdpT_out)) {
        const Teuchos::RCP<ParamVec> p_vec = Teuchos::rcpFromRef(sacado_param_vec[l]);
        app->evaluateResponseTangentT(
            j, alpha, beta, omega, curr_time, false,
            x_dotT.get(), x_dotdotT.get(), *xT,
            sacado_param_vec, p_vec.get(),
            NULL, NULL, NULL, NULL, gT_out.get(), NULL,
            dgdpT_out.get());
        gT_out = Teuchos::null;
      }
    }

    if (Teuchos::nonnull(gT_out)) {
      app->evaluateResponseT(
          j, curr_time, x_dotT.get(), x_dotdotT.get(), *xT,
          sacado_param_vec, *gT_out);
    }
  }
}
void Piro::RythmosSolver<Scalar>::evalModelImpl(
#endif
    const Thyra::ModelEvaluatorBase::InArgs<Scalar>& inArgs,
    const Thyra::ModelEvaluatorBase::OutArgs<Scalar>& outArgs) const
{
    using Teuchos::RCP;
    using Teuchos::rcp;

    // TODO: Support more than 1 parameter and 1 response
    const int j = 0;
    const int l = 0;

    // Parse InArgs
    RCP<const Thyra::VectorBase<Scalar> > p_in;
    if (num_p > 0) {
        p_in = inArgs.get_p(l);
    }
    RCP<const Thyra::VectorBase<Scalar> > p_in2;  //JF add for multipoint
    if (num_p > 1) {
        p_in2 = inArgs.get_p(l+1);
    }

    // Parse OutArgs
    RCP<Thyra::VectorBase<Scalar> > g_out;
    if (num_g > 0) {
        g_out = outArgs.get_g(j);
    }
    const RCP<Thyra::VectorBase<Scalar> > gx_out = outArgs.get_g(num_g);

    Thyra::ModelEvaluatorBase::InArgs<Scalar> state_ic = model->getNominalValues();

    // Set initial time in ME if needed

    if(t_initial > 0.0 && state_ic.supports(Thyra::ModelEvaluatorBase::IN_ARG_t))

        state_ic.set_t(t_initial);

    if (Teuchos::nonnull(initialConditionModel)) {
        // The initial condition depends on the parameter
        // It is found by querying the auxiliary model evaluator as the last response
        const RCP<Thyra::VectorBase<Scalar> > initialState =
            Thyra::createMember(model->get_x_space());

        {
            Thyra::ModelEvaluatorBase::InArgs<Scalar> initCondInArgs = initialConditionModel->createInArgs();
            if (num_p > 0) {
                initCondInArgs.set_p(l, inArgs.get_p(l));
            }

            Thyra::ModelEvaluatorBase::OutArgs<Scalar> initCondOutArgs = initialConditionModel->createOutArgs();
            initCondOutArgs.set_g(initCondOutArgs.Ng() - 1, initialState);

            initialConditionModel->evalModel(initCondInArgs, initCondOutArgs);
        }

        state_ic.set_x(initialState);
    }

    // Set paramters p_in as part of initial conditions
    if (num_p > 0) {
        if (Teuchos::nonnull(p_in)) {
            state_ic.set_p(l, p_in);
        }
    }
    if (num_p > 1) { //JF added for multipoint
        if (Teuchos::nonnull(p_in2)) {
            state_ic.set_p(l+1, p_in2);
        }
    }

    *out << "\nstate_ic:\n" << Teuchos::describe(state_ic, solnVerbLevel);

    //JF  may need a version of the following for multipoint, i.e. num_p>1, l+1, if we want sensitivities
    RCP<Thyra::MultiVectorBase<Scalar> > dgxdp_out;
    Thyra::ModelEvaluatorBase::Derivative<Scalar> dgdp_deriv_out;
    if (num_p > 0) {
        const Thyra::ModelEvaluatorBase::DerivativeSupport dgxdp_support =
            outArgs.supports(Thyra::ModelEvaluatorBase::OUT_ARG_DgDp, num_g, l);
        if (dgxdp_support.supports(Thyra::ModelEvaluatorBase::DERIV_MV_JACOBIAN_FORM)) {
            const Thyra::ModelEvaluatorBase::Derivative<Scalar> dgxdp_deriv =
                outArgs.get_DgDp(num_g, l);
            dgxdp_out = dgxdp_deriv.getMultiVector();
        }

        if (num_g > 0) {
            const Thyra::ModelEvaluatorBase::DerivativeSupport dgdp_support =
                outArgs.supports(Thyra::ModelEvaluatorBase::OUT_ARG_DgDp, j, l);
            if (!dgdp_support.none()) {
                dgdp_deriv_out = outArgs.get_DgDp(j, l);
            }
        }
    }

    const bool requestedSensitivities =
        Teuchos::nonnull(dgxdp_out) || !dgdp_deriv_out.isEmpty();

    RCP<const Thyra::VectorBase<Scalar> > finalSolution;
    if (!requestedSensitivities) {
        //
        *out << "\nE) Solve the forward problem ...\n";
        //

        fwdStateStepper->setInitialCondition(state_ic);

        fwdStateIntegrator->setStepper(fwdStateStepper, t_final, true);
        *out << "T final : " << t_final << " \n";

        Teuchos::Array<RCP<const Thyra::VectorBase<Scalar> > > x_final_array;
        fwdStateIntegrator->getFwdPoints(
            Teuchos::tuple<Scalar>(t_final), &x_final_array, NULL, NULL);
        finalSolution = x_final_array[0];

        if (Teuchos::VERB_MEDIUM <= solnVerbLevel) {
            std::cout << "Final Solution\n" << *finalSolution << std::endl;
        }

    } else { // Computing sensitivities
        //
        *out << "\nE) Solve the forward problem with Sensitivities...\n";
        //
        RCP<Rythmos::ForwardSensitivityStepper<Scalar> > stateAndSensStepper =
            Rythmos::forwardSensitivityStepper<Scalar>();
        stateAndSensStepper->initializeSyncedSteppers(
            model, l, model->getNominalValues(),
            fwdStateStepper, fwdTimeStepSolver);

        //
        // Set the initial condition for the state and forward sensitivities
        //

        const RCP<Thyra::VectorBase<Scalar> > s_bar_init =
            Thyra::createMember(stateAndSensStepper->getFwdSensModel()->get_x_space());
        const RCP<Thyra::VectorBase<Scalar> > s_bar_dot_init =
            Thyra::createMember(stateAndSensStepper->getFwdSensModel()->get_x_space());

        if (Teuchos::is_null(initialConditionModel)) {
            // The initial condition is assumed to be independent from the parameters
            // Therefore, the initial condition for the sensitivity is zero
            Thyra::assign(s_bar_init.ptr(), Teuchos::ScalarTraits<Scalar>::zero());
        } else {
            // Use initialConditionModel to compute initial condition for sensitivity
            Thyra::ModelEvaluatorBase::InArgs<Scalar> initCondInArgs = initialConditionModel->createInArgs();
            initCondInArgs.set_p(l, inArgs.get_p(l));

            Thyra::ModelEvaluatorBase::OutArgs<Scalar> initCondOutArgs = initialConditionModel->createOutArgs();
            typedef Thyra::DefaultMultiVectorProductVector<Scalar> DMVPV;
            const RCP<DMVPV> s_bar_init_downcasted = Teuchos::rcp_dynamic_cast<DMVPV>(s_bar_init);
            const Thyra::ModelEvaluatorBase::Derivative<Scalar> initCond_deriv(
                s_bar_init_downcasted->getNonconstMultiVector(),
                Thyra::ModelEvaluatorBase::DERIV_MV_JACOBIAN_FORM);
            initCondOutArgs.set_DgDp(initCondOutArgs.Ng() - 1, l, initCond_deriv);

            initialConditionModel->evalModel(initCondInArgs, initCondOutArgs);
        }
        Thyra::assign(s_bar_dot_init.ptr(), Teuchos::ScalarTraits<Scalar>::zero());

        RCP<const Rythmos::StateAndForwardSensitivityModelEvaluator<Scalar> >
        stateAndSensModel = stateAndSensStepper->getStateAndFwdSensModel();

        Thyra::ModelEvaluatorBase::InArgs<Scalar>
        state_and_sens_ic = stateAndSensStepper->getModel()->createInArgs();

        // Copy time, parameters etc.
        state_and_sens_ic.setArgs(state_ic);
        // Set initial condition for x_bar = [ x; s_bar ]
        state_and_sens_ic.set_x(stateAndSensModel->create_x_bar_vec(state_ic.get_x(), s_bar_init));
        // Set initial condition for x_bar_dot = [ x_dot; s_bar_dot ]
        state_and_sens_ic.set_x_dot(stateAndSensModel->create_x_bar_vec(state_ic.get_x_dot(), s_bar_dot_init));

        stateAndSensStepper->setInitialCondition(state_and_sens_ic);

        //
        // Use a StepperAsModelEvaluator to integrate the state+sens
        //

        const RCP<Rythmos::StepperAsModelEvaluator<Scalar> >
        stateAndSensIntegratorAsModel = Rythmos::stepperAsModelEvaluator(
                                            Teuchos::rcp_implicit_cast<Rythmos::StepperBase<Scalar> >(stateAndSensStepper),
                                            Teuchos::rcp_implicit_cast<Rythmos::IntegratorBase<Scalar> >(fwdStateIntegrator),
                                            state_and_sens_ic);
        // StepperAsModelEvaluator outputs the solution as its last response
        const int stateAndSensModelStateResponseIndex = stateAndSensIntegratorAsModel->Ng() - 1;

        *out << "\nUse the StepperAsModelEvaluator to integrate state + sens x_bar(p,t_final) ... \n";
        Teuchos::OSTab tab(out);

        // Solution sensitivity in column-oriented (Jacobian) MultiVector form
        RCP<const Thyra::MultiVectorBase<Scalar> > dxdp;

        const RCP<Thyra::VectorBase<Scalar> > x_bar_final =
            Thyra::createMember(stateAndSensIntegratorAsModel->get_g_space(stateAndSensModelStateResponseIndex));
        // Extract pieces of x_bar_final to prepare output
        {
            const RCP<const Thyra::ProductVectorBase<Scalar> > x_bar_final_downcasted =
                Thyra::productVectorBase<Scalar>(x_bar_final);

            // Solution
            const int solutionBlockIndex = 0;
            finalSolution = x_bar_final_downcasted->getVectorBlock(solutionBlockIndex);

            // Sensitivity
            const int sensitivityBlockIndex = 1;
            const RCP<const Thyra::VectorBase<Scalar> > s_bar_final =
                x_bar_final_downcasted->getVectorBlock(sensitivityBlockIndex);
            {
                typedef Thyra::DefaultMultiVectorProductVector<Scalar> DMVPV;
                const RCP<const DMVPV> s_bar_final_downcasted = Teuchos::rcp_dynamic_cast<const DMVPV>(s_bar_final);

                dxdp = s_bar_final_downcasted->getMultiVector();
            }
        }

        Thyra::eval_g(
            *stateAndSensIntegratorAsModel,
            l, *state_ic.get_p(l),
            t_final,
            stateAndSensModelStateResponseIndex, x_bar_final.get()
        );

        *out
                << "\nx_bar_final = x_bar(p,t_final) evaluated using "
                << "stateAndSensIntegratorAsModel:\n"
                << Teuchos::describe(*x_bar_final,solnVerbLevel);

        if (Teuchos::nonnull(dgxdp_out)) {
            Thyra::assign(dgxdp_out.ptr(), *dxdp);
        }

        if (!dgdp_deriv_out.isEmpty()) {
            RCP<Thyra::DefaultAddedLinearOp<Scalar> > dgdp_op_out;
            {
                const RCP<Thyra::LinearOpBase<Scalar> > dgdp_op = dgdp_deriv_out.getLinearOp();
                if (Teuchos::nonnull(dgdp_op)) {
                    dgdp_op_out = Teuchos::rcp_dynamic_cast<Thyra::DefaultAddedLinearOp<Scalar> >(dgdp_op);
                    dgdp_op_out.assert_not_null();
                }
            }

            Thyra::ModelEvaluatorBase::InArgs<Scalar> modelInArgs = model->createInArgs();
            {
                modelInArgs.set_x(finalSolution);
                if (num_p > 0) {
                    modelInArgs.set_p(l, p_in);
                }
            }

            // require dgdx, dgdp from model
            Thyra::ModelEvaluatorBase::OutArgs<Scalar> modelOutArgs = model->createOutArgs();
            {
                const Thyra::ModelEvaluatorBase::Derivative<Scalar> dgdx_deriv(model->create_DgDx_op(j));
                modelOutArgs.set_DgDx(j, dgdx_deriv);

                Thyra::ModelEvaluatorBase::Derivative<Scalar> dgdp_deriv;
                if (Teuchos::nonnull(dgdp_op_out)) {
                    dgdp_deriv = model->create_DgDp_op(j, l);
                } else {
                    dgdp_deriv = dgdp_deriv_out;
                }
                modelOutArgs.set_DgDp(j, l, dgdp_deriv);
            }

            model->evalModel(modelInArgs, modelOutArgs);

            const RCP<const Thyra::LinearOpBase<Scalar> > dgdx =
                modelOutArgs.get_DgDx(j).getLinearOp();

            // dgdp_out = dgdp + <dgdx, dxdp>
            if (Teuchos::nonnull(dgdp_op_out)) {
                Teuchos::Array<RCP<const Thyra::LinearOpBase<Scalar> > > op_args(2);
                {
                    op_args[0] = modelOutArgs.get_DgDp(j, l).getLinearOp();
                    op_args[1] = Thyra::multiply<Scalar>(dgdx, dxdp);
                }
                dgdp_op_out->initialize(op_args);
            } else {
                const RCP<Thyra::MultiVectorBase<Scalar> > dgdp_mv_out = dgdp_deriv_out.getMultiVector();
                Thyra::apply(
                    *dgdx,
                    Thyra::NOTRANS,
                    *dxdp,
                    dgdp_mv_out.ptr(),
                    Teuchos::ScalarTraits<Scalar>::one(),
                    Teuchos::ScalarTraits<Scalar>::one());
            }
        }
    }

    *out << "\nF) Check the solution to the forward problem ...\n";

    // As post-processing step, calculate responses at final solution
    {
        Thyra::ModelEvaluatorBase::InArgs<Scalar> modelInArgs = model->createInArgs();
        {
            modelInArgs.set_x(finalSolution);
            if (num_p > 0) {
                modelInArgs.set_p(l, p_in);
            }
            if (num_p > 1) {  //JF added for multipoint
                modelInArgs.set_p(l+1, p_in2);
            }
            //Set time to be final time at which the solve occurs (< t_final in the case we don't make it to t_final).
            modelInArgs.set_t(fwdStateStepper->getTimeRange().lower());
        }

        Thyra::ModelEvaluatorBase::OutArgs<Scalar> modelOutArgs = model->createOutArgs();
        if (Teuchos::nonnull(g_out)) {
            Thyra::put_scalar(Teuchos::ScalarTraits<Scalar>::zero(), g_out.ptr());
            modelOutArgs.set_g(j, g_out);
        }

        model->evalModel(modelInArgs, modelOutArgs);
    }

    // Return the final solution as an additional g-vector, if requested
    if (Teuchos::nonnull(gx_out)) {
        Thyra::copy(*finalSolution, gx_out.ptr());
    }
}
// hide the original parental method AMET->evalModelImpl():
void
Aeras::HVDecorator::evalModelImpl(
    const Thyra::ModelEvaluatorBase::InArgs<ST>& inArgsT,
    const Thyra::ModelEvaluatorBase::OutArgs<ST>& outArgsT) const
{

  std::cout << "DEBUG WHICH HVDecorator: " << __PRETTY_FUNCTION__ << "\n";
	
  Teuchos::TimeMonitor Timer(*timer); //start timer
  //
  // Get the input arguments
  //
  const Teuchos::RCP<const Tpetra_Vector> xT =
    ConverterT::getConstTpetraVector(inArgsT.get_x());

  const Teuchos::RCP<const Tpetra_Vector> x_dotT =
    Teuchos::nonnull(inArgsT.get_x_dot()) ?
    ConverterT::getConstTpetraVector(inArgsT.get_x_dot()) :
    Teuchos::null;

  // AGS: x_dotdot time integrators not imlemented in Thyra ME yet
  //const Teuchos::RCP<const Tpetra_Vector> x_dotdotT =
  //  Teuchos::nonnull(inArgsT.get_x_dotdot()) ?
  //  ConverterT::getConstTpetraVector(inArgsT.get_x_dotdot()) :
  //  Teuchos::null;
  const Teuchos::RCP<const Tpetra_Vector> x_dotdotT = Teuchos::null;


  const double alpha = (Teuchos::nonnull(x_dotT) || Teuchos::nonnull(x_dotdotT)) ? inArgsT.get_alpha() : 0.0;
  // AGS: x_dotdot time integrators not imlemented in Thyra ME yet
  // const double omega = (Teuchos::nonnull(x_dotT) || Teuchos::nonnull(x_dotdotT)) ? inArgsT.get_omega() : 0.0;
  const double omega = 0.0;
  const double beta = (Teuchos::nonnull(x_dotT) || Teuchos::nonnull(x_dotdotT)) ? inArgsT.get_beta() : 1.0;
  const double curr_time = (Teuchos::nonnull(x_dotT) || Teuchos::nonnull(x_dotdotT)) ? inArgsT.get_t() : 0.0;

  for (int l = 0; l < inArgsT.Np(); ++l) {
    const Teuchos::RCP<const Thyra::VectorBase<ST> > p = inArgsT.get_p(l);
    if (Teuchos::nonnull(p)) {
      const Teuchos::RCP<const Tpetra_Vector> pT = ConverterT::getConstTpetraVector(p);
      const Teuchos::ArrayRCP<const ST> pT_constView = pT->get1dView();

      ParamVec &sacado_param_vector = sacado_param_vec[l];
      for (unsigned int k = 0; k < sacado_param_vector.size(); ++k) {
        sacado_param_vector[k].baseValue = pT_constView[k];
      }
    }
  }

  //
  // Get the output arguments
  //
  const Teuchos::RCP<Tpetra_Vector> fT_out =
    Teuchos::nonnull(outArgsT.get_f()) ?
    ConverterT::getTpetraVector(outArgsT.get_f()) :
    Teuchos::null;

  const Teuchos::RCP<Tpetra_Operator> W_op_outT =
    Teuchos::nonnull(outArgsT.get_W_op()) ?
    ConverterT::getTpetraOperator(outArgsT.get_W_op()) :
    Teuchos::null;

#ifdef WRITE_MASS_MATRIX_TO_MM_FILE
  //IK, 4/24/15: adding object to hold mass matrix to be written to matrix market file
  const Teuchos::RCP<Tpetra_Operator> Mass =
    Teuchos::nonnull(outArgsT.get_W_op()) ?
    ConverterT::getTpetraOperator(outArgsT.get_W_op()) :
    Teuchos::null;
  //IK, 4/24/15: needed for writing mass matrix out to matrix market file
  const Teuchos::RCP<Tpetra_Vector> ftmp =
    Teuchos::nonnull(outArgsT.get_f()) ?
    ConverterT::getTpetraVector(outArgsT.get_f()) :
    Teuchos::null;
#endif

  // Cast W to a CrsMatrix, throw an exception if this fails
  const Teuchos::RCP<Tpetra_CrsMatrix> W_op_out_crsT =
    Teuchos::nonnull(W_op_outT) ?
    Teuchos::rcp_dynamic_cast<Tpetra_CrsMatrix>(W_op_outT, true) :
    Teuchos::null;

#ifdef WRITE_MASS_MATRIX_TO_MM_FILE
  //IK, 4/24/15: adding object to hold mass matrix to be written to matrix market file
  const Teuchos::RCP<Tpetra_CrsMatrix> Mass_crs =
    Teuchos::nonnull(Mass) ?
    Teuchos::rcp_dynamic_cast<Tpetra_CrsMatrix>(Mass, true) :
    Teuchos::null;
#endif

  //
  // Compute the functions
  //
  bool f_already_computed = false;

  // W matrix
  if (Teuchos::nonnull(W_op_out_crsT)) {
    app->computeGlobalJacobianT(
        alpha, beta, omega, curr_time, x_dotT.get(), x_dotdotT.get(),  *xT,
        sacado_param_vec, fT_out.get(), *W_op_out_crsT);
    f_already_computed = true;
  }

  // df/dp
  for (int l = 0; l < outArgsT.Np(); ++l) {
    const Teuchos::RCP<Thyra::MultiVectorBase<ST> > dfdp_out =
      outArgsT.get_DfDp(l).getMultiVector();

    const Teuchos::RCP<Tpetra_MultiVector> dfdp_outT =
      Teuchos::nonnull(dfdp_out) ?
      ConverterT::getTpetraMultiVector(dfdp_out) :
      Teuchos::null;

    if (Teuchos::nonnull(dfdp_outT)) {
      const Teuchos::RCP<ParamVec> p_vec = Teuchos::rcpFromRef(sacado_param_vec[l]);

      app->computeGlobalTangentT(
          0.0, 0.0, 0.0, curr_time, false, x_dotT.get(), x_dotdotT.get(), *xT,
          sacado_param_vec, p_vec.get(),
          NULL, NULL, NULL, NULL, fT_out.get(), NULL,
          dfdp_outT.get());

      f_already_computed = true;
    }
  }

  // f
  if (app->is_adjoint) {
    const Thyra::ModelEvaluatorBase::Derivative<ST> f_derivT(
        outArgsT.get_f(),
        Thyra::ModelEvaluatorBase::DERIV_TRANS_MV_BY_ROW);

    const Thyra::ModelEvaluatorBase::Derivative<ST> dummy_derivT;

    const int response_index = 0; // need to add capability for sending this in
    app->evaluateResponseDerivativeT(
        response_index, curr_time, x_dotT.get(), x_dotdotT.get(), *xT,
        sacado_param_vec, NULL,
        NULL, f_derivT, dummy_derivT, dummy_derivT, dummy_derivT);
  } else {
    if (Teuchos::nonnull(fT_out) && !f_already_computed) {
      app->computeGlobalResidualT(
          curr_time, x_dotT.get(), x_dotdotT.get(), *xT,
          sacado_param_vec, *fT_out);
    }
  }

  Teuchos::RCP<Tpetra_Vector> xtildeT = Teuchos::rcp(new Tpetra_Vector(xT->getMap())); 
  //compute xtildeT 
  applyLinvML(xT, xtildeT); 

#ifdef WRITE_TO_MATRIX_MARKET
  //writing to MatrixMarket for debug
  char name[100];  //create string for file name
  sprintf(name, "xT_%i.mm", mm_counter);
  Tpetra_MatrixMarket_Writer::writeDenseFile(name, xT);
  sprintf(name, "xtildeT_%i.mm", mm_counter);
  Tpetra_MatrixMarket_Writer::writeDenseFile(name, xtildeT);
  mm_counter++; 
#endif  

  //std::cout <<"in HVDec evalModelImpl a, b= " << alpha << "  "<< beta <<std::endl;

  if(Teuchos::nonnull(inArgsT.get_x_dot())){
	  std::cout <<"in the if-statement for the update" <<std::endl;
	  fT_out->update(1.0, *xtildeT, 1.0);
  }

  // Response functions
  for (int j = 0; j < outArgsT.Ng(); ++j) {
    const Teuchos::RCP<Thyra::VectorBase<ST> > g_out = outArgsT.get_g(j);
    Teuchos::RCP<Tpetra_Vector> gT_out =
      Teuchos::nonnull(g_out) ?
      ConverterT::getTpetraVector(g_out) :
      Teuchos::null;

    const Thyra::ModelEvaluatorBase::Derivative<ST> dgdxT_out = outArgsT.get_DgDx(j);
    const Thyra::ModelEvaluatorBase::Derivative<ST> dgdxdotT_out = outArgsT.get_DgDx_dot(j);
    // AGS: x_dotdot time integrators not imlemented in Thyra ME yet
    const Thyra::ModelEvaluatorBase::Derivative<ST> dgdxdotdotT_out;
    sanitize_nans(dgdxT_out);
    sanitize_nans(dgdxdotT_out);
    sanitize_nans(dgdxdotdotT_out);

    // dg/dx, dg/dxdot
    if (!dgdxT_out.isEmpty() || !dgdxdotT_out.isEmpty()) {
      const Thyra::ModelEvaluatorBase::Derivative<ST> dummy_derivT;
      app->evaluateResponseDerivativeT(
          j, curr_time, x_dotT.get(), x_dotdotT.get(), *xT,
          sacado_param_vec, NULL,
          gT_out.get(), dgdxT_out,
          dgdxdotT_out, dgdxdotdotT_out, dummy_derivT);
      // Set gT_out to null to indicate that g_out was evaluated.
      gT_out = Teuchos::null;
    }

    // dg/dp
    for (int l = 0; l < outArgsT.Np(); ++l) {
      const Teuchos::RCP<Thyra::MultiVectorBase<ST> > dgdp_out =
        outArgsT.get_DgDp(j, l).getMultiVector();
      const Teuchos::RCP<Tpetra_MultiVector> dgdpT_out =
        Teuchos::nonnull(dgdp_out) ?
        ConverterT::getTpetraMultiVector(dgdp_out) :
        Teuchos::null;

      if (Teuchos::nonnull(dgdpT_out)) {
        const Teuchos::RCP<ParamVec> p_vec = Teuchos::rcpFromRef(sacado_param_vec[l]);
        app->evaluateResponseTangentT(
            j, alpha, beta, omega, curr_time, false,
            x_dotT.get(), x_dotdotT.get(), *xT,
            sacado_param_vec, p_vec.get(),
            NULL, NULL, NULL, NULL, gT_out.get(), NULL,
            dgdpT_out.get());
        gT_out = Teuchos::null;
      }
    }

    if (Teuchos::nonnull(gT_out)) {
      app->evaluateResponseT(
          j, curr_time, x_dotT.get(), x_dotdotT.get(), *xT,
          sacado_param_vec, *gT_out);
    }
  }
}