Thyra::ModelEvaluatorBase::OutArgs<Scalar> 
Piro::VelocityVerletSolver<Scalar>::createOutArgsImpl() const
{
  Thyra::ModelEvaluatorBase::OutArgsSetup<Scalar> outArgs;
  outArgs.setModelEvalDescription(this->description());

  // One additional response slot for the solution vector
  outArgs.set_Np_Ng(num_p, num_g + 1);

  const Thyra::ModelEvaluatorBase::OutArgs<Scalar> modelOutArgs = model->createOutArgs();

  if (num_p > 0) {
    // Only one parameter supported
    const int l = 0;

    // Computing the DxDp sensitivity for a transient problem currently requires the evaluation of
    // the mutilivector-based, Jacobian-oriented DfDp derivatives of the underlying transient model.
    const Thyra::ModelEvaluatorBase::DerivativeSupport model_dfdp_support =
      modelOutArgs.supports(Thyra::ModelEvaluatorBase::OUT_ARG_DfDp, l);
    if (!model_dfdp_support.supports(Thyra::ModelEvaluatorBase::DERIV_MV_JACOBIAN_FORM)) {
      // Ok to return early since only one parameter supported
      return outArgs;
    }

    // Solution sensitivity
    outArgs.setSupports(
        Thyra::ModelEvaluatorBase::OUT_ARG_DgDp,
        num_g,
        l,
        Thyra::ModelEvaluatorBase::DERIV_MV_JACOBIAN_FORM);

    if (num_g > 0) {
      // Only one response supported
      const int j = 0;

      const Thyra::ModelEvaluatorBase::DerivativeSupport model_dgdx_support =
        modelOutArgs.supports(Thyra::ModelEvaluatorBase::OUT_ARG_DgDx, j);
      if (!model_dgdx_support.none()) {
        const Thyra::ModelEvaluatorBase::DerivativeSupport model_dgdp_support =
          modelOutArgs.supports(Thyra::ModelEvaluatorBase::OUT_ARG_DgDp, j, l);
        // Response sensitivity
        Thyra::ModelEvaluatorBase::DerivativeSupport dgdp_support;
        if (model_dgdp_support.supports(Thyra::ModelEvaluatorBase::DERIV_MV_JACOBIAN_FORM)) {
          dgdp_support.plus(Thyra::ModelEvaluatorBase::DERIV_MV_JACOBIAN_FORM);
        }
        if (model_dgdp_support.supports(Thyra::ModelEvaluatorBase::DERIV_LINEAR_OP)) {
          dgdp_support.plus(Thyra::ModelEvaluatorBase::DERIV_LINEAR_OP);
        }
        outArgs.setSupports(
            Thyra::ModelEvaluatorBase::OUT_ARG_DgDp,
            j,
            l,
            dgdp_support);
      }
    }
  }

  return outArgs;

}
void Piro::SteadyStateSolver<Scalar>::evalConvergedModel(
    const Thyra::ModelEvaluatorBase::InArgs<Scalar>& modelInArgs,
    const Thyra::ModelEvaluatorBase::OutArgs<Scalar>& outArgs) const
{
  using Teuchos::RCP;
  using Teuchos::rcp;

  // Solution at convergence is the response at index num_g_
  {
    const RCP<Thyra::VectorBase<Scalar> > gx_out = outArgs.get_g(num_g_);
    if (Teuchos::nonnull(gx_out)) {
      Thyra::copy(*modelInArgs.get_x(), gx_out.ptr());
    }
  }

  // Setup output for final evalution of underlying model
  Thyra::ModelEvaluatorBase::OutArgs<Scalar> modelOutArgs = model_->createOutArgs();
  {
    // Responses
    for (int j = 0; j < num_g_; ++j) {
      const RCP<Thyra::VectorBase<Scalar> > g_out = outArgs.get_g(j);
      // Forward to underlying model
      modelOutArgs.set_g(j, g_out);
    }

    // Jacobian
    {
      bool jacobianRequired = false;
      for (int j = 0; j <= num_g_; ++j) {
        for (int l = 0; l < num_p_; ++l) {
          const Thyra::ModelEvaluatorBase::DerivativeSupport dgdp_support =
            outArgs.supports(Thyra::ModelEvaluatorBase::OUT_ARG_DgDp, j, l);
          if (!dgdp_support.none()) {
            const Thyra::ModelEvaluatorBase::Derivative<Scalar> dgdp_deriv =
              outArgs.get_DgDp(j, l);
            if (!dgdp_deriv.isEmpty()) {
              jacobianRequired = true;
            }
          }
        }
      }
      if (jacobianRequired) {
        const RCP<Thyra::LinearOpWithSolveBase<Scalar> > jacobian =
          model_->create_W();
        modelOutArgs.set_W(jacobian);
      }
    }

    // DfDp derivatives
    for (int l = 0; l < num_p_; ++l) {
      Thyra::ModelEvaluatorBase::DerivativeSupport dfdp_request;
      for (int j = 0; j <= num_g_; ++j) {
        const Thyra::ModelEvaluatorBase::DerivativeSupport dgdp_support =
          outArgs.supports(Thyra::ModelEvaluatorBase::OUT_ARG_DgDp, j, l);
        if (!dgdp_support.none()) {
          const Thyra::ModelEvaluatorBase::Derivative<Scalar> dgdp_deriv =
            outArgs.get_DgDp(j, l);
          if (Teuchos::nonnull(dgdp_deriv.getLinearOp())) {
            dfdp_request.plus(Thyra::ModelEvaluatorBase::DERIV_LINEAR_OP);
          } else if (Teuchos::nonnull(dgdp_deriv.getMultiVector())) {
            dfdp_request.plus(Thyra::ModelEvaluatorBase::DERIV_MV_JACOBIAN_FORM);
          }
        }
      }

      if (!dfdp_request.none()) {
        Thyra::ModelEvaluatorBase::Derivative<Scalar> dfdp_deriv;
        if (dfdp_request.supports(Thyra::ModelEvaluatorBase::DERIV_MV_JACOBIAN_FORM)) {
          dfdp_deriv = Thyra::create_DfDp_mv(*model_, l, Thyra::ModelEvaluatorBase::DERIV_MV_JACOBIAN_FORM);
        } else if (dfdp_request.supports(Thyra::ModelEvaluatorBase::DERIV_LINEAR_OP)) {
          dfdp_deriv = model_->create_DfDp_op(l);
        }
        modelOutArgs.set_DfDp(l, dfdp_deriv);
      }
    }

    // DgDx derivatives
    for (int j = 0; j < num_g_; ++j) {
      Thyra::ModelEvaluatorBase::DerivativeSupport dgdx_request;
      for (int l = 0; l < num_p_; ++l) {
        const Thyra::ModelEvaluatorBase::DerivativeSupport dgdp_support =
          outArgs.supports(Thyra::ModelEvaluatorBase::OUT_ARG_DgDp, j, l);
        if (!dgdp_support.none()) {
          const Thyra::ModelEvaluatorBase::Derivative<Scalar> dgdp_deriv =
            outArgs.get_DgDp(j, l);
          if (!dgdp_deriv.isEmpty()) {
            const bool dgdp_mvGrad_required =
              Teuchos::nonnull(dgdp_deriv.getMultiVector()) &&
              dgdp_deriv.getMultiVectorOrientation() == Thyra::ModelEvaluatorBase::DERIV_MV_GRADIENT_FORM;
            if (dgdp_mvGrad_required) {
              dgdx_request.plus(Thyra::ModelEvaluatorBase::DERIV_MV_GRADIENT_FORM);
            } else {
              dgdx_request.plus(Thyra::ModelEvaluatorBase::DERIV_LINEAR_OP);
            }
          }
        }
      }

      if (!dgdx_request.none()) {
        Thyra::ModelEvaluatorBase::Derivative<Scalar> dgdx_deriv;
        if (dgdx_request.supports(Thyra::ModelEvaluatorBase::DERIV_MV_GRADIENT_FORM)) {
          dgdx_deriv = Thyra::create_DgDx_mv(*model_, j, Thyra::ModelEvaluatorBase::DERIV_MV_GRADIENT_FORM);
        } else if (dgdx_request.supports(Thyra::ModelEvaluatorBase::DERIV_LINEAR_OP)) {
          dgdx_deriv = model_->create_DgDx_op(j);
        }
        modelOutArgs.set_DgDx(j, dgdx_deriv);
      }
    }

    // DgDp derivatives
    for (int l = 0; l < num_p_; ++l) {
      for (int j = 0; j < num_g_; ++j) {
        const Thyra::ModelEvaluatorBase::DerivativeSupport dgdp_support =
          outArgs.supports(Thyra::ModelEvaluatorBase::OUT_ARG_DgDp, j, l);
        if (!dgdp_support.none()) {
          const Thyra::ModelEvaluatorBase::Derivative<Scalar> dgdp_deriv =
            outArgs.get_DgDp(j, l);
          Thyra::ModelEvaluatorBase::Derivative<Scalar> model_dgdp_deriv;
          const RCP<Thyra::LinearOpBase<Scalar> > dgdp_op = dgdp_deriv.getLinearOp();
          if (Teuchos::nonnull(dgdp_op)) {
            model_dgdp_deriv = model_->create_DgDp_op(j, l);
          } else {
            model_dgdp_deriv = dgdp_deriv;
          }
          if (!model_dgdp_deriv.isEmpty()) {
            modelOutArgs.set_DgDp(j, l, model_dgdp_deriv);
          }
        }
      }
    }
  }

  // Evaluate underlying model
  model_->evalModel(modelInArgs, modelOutArgs);

  // Assemble user-requested sensitivities
  if (modelOutArgs.supports(Thyra::ModelEvaluatorBase::OUT_ARG_W)) {
    const RCP<Thyra::LinearOpWithSolveBase<Scalar> > jacobian =
      modelOutArgs.get_W();
    if (Teuchos::nonnull(jacobian)) {
      for (int l = 0; l < num_p_; ++l) {
        const Thyra::ModelEvaluatorBase::DerivativeSupport dfdp_support =
          modelOutArgs.supports(Thyra::ModelEvaluatorBase::OUT_ARG_DfDp, l);
        if (!dfdp_support.none()) {
          const Thyra::ModelEvaluatorBase::Derivative<Scalar> dfdp_deriv =
            modelOutArgs.get_DfDp(l);
          const RCP<Thyra::MultiVectorBase<Scalar> > dfdp_mv =
            dfdp_deriv.getMultiVector();
          RCP<Thyra::LinearOpBase<Scalar> > dfdp_op =
            dfdp_deriv.getLinearOp();
          if (Teuchos::is_null(dfdp_op)) {
            dfdp_op = dfdp_mv;
          }

          const Thyra::ModelEvaluatorBase::Derivative<Scalar> dxdp_deriv =
            outArgs.get_DgDp(num_g_, l);
          const RCP<Thyra::LinearOpBase<Scalar> > dxdp_op =
            dxdp_deriv.getLinearOp();
          const RCP<Thyra::MultiVectorBase<Scalar> > dxdp_mv =
            dxdp_deriv.getMultiVector();

          RCP<const Thyra::LinearOpBase<Scalar> > minus_dxdp_op;
          RCP<Thyra::MultiVectorBase<Scalar> > minus_dxdp_mv;
          if (Teuchos::nonnull(dfdp_mv)) {
            if (Teuchos::nonnull(dxdp_mv)) {
              minus_dxdp_mv = dxdp_mv; // Use user-provided object as temporary
            } else {
              minus_dxdp_mv =
                Thyra::createMembers(model_->get_x_space(), model_->get_p_space(l));
              minus_dxdp_op = minus_dxdp_mv;
            }
          }

          if (Teuchos::is_null(minus_dxdp_op)) {
            const RCP<const Thyra::LinearOpBase<Scalar> > dfdx_inv_op =
              Thyra::inverse<Scalar>(jacobian);
            minus_dxdp_op = Thyra::multiply<Scalar>(dfdx_inv_op, dfdp_op);
          }

          if (Teuchos::nonnull(minus_dxdp_mv)) {
            Thyra::assign(minus_dxdp_mv.ptr(), Teuchos::ScalarTraits<Scalar>::zero());

            const Thyra::SolveCriteria<Scalar> defaultSolveCriteria;
            const Thyra::SolveStatus<Scalar> solveStatus =
              Thyra::solve(
                  *jacobian,
                  Thyra::NOTRANS,
                  *dfdp_mv,
                  minus_dxdp_mv.ptr(),
                  Teuchos::ptr(&defaultSolveCriteria));
            TEUCHOS_TEST_FOR_EXCEPTION(
                solveStatus.solveStatus == Thyra::SOLVE_STATUS_UNCONVERGED,
                std::runtime_error,
                "Jacobian solver failed to converge");
          }

          // Solution sensitivities
          if (Teuchos::nonnull(dxdp_mv)) {
            minus_dxdp_mv = Teuchos::null; // Invalidates temporary
            Thyra::scale(-Teuchos::ScalarTraits<Scalar>::one(), dxdp_mv.ptr());
          } else if (Teuchos::nonnull(dxdp_op)) {
            const RCP<Thyra::DefaultMultipliedLinearOp<Scalar> > dxdp_op_downcasted =
              Teuchos::rcp_dynamic_cast<Thyra::DefaultMultipliedLinearOp<Scalar> >(dxdp_op);
            TEUCHOS_TEST_FOR_EXCEPTION(
                Teuchos::is_null(dxdp_op_downcasted),
                std::invalid_argument,
                "Illegal operator for DgDp(" <<
                "j = " << num_g_ << ", " <<
                "index l = " << l << ")\n");

            const RCP<const Thyra::LinearOpBase<Scalar> > minus_id_op =
              Thyra::scale<Scalar>(-Teuchos::ScalarTraits<Scalar>::one(), Thyra::identity(dfdp_op->domain()));

            dxdp_op_downcasted->initialize(Teuchos::tuple(minus_dxdp_op, minus_id_op));
          }

          // Response sensitivities
          for (int j = 0; j < num_g_; ++j) {
            const Thyra::ModelEvaluatorBase::DerivativeSupport dgdp_support =
              outArgs.supports(Thyra::ModelEvaluatorBase::OUT_ARG_DgDp, j, l);
            if (!dgdp_support.none()) {
              const Thyra::ModelEvaluatorBase::Derivative<Scalar> dgdp_deriv =
                outArgs.get_DgDp(j, l);
              if (!dgdp_deriv.isEmpty()) {
                const Thyra::ModelEvaluatorBase::Derivative<Scalar> dgdx_deriv =
                  modelOutArgs.get_DgDx(j);
                const RCP<const Thyra::MultiVectorBase<Scalar> > dgdx_mv =
                  dgdx_deriv.getMultiVector();
                RCP<const Thyra::LinearOpBase<Scalar> > dgdx_op =
                  dgdx_deriv.getLinearOp();
                if (Teuchos::is_null(dgdx_op)) {
                  dgdx_op = Thyra::adjoint<Scalar>(dgdx_mv);
                }

                const RCP<Thyra::LinearOpBase<Scalar> > dgdp_op =
                  dgdp_deriv.getLinearOp();
                if (Teuchos::nonnull(dgdp_op)) {
                  const RCP<Thyra::DefaultAddedLinearOp<Scalar> > dgdp_op_downcasted =
                    Teuchos::rcp_dynamic_cast<Thyra::DefaultAddedLinearOp<Scalar> >(dgdp_op);
                  TEUCHOS_TEST_FOR_EXCEPTION(
                      Teuchos::is_null(dgdp_op_downcasted),
                      std::invalid_argument,
                      "Illegal operator for DgDp(" <<
                      "j = " << j << ", " <<
                      "index l = " << l << ")\n");

                  dgdp_op_downcasted->uninitialize();

                  const RCP<const Thyra::LinearOpBase<Scalar> > implicit_dgdp_op =
                    Thyra::multiply<Scalar>(
                      Thyra::scale<Scalar>(-Teuchos::ScalarTraits<Scalar>::one(), dgdx_op),
                      minus_dxdp_op);

                  const RCP<const Thyra::LinearOpBase<Scalar> > model_dgdp_op =
                    modelOutArgs.get_DgDp(j, l).getLinearOp();

                  Teuchos::Array<RCP<const Thyra::LinearOpBase<Scalar> > > op_args(2);
                  op_args[0] = model_dgdp_op;
                  op_args[1] = implicit_dgdp_op;
                  dgdp_op_downcasted->initialize(op_args);
                }

                const RCP<Thyra::MultiVectorBase<Scalar> > dgdp_mv =
                  dgdp_deriv.getMultiVector();
                if (Teuchos::nonnull(dgdp_mv)) {
                  if (dgdp_deriv.getMultiVectorOrientation() == Thyra::ModelEvaluatorBase::DERIV_MV_GRADIENT_FORM) {
                    if (Teuchos::nonnull(dxdp_mv)) {
                      Thyra::apply(
                          *dxdp_mv,
                          Thyra::TRANS,
                          *dgdx_mv,
                          dgdp_mv.ptr(),
                          Teuchos::ScalarTraits<Scalar>::one(),
                          Teuchos::ScalarTraits<Scalar>::one());
                    } else {
                      Thyra::apply(
                          *minus_dxdp_mv,
                          Thyra::TRANS,
                          *dgdx_mv,
                          dgdp_mv.ptr(),
                          -Teuchos::ScalarTraits<Scalar>::one(),
                          Teuchos::ScalarTraits<Scalar>::one());
                    }
                  } else {
                    if (Teuchos::nonnull(dxdp_mv)) {
                      Thyra::apply(
                          *dgdx_op,
                          Thyra::NOTRANS,
                          *dxdp_mv,
                          dgdp_mv.ptr(),
                          Teuchos::ScalarTraits<Scalar>::one(),
                          Teuchos::ScalarTraits<Scalar>::one());
                    } else {
                      Thyra::apply(
                          *dgdx_op,
                          Thyra::NOTRANS,
                          *minus_dxdp_mv,
                          dgdp_mv.ptr(),
                          -Teuchos::ScalarTraits<Scalar>::one(),
                          Teuchos::ScalarTraits<Scalar>::one());
                    }
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}
void Piro::RythmosSolver<Scalar>::evalModelImpl(
#endif
    const Thyra::ModelEvaluatorBase::InArgs<Scalar>& inArgs,
    const Thyra::ModelEvaluatorBase::OutArgs<Scalar>& outArgs) const
{
    using Teuchos::RCP;
    using Teuchos::rcp;

    // TODO: Support more than 1 parameter and 1 response
    const int j = 0;
    const int l = 0;

    // Parse InArgs
    RCP<const Thyra::VectorBase<Scalar> > p_in;
    if (num_p > 0) {
        p_in = inArgs.get_p(l);
    }
    RCP<const Thyra::VectorBase<Scalar> > p_in2;  //JF add for multipoint
    if (num_p > 1) {
        p_in2 = inArgs.get_p(l+1);
    }

    // Parse OutArgs
    RCP<Thyra::VectorBase<Scalar> > g_out;
    if (num_g > 0) {
        g_out = outArgs.get_g(j);
    }
    const RCP<Thyra::VectorBase<Scalar> > gx_out = outArgs.get_g(num_g);

    Thyra::ModelEvaluatorBase::InArgs<Scalar> state_ic = model->getNominalValues();

    // Set initial time in ME if needed

    if(t_initial > 0.0 && state_ic.supports(Thyra::ModelEvaluatorBase::IN_ARG_t))

        state_ic.set_t(t_initial);

    if (Teuchos::nonnull(initialConditionModel)) {
        // The initial condition depends on the parameter
        // It is found by querying the auxiliary model evaluator as the last response
        const RCP<Thyra::VectorBase<Scalar> > initialState =
            Thyra::createMember(model->get_x_space());

        {
            Thyra::ModelEvaluatorBase::InArgs<Scalar> initCondInArgs = initialConditionModel->createInArgs();
            if (num_p > 0) {
                initCondInArgs.set_p(l, inArgs.get_p(l));
            }

            Thyra::ModelEvaluatorBase::OutArgs<Scalar> initCondOutArgs = initialConditionModel->createOutArgs();
            initCondOutArgs.set_g(initCondOutArgs.Ng() - 1, initialState);

            initialConditionModel->evalModel(initCondInArgs, initCondOutArgs);
        }

        state_ic.set_x(initialState);
    }

    // Set paramters p_in as part of initial conditions
    if (num_p > 0) {
        if (Teuchos::nonnull(p_in)) {
            state_ic.set_p(l, p_in);
        }
    }
    if (num_p > 1) { //JF added for multipoint
        if (Teuchos::nonnull(p_in2)) {
            state_ic.set_p(l+1, p_in2);
        }
    }

    *out << "\nstate_ic:\n" << Teuchos::describe(state_ic, solnVerbLevel);

    //JF  may need a version of the following for multipoint, i.e. num_p>1, l+1, if we want sensitivities
    RCP<Thyra::MultiVectorBase<Scalar> > dgxdp_out;
    Thyra::ModelEvaluatorBase::Derivative<Scalar> dgdp_deriv_out;
    if (num_p > 0) {
        const Thyra::ModelEvaluatorBase::DerivativeSupport dgxdp_support =
            outArgs.supports(Thyra::ModelEvaluatorBase::OUT_ARG_DgDp, num_g, l);
        if (dgxdp_support.supports(Thyra::ModelEvaluatorBase::DERIV_MV_JACOBIAN_FORM)) {
            const Thyra::ModelEvaluatorBase::Derivative<Scalar> dgxdp_deriv =
                outArgs.get_DgDp(num_g, l);
            dgxdp_out = dgxdp_deriv.getMultiVector();
        }

        if (num_g > 0) {
            const Thyra::ModelEvaluatorBase::DerivativeSupport dgdp_support =
                outArgs.supports(Thyra::ModelEvaluatorBase::OUT_ARG_DgDp, j, l);
            if (!dgdp_support.none()) {
                dgdp_deriv_out = outArgs.get_DgDp(j, l);
            }
        }
    }

    const bool requestedSensitivities =
        Teuchos::nonnull(dgxdp_out) || !dgdp_deriv_out.isEmpty();

    RCP<const Thyra::VectorBase<Scalar> > finalSolution;
    if (!requestedSensitivities) {
        //
        *out << "\nE) Solve the forward problem ...\n";
        //

        fwdStateStepper->setInitialCondition(state_ic);

        fwdStateIntegrator->setStepper(fwdStateStepper, t_final, true);
        *out << "T final : " << t_final << " \n";

        Teuchos::Array<RCP<const Thyra::VectorBase<Scalar> > > x_final_array;
        fwdStateIntegrator->getFwdPoints(
            Teuchos::tuple<Scalar>(t_final), &x_final_array, NULL, NULL);
        finalSolution = x_final_array[0];

        if (Teuchos::VERB_MEDIUM <= solnVerbLevel) {
            std::cout << "Final Solution\n" << *finalSolution << std::endl;
        }

    } else { // Computing sensitivities
        //
        *out << "\nE) Solve the forward problem with Sensitivities...\n";
        //
        RCP<Rythmos::ForwardSensitivityStepper<Scalar> > stateAndSensStepper =
            Rythmos::forwardSensitivityStepper<Scalar>();
        stateAndSensStepper->initializeSyncedSteppers(
            model, l, model->getNominalValues(),
            fwdStateStepper, fwdTimeStepSolver);

        //
        // Set the initial condition for the state and forward sensitivities
        //

        const RCP<Thyra::VectorBase<Scalar> > s_bar_init =
            Thyra::createMember(stateAndSensStepper->getFwdSensModel()->get_x_space());
        const RCP<Thyra::VectorBase<Scalar> > s_bar_dot_init =
            Thyra::createMember(stateAndSensStepper->getFwdSensModel()->get_x_space());

        if (Teuchos::is_null(initialConditionModel)) {
            // The initial condition is assumed to be independent from the parameters
            // Therefore, the initial condition for the sensitivity is zero
            Thyra::assign(s_bar_init.ptr(), Teuchos::ScalarTraits<Scalar>::zero());
        } else {
            // Use initialConditionModel to compute initial condition for sensitivity
            Thyra::ModelEvaluatorBase::InArgs<Scalar> initCondInArgs = initialConditionModel->createInArgs();
            initCondInArgs.set_p(l, inArgs.get_p(l));

            Thyra::ModelEvaluatorBase::OutArgs<Scalar> initCondOutArgs = initialConditionModel->createOutArgs();
            typedef Thyra::DefaultMultiVectorProductVector<Scalar> DMVPV;
            const RCP<DMVPV> s_bar_init_downcasted = Teuchos::rcp_dynamic_cast<DMVPV>(s_bar_init);
            const Thyra::ModelEvaluatorBase::Derivative<Scalar> initCond_deriv(
                s_bar_init_downcasted->getNonconstMultiVector(),
                Thyra::ModelEvaluatorBase::DERIV_MV_JACOBIAN_FORM);
            initCondOutArgs.set_DgDp(initCondOutArgs.Ng() - 1, l, initCond_deriv);

            initialConditionModel->evalModel(initCondInArgs, initCondOutArgs);
        }
        Thyra::assign(s_bar_dot_init.ptr(), Teuchos::ScalarTraits<Scalar>::zero());

        RCP<const Rythmos::StateAndForwardSensitivityModelEvaluator<Scalar> >
        stateAndSensModel = stateAndSensStepper->getStateAndFwdSensModel();

        Thyra::ModelEvaluatorBase::InArgs<Scalar>
        state_and_sens_ic = stateAndSensStepper->getModel()->createInArgs();

        // Copy time, parameters etc.
        state_and_sens_ic.setArgs(state_ic);
        // Set initial condition for x_bar = [ x; s_bar ]
        state_and_sens_ic.set_x(stateAndSensModel->create_x_bar_vec(state_ic.get_x(), s_bar_init));
        // Set initial condition for x_bar_dot = [ x_dot; s_bar_dot ]
        state_and_sens_ic.set_x_dot(stateAndSensModel->create_x_bar_vec(state_ic.get_x_dot(), s_bar_dot_init));

        stateAndSensStepper->setInitialCondition(state_and_sens_ic);

        //
        // Use a StepperAsModelEvaluator to integrate the state+sens
        //

        const RCP<Rythmos::StepperAsModelEvaluator<Scalar> >
        stateAndSensIntegratorAsModel = Rythmos::stepperAsModelEvaluator(
                                            Teuchos::rcp_implicit_cast<Rythmos::StepperBase<Scalar> >(stateAndSensStepper),
                                            Teuchos::rcp_implicit_cast<Rythmos::IntegratorBase<Scalar> >(fwdStateIntegrator),
                                            state_and_sens_ic);
        // StepperAsModelEvaluator outputs the solution as its last response
        const int stateAndSensModelStateResponseIndex = stateAndSensIntegratorAsModel->Ng() - 1;

        *out << "\nUse the StepperAsModelEvaluator to integrate state + sens x_bar(p,t_final) ... \n";
        Teuchos::OSTab tab(out);

        // Solution sensitivity in column-oriented (Jacobian) MultiVector form
        RCP<const Thyra::MultiVectorBase<Scalar> > dxdp;

        const RCP<Thyra::VectorBase<Scalar> > x_bar_final =
            Thyra::createMember(stateAndSensIntegratorAsModel->get_g_space(stateAndSensModelStateResponseIndex));
        // Extract pieces of x_bar_final to prepare output
        {
            const RCP<const Thyra::ProductVectorBase<Scalar> > x_bar_final_downcasted =
                Thyra::productVectorBase<Scalar>(x_bar_final);

            // Solution
            const int solutionBlockIndex = 0;
            finalSolution = x_bar_final_downcasted->getVectorBlock(solutionBlockIndex);

            // Sensitivity
            const int sensitivityBlockIndex = 1;
            const RCP<const Thyra::VectorBase<Scalar> > s_bar_final =
                x_bar_final_downcasted->getVectorBlock(sensitivityBlockIndex);
            {
                typedef Thyra::DefaultMultiVectorProductVector<Scalar> DMVPV;
                const RCP<const DMVPV> s_bar_final_downcasted = Teuchos::rcp_dynamic_cast<const DMVPV>(s_bar_final);

                dxdp = s_bar_final_downcasted->getMultiVector();
            }
        }

        Thyra::eval_g(
            *stateAndSensIntegratorAsModel,
            l, *state_ic.get_p(l),
            t_final,
            stateAndSensModelStateResponseIndex, x_bar_final.get()
        );

        *out
                << "\nx_bar_final = x_bar(p,t_final) evaluated using "
                << "stateAndSensIntegratorAsModel:\n"
                << Teuchos::describe(*x_bar_final,solnVerbLevel);

        if (Teuchos::nonnull(dgxdp_out)) {
            Thyra::assign(dgxdp_out.ptr(), *dxdp);
        }

        if (!dgdp_deriv_out.isEmpty()) {
            RCP<Thyra::DefaultAddedLinearOp<Scalar> > dgdp_op_out;
            {
                const RCP<Thyra::LinearOpBase<Scalar> > dgdp_op = dgdp_deriv_out.getLinearOp();
                if (Teuchos::nonnull(dgdp_op)) {
                    dgdp_op_out = Teuchos::rcp_dynamic_cast<Thyra::DefaultAddedLinearOp<Scalar> >(dgdp_op);
                    dgdp_op_out.assert_not_null();
                }
            }

            Thyra::ModelEvaluatorBase::InArgs<Scalar> modelInArgs = model->createInArgs();
            {
                modelInArgs.set_x(finalSolution);
                if (num_p > 0) {
                    modelInArgs.set_p(l, p_in);
                }
            }

            // require dgdx, dgdp from model
            Thyra::ModelEvaluatorBase::OutArgs<Scalar> modelOutArgs = model->createOutArgs();
            {
                const Thyra::ModelEvaluatorBase::Derivative<Scalar> dgdx_deriv(model->create_DgDx_op(j));
                modelOutArgs.set_DgDx(j, dgdx_deriv);

                Thyra::ModelEvaluatorBase::Derivative<Scalar> dgdp_deriv;
                if (Teuchos::nonnull(dgdp_op_out)) {
                    dgdp_deriv = model->create_DgDp_op(j, l);
                } else {
                    dgdp_deriv = dgdp_deriv_out;
                }
                modelOutArgs.set_DgDp(j, l, dgdp_deriv);
            }

            model->evalModel(modelInArgs, modelOutArgs);

            const RCP<const Thyra::LinearOpBase<Scalar> > dgdx =
                modelOutArgs.get_DgDx(j).getLinearOp();

            // dgdp_out = dgdp + <dgdx, dxdp>
            if (Teuchos::nonnull(dgdp_op_out)) {
                Teuchos::Array<RCP<const Thyra::LinearOpBase<Scalar> > > op_args(2);
                {
                    op_args[0] = modelOutArgs.get_DgDp(j, l).getLinearOp();
                    op_args[1] = Thyra::multiply<Scalar>(dgdx, dxdp);
                }
                dgdp_op_out->initialize(op_args);
            } else {
                const RCP<Thyra::MultiVectorBase<Scalar> > dgdp_mv_out = dgdp_deriv_out.getMultiVector();
                Thyra::apply(
                    *dgdx,
                    Thyra::NOTRANS,
                    *dxdp,
                    dgdp_mv_out.ptr(),
                    Teuchos::ScalarTraits<Scalar>::one(),
                    Teuchos::ScalarTraits<Scalar>::one());
            }
        }
    }

    *out << "\nF) Check the solution to the forward problem ...\n";

    // As post-processing step, calculate responses at final solution
    {
        Thyra::ModelEvaluatorBase::InArgs<Scalar> modelInArgs = model->createInArgs();
        {
            modelInArgs.set_x(finalSolution);
            if (num_p > 0) {
                modelInArgs.set_p(l, p_in);
            }
            if (num_p > 1) {  //JF added for multipoint
                modelInArgs.set_p(l+1, p_in2);
            }
            //Set time to be final time at which the solve occurs (< t_final in the case we don't make it to t_final).
            modelInArgs.set_t(fwdStateStepper->getTimeRange().lower());
        }

        Thyra::ModelEvaluatorBase::OutArgs<Scalar> modelOutArgs = model->createOutArgs();
        if (Teuchos::nonnull(g_out)) {
            Thyra::put_scalar(Teuchos::ScalarTraits<Scalar>::zero(), g_out.ptr());
            modelOutArgs.set_g(j, g_out);
        }

        model->evalModel(modelInArgs, modelOutArgs);
    }

    // Return the final solution as an additional g-vector, if requested
    if (Teuchos::nonnull(gx_out)) {
        Thyra::copy(*finalSolution, gx_out.ptr());
    }
}