void assertValidModel(
  const StepperBase<Scalar>& stepper,
  const Thyra::ModelEvaluator<Scalar>& model
  )
{

  typedef Thyra::ModelEvaluatorBase MEB;

  TEUCHOS_ASSERT(stepper.acceptsModel());

  const MEB::InArgs<Scalar> inArgs = model.createInArgs();
  const MEB::OutArgs<Scalar> outArgs = model.createOutArgs();

  //TEUCHOS_ASSERT(inArgs.supports(MEB::IN_ARG_t));
  TEUCHOS_ASSERT(inArgs.supports(MEB::IN_ARG_x));
  TEUCHOS_ASSERT(outArgs.supports(MEB::OUT_ARG_f));
  
  if (stepper.isImplicit()) { // implicit stepper
    TEUCHOS_ASSERT( inArgs.supports(MEB::IN_ARG_x_dot) );
    TEUCHOS_ASSERT( inArgs.supports(MEB::IN_ARG_alpha) );
    TEUCHOS_ASSERT( inArgs.supports(MEB::IN_ARG_beta) );
    TEUCHOS_ASSERT( outArgs.supports(MEB::OUT_ARG_W) );
  } 
  //else { // explicit stepper
  //  TEUCHOS_ASSERT( !inArgs.supports(MEB::IN_ARG_x_dot) );
  //  TEUCHOS_ASSERT( !inArgs.supports(MEB::IN_ARG_alpha) );
  //  TEUCHOS_ASSERT( !inArgs.supports(MEB::IN_ARG_beta) );
  //  TEUCHOS_ASSERT( !outArgs.supports(MEB::OUT_ARG_W) );
  //}

}
TEUCHOS_UNIT_TEST( Rythmos_ForwardSensitivityExplicitModelEvaluator, args ) {
  RCP<ForwardSensitivityExplicitModelEvaluator<double> > model =
    forwardSensitivityExplicitModelEvaluator<double>();
  RCP<SinCosModel> innerModel = sinCosModel();
  {
    RCP<ParameterList> pl = Teuchos::parameterList();
    pl->set("Accept model parameters",true);
    pl->set("Implicit model formulation",false);
    innerModel->setParameterList(pl);
  }
  model->initializeStructure(innerModel, 0 );
  typedef Thyra::ModelEvaluatorBase MEB;
  {
    MEB::InArgs<double> inArgs = model->createInArgs();
    TEST_EQUALITY_CONST( inArgs.supports(MEB::IN_ARG_t), true );
    TEST_EQUALITY_CONST( inArgs.supports(MEB::IN_ARG_x), true );
    TEST_EQUALITY_CONST( inArgs.supports(MEB::IN_ARG_x_dot), false );
    TEST_EQUALITY_CONST( inArgs.supports(MEB::IN_ARG_alpha), false );
    TEST_EQUALITY_CONST( inArgs.supports(MEB::IN_ARG_beta), true );
  }
  {
    MEB::OutArgs<double> outArgs = model->createOutArgs();
    TEST_EQUALITY_CONST( outArgs.supports(MEB::OUT_ARG_f), true );
    TEST_EQUALITY_CONST( outArgs.supports(MEB::OUT_ARG_W_op), false );
    TEST_EQUALITY_CONST( outArgs.supports(MEB::OUT_ARG_W), false );
  }
}
int TriKota::ThyraDirectApplicInterface::derived_map_ac(const Dakota::String& ac_name)
{

  if (App != Teuchos::null) {

    // Test for consistency of problem definition between ModelEval and Dakota
    TEST_FOR_EXCEPTION(numVars > numParameters, std::logic_error,
                       "TriKota_Dakota Adapter Error: ");
    TEST_FOR_EXCEPTION(numFns > numResponses, std::logic_error,
                       "TriKota_Dakota Adapter Error: ");
    TEST_FOR_EXCEPTION(hessFlag, std::logic_error,
                       "TriKota_Dakota Adapter Error: ");

    MEB::InArgs<double> inArgs = App->createInArgs();
    MEB::OutArgs<double> outArgs = App->createOutArgs();

    TEST_FOR_EXCEPTION(gradFlag && !supportsSensitivities, std::logic_error,
                       "TriKota_Dakota Adapter Error: ");

    // Load parameters from Dakota to ModelEval data structure
    {
      Thyra::DetachedVectorView<double> my_p(model_p);
      for (unsigned int i=0; i<numVars; i++) my_p[i]=xC[i];
    }

    // Evaluate model
    inArgs.set_p(0,model_p);
    outArgs.set_g(0,model_g);
    if (gradFlag) outArgs.set_DgDp(0,0,
      MEB::DerivativeMultiVector<double>(model_dgdp,orientation));
    App->evalModel(inArgs, outArgs);

    Thyra::DetachedVectorView<double> my_g(model_g);
    for (unsigned int j=0; j<numFns; j++) fnVals[j]= my_g[j];

    if (gradFlag) {
      if (orientation == MEB::DERIV_MV_BY_COL) {
        for (unsigned int j=0; j<numVars; j++) {
          Thyra::DetachedVectorView<double>
             my_dgdp_j(model_dgdp->col(j));
          for (unsigned int i=0; i<numFns; i++)  fnGrads[i][j]= my_dgdp_j[i];
        }
      }
      else {
        for (unsigned int j=0; j<numFns; j++) {
          Thyra::DetachedVectorView<double>
             my_dgdp_j(model_dgdp->col(j));
          for (unsigned int i=0; i<numVars; i++) fnGrads[j][i]= my_dgdp_j[i]; 
        }
      }
    }
  }
  else {
    TEST_FOR_EXCEPTION(parallelLib.parallel_configuration().ea_parallel_level().server_intra_communicator()
               != MPI_COMM_NULL, std::logic_error,
              "\nTriKota Parallelism Error: ModelEvaluator=null, but analysis_comm != MPI_COMMM_NULL");
  }

  return 0;
}
void ExplicitModelEvaluator<Scalar>::
buildInverseMassMatrix() const
{
  typedef Thyra::ModelEvaluatorBase MEB;
  using Teuchos::RCP;
  using Thyra::createMember;
  
  RCP<const Thyra::ModelEvaluator<Scalar> > me = this->getUnderlyingModel();

  // first allocate space for the mass matrix
  RCP<Thyra::LinearOpBase<Scalar> > mass = me->create_W_op();

  // intialize a zero to get rid of the x-dot 
  if(zero_==Teuchos::null) {
    zero_ = Thyra::createMember(*me->get_x_space());
    Thyra::assign(zero_.ptr(),0.0);
  }
  
  // request only the mass matrix from the physics
  // Model evaluator builds: alpha*u_dot + beta*F(u) = 0
  MEB::InArgs<Scalar>  inArgs  = me->createInArgs();
  inArgs.set_x(createMember(me->get_x_space()));
  inArgs.set_x_dot(zero_);
  inArgs.set_alpha(-1.0);
  inArgs.set_beta(0.0);

  // set the one time beta to ensure dirichlet conditions
  // are correctly included in the mass matrix: do it for
  // both epetra and Tpetra. If a panzer model evaluator has
  // not been passed in...oh well you get what you asked for!
  if(panzerModel_!=Teuchos::null)
    panzerModel_->setOneTimeDirichletBeta(-1.0);
  else if(panzerEpetraModel_!=Teuchos::null)
    panzerEpetraModel_->setOneTimeDirichletBeta(-1.0);

  // set only the mass matrix
  MEB::OutArgs<Scalar> outArgs = me->createOutArgs();
  outArgs.set_W_op(mass);

  // this will fill the mass matrix operator 
  me->evalModel(inArgs,outArgs);

  if(!massLumping_) {
    invMassMatrix_ = Thyra::inverse<Scalar>(*me->get_W_factory(),mass);
  }
  else {
    // build lumped mass matrix (assumes all positive mass entries, does a simple sum)
    Teuchos::RCP<Thyra::VectorBase<Scalar> > ones = Thyra::createMember(*mass->domain());
    Thyra::assign(ones.ptr(),1.0);

    RCP<Thyra::VectorBase<Scalar> > invLumpMass = Thyra::createMember(*mass->range());
    Thyra::apply(*mass,Thyra::NOTRANS,*ones,invLumpMass.ptr());
    Thyra::reciprocal(*invLumpMass,invLumpMass.ptr());

    invMassMatrix_ = Thyra::diagonal(invLumpMass);
  }
}
Thyra::ModelEvaluatorBase::InArgs<Scalar> ExplicitModelEvaluator<Scalar>::
getNominalValues() const
{
  typedef Thyra::ModelEvaluatorBase MEB;

  MEB::InArgs<Scalar> nomVals = createInArgs();
  nomVals.setArgs(this->getUnderlyingModel()->getNominalValues(),true);

  return nomVals;
}
ModelEvaluatorBase::InArgs<Scalar>
DefaultStateEliminationModelEvaluator<Scalar>::createInArgs() const
{
  typedef ModelEvaluatorBase MEB;
  const Teuchos::RCP<const ModelEvaluator<Scalar> >
    thyraModel = this->getUnderlyingModel();
  const MEB::InArgs<Scalar> wrappedInArgs = thyraModel->createInArgs();
  MEB::InArgsSetup<Scalar> inArgs;
  inArgs.setModelEvalDescription(this->description());
  inArgs.set_Np(wrappedInArgs.Np());
  inArgs.setSupports(wrappedInArgs);
  inArgs.setUnsupportsAndRelated(MEB::IN_ARG_x); // Wipe out x, x_dot ...
  return inArgs;
}
Thyra::ModelEvaluatorBase::InArgs<Scalar>
ForwardSensitivityExplicitModelEvaluator<Scalar>::createInArgs() const
{
  TEUCHOS_ASSERT( !is_null(stateModel_) );
  typedef Thyra::ModelEvaluatorBase MEB;
  MEB::InArgs<Scalar> stateModelInArgs = stateModel_->createInArgs();
  MEB::InArgsSetup<Scalar> inArgs;
  inArgs.setModelEvalDescription(this->description());
  inArgs.setSupports( MEB::IN_ARG_x );
  inArgs.setSupports( MEB::IN_ARG_t );
  inArgs.setSupports( MEB::IN_ARG_beta,
    stateModelInArgs.supports(MEB::IN_ARG_beta) );
  return inArgs;
}
RCP<Thyra::VectorBase<Scalar> > eval_f_t(
    const Thyra::ModelEvaluator<Scalar>& me,
    Scalar t
    ) {
  typedef Teuchos::ScalarTraits<Scalar> ST;
  typedef Thyra::ModelEvaluatorBase MEB;
  MEB::InArgs<Scalar> inArgs = me.createInArgs();
  inArgs.set_t(t);
  MEB::OutArgs<Scalar> outArgs = me.createOutArgs();
  RCP<Thyra::VectorBase<Scalar> > f_out = Thyra::createMember(me.get_f_space());
  V_S(outArg(*f_out),ST::zero());
  outArgs.set_f(f_out);
  me.evalModel(inArgs,outArgs);
  return f_out;
}
void eval_model_explicit(
    const Thyra::ModelEvaluator<Scalar> &model,
    Thyra::ModelEvaluatorBase::InArgs<Scalar> &basePoint,
    const VectorBase<Scalar>& x_in,
    const typename Thyra::ModelEvaluatorBase::InArgs<Scalar>::ScalarMag &t_in,
    const Ptr<VectorBase<Scalar> >& f_out
    )
{
  typedef Thyra::ModelEvaluatorBase MEB;
  MEB::InArgs<Scalar> inArgs = model.createInArgs();
  MEB::OutArgs<Scalar> outArgs = model.createOutArgs();
  inArgs.setArgs(basePoint);
  inArgs.set_x(Teuchos::rcp(&x_in,false));
  if (inArgs.supports(MEB::IN_ARG_t)) {
    inArgs.set_t(t_in);
  }
  // For model evaluators whose state function f(x, x_dot, t) describes
  // an implicit ODE, and which accept an optional x_dot input argument,
  // make sure the latter is set to null in order to request the evaluation
  // of a state function corresponding to the explicit ODE formulation
  // x_dot = f(x, t)
  if (inArgs.supports(MEB::IN_ARG_x_dot)) {
    inArgs.set_x_dot(Teuchos::null);
  }
  outArgs.set_f(Teuchos::rcp(&*f_out,false));
  model.evalModel(inArgs,outArgs);
}
void eval_model_explicit(
    const Thyra::ModelEvaluator<Scalar> &model,
    Thyra::ModelEvaluatorBase::InArgs<Scalar> &basePoint,
    const VectorBase<Scalar>& x_in,
    const typename Thyra::ModelEvaluatorBase::InArgs<Scalar>::ScalarMag &t_in,
    const Ptr<VectorBase<Scalar> >& f_out
    )
{
  typedef Thyra::ModelEvaluatorBase MEB;
  MEB::InArgs<Scalar> inArgs = model.createInArgs();
  MEB::OutArgs<Scalar> outArgs = model.createOutArgs();
  inArgs.setArgs(basePoint);
  inArgs.set_x(Teuchos::rcp(&x_in,false));
  if (inArgs.supports(MEB::IN_ARG_t)) {
    inArgs.set_t(t_in);
  }
  outArgs.set_f(Teuchos::rcp(&*f_out,false));
  model.evalModel(inArgs,outArgs);
}
void ForwardSensitivityExplicitModelEvaluator<Scalar>::computeDerivativeMatrices(
  const Thyra::ModelEvaluatorBase::InArgs<Scalar> &point
  ) const
{
  TEUCHOS_ASSERT( !is_null(stateModel_) );

  typedef Thyra::ModelEvaluatorBase MEB;
  typedef Teuchos::VerboseObjectTempState<MEB> VOTSME;

  Teuchos::RCP<Teuchos::FancyOStream> out = this->getOStream();
  Teuchos::EVerbosityLevel verbLevel = this->getVerbLevel();

  MEB::InArgs<Scalar> inArgs = stateBasePoint_;
  MEB::OutArgs<Scalar> outArgs = stateModel_->createOutArgs();
  
  if (is_null(DfDx_)) {
    DfDx_ = stateModel_->create_W_op();
  }
  if (inArgs.supports(MEB::IN_ARG_beta)) {
    inArgs.set_beta(1.0);
  }
  outArgs.set_W_op(DfDx_);

  if (is_null(DfDp_)) {
    DfDp_ = Thyra::create_DfDp_mv(
      *stateModel_,p_index_,
      MEB::DERIV_MV_BY_COL
      ).getMultiVector();
  }
  outArgs.set_DfDp(
    p_index_,
    MEB::Derivative<Scalar>(DfDp_,MEB::DERIV_MV_BY_COL)
    );
  
  VOTSME stateModel_outputTempState(stateModel_,out,verbLevel);
  stateModel_->evalModel(inArgs,outArgs);
  

}
void restart( StepperBase<Scalar> *stepper )
{
#ifdef RYTHMOS_DEBUG
  TEST_FOR_EXCEPT(0==stepper);
#endif // RYTHMOS_DEBUG
  typedef Thyra::ModelEvaluatorBase MEB;
  const Rythmos::StepStatus<double>
    stepStatus = stepper->getStepStatus();
  const RCP<const Thyra::ModelEvaluator<Scalar> >
    model = stepper->getModel();
  // First, copy all of the model's state, including parameter values etc.
  MEB::InArgs<double> initialCondition = model->createInArgs();
  initialCondition.setArgs(model->getNominalValues());
  // Set the current values of the state and time
  RCP<const Thyra::VectorBase<double> > x, x_dot;
  Rythmos::get_x_and_x_dot(*stepper,stepStatus.time,&x,&x_dot);
  initialCondition.set_x(x);
  initialCondition.set_x_dot(x_dot);
  initialCondition.set_t(stepStatus.time);
  // Set the new initial condition back on the stepper.  This will effectively
  // reset the stepper to think that it is starting over again (which it is).
  stepper->setInitialCondition(initialCondition);
}
bool setDefaultInitialConditionFromNominalValues(
  const Thyra::ModelEvaluator<Scalar>& model,
  const Ptr<StepperBase<Scalar> >& stepper
  )
{

  typedef ScalarTraits<Scalar> ST;
  typedef Thyra::ModelEvaluatorBase MEB;

  if (isInitialized(*stepper))
    return false;  // Already has an initial condition
  
  MEB::InArgs<Scalar> initCond = model.getNominalValues();

  if (!is_null(initCond.get_x())) {
    // IC has x, we will assume that initCont.get_t() is the valid start time.
    // Therefore, we just need to check that x_dot is also set or we will
    // create a zero x_dot
#ifdef RYTHMOS_DEBUG
    THYRA_ASSERT_VEC_SPACES( "setInitialConditionIfExists(...)", 
      *model.get_x_space(), *initCond.get_x()->space() );
#endif
    if (initCond.supports(MEB::IN_ARG_x_dot)) {
      if (is_null(initCond.get_x_dot())) {
        const RCP<Thyra::VectorBase<Scalar> > x_dot =
          createMember(model.get_x_space());
        assign(x_dot.ptr(), ST::zero());
      }
      else {
#ifdef RYTHMOS_DEBUG
        THYRA_ASSERT_VEC_SPACES( "setInitialConditionIfExists(...)", 
          *model.get_x_space(), *initCond.get_x_dot()->space() );
#endif
      }
    }
    stepper->setInitialCondition(initCond);
    return true;
  }

  // The model has not nominal values for which to set the initial
  // conditions so wo don't do anything!  The stepper will still have not
  return false;

}
int main(int argc, char *argv[])
{

  using std::endl;
  typedef double Scalar;
  typedef double ScalarMag;
  using Teuchos::describe;
  using Teuchos::RCP;
  using Teuchos::rcp;
  using Teuchos::rcp_implicit_cast;
  using Teuchos::rcp_dynamic_cast;
  using Teuchos::as;
  using Teuchos::ParameterList;
  using Teuchos::CommandLineProcessor;
  typedef Teuchos::ParameterList::PrintOptions PLPrintOptions;
  typedef Thyra::ModelEvaluatorBase MEB;
  typedef Thyra::DefaultMultiVectorProductVectorSpace<Scalar> DMVPVS;
  using Thyra::productVectorBase;

  bool result, success = true;

  Teuchos::GlobalMPISession mpiSession(&argc,&argv);

  RCP<Epetra_Comm> epetra_comm;
#ifdef HAVE_MPI
  epetra_comm = rcp( new Epetra_MpiComm(MPI_COMM_WORLD) );
#else
  epetra_comm = rcp( new Epetra_SerialComm );
#endif // HAVE_MPI

  RCP<Teuchos::FancyOStream>
    out = Teuchos::VerboseObjectBase::getDefaultOStream();

  try {

    //
    // Read commandline options
    //

    CommandLineProcessor clp;
    clp.throwExceptions(false);
    clp.addOutputSetupOptions(true);

    std::string paramsFileName = "";
    clp.setOption( "params-file", &paramsFileName,
      "File name for XML parameters" );

    std::string extraParamsString = "";
    clp.setOption( "extra-params", &extraParamsString,
      "Extra XML parameters" );

    std::string extraParamsFile = "";
    clp.setOption( "extra-params-file", &extraParamsFile, "File containing extra parameters in XML format.");

    double maxStateError = 1e-6;
    clp.setOption( "max-state-error", &maxStateError,
      "The maximum allowed error in the integrated state in relation to the exact state solution" );

    double finalTime = 1e-3;
    clp.setOption( "final-time", &finalTime,
      "Final integration time (initial time is 0.0)" );

    int numTimeSteps = 10;
    clp.setOption( "num-time-steps", &numTimeSteps,
      "Number of (fixed) time steps.  If <= 0.0, then variable time steps are taken" );

    bool useBDF = false;
    clp.setOption( "use-BDF", "use-BE", &useBDF,
      "Use BDF or Backward Euler (BE)" );

    bool useIRK = false;
    clp.setOption( "use-IRK", "use-other", &useIRK,
      "Use IRK or something" );

    bool doFwdSensSolve = false;
    clp.setOption( "fwd-sens-solve", "state-solve", &doFwdSensSolve,
      "Do the forward sensitivity solve or just the state solve" );

    bool doFwdSensErrorControl = false;
    clp.setOption( "fwd-sens-err-cntrl", "no-fwd-sens-err-cntrl", &doFwdSensErrorControl,
      "Do error control on the forward sensitivity solve or not" );

    double maxRestateError = 0.0;
    clp.setOption( "max-restate-error", &maxRestateError,
      "The maximum allowed error between the state integrated by itself verses integrated along with DxDp" );

    double maxSensError = 1e-4;
    clp.setOption( "max-sens-error", &maxSensError,
      "The maximum allowed error in the integrated sensitivity in relation to"
      " the finite-difference sensitivity" );

    Teuchos::EVerbosityLevel verbLevel = Teuchos::VERB_DEFAULT;
    setVerbosityLevelOption( "verb-level", &verbLevel,
      "Top-level verbosity level.  By default, this gets deincremented as you go deeper into numerical objects.",
      &clp );

    bool testExactSensitivity = false;
    clp.setOption( "test-exact-sens", "no-test-exact-sens", &testExactSensitivity,
      "Test the exact sensitivity with finite differences or not." );

    bool dumpFinalSolutions = false;
    clp.setOption(
      "dump-final-solutions", "no-dump-final-solutions", &dumpFinalSolutions,
      "Determine if the final solutions are dumpped or not." );

    CommandLineProcessor::EParseCommandLineReturn parse_return = clp.parse(argc,argv);
    if( parse_return != CommandLineProcessor::PARSE_SUCCESSFUL ) return parse_return;

    if ( Teuchos::VERB_DEFAULT == verbLevel )
      verbLevel = Teuchos::VERB_LOW;

    const Teuchos::EVerbosityLevel
      solnVerbLevel = ( dumpFinalSolutions ? Teuchos::VERB_EXTREME : verbLevel );

    //
    // Get the base parameter list that all other parameter lists will be read
    // from.
    //

    RCP<ParameterList>
      paramList = Teuchos::parameterList();
    if (paramsFileName.length())
      updateParametersFromXmlFile( paramsFileName, paramList.ptr() );
    if(extraParamsFile.length())
      Teuchos::updateParametersFromXmlFile( "./"+extraParamsFile, paramList.ptr() );
    if (extraParamsString.length())
      updateParametersFromXmlString( extraParamsString, paramList.ptr() );

    if (testExactSensitivity) {
      paramList->sublist(DiagonalTransientModel_name).set("Exact Solution as Response",true);
    }

    paramList->validateParameters(*getValidParameters(),0); // Only validate top level lists!

    //
    // Create the Stratimikos linear solver factory.
    //
    // This is the linear solve strategy that will be used to solve for the
    // linear system with the W.
    //

    Stratimikos::DefaultLinearSolverBuilder linearSolverBuilder;
    linearSolverBuilder.setParameterList(sublist(paramList,Stratimikos_name));
    RCP<Thyra::LinearOpWithSolveFactoryBase<Scalar> >
      W_factory = createLinearSolveStrategy(linearSolverBuilder);

    //
    // Create the underlying EpetraExt::ModelEvaluator
    //

    RCP<EpetraExt::DiagonalTransientModel>
      epetraStateModel = EpetraExt::diagonalTransientModel(
        epetra_comm,
        sublist(paramList,DiagonalTransientModel_name)
        );

    *out <<"\nepetraStateModel valid options:\n";
    epetraStateModel->getValidParameters()->print(
      *out, PLPrintOptions().indent(2).showTypes(true).showDoc(true)
      );

    //
    // Create the Thyra-wrapped ModelEvaluator
    //

    RCP<Thyra::ModelEvaluator<double> >
      stateModel = epetraModelEvaluator(epetraStateModel,W_factory);

    *out << "\nParameter names = " << *stateModel->get_p_names(0) << "\n";

    //
    // Create the Rythmos stateStepper
    //

    RCP<Rythmos::TimeStepNonlinearSolver<double> >
      nonlinearSolver = Rythmos::timeStepNonlinearSolver<double>();
    RCP<ParameterList>
      nonlinearSolverPL = sublist(paramList,TimeStepNonlinearSolver_name);
    nonlinearSolverPL->get("Default Tol",1e-3*maxStateError); // Set default if not set
    nonlinearSolver->setParameterList(nonlinearSolverPL);

    RCP<Rythmos::StepperBase<Scalar> > stateStepper;

    if (useBDF) {
      stateStepper = rcp(
        new Rythmos::ImplicitBDFStepper<double>(
          stateModel, nonlinearSolver
          )
        );
    }
    else if (useIRK) {
      // We need a separate LOWSFB object for the IRK stepper
      RCP<Thyra::LinearOpWithSolveFactoryBase<Scalar> >
        irk_W_factory = createLinearSolveStrategy(linearSolverBuilder);
      RCP<Rythmos::RKButcherTableauBase<double> > irkbt = Rythmos::createRKBT<double>("Backward Euler");
      stateStepper = Rythmos::implicitRKStepper<double>(
        stateModel, nonlinearSolver, irk_W_factory, irkbt
        );
    }
    else {
      stateStepper = rcp(
        new Rythmos::BackwardEulerStepper<double>(
          stateModel, nonlinearSolver
          )
        );
    }

    *out <<"\nstateStepper:\n" << describe(*stateStepper,verbLevel);
    *out <<"\nstateStepper valid options:\n";
    stateStepper->getValidParameters()->print(
      *out, PLPrintOptions().indent(2).showTypes(true).showDoc(true)
      );

    stateStepper->setParameterList(sublist(paramList,RythmosStepper_name));

    //
    // Setup finite difference objects that will be used for tests
    //

    Thyra::DirectionalFiniteDiffCalculator<Scalar> fdCalc;
    fdCalc.setParameterList(sublist(paramList,FdCalc_name));
    fdCalc.setOStream(out);
    fdCalc.setVerbLevel(verbLevel);

    //
    // Use a StepperAsModelEvaluator to integrate the state
    //

    const MEB::InArgs<Scalar>
      state_ic = stateModel->getNominalValues();
    *out << "\nstate_ic:\n" << describe(state_ic,verbLevel);

    RCP<Rythmos::IntegratorBase<Scalar> > integrator;
    {
      RCP<ParameterList>
        integratorPL = sublist(paramList,RythmosIntegrator_name);
      integratorPL->set( "Take Variable Steps", as<bool>(numTimeSteps < 0) );
      integratorPL->set( "Fixed dt", as<double>((finalTime - state_ic.get_t())/numTimeSteps) );
      RCP<Rythmos::IntegratorBase<Scalar> >
        defaultIntegrator = Rythmos::controlledDefaultIntegrator<Scalar>(
          Rythmos::simpleIntegrationControlStrategy<Scalar>(integratorPL)
          );
      integrator = defaultIntegrator;
    }

    RCP<Rythmos::StepperAsModelEvaluator<Scalar> >
      stateIntegratorAsModel = Rythmos::stepperAsModelEvaluator(
        stateStepper, integrator, state_ic
        );
    stateIntegratorAsModel->setVerbLevel(verbLevel);

    *out << "\nUse the StepperAsModelEvaluator to integrate state x(p,finalTime) ... \n";

    RCP<Thyra::VectorBase<Scalar> > x_final;

    {

      Teuchos::OSTab tab(out);

      x_final = createMember(stateIntegratorAsModel->get_g_space(0));

      eval_g(
        *stateIntegratorAsModel,
        0, *state_ic.get_p(0),
        finalTime,
        0, &*x_final
        );

      *out
        << "\nx_final = x(p,finalTime) evaluated using stateIntegratorAsModel:\n"
        << describe(*x_final,solnVerbLevel);

    }

    //
    // Test the integrated state against the exact analytical state solution
    //

    RCP<const Thyra::VectorBase<Scalar> >
      exact_x_final = create_Vector(
        epetraStateModel->getExactSolution(finalTime),
        stateModel->get_x_space()
        );

    result = Thyra::testRelNormDiffErr(
      "exact_x_final", *exact_x_final, "x_final", *x_final,
      "maxStateError", maxStateError, "warningTol", 1.0, // Don't warn
      &*out, solnVerbLevel
      );
    if (!result) success = false;

    //
    // Solve and test the forward sensitivity computation
    //

    if (doFwdSensSolve) {

      //
      // Create the forward sensitivity stepper
      //

      RCP<Rythmos::ForwardSensitivityStepper<Scalar> > stateAndSensStepper =
        Rythmos::forwardSensitivityStepper<Scalar>();
      if (doFwdSensErrorControl) {
        stateAndSensStepper->initializeDecoupledSteppers(
          stateModel, 0, stateModel->getNominalValues(),
          stateStepper, nonlinearSolver,
          integrator->cloneIntegrator(), finalTime
          );
      }
      else {
        stateAndSensStepper->initializeSyncedSteppers(
          stateModel, 0, stateModel->getNominalValues(),
          stateStepper, nonlinearSolver
          );
        // The above call will result in stateStepper and nonlinearSolver being
        // cloned.  This helps to ensure consistency between the state and
        // sensitivity computations!
      }

      //
      // Set the initial condition for the state and forward sensitivities
      //

      RCP<Thyra::VectorBase<Scalar> > s_bar_init
        = createMember(stateAndSensStepper->getFwdSensModel()->get_x_space());
      assign( s_bar_init.ptr(), 0.0 );
      RCP<Thyra::VectorBase<Scalar> > s_bar_dot_init
        = createMember(stateAndSensStepper->getFwdSensModel()->get_x_space());
      assign( s_bar_dot_init.ptr(), 0.0 );
      // Above, I believe that these are the correct initial conditions for
      // s_bar and s_bar_dot given how the EpetraExt::DiagonalTransientModel
      // is currently implemented!

      RCP<const Rythmos::StateAndForwardSensitivityModelEvaluator<Scalar> >
        stateAndSensModel = stateAndSensStepper->getStateAndFwdSensModel();

      MEB::InArgs<Scalar>
        state_and_sens_ic = stateAndSensStepper->getModel()->createInArgs();

      // Copy time, parameters etc.
      state_and_sens_ic.setArgs(state_ic);
      // Set initial condition for x_bar = [ x; s_bar ]
      state_and_sens_ic.set_x(
        stateAndSensModel->create_x_bar_vec(state_ic.get_x(),s_bar_init)
        );
      // Set initial condition for x_bar_dot = [ x_dot; s_bar_dot ]
      state_and_sens_ic.set_x_dot(
        stateAndSensModel->create_x_bar_vec(state_ic.get_x_dot(),s_bar_dot_init)
        );

      *out << "\nstate_and_sens_ic:\n" << describe(state_and_sens_ic,verbLevel);

      stateAndSensStepper->setInitialCondition(state_and_sens_ic);

      //
      // Use a StepperAsModelEvaluator to integrate the state+sens
      //

      RCP<Rythmos::StepperAsModelEvaluator<Scalar> >
        stateAndSensIntegratorAsModel = Rythmos::stepperAsModelEvaluator(
          rcp_implicit_cast<Rythmos::StepperBase<Scalar> >(stateAndSensStepper),
          integrator, state_and_sens_ic
          );
      stateAndSensIntegratorAsModel->setVerbLevel(verbLevel);

      *out << "\nUse the StepperAsModelEvaluator to integrate state + sens x_bar(p,finalTime) ... \n";

      RCP<Thyra::VectorBase<Scalar> > x_bar_final;

      {

        Teuchos::OSTab tab(out);

        x_bar_final = createMember(stateAndSensIntegratorAsModel->get_g_space(0));

        eval_g(
          *stateAndSensIntegratorAsModel,
          0, *state_ic.get_p(0),
          finalTime,
          0, &*x_bar_final
          );

        *out
          << "\nx_bar_final = x_bar(p,finalTime) evaluated using stateAndSensIntegratorAsModel:\n"
          << describe(*x_bar_final,solnVerbLevel);

      }

      //
      // Test that the state computed above is same as computed initially!
      //

      *out << "\nChecking that x(p,finalTime) computed as part of x_bar above is the same ...\n";

      {

        Teuchos::OSTab tab(out);

        RCP<const Thyra::VectorBase<Scalar> >
          x_in_x_bar_final = productVectorBase<Scalar>(x_bar_final)->getVectorBlock(0);

        result = Thyra::testRelNormDiffErr<Scalar>(
          "x_final", *x_final,
          "x_in_x_bar_final", *x_in_x_bar_final,
          "maxRestateError", maxRestateError,
          "warningTol", 1.0, // Don't warn
          &*out, solnVerbLevel
          );
        if (!result) success = false;

      }

      //
      // Compute DxDp using finite differences
      //

      *out << "\nApproximating DxDp(p,t) using directional finite differences of integrator for x(p,t) ...\n";

      RCP<Thyra::MultiVectorBase<Scalar> > DxDp_fd_final;

      {

        Teuchos::OSTab tab(out);


        MEB::InArgs<Scalar>
          fdBasePoint = stateIntegratorAsModel->createInArgs();

        fdBasePoint.set_t(finalTime);
        fdBasePoint.set_p(0,stateModel->getNominalValues().get_p(0));

        DxDp_fd_final = createMembers(
          stateIntegratorAsModel->get_g_space(0),
          stateIntegratorAsModel->get_p_space(0)->dim()
          );

        typedef Thyra::DirectionalFiniteDiffCalculatorTypes::SelectedDerivatives
          SelectedDerivatives;

        MEB::OutArgs<Scalar> fdOutArgs =
          fdCalc.createOutArgs(
            *stateIntegratorAsModel,
            SelectedDerivatives().supports(MEB::OUT_ARG_DgDp,0,0)
            );
        fdOutArgs.set_DgDp(0,0,DxDp_fd_final);

        // Silence the model evaluators that are called.  The fdCal object
        // will show all of the inputs and outputs for each call.
        stateStepper->setVerbLevel(Teuchos::VERB_NONE);
        stateIntegratorAsModel->setVerbLevel(Teuchos::VERB_NONE);

        fdCalc.calcDerivatives(
          *stateIntegratorAsModel, fdBasePoint,
          stateIntegratorAsModel->createOutArgs(), // Don't bother with function value
          fdOutArgs
          );

        *out
          << "\nFinite difference DxDp_fd_final = DxDp(p,finalTime): "
          << describe(*DxDp_fd_final,solnVerbLevel);

      }

      //
      // Test that the integrated sens and the F.D. sens are similar
      //

      *out << "\nChecking that integrated DxDp(p,finalTime) and finite-diff DxDp(p,finalTime) are similar ...\n";

      {

        Teuchos::OSTab tab(out);

        RCP<const Thyra::VectorBase<Scalar> >
          DxDp_vec_final = Thyra::productVectorBase<Scalar>(x_bar_final)->getVectorBlock(1);

        RCP<const Thyra::VectorBase<Scalar> >
          DxDp_fd_vec_final = Thyra::multiVectorProductVector(
            rcp_dynamic_cast<const Thyra::DefaultMultiVectorProductVectorSpace<Scalar> >(
              DxDp_vec_final->range()
              ),
            DxDp_fd_final
            );

        result = Thyra::testRelNormDiffErr(
          "DxDp_vec_final", *DxDp_vec_final,
          "DxDp_fd_vec_final", *DxDp_fd_vec_final,
          "maxSensError", maxSensError,
          "warningTol", 1.0, // Don't warn
          &*out, solnVerbLevel
          );
        if (!result) success = false;

      }

    }

  }
  TEUCHOS_STANDARD_CATCH_STATEMENTS(true,*out,success);

  if(success)
    *out << "\nEnd Result: TEST PASSED" << endl;
  else
    *out << "\nEnd Result: TEST FAILED" << endl;

  return ( success ? 0 : 1 );

} // end main() [Doxygen looks for this!]
void ImplicitRKModelEvaluator<Scalar>::evalModelImpl(
  const Thyra::ModelEvaluatorBase::InArgs<Scalar>& inArgs_bar,
  const Thyra::ModelEvaluatorBase::OutArgs<Scalar>& outArgs_bar
  ) const
{

  using Teuchos::rcp_dynamic_cast;
  typedef ScalarTraits<Scalar> ST;
  typedef Thyra::ModelEvaluatorBase MEB;
  typedef Thyra::VectorBase<Scalar> VB;
  typedef Thyra::ProductVectorBase<Scalar> PVB;
  typedef Thyra::BlockedLinearOpBase<Scalar> BLWB;

  TEST_FOR_EXCEPTION( !isInitialized_, std::logic_error,
      "Error!  initializeIRKModel must be called before evalModel\n"
      );

  TEST_FOR_EXCEPTION( !setTimeStepPointCalled_, std::logic_error,
      "Error!  setTimeStepPoint must be called before evalModel"
      );

  THYRA_MODEL_EVALUATOR_DECORATOR_EVAL_MODEL_GEN_BEGIN(
    "Rythmos::ImplicitRKModelEvaluator",inArgs_bar,outArgs_bar,daeModel_
    );

  //
  // A) Unwrap the inArgs and outArgs to get at product vectors and block op
  //

  const RCP<const PVB> x_bar = rcp_dynamic_cast<const PVB>(inArgs_bar.get_x(), true);
  const RCP<PVB> f_bar = rcp_dynamic_cast<PVB>(outArgs_bar.get_f(), true);
  const RCP<BLWB> W_op_bar = rcp_dynamic_cast<BLWB>(outArgs_bar.get_W_op(), true);

  //
  // B) Assemble f_bar and W_op_bar by looping over stages
  //

  MEB::InArgs<Scalar> daeInArgs = daeModel_->createInArgs();
  MEB::OutArgs<Scalar> daeOutArgs = daeModel_->createOutArgs();
  const RCP<VB> x_i = createMember(daeModel_->get_x_space());
  daeInArgs.setArgs(basePoint_);
  
  const int numStages = irkButcherTableau_->numStages();

  for ( int i = 0; i < numStages; ++i ) {

    // B.1) Setup the DAE's inArgs for stage f(i) ...
    assembleIRKState( i, irkButcherTableau_->A(), delta_t_, *x_old_, *x_bar, outArg(*x_i) );
    daeInArgs.set_x( x_i );
    daeInArgs.set_x_dot( x_bar->getVectorBlock(i) );
    daeInArgs.set_t( t_old_ + irkButcherTableau_->c()(i) * delta_t_ );
    Scalar alpha = ST::zero();
    if (i == 0) {
      alpha = ST::one();
    } else {
      alpha = ST::zero();
    }
    Scalar beta = delta_t_ * irkButcherTableau_->A()(i,0);
    daeInArgs.set_alpha( alpha );
    daeInArgs.set_beta( beta );

    // B.2) Setup the DAE's outArgs for stage f(i) ...
    if (!is_null(f_bar))
      daeOutArgs.set_f( f_bar->getNonconstVectorBlock(i) );
    if (!is_null(W_op_bar)) {
      daeOutArgs.set_W_op(W_op_bar->getNonconstBlock(i,0));
    }

    // B.3) Compute f_bar(i) and/or W_op_bar(i,0) ...
    daeModel_->evalModel( daeInArgs, daeOutArgs );
    daeOutArgs.set_f(Teuchos::null);
    daeOutArgs.set_W_op(Teuchos::null);
    
    // B.4) Evaluate the rest of the W_op_bar(i,j=1...numStages-1) ...
    if (!is_null(W_op_bar)) {
      for ( int j = 1; j < numStages; ++j ) {
        alpha = ST::zero();
        if (i == j) {
          alpha = ST::one();
        } else {
          alpha = ST::zero();
        }
        beta = delta_t_ * irkButcherTableau_->A()(i,j);
        daeInArgs.set_alpha( alpha );
        daeInArgs.set_beta( beta );
        daeOutArgs.set_W_op(W_op_bar->getNonconstBlock(i,j));
        daeModel_->evalModel( daeInArgs, daeOutArgs );
        daeOutArgs.set_W_op(Teuchos::null);
      }
    }

  }
  
  THYRA_MODEL_EVALUATOR_DECORATOR_EVAL_MODEL_END();
  
}
int main(int argc, char *argv[])
{

  using std::endl;
  typedef double Scalar;
  // typedef double ScalarMag; // unused
  typedef Teuchos::ScalarTraits<Scalar> ST;
  using Teuchos::describe;
  using Teuchos::Array;
  using Teuchos::RCP;
  using Teuchos::rcp;
  using Teuchos::outArg;
  using Teuchos::rcp_implicit_cast;
  using Teuchos::rcp_dynamic_cast;
  using Teuchos::as;
  using Teuchos::ParameterList;
  using Teuchos::CommandLineProcessor;
  typedef Teuchos::ParameterList::PrintOptions PLPrintOptions;
  typedef Thyra::ModelEvaluatorBase MEB;

  bool result, success = true;

  Teuchos::GlobalMPISession mpiSession(&argc,&argv);

  RCP<Epetra_Comm> epetra_comm;
#ifdef HAVE_MPI
  epetra_comm = rcp( new Epetra_MpiComm(MPI_COMM_WORLD) );
#else
  epetra_comm = rcp( new Epetra_SerialComm );
#endif // HAVE_MPI

  RCP<Teuchos::FancyOStream>
    out = Teuchos::VerboseObjectBase::getDefaultOStream();

  try {

    //
    // Read commandline options
    //

    CommandLineProcessor clp;
    clp.throwExceptions(false);
    clp.addOutputSetupOptions(true);

    std::string paramsFileName = "";
    clp.setOption( "params-file", &paramsFileName,
      "File name for XML parameters" );

    double t_final = 1e-3;
    clp.setOption( "final-time", &t_final,
      "Final integration time (initial time is 0.0)" );

    int numTimeSteps = 10;
    clp.setOption( "num-time-steps", &numTimeSteps,
      "Number of (fixed) time steps.  If <= 0.0, then variable time steps are taken" );

    double maxStateError = 1e-14;
    clp.setOption( "max-state-error", &maxStateError,
      "Maximum relative error in the integrated state allowed" );

    Teuchos::EVerbosityLevel verbLevel = Teuchos::VERB_DEFAULT;
    setVerbosityLevelOption( "verb-level", &verbLevel,
      "Top-level verbosity level.  By default, this gets deincremented as you go deeper into numerical objects.",
      &clp );

    Teuchos::EVerbosityLevel solnVerbLevel = Teuchos::VERB_DEFAULT;
    setVerbosityLevelOption( "soln-verb-level", &solnVerbLevel,
      "Solution verbosity level",
      &clp );

    CommandLineProcessor::EParseCommandLineReturn parse_return = clp.parse(argc,argv);
    if( parse_return != CommandLineProcessor::PARSE_SUCCESSFUL ) return parse_return;

    //
    *out << "\nA) Get the base parameter list ...\n";
    //

    RCP<ParameterList>
      paramList = Teuchos::parameterList();
    if (paramsFileName.length())
      updateParametersFromXmlFile( paramsFileName, paramList.ptr() );

    paramList->validateParameters(*getValidParameters());

    const Scalar t_init = 0.0;

    const Rythmos::TimeRange<Scalar> fwdTimeRange(t_init, t_final);
    const Scalar delta_t = t_final / numTimeSteps;
    *out << "\ndelta_t = " << delta_t;

    //
    *out << "\nB) Create the Stratimikos linear solver factory ...\n";
    //
    // This is the linear solve strategy that will be used to solve for the
    // linear system with the W.
    //

    Stratimikos::DefaultLinearSolverBuilder linearSolverBuilder;
    linearSolverBuilder.setParameterList(sublist(paramList,Stratimikos_name));
    RCP<Thyra::LinearOpWithSolveFactoryBase<Scalar> >
      W_factory = createLinearSolveStrategy(linearSolverBuilder);

    //
    *out << "\nC) Create and initalize the forward model ...\n";
    //

    // C.1) Create the underlying EpetraExt::ModelEvaluator

    RCP<EpetraExt::DiagonalTransientModel> epetraStateModel =
      EpetraExt::diagonalTransientModel(
        epetra_comm,
        sublist(paramList,DiagonalTransientModel_name)
        );

    *out <<"\nepetraStateModel valid options:\n";
    epetraStateModel->getValidParameters()->print(
      *out, PLPrintOptions().indent(2).showTypes(true).showDoc(true)
      );

    // C.2) Create the Thyra-wrapped ModelEvaluator

    RCP<Thyra::ModelEvaluator<double> > fwdStateModel =
      epetraModelEvaluator(epetraStateModel, W_factory);

    const RCP<const Thyra::VectorSpaceBase<Scalar> >
      x_space = fwdStateModel->get_x_space();

    const RCP<const Thyra::VectorBase<Scalar> >
      gamma = Thyra::create_Vector(epetraStateModel->get_gamma(), x_space);
    *out << "\ngamma = " << describe(*gamma, solnVerbLevel);

    //
    *out << "\nD) Create the stepper and integrator for the forward problem ...\n";
    //

    RCP<Rythmos::TimeStepNonlinearSolver<double> > fwdTimeStepSolver =
      Rythmos::timeStepNonlinearSolver<double>();
    RCP<Rythmos::StepperBase<Scalar> > fwdStateStepper =
      Rythmos::backwardEulerStepper<double>(fwdStateModel, fwdTimeStepSolver);
    fwdStateStepper->setParameterList(sublist(paramList, RythmosStepper_name));
    RCP<Rythmos::IntegratorBase<Scalar> > fwdStateIntegrator;
    {
      RCP<ParameterList>
        integrationControlPL = sublist(paramList, RythmosIntegrationControl_name);
      integrationControlPL->set( "Take Variable Steps", false );
      integrationControlPL->set( "Fixed dt", as<double>(delta_t) );
      RCP<Rythmos::IntegratorBase<Scalar> >
        defaultIntegrator = Rythmos::controlledDefaultIntegrator<Scalar>(
          Rythmos::simpleIntegrationControlStrategy<Scalar>(integrationControlPL)
          );
      fwdStateIntegrator = defaultIntegrator;
    }
    fwdStateIntegrator->setParameterList(sublist(paramList, RythmosIntegrator_name));

    //
    *out << "\nE) Solve the forward problem ...\n";
    //

    const MEB::InArgs<Scalar>
      state_ic = fwdStateModel->getNominalValues();
    *out << "\nstate_ic:\n" << describe(state_ic,solnVerbLevel);

    fwdStateStepper->setInitialCondition(state_ic);
    fwdStateIntegrator->setStepper(fwdStateStepper, t_final);

    Array<RCP<const Thyra::VectorBase<Scalar> > > x_final_array;
    fwdStateIntegrator->getFwdPoints(
      Teuchos::tuple<Scalar>(t_final), &x_final_array, NULL, NULL
      );
    const RCP<const Thyra::VectorBase<Scalar> > x_final = x_final_array[0];

    *out << "\nx_final:\n" << describe(*x_final, solnVerbLevel);

    //
    *out << "\nF) Check the solution to the forward problem ...\n";
    //

    const RCP<Thyra::VectorBase<Scalar> >
      x_beta = createMember(x_space),
      x_final_be_exact = createMember(x_space);

    {
      Thyra::ConstDetachedVectorView<Scalar> d_gamma(*gamma);
      Thyra::ConstDetachedVectorView<Scalar> d_x_ic(*state_ic.get_x());
      Thyra::DetachedVectorView<Scalar> d_x_beta(*x_beta);
      Thyra::DetachedVectorView<Scalar> d_x_final_be_exact(*x_final_be_exact);
      const int n = d_gamma.subDim();
      for ( int i = 0; i < n; ++i ) {
        d_x_beta(i) = 1.0 / ( 1.0 - delta_t * d_gamma(i) );
        d_x_final_be_exact(i) = integralPow(d_x_beta(i), numTimeSteps) * d_x_ic(i);
      }
    }

    *out << "\nx_final_be_exact:\n" << describe(*x_final_be_exact, solnVerbLevel);

    result = Thyra::testRelNormDiffErr<Scalar>(
      "x_final", *x_final,
      "x_final_be_exact", *x_final_be_exact,
      "maxStateError", maxStateError,
      "warningTol", 1.0, // Don't warn
      &*out, solnVerbLevel
      );
    if (!result) success = false;

    //
    *out << "\nG) Create the Adjoint ME wrapper object ...\n";
    //

    RCP<Thyra::ModelEvaluator<double> > adjModel =
      Rythmos::adjointModelEvaluator<double>(
        fwdStateModel, fwdTimeRange
        );

    //
    *out << "\nH) Create a stepper and integrator for the adjoint ...\n";
    //

    RCP<Thyra::LinearNonlinearSolver<double> > adjTimeStepSolver =
      Thyra::linearNonlinearSolver<double>();
    RCP<Rythmos::StepperBase<Scalar> > adjStepper =
      Rythmos::backwardEulerStepper<double>(adjModel, adjTimeStepSolver);
    adjStepper->setParameterList(sublist(paramList, RythmosStepper_name));
    RCP<Rythmos::IntegratorBase<Scalar> > adjIntegrator =
      fwdStateIntegrator->cloneIntegrator();

    //
    *out << "\nI) Set up the initial condition for the adjoint at the final time ...\n";
    //

    const RCP<const Thyra::VectorSpaceBase<Scalar> >
      f_space = fwdStateModel->get_f_space();

    // lambda(t_final) = x_final
    const RCP<Thyra::VectorBase<Scalar> > lambda_ic = createMember(f_space);
    V_V( outArg(*lambda_ic), *x_final_be_exact );

    // lambda_dot(t_final,i) = - gamma(i) * lambda(t_final,i)
    const RCP<Thyra::VectorBase<Scalar> > lambda_dot_ic = createMember(f_space);
    Thyra::V_S<Scalar>( outArg(*lambda_dot_ic), ST::zero() );
    Thyra::ele_wise_prod<Scalar>( -ST::one(), *gamma, *lambda_ic,
      outArg(*lambda_dot_ic) );

    MEB::InArgs<Scalar> adj_ic = adjModel->getNominalValues();
    adj_ic.set_x(lambda_ic);
    adj_ic.set_x_dot(lambda_dot_ic);
    *out << "\nadj_ic:\n" << describe(adj_ic,solnVerbLevel);

    //
    *out << "\nJ) Integrate the adjoint backwards in time (using backward time) ...\n";
    //

    adjStepper->setInitialCondition(adj_ic);
    adjIntegrator->setStepper(adjStepper, fwdTimeRange.length());

    Array<RCP<const Thyra::VectorBase<Scalar> > > lambda_final_array;
    adjIntegrator->getFwdPoints(
      Teuchos::tuple<Scalar>(fwdTimeRange.length()), &lambda_final_array, NULL, NULL
      );
    const RCP<const Thyra::VectorBase<Scalar> > lambda_final = lambda_final_array[0];

    *out << "\nlambda_final:\n" << describe(*lambda_final, solnVerbLevel);

    //
    *out << "\nK) Test the final adjoint againt exact discrete solution ...\n";
    //

    {

      const RCP<Thyra::VectorBase<Scalar> >
        lambda_final_be_exact = createMember(x_space);

      {
        Thyra::ConstDetachedVectorView<Scalar> d_gamma(*gamma);
        Thyra::ConstDetachedVectorView<Scalar> d_x_final(*x_final);
        Thyra::DetachedVectorView<Scalar> d_x_beta(*x_beta);
        Thyra::DetachedVectorView<Scalar> d_lambda_final_be_exact(*lambda_final_be_exact);
        const int n = d_gamma.subDim();
        for ( int i = 0; i < n; ++i ) {
          d_lambda_final_be_exact(i) = integralPow(d_x_beta(i), numTimeSteps) * d_x_final(i);
        }
      }

      *out << "\nlambda_final_be_exact:\n" << describe(*lambda_final_be_exact, solnVerbLevel);

      result = Thyra::testRelNormDiffErr<Scalar>(
        "lambda_final", *lambda_final,
        "lambda_final_be_exact", *lambda_final_be_exact,
        "maxStateError", maxStateError,
        "warningTol", 1.0, // Don't warn
        &*out, solnVerbLevel
        );
      if (!result) success = false;

    }

    //
    *out << "\nL) Test the reduced gradient from the adjoint against the discrete forward reduced gradient ...\n";
    //

    {

      const RCP<const Thyra::VectorBase<Scalar> >
        d_d_hat_d_p_from_lambda = lambda_final; // See above

      const RCP<Thyra::VectorBase<Scalar> >
        d_d_hat_d_p_be_exact = createMember(x_space);

      {
        Thyra::ConstDetachedVectorView<Scalar> d_x_ic(*state_ic.get_x());
        Thyra::DetachedVectorView<Scalar> d_x_beta(*x_beta);
        Thyra::DetachedVectorView<Scalar> d_d_d_hat_d_p_be_exact(*d_d_hat_d_p_be_exact);
        const int n = d_x_ic.subDim();
        for ( int i = 0; i < n; ++i ) {
          d_d_d_hat_d_p_be_exact(i) = integralPow(d_x_beta(i), 2*numTimeSteps) * d_x_ic(i);
        }
      }

      *out << "\nd_d_hat_d_p_be_exact:\n" << describe(*d_d_hat_d_p_be_exact, solnVerbLevel);

      result = Thyra::testRelNormDiffErr<Scalar>(
        "d_d_hat_d_p_from_lambda", *d_d_hat_d_p_from_lambda,
        "d_d_hat_d_p_be_exact", *d_d_hat_d_p_be_exact,
        "maxStateError", maxStateError,
        "warningTol", 1.0, // Don't warn
        &*out, solnVerbLevel
        );
      if (!result) success = false;

    }

  }
  TEUCHOS_STANDARD_CATCH_STATEMENTS(true,*out,success);

  if(success)
    *out << "\nEnd Result: TEST PASSED" << endl;
  else
    *out << "\nEnd Result: TEST FAILED" << endl;

  return ( success ? 0 : 1 );

} // end main() [Doxygen looks for this!]
void DefaultModelEvaluatorWithSolveFactory<Scalar>::evalModelImpl(
  const ModelEvaluatorBase::InArgs<Scalar> &inArgs,
  const ModelEvaluatorBase::OutArgs<Scalar> &outArgs
  ) const
{
  typedef ModelEvaluatorBase MEB;
  using Teuchos::rcp;
  using Teuchos::rcp_const_cast;
  using Teuchos::rcp_dynamic_cast;
  using Teuchos::OSTab;

  THYRA_MODEL_EVALUATOR_DECORATOR_EVAL_MODEL_BEGIN(
    "Thyra::DefaultModelEvaluatorWithSolveFactory",inArgs,outArgs
    );

  Teuchos::Time timer("");

  typedef Teuchos::VerboseObjectTempState<LinearOpWithSolveFactoryBase<Scalar> >
    VOTSLOWSF;
  VOTSLOWSF W_factory_outputTempState(W_factory_,out,verbLevel);

  // InArgs

  MEB::InArgs<Scalar> wrappedInArgs = thyraModel->createInArgs();

  wrappedInArgs.setArgs(inArgs,true);

  // OutArgs

  MEB::OutArgs<Scalar> wrappedOutArgs = thyraModel->createOutArgs();

  wrappedOutArgs.setArgs(outArgs,true);

  RCP<LinearOpWithSolveBase<Scalar> > W;
  RCP<const LinearOpBase<Scalar> > fwdW;
  if( outArgs.supports(MEB::OUT_ARG_W) && (W = outArgs.get_W()).get() ) {
    Thyra::uninitializeOp<Scalar>(*W_factory_, W.ptr(), outArg(fwdW));

    {
      // Handle this case later if we need to!
      const bool both_W_and_W_op_requested = nonnull(outArgs.get_W_op());
      TEUCHOS_TEST_FOR_EXCEPT(both_W_and_W_op_requested);
    }

    RCP<LinearOpBase<Scalar> > nonconst_fwdW;
    if(fwdW.get()) {
      nonconst_fwdW = rcp_const_cast<LinearOpBase<Scalar> >(fwdW);
    }
    else {
      nonconst_fwdW = thyraModel->create_W_op();
      fwdW = nonconst_fwdW;
    }

    wrappedOutArgs.set_W_op(nonconst_fwdW);
  }

  // Do the evaluation

  if(out.get() && includesVerbLevel(verbLevel,Teuchos::VERB_LOW))
    *out << "\nEvaluating the output functions on model \'"
         << thyraModel->description() << "\' ...\n";
  timer.start(true);

  thyraModel->evalModel(wrappedInArgs,wrappedOutArgs);

  timer.stop();
  if(out.get() && includesVerbLevel(verbLevel,Teuchos::VERB_LOW))
    OSTab(out).o() << "\nTime to evaluate underlying model = "
                   << timer.totalElapsedTime()<<" sec\n";

  // Postprocess arguments

  if(out.get() && includesVerbLevel(verbLevel,Teuchos::VERB_LOW))
    *out << "\nPost processing the output objects ...\n";
  timer.start(true);

  if( W.get() ) {
    Thyra::initializeOp<Scalar>(*W_factory_, fwdW, W.ptr());
    W->setVerbLevel(this->getVerbLevel());
    W->setOStream(this->getOStream());
  }

  timer.stop();
  if(out.get() && includesVerbLevel(verbLevel,Teuchos::VERB_LOW))
    OSTab(out).o() << "\nTime to process output objects = "
                   << timer.totalElapsedTime()<<" sec\n";

  THYRA_MODEL_EVALUATOR_DECORATOR_EVAL_MODEL_END();

}
void TimeDiscretizedBackwardEulerModelEvaluator<Scalar>::evalModelImpl(
  const Thyra::ModelEvaluatorBase::InArgs<Scalar>& inArgs_bar,
  const Thyra::ModelEvaluatorBase::OutArgs<Scalar>& outArgs_bar
  ) const
{


  using Teuchos::rcp_dynamic_cast;
  typedef ScalarTraits<Scalar> ST;
  typedef Thyra::ModelEvaluatorBase MEB;
  typedef Thyra::VectorBase<Scalar> VB;
  typedef Thyra::ProductVectorBase<Scalar> PVB;
  typedef Thyra::BlockedLinearOpBase<Scalar> BLWB;

/*
  THYRA_MODEL_EVALUATOR_DECORATOR_EVAL_MODEL_GEN_BEGIN(
    "Rythmos::ImplicitRKModelEvaluator",inArgs_bar,outArgs_bar,daeModel_
    );
*/

  TEST_FOR_EXCEPTION( delta_t_ <= 0.0, std::logic_error,
    "Error, you have not initialized this object correctly!" );

  //
  // A) Unwrap the inArgs and outArgs to get at product vectors and block op
  //

  const RCP<const PVB> x_bar = rcp_dynamic_cast<const PVB>(inArgs_bar.get_x(), true);
  const RCP<PVB> f_bar = rcp_dynamic_cast<PVB>(outArgs_bar.get_f(), true);
  RCP<BLWB> W_op_bar = rcp_dynamic_cast<BLWB>(outArgs_bar.get_W_op(), true);

  //
  // B) Assemble f_bar and W_op_bar by looping over stages
  //

  MEB::InArgs<Scalar> daeInArgs = daeModel_->createInArgs();
  MEB::OutArgs<Scalar> daeOutArgs = daeModel_->createOutArgs();
  const RCP<VB> x_dot_i = createMember(daeModel_->get_x_space());
  daeInArgs.setArgs(initCond_);
  
  Scalar t_i = initTime_; // ToDo: Define t_init!

  const Scalar oneOverDeltaT = 1.0/delta_t_;

  for ( int i = 0; i < numTimeSteps_; ++i ) {

    // B.1) Setup the DAE's inArgs for time step eqn f(i) ...
    const RCP<const Thyra::VectorBase<Scalar> >
      x_i = x_bar->getVectorBlock(i),
      x_im1 = ( i==0 ? initCond_.get_x() : x_bar->getVectorBlock(i-1) );
    V_VmV( x_dot_i.ptr(), *x_i, *x_im1 ); // x_dot_i = 1/dt * ( x[i] - x[i-1] )
    Vt_S( x_dot_i.ptr(), oneOverDeltaT ); // ... 
    daeInArgs.set_x_dot( x_dot_i );
    daeInArgs.set_x( x_i );
    daeInArgs.set_t( t_i );
    daeInArgs.set_alpha( oneOverDeltaT );
    daeInArgs.set_beta( 1.0 );

    // B.2) Setup the DAE's outArgs for f(i) and/or W(i,i) ...
    if (!is_null(f_bar))
      daeOutArgs.set_f( f_bar->getNonconstVectorBlock(i) );
    if (!is_null(W_op_bar))
      daeOutArgs.set_W_op(W_op_bar->getNonconstBlock(i,i).assert_not_null());

    // B.3) Compute f_bar(i) and/or W_op_bar(i,i) ...
    daeModel_->evalModel( daeInArgs, daeOutArgs );
    daeOutArgs.set_f(Teuchos::null);
    daeOutArgs.set_W_op(Teuchos::null);
    
    // B.4) Evaluate W_op_bar(i,i-1)
    if ( !is_null(W_op_bar) && i > 0 ) {
      daeInArgs.set_alpha( -oneOverDeltaT );
      daeInArgs.set_beta( 0.0 );
      daeOutArgs.set_W_op(W_op_bar->getNonconstBlock(i,i-1).assert_not_null());
      daeModel_->evalModel( daeInArgs, daeOutArgs );
      daeOutArgs.set_W_op(Teuchos::null);
    }

    //
    t_i += delta_t_;

  }

/*  
  THYRA_MODEL_EVALUATOR_DECORATOR_EVAL_MODEL_END();
*/

}
void DefaultStateEliminationModelEvaluator<Scalar>::evalModelImpl(
  const ModelEvaluatorBase::InArgs<Scalar> &inArgs,
  const ModelEvaluatorBase::OutArgs<Scalar> &outArgs
  ) const
{
  typedef ModelEvaluatorBase MEB;
  using Teuchos::RCP;
  using Teuchos::rcp;
  using Teuchos::rcp_const_cast;
  using Teuchos::rcp_dynamic_cast;
  using Teuchos::OSTab;

  Teuchos::Time totalTimer(""), timer("");
  totalTimer.start(true);

  const Teuchos::RCP<Teuchos::FancyOStream> out = this->getOStream();
  const Teuchos::EVerbosityLevel verbLevel = this->getVerbLevel();
  Teuchos::OSTab tab(out);
  if(out.get() && static_cast<int>(verbLevel) >= static_cast<int>(Teuchos::VERB_LOW))
    *out << "\nEntering Thyra::DefaultStateEliminationModelEvaluator<Scalar>::evalModel(...) ...\n";

  const Teuchos::RCP<const ModelEvaluator<Scalar> >
    thyraModel = this->getUnderlyingModel();

  const int Np = outArgs.Np(), Ng = outArgs.Ng();

  // Get the intial state guess if not already gotten
  if (is_null(x_guess_solu_)) {
    const ModelEvaluatorBase::InArgs<Scalar>
      nominalValues = thyraModel->getNominalValues();
    if(nominalValues.get_x().get()) {
      x_guess_solu_ = nominalValues.get_x()->clone_v();
    }
    else {
      x_guess_solu_ = createMember(thyraModel->get_x_space());
      assign(&*x_guess_solu_,Scalar(0.0));
    }
  }

  // Reset the nominal values
  MEB::InArgs<Scalar> wrappedNominalValues = thyraModel->getNominalValues();
  wrappedNominalValues.setArgs(inArgs,true);
  wrappedNominalValues.set_x(x_guess_solu_);
  
  typedef Teuchos::VerboseObjectTempState<ModelEvaluatorBase> VOTSME;
  //VOTSME thyraModel_outputTempState(rcp(&wrappedThyraModel,false),out,verbLevel);

  typedef Teuchos::VerboseObjectTempState<NonlinearSolverBase<Scalar> > VOTSNSB;
  VOTSNSB statSolver_outputTempState(
    stateSolver_,out
    ,static_cast<int>(verbLevel) >= static_cast<int>(Teuchos::VERB_LOW) ? Teuchos::VERB_LOW : Teuchos::VERB_NONE 
    );

  if(out.get() && static_cast<int>(verbLevel) >= static_cast<int>(Teuchos::VERB_EXTREME))
    *out
      << "\ninArgs =\n" << Teuchos::describe(inArgs,verbLevel)
      << "\noutArgs on input =\n" << Teuchos::describe(outArgs,Teuchos::VERB_LOW);

  if(out.get() && static_cast<int>(verbLevel) >= static_cast<int>(Teuchos::VERB_LOW))
    *out << "\nSolving f(x,...) for x ...\n";

  wrappedThyraModel_->setNominalValues(
    rcp(new MEB::InArgs<Scalar>(wrappedNominalValues))
    );
  
  SolveStatus<Scalar> solveStatus = stateSolver_->solve(&*x_guess_solu_,NULL);

  if( solveStatus.solveStatus == SOLVE_STATUS_CONVERGED ) {
    
    if(out.get() && static_cast<int>(verbLevel) >= static_cast<int>(Teuchos::VERB_LOW))
      *out << "\nComputing the output functions at the solved state solution ...\n";

    MEB::InArgs<Scalar>   wrappedInArgs  = thyraModel->createInArgs();
    MEB::OutArgs<Scalar>  wrappedOutArgs = thyraModel->createOutArgs();
    wrappedInArgs.setArgs(inArgs,true);
    wrappedInArgs.set_x(x_guess_solu_);
    wrappedOutArgs.setArgs(outArgs,true);
    
    for( int l = 0; l < Np; ++l ) {
      for( int j = 0; j < Ng; ++j ) {
        if(
          outArgs.supports(MEB::OUT_ARG_DgDp,j,l).none()==false
          && outArgs.get_DgDp(j,l).isEmpty()==false
          )
        {
          // Set DfDp(l) and DgDx(j) to be computed!
          //wrappedOutArgs.set_DfDp(l,...);
          //wrappedOutArgs.set_DgDx(j,...);
          TEST_FOR_EXCEPT(true);
        }
      }
    }
    
    thyraModel->evalModel(wrappedInArgs,wrappedOutArgs);

    //
    // Compute DgDp(j,l) using direct sensitivties
    //
    for( int l = 0; l < Np; ++l ) {
      if(
        wrappedOutArgs.supports(MEB::OUT_ARG_DfDp,l).none()==false
        && wrappedOutArgs.get_DfDp(l).isEmpty()==false
        )
      {
        //
        // Compute:  D(l) = -inv(DfDx)*DfDp(l)
        //
        TEST_FOR_EXCEPT(true);
        for( int j = 0; j < Ng; ++j ) {
          if(
            outArgs.supports(MEB::OUT_ARG_DgDp,j,l).none()==false
            && outArgs.get_DgDp(j,l).isEmpty()==false
            )
          {
            //
            // Compute:  DgDp(j,l) = DgDp(j,l) + DgDx(j)*D
            //
            TEST_FOR_EXCEPT(true);
          }
        }
      }
    }
    // ToDo: Add a mode to compute DgDp(l) using adjoint sensitivities?
    
  }
  else {
    
    if(out.get() && static_cast<int>(verbLevel) >= static_cast<int>(Teuchos::VERB_LOW))
      *out << "\nFailed to converge, returning NaNs ...\n";
    outArgs.setFailed();
    
  }
  
  if(out.get() && static_cast<int>(verbLevel) >= static_cast<int>(Teuchos::VERB_EXTREME))
    *out
      << "\noutArgs on output =\n" << Teuchos::describe(outArgs,verbLevel);

  totalTimer.stop();
  if(out.get() && static_cast<int>(verbLevel) >= static_cast<int>(Teuchos::VERB_LOW))
    *out
      << "\nTotal evaluation time = "<<totalTimer.totalElapsedTime()<<" sec\n"
      << "\nLeaving Thyra::DefaultStateEliminationModelEvaluator<Scalar>::evalModel(...) ...\n";
  
}
int main(int argc, char *argv[])
{

    using std::endl;
    typedef double Scalar;
    typedef double ScalarMag;
    using Teuchos::describe;
    using Teuchos::RCP;
    using Teuchos::rcp;
    using Teuchos::rcp_implicit_cast;
    using Teuchos::rcp_dynamic_cast;
    using Teuchos::as;
    using Teuchos::ParameterList;
    using Teuchos::CommandLineProcessor;
    typedef Teuchos::ParameterList::PrintOptions PLPrintOptions;
    typedef Thyra::ModelEvaluatorBase MEB;
    using Thyra::createMember;
    using Thyra::createMembers;

    bool success = true;

    Teuchos::GlobalMPISession mpiSession(&argc,&argv);

    RCP<Epetra_Comm> epetra_comm;
#ifdef HAVE_MPI
    epetra_comm = rcp( new Epetra_MpiComm(MPI_COMM_WORLD) );
#else
    epetra_comm = rcp( new Epetra_SerialComm );
#endif // HAVE_MPI

    RCP<Teuchos::FancyOStream>
    out = Teuchos::VerboseObjectBase::getDefaultOStream();

    try {

        //
        // A) Read commandline options
        //

        CommandLineProcessor clp;
        clp.throwExceptions(false);
        clp.addOutputSetupOptions(true);

        std::string paramsFileName = "";
        clp.setOption( "params-file", &paramsFileName,
                       "File name for XML parameters" );

        std::string extraParamsString = "";
        clp.setOption( "extra-params", &extraParamsString,
                       "Extra XML parameter string" );

        Teuchos::EVerbosityLevel verbLevel = Teuchos::VERB_DEFAULT;
        setVerbosityLevelOption( "verb-level", &verbLevel,
                                 "Top-level verbosity level.  By default, this gets deincremented as you go deeper into numerical objects.",
                                 &clp );

        double finalTime = 1.0;
        clp.setOption( "final-time", &finalTime, "Final time (the inital time)" );

        int numTimeSteps = 2;
        clp.setOption( "num-time-steps", &numTimeSteps, "Number of time steps" );

        bool dumpFinalSolutions = false;
        clp.setOption(
            "dump-final-solutions", "no-dump-final-solutions", &dumpFinalSolutions,
            "Determine if the final solutions are dumpped or not." );

        double maxStateError = 1e-6;
        clp.setOption( "max-state-error", &maxStateError,
                       "The maximum allowed error in the integrated state in relation to the exact state solution" );

        // ToDo: Read in more parameters

        CommandLineProcessor::EParseCommandLineReturn parse_return = clp.parse(argc,argv);
        if( parse_return != CommandLineProcessor::PARSE_SUCCESSFUL ) return parse_return;

        if ( Teuchos::VERB_DEFAULT == verbLevel )
            verbLevel = Teuchos::VERB_LOW;

        const Teuchos::EVerbosityLevel
        solnVerbLevel = ( dumpFinalSolutions ? Teuchos::VERB_EXTREME : verbLevel );

        //
        // B) Get the base parameter list that all other parameter lists will be
        // read from.
        //

        RCP<ParameterList> paramList = Teuchos::parameterList();
        if (paramsFileName.length())
            updateParametersFromXmlFile( paramsFileName, &*paramList );
        if (extraParamsString.length())
            updateParametersFromXmlString( extraParamsString, &*paramList );

        paramList->validateParameters(*getValidParameters());

        //
        // C) Create the Stratimikos linear solver factories.
        //

        // Get the linear solve strategy that will be used to solve for the linear
        // system with the dae's W matrix.
        Stratimikos::DefaultLinearSolverBuilder daeLinearSolverBuilder;
        daeLinearSolverBuilder.setParameterList(sublist(paramList,DAELinearSolver_name));
        RCP<Thyra::LinearOpWithSolveFactoryBase<Scalar> >
        daeLOWSF = createLinearSolveStrategy(daeLinearSolverBuilder);

        // Get the linear solve strategy that can be used to override the overall
        // linear system solve
        Stratimikos::DefaultLinearSolverBuilder overallLinearSolverBuilder;
        overallLinearSolverBuilder.setParameterList(sublist(paramList,OverallLinearSolver_name));
        RCP<Thyra::LinearOpWithSolveFactoryBase<Scalar> >
        overallLOWSF = createLinearSolveStrategy(overallLinearSolverBuilder);

        //
        // D) Create the underlying EpetraExt::ModelEvaluator
        //

        RCP<EpetraExt::DiagonalTransientModel> epetraDaeModel =
            EpetraExt::diagonalTransientModel(
                epetra_comm,
                sublist(paramList,DiagonalTransientModel_name)
            );

        *out <<"\nepetraDaeModel valid options:\n";
        epetraDaeModel->getValidParameters()->print(
            *out, PLPrintOptions().indent(2).showTypes(true).showDoc(true)
        );

        //
        // E) Create the Thyra-wrapped ModelEvaluator
        //

        RCP<Thyra::ModelEvaluator<double> > daeModel =
            epetraModelEvaluator(epetraDaeModel,daeLOWSF);

        //
        // F) Create the TimeDiscretizedBackwardEulerModelEvaluator
        //

        MEB::InArgs<Scalar> initCond = daeModel->createInArgs();
        initCond.setArgs(daeModel->getNominalValues());

        RCP<Thyra::ModelEvaluator<Scalar> >
        discretizedModel = Rythmos::timeDiscretizedBackwardEulerModelEvaluator<Scalar>(
                               daeModel, initCond, finalTime, numTimeSteps, overallLOWSF );

        *out << "\ndiscretizedModel = " << describe(*discretizedModel,verbLevel);

        //
        // F) Setup a nonlinear solver and solve the system
        //

        // F.1) Setup a nonlinear solver

        Thyra::DampenedNewtonNonlinearSolver<Scalar> nonlinearSolver;
        nonlinearSolver.setOStream(out);
        nonlinearSolver.setVerbLevel(verbLevel);
        //nonlinearSolver.setParameterList(sublist(paramList,NonlinearSolver_name));
        //2007/11/27: rabartl: ToDo: Implement parameter list handling for
        //DampenedNonlinearSolve so that I can uncomment the above line.
        nonlinearSolver.setModel(discretizedModel);

        // F.2) Solve the system

        RCP<Thyra::VectorBase<Scalar> >
        x_bar = createMember(discretizedModel->get_x_space());
        V_S( x_bar.ptr(), 0.0 );

        Thyra::SolveStatus<Scalar> solveStatus =
            Thyra::solve( nonlinearSolver, &*x_bar );

        *out << "\nsolveStatus:\n" << solveStatus;

        *out << "\nx_bar = " << describe(*x_bar,solnVerbLevel);

        //
        // G) Verify that the solution is correct???
        //

        // Check against the end time exact solution.

        RCP<const Thyra::VectorBase<Scalar> >
        exact_x_final = Thyra::create_Vector(
                            epetraDaeModel->getExactSolution(finalTime),
                            daeModel->get_x_space()
                        );

        RCP<const Thyra::VectorBase<Scalar> > solved_x_final
            = rcp_dynamic_cast<Thyra::ProductVectorBase<Scalar> >(x_bar,true)->getVectorBlock(numTimeSteps-1);

        const bool result = Thyra::testRelNormDiffErr(
                                "exact_x_final", *exact_x_final, "solved_x_final", *solved_x_final,
                                "maxStateError", maxStateError, "warningTol", 1.0, // Don't warn
                                &*out, solnVerbLevel
                            );
        if (!result) success = false;

    }
    TEUCHOS_STANDARD_CATCH_STATEMENTS(true,*out,success);

    if(success)
        *out << "\nEnd Result: TEST PASSED" << endl;
    else
        *out << "\nEnd Result: TEST FAILED" << endl;

    return ( success ? 0 : 1 );

} // end main() [Doxygen looks for this!]
void DiagonalImplicitRKModelEvaluator<Scalar>::evalModelImpl(
  const Thyra::ModelEvaluatorBase::InArgs<Scalar>& inArgs_stage,
  const Thyra::ModelEvaluatorBase::OutArgs<Scalar>& outArgs_stage
  ) const
{

  typedef ScalarTraits<Scalar> ST;
  typedef Thyra::ModelEvaluatorBase MEB;

  TEUCHOS_TEST_FOR_EXCEPTION( !isInitialized_, std::logic_error,
      "Error!  initializeDIRKModel must be called before evalModel\n"
      );

  TEUCHOS_TEST_FOR_EXCEPTION( !setTimeStepPointCalled_, std::logic_error,
      "Error!  setTimeStepPoint must be called before evalModel"
      );

  TEUCHOS_TEST_FOR_EXCEPTION( currentStage_ == -1, std::logic_error,
      "Error!  setCurrentStage must be called before evalModel"
      );

  THYRA_MODEL_EVALUATOR_DECORATOR_EVAL_MODEL_GEN_BEGIN(
    "Rythmos::DiagonalImplicitRKModelEvaluator",inArgs_stage,outArgs_stage,daeModel_
    );

  //
  // A) Unwrap the inArgs and outArgs 
  //

  const RCP<const Thyra::VectorBase<Scalar> > x_in = inArgs_stage.get_x();
  const RCP<Thyra::VectorBase<Scalar> > f_out = outArgs_stage.get_f();
  const RCP<Thyra::LinearOpBase<Scalar> > W_op_out = outArgs_stage.get_W_op();

  //
  // B) Assemble f_out and W_op_out for given stage
  //

  MEB::InArgs<Scalar> daeInArgs = daeModel_->createInArgs();
  MEB::OutArgs<Scalar> daeOutArgs = daeModel_->createOutArgs();
  const RCP<Thyra::VectorBase<Scalar> > x_i = createMember(daeModel_->get_x_space());
  daeInArgs.setArgs(basePoint_);
  
  // B.1) Setup the DAE's inArgs for stage f(currentStage_) ...
  V_V(stage_derivatives_->getNonconstVectorBlock(currentStage_).ptr(),*x_in);
  assembleIRKState( currentStage_, dirkButcherTableau_->A(), delta_t_, *x_old_, *stage_derivatives_, outArg(*x_i) );
  daeInArgs.set_x( x_i );
  daeInArgs.set_x_dot( x_in );
  daeInArgs.set_t( t_old_ + dirkButcherTableau_->c()(currentStage_) * delta_t_ );
  daeInArgs.set_alpha(ST::one());
  daeInArgs.set_beta( delta_t_ * dirkButcherTableau_->A()(currentStage_,currentStage_) );

  // B.2) Setup the DAE's outArgs for stage f(i) ...
  if (!is_null(f_out))
    daeOutArgs.set_f( f_out );
  if (!is_null(W_op_out))
    daeOutArgs.set_W_op(W_op_out);

  // B.3) Compute f_out(i) and/or W_op_out ...
  daeModel_->evalModel( daeInArgs, daeOutArgs );
  daeOutArgs.set_f(Teuchos::null);
  daeOutArgs.set_W_op(Teuchos::null);
  
  THYRA_MODEL_EVALUATOR_DECORATOR_EVAL_MODEL_END();
  
}
TEUCHOS_UNIT_TEST( Rythmos_ForwardSensitivityExplicitModelEvaluator, evalModel ) {
  typedef Thyra::ModelEvaluatorBase MEB;
  RCP<ForwardSensitivityExplicitModelEvaluator<double> > model =
    forwardSensitivityExplicitModelEvaluator<double>();
  RCP<SinCosModel> innerModel = sinCosModel(false);
  double a = 0.4;
  double f = 1.5;
  double L = 1.6;
  {
    RCP<ParameterList> pl = Teuchos::parameterList();
    pl->set("Accept model parameters",true);
    pl->set("Implicit model formulation",false);
    pl->set("Coeff a", a );
    pl->set("Coeff f", f );
    pl->set("Coeff L", L );
    innerModel->setParameterList(pl);
  }
  model->initializeStructure(innerModel, 0 );
  RCP<VectorBase<double> > x;
  MEB::InArgs<double> pointInArgs;  // Used to change the solution for re-evaluation
  RCP<StepperBase<double> > stepper; // Used for initializePointState
  {
    pointInArgs = innerModel->createInArgs();
    pointInArgs.set_t(0.1);
    x = Thyra::createMember(innerModel->get_x_space());
    {
      Thyra::DetachedVectorView<double> x_view( *x );
      x_view[0] = 2.0;
      x_view[1] = 3.0;
    }
    pointInArgs.set_x(x);
    RCP<VectorBase<double> > p0 = Thyra::createMember(innerModel->get_p_space(0));
    {
      Thyra::DetachedVectorView<double> p0_view( *p0 );
      p0_view[0] = a;
      p0_view[1] = f;
      p0_view[2] = L;
    }
    pointInArgs.set_p(0,p0);
    {
      // Create a stepper with these initial conditions to use to call
      // initializePointState on this ME:
      stepper = forwardEulerStepper<double>();
      stepper->setInitialCondition(pointInArgs);
      model->initializePointState(Teuchos::inOutArg(*stepper),false);
    }
  }
  MEB::InArgs<double> inArgs = model->createInArgs();
  RCP<VectorBase<double> > x_bar = Thyra::createMember(model->get_x_space());
  RCP<Thyra::DefaultMultiVectorProductVector<double> >
    s_bar = Teuchos::rcp_dynamic_cast<Thyra::DefaultMultiVectorProductVector<double> >(
      x_bar, true
      );
  RCP<Thyra::MultiVectorBase<double> >
    S = s_bar->getNonconstMultiVector();
  // Fill S with data
  {
    TEST_EQUALITY_CONST( S->domain()->dim(), 3 );
    TEST_EQUALITY_CONST( S->range()->dim(), 2 );
    RCP<VectorBase<double> > S0 = S->col(0);
    RCP<VectorBase<double> > S1 = S->col(1);
    RCP<VectorBase<double> > S2 = S->col(2);
    TEST_EQUALITY_CONST( S0->space()->dim(), 2 );
    TEST_EQUALITY_CONST( S1->space()->dim(), 2 );
    TEST_EQUALITY_CONST( S2->space()->dim(), 2 );
    Thyra::DetachedVectorView<double> S0_view( *S0 );
    S0_view[0] = 7.0;
    S0_view[1] = 8.0;
    Thyra::DetachedVectorView<double> S1_view( *S1 );
    S1_view[0] = 9.0;
    S1_view[1] = 10.0;
    Thyra::DetachedVectorView<double> S2_view( *S2 );
    S2_view[0] = 11.0;
    S2_view[1] = 12.0;
  }
  inArgs.set_x(x_bar);
  MEB::OutArgs<double> outArgs = model->createOutArgs();
  RCP<VectorBase<double> > f_bar = Thyra::createMember(model->get_f_space());
  RCP<Thyra::DefaultMultiVectorProductVector<double> >
    f_sens = Teuchos::rcp_dynamic_cast<Thyra::DefaultMultiVectorProductVector<double> >(
      f_bar, true
      );
  RCP<Thyra::MultiVectorBase<double> >
    F_sens = f_sens->getNonconstMultiVector().assert_not_null();

  V_S(Teuchos::outArg(*f_bar),0.0);
  outArgs.set_f(f_bar);
  
  inArgs.set_t(0.1);
  model->evalModel(inArgs,outArgs);

  // Verify F_sens = df/dx*S = df/dp
  // df/dx = [ 0             1 ]
  //         [ -(f/L)*(f/L)  0 ]
  // S =   [ 7   9  11 ]    x = [ 2 ]
  //       [ 8  10  12 ]        [ 3 ]
  // df/dp = [     0             0                   0              ]
  //         [ (f/L)*(f/L) 2*f/(L*L)*(a-x_0) -2*f*f/(L*L*L)*(a-x_0) ]
  // F_sens_0 = 
  // [            8               ]
  // [ -7*(f/L)*(f/L)+(f*f)/(L*L) ]
  // F_sens_1 = 
  // [            10                    ]
  // [ -9*(f/L)*(f/L)+2*f/(L*L)*(a-x_0) ]
  // F_sens_2 = 
  // [            12                         ]
  // [ -11*(f/L)*(f/L)-2*f*f/(L*L*L)*(a-x_0) ]
  // 
  double tol = 1.0e-10;
  {
    TEST_EQUALITY_CONST( F_sens->domain()->dim(), 3 );
    TEST_EQUALITY_CONST( F_sens->range()->dim(), 2 );
    RCP<VectorBase<double> > F_sens_0 = F_sens->col(0);
    RCP<VectorBase<double> > F_sens_1 = F_sens->col(1);
    RCP<VectorBase<double> > F_sens_2 = F_sens->col(2);
    TEST_EQUALITY_CONST( F_sens_0->space()->dim(), 2 );
    TEST_EQUALITY_CONST( F_sens_1->space()->dim(), 2 );
    TEST_EQUALITY_CONST( F_sens_2->space()->dim(), 2 );

    Thyra::DetachedVectorView<double> F_sens_0_view( *F_sens_0 );
    TEST_FLOATING_EQUALITY( F_sens_0_view[0], 8.0, tol );
    TEST_FLOATING_EQUALITY( F_sens_0_view[1], -7.0*(f/L)*(f/L)+(f*f)/(L*L), tol );

    Thyra::DetachedVectorView<double> F_sens_1_view( *F_sens_1 );
    TEST_FLOATING_EQUALITY( F_sens_1_view[0], 10.0, tol );
    TEST_FLOATING_EQUALITY( F_sens_1_view[1], -9*(f/L)*(f/L)+2*f/(L*L)*(a-2.0), tol );

    Thyra::DetachedVectorView<double> F_sens_2_view( *F_sens_2 );
    TEST_FLOATING_EQUALITY( F_sens_2_view[0], 12.0, tol );
    TEST_FLOATING_EQUALITY( F_sens_2_view[1], -11*(f/L)*(f/L)-2*f*f/(L*L*L)*(a-2.0), tol );
  }

  // Now change x and evaluate again.
  {
    Thyra::DetachedVectorView<double> x_view( *x );
    x_view[0] = 20.0;
    x_view[1] = 21.0;
  }
  // We need to call initializePointState again due to the vector
  // being cloned inside.
  stepper->setInitialCondition(pointInArgs);
  model->initializePointState(Teuchos::inOutArg(*stepper),false);

  model->evalModel(inArgs,outArgs);
  {
    TEST_EQUALITY_CONST( F_sens->domain()->dim(), 3 );
    TEST_EQUALITY_CONST( F_sens->range()->dim(), 2 );
    RCP<VectorBase<double> > F_sens_0 = F_sens->col(0);
    RCP<VectorBase<double> > F_sens_1 = F_sens->col(1);
    RCP<VectorBase<double> > F_sens_2 = F_sens->col(2);
    TEST_EQUALITY_CONST( F_sens_0->space()->dim(), 2 );
    TEST_EQUALITY_CONST( F_sens_1->space()->dim(), 2 );
    TEST_EQUALITY_CONST( F_sens_2->space()->dim(), 2 );

    Thyra::DetachedVectorView<double> F_sens_0_view( *F_sens_0 );
    TEST_FLOATING_EQUALITY( F_sens_0_view[0], 8.0, tol );
    TEST_FLOATING_EQUALITY( F_sens_0_view[1], -7.0*(f/L)*(f/L)+(f*f)/(L*L), tol );

    Thyra::DetachedVectorView<double> F_sens_1_view( *F_sens_1 );
    TEST_FLOATING_EQUALITY( F_sens_1_view[0], 10.0, tol );
    TEST_FLOATING_EQUALITY( F_sens_1_view[1], -9*(f/L)*(f/L)+2*f/(L*L)*(a-20.0), tol );

    Thyra::DetachedVectorView<double> F_sens_2_view( *F_sens_2 );
    TEST_FLOATING_EQUALITY( F_sens_2_view[0], 12.0, tol );
    TEST_FLOATING_EQUALITY( F_sens_2_view[1], -11*(f/L)*(f/L)-2*f*f/(L*L*L)*(a-20.0), tol );
  }

}
double computeForwardSensitivityErrorStackedStepperSinCosFE(
    int numTimeSteps,
    Array<RCP<const VectorBase<double> > >& computedSol,
    Array<RCP<const VectorBase<double> > >& exactSol
)
{
    using Teuchos::rcp_dynamic_cast;
    typedef Thyra::ModelEvaluatorBase MEB;
    // Forward ODE Model:
    RCP<SinCosModel> fwdModel = sinCosModel();
    {
        RCP<ParameterList> pl = Teuchos::parameterList();
        pl->set("Accept model parameters",true);
        pl->set("Implicit model formulation",false);
        pl->set("Provide nominal values",true);
        double b = 5.0;
        //double phi = 0.0;
        double a = 2.0;
        double f = 3.0;
        double L = 4.0;
        double x0 = a;
        double x1 = b*f/L;
        pl->set("Coeff a", a);
        pl->set("Coeff f", f);
        pl->set("Coeff L", L);
        pl->set("IC x_0", x0);
        pl->set("IC x_1", x1);
        fwdModel->setParameterList(pl);
    }
    RCP<StepperBase<double> > fwdStepper;
    RCP<StepperBase<double> > fsStepper;
    {
        const RCP<StepperBuilder<double> > builder = stepperBuilder<double>();
        RCP<ParameterList> stepperPL = Teuchos::parameterList();
        stepperPL->set("Stepper Type","Forward Euler");
        builder->setParameterList(stepperPL);
        fwdStepper = builder->create();
        fsStepper = builder->create();
    }
    // Forward Sensitivity Model:
    RCP<ForwardSensitivityExplicitModelEvaluator<double> > fsModel =
        forwardSensitivityExplicitModelEvaluator<double>();
    int p_index = 0;
    fsModel->initializeStructure(fwdModel,p_index);

    const MEB::InArgs<double> fwdModel_ic = fwdModel->getNominalValues();
    fwdStepper->setModel(fwdModel);
    fwdStepper->setInitialCondition(fwdModel_ic);
    fsModel->initializePointState(Teuchos::inOutArg(*fwdStepper),false);

    MEB::InArgs<double> fsModel_ic = fsModel->getNominalValues();
    {
        // Set up sensitivity initial conditions so they match the initial
        // conditions in getExactSensSolution
        RCP<Thyra::VectorBase<double> > s_bar_init
            = createMember(fsModel->get_x_space());
        RCP<Thyra::DefaultMultiVectorProductVector<double> > s_bar_mv =
            rcp_dynamic_cast<Thyra::DefaultMultiVectorProductVector<double> >(
                s_bar_init,
                true
            );
        int np = 3; // SinCos problem number of elements in parameter vector.
        for (int j=0 ; j < np ; ++j) {
            MEB::InArgs<double> sens_ic = fwdModel->getExactSensSolution(j,0.0);
            V_V(outArg(*(s_bar_mv->getNonconstVectorBlock(j))),
                *(sens_ic.get_x())
               );
        }
        fsModel_ic.set_x(s_bar_init);
    }
    fsStepper->setModel(fsModel);
    fsStepper->setInitialCondition(fsModel_ic);

    RCP<StackedStepper<double> > sStepper = stackedStepper<double>();
    sStepper->addStepper(fwdStepper);
    sStepper->addStepper(fsStepper);
    {
        // Set up Forward Sensitivities step strategy
        RCP<ForwardSensitivityStackedStepperStepStrategy<double> > stepStrategy =
            forwardSensitivityStackedStepperStepStrategy<double>();
        sStepper->setStackedStepperStepControlStrategy(stepStrategy);
    }

    double finalTime = 1.0e-4;
    double dt = finalTime/numTimeSteps; // Assume t_0 = 0.0;
    for (int i=0 ; i < numTimeSteps ; ++i ) {
        double dt_taken = sStepper->takeStep(dt,STEP_TYPE_FIXED);
        TEUCHOS_ASSERT( dt_taken == dt );
    }
    RCP<VectorBase<double> > x_bar_final =
        Thyra::createMember(sStepper->get_x_space());
    {
        Array<double> t_vec;
        Array<RCP<const VectorBase<double> > > x_vec;

        t_vec.push_back(finalTime);
        sStepper->getPoints(
            t_vec,
            &x_vec,
            NULL,
            NULL
        );
        V_V(Teuchos::outArg(*x_bar_final),*x_vec[0]);
    }

    // Now we check that the sensitivities are correct
    RCP<const Thyra::VectorBase<double> > DxDp_vec_final =
        Thyra::productVectorBase<double>(x_bar_final)->getVectorBlock(1);
    RCP<const Thyra::DefaultMultiVectorProductVector<double> > DxDp_mv_final =
        rcp_dynamic_cast<const Thyra::DefaultMultiVectorProductVector<double> >(
            DxDp_vec_final,
            true
        );
    RCP<const Thyra::VectorBase<double> >
    DxDp_s0_final = DxDp_mv_final->getVectorBlock(0);
    RCP<const Thyra::VectorBase<double> >
    DxDp_s1_final = DxDp_mv_final->getVectorBlock(1);
    RCP<const Thyra::VectorBase<double> >
    DxDp_s2_final = DxDp_mv_final->getVectorBlock(2);

    computedSol.clear();
    computedSol.push_back(DxDp_s0_final);
    computedSol.push_back(DxDp_s1_final);
    computedSol.push_back(DxDp_s2_final);

    MEB::InArgs<double> exactSensSolution;
    exactSensSolution = fwdModel->getExactSensSolution(0,finalTime);
    RCP<const Thyra::VectorBase<double> > ds0dp = exactSensSolution.get_x();
    exactSensSolution = fwdModel->getExactSensSolution(1,finalTime);
    RCP<const Thyra::VectorBase<double> > ds1dp = exactSensSolution.get_x();
    exactSensSolution = fwdModel->getExactSensSolution(2,finalTime);
    RCP<const Thyra::VectorBase<double> > ds2dp = exactSensSolution.get_x();

    exactSol.clear();
    exactSol.push_back(ds0dp);
    exactSol.push_back(ds1dp);
    exactSol.push_back(ds2dp);

    return dt;
}