void eval_model_explicit(
    const Thyra::ModelEvaluator<Scalar> &model,
    Thyra::ModelEvaluatorBase::InArgs<Scalar> &basePoint,
    const VectorBase<Scalar>& x_in,
    const typename Thyra::ModelEvaluatorBase::InArgs<Scalar>::ScalarMag &t_in,
    const Ptr<VectorBase<Scalar> >& f_out
    )
{
  typedef Thyra::ModelEvaluatorBase MEB;
  MEB::InArgs<Scalar> inArgs = model.createInArgs();
  MEB::OutArgs<Scalar> outArgs = model.createOutArgs();
  inArgs.setArgs(basePoint);
  inArgs.set_x(Teuchos::rcp(&x_in,false));
  if (inArgs.supports(MEB::IN_ARG_t)) {
    inArgs.set_t(t_in);
  }
  // For model evaluators whose state function f(x, x_dot, t) describes
  // an implicit ODE, and which accept an optional x_dot input argument,
  // make sure the latter is set to null in order to request the evaluation
  // of a state function corresponding to the explicit ODE formulation
  // x_dot = f(x, t)
  if (inArgs.supports(MEB::IN_ARG_x_dot)) {
    inArgs.set_x_dot(Teuchos::null);
  }
  outArgs.set_f(Teuchos::rcp(&*f_out,false));
  model.evalModel(inArgs,outArgs);
}
RCP<Thyra::VectorBase<Scalar> > eval_f_t(
    const Thyra::ModelEvaluator<Scalar>& me,
    Scalar t
    ) {
  typedef Teuchos::ScalarTraits<Scalar> ST;
  typedef Thyra::ModelEvaluatorBase MEB;
  MEB::InArgs<Scalar> inArgs = me.createInArgs();
  inArgs.set_t(t);
  MEB::OutArgs<Scalar> outArgs = me.createOutArgs();
  RCP<Thyra::VectorBase<Scalar> > f_out = Thyra::createMember(me.get_f_space());
  V_S(outArg(*f_out),ST::zero());
  outArgs.set_f(f_out);
  me.evalModel(inArgs,outArgs);
  return f_out;
}
void eval_model_explicit(
    const Thyra::ModelEvaluator<Scalar> &model,
    Thyra::ModelEvaluatorBase::InArgs<Scalar> &basePoint,
    const VectorBase<Scalar>& x_in,
    const typename Thyra::ModelEvaluatorBase::InArgs<Scalar>::ScalarMag &t_in,
    const Ptr<VectorBase<Scalar> >& f_out
    )
{
  typedef Thyra::ModelEvaluatorBase MEB;
  MEB::InArgs<Scalar> inArgs = model.createInArgs();
  MEB::OutArgs<Scalar> outArgs = model.createOutArgs();
  inArgs.setArgs(basePoint);
  inArgs.set_x(Teuchos::rcp(&x_in,false));
  if (inArgs.supports(MEB::IN_ARG_t)) {
    inArgs.set_t(t_in);
  }
  outArgs.set_f(Teuchos::rcp(&*f_out,false));
  model.evalModel(inArgs,outArgs);
}
void restart( StepperBase<Scalar> *stepper )
{
#ifdef RYTHMOS_DEBUG
  TEST_FOR_EXCEPT(0==stepper);
#endif // RYTHMOS_DEBUG
  typedef Thyra::ModelEvaluatorBase MEB;
  const Rythmos::StepStatus<double>
    stepStatus = stepper->getStepStatus();
  const RCP<const Thyra::ModelEvaluator<Scalar> >
    model = stepper->getModel();
  // First, copy all of the model's state, including parameter values etc.
  MEB::InArgs<double> initialCondition = model->createInArgs();
  initialCondition.setArgs(model->getNominalValues());
  // Set the current values of the state and time
  RCP<const Thyra::VectorBase<double> > x, x_dot;
  Rythmos::get_x_and_x_dot(*stepper,stepStatus.time,&x,&x_dot);
  initialCondition.set_x(x);
  initialCondition.set_x_dot(x_dot);
  initialCondition.set_t(stepStatus.time);
  // Set the new initial condition back on the stepper.  This will effectively
  // reset the stepper to think that it is starting over again (which it is).
  stepper->setInitialCondition(initialCondition);
}
void DiagonalImplicitRKModelEvaluator<Scalar>::evalModelImpl(
  const Thyra::ModelEvaluatorBase::InArgs<Scalar>& inArgs_stage,
  const Thyra::ModelEvaluatorBase::OutArgs<Scalar>& outArgs_stage
  ) const
{

  typedef ScalarTraits<Scalar> ST;
  typedef Thyra::ModelEvaluatorBase MEB;

  TEUCHOS_TEST_FOR_EXCEPTION( !isInitialized_, std::logic_error,
      "Error!  initializeDIRKModel must be called before evalModel\n"
      );

  TEUCHOS_TEST_FOR_EXCEPTION( !setTimeStepPointCalled_, std::logic_error,
      "Error!  setTimeStepPoint must be called before evalModel"
      );

  TEUCHOS_TEST_FOR_EXCEPTION( currentStage_ == -1, std::logic_error,
      "Error!  setCurrentStage must be called before evalModel"
      );

  THYRA_MODEL_EVALUATOR_DECORATOR_EVAL_MODEL_GEN_BEGIN(
    "Rythmos::DiagonalImplicitRKModelEvaluator",inArgs_stage,outArgs_stage,daeModel_
    );

  //
  // A) Unwrap the inArgs and outArgs 
  //

  const RCP<const Thyra::VectorBase<Scalar> > x_in = inArgs_stage.get_x();
  const RCP<Thyra::VectorBase<Scalar> > f_out = outArgs_stage.get_f();
  const RCP<Thyra::LinearOpBase<Scalar> > W_op_out = outArgs_stage.get_W_op();

  //
  // B) Assemble f_out and W_op_out for given stage
  //

  MEB::InArgs<Scalar> daeInArgs = daeModel_->createInArgs();
  MEB::OutArgs<Scalar> daeOutArgs = daeModel_->createOutArgs();
  const RCP<Thyra::VectorBase<Scalar> > x_i = createMember(daeModel_->get_x_space());
  daeInArgs.setArgs(basePoint_);
  
  // B.1) Setup the DAE's inArgs for stage f(currentStage_) ...
  V_V(stage_derivatives_->getNonconstVectorBlock(currentStage_).ptr(),*x_in);
  assembleIRKState( currentStage_, dirkButcherTableau_->A(), delta_t_, *x_old_, *stage_derivatives_, outArg(*x_i) );
  daeInArgs.set_x( x_i );
  daeInArgs.set_x_dot( x_in );
  daeInArgs.set_t( t_old_ + dirkButcherTableau_->c()(currentStage_) * delta_t_ );
  daeInArgs.set_alpha(ST::one());
  daeInArgs.set_beta( delta_t_ * dirkButcherTableau_->A()(currentStage_,currentStage_) );

  // B.2) Setup the DAE's outArgs for stage f(i) ...
  if (!is_null(f_out))
    daeOutArgs.set_f( f_out );
  if (!is_null(W_op_out))
    daeOutArgs.set_W_op(W_op_out);

  // B.3) Compute f_out(i) and/or W_op_out ...
  daeModel_->evalModel( daeInArgs, daeOutArgs );
  daeOutArgs.set_f(Teuchos::null);
  daeOutArgs.set_W_op(Teuchos::null);
  
  THYRA_MODEL_EVALUATOR_DECORATOR_EVAL_MODEL_END();
  
}
void TimeDiscretizedBackwardEulerModelEvaluator<Scalar>::evalModelImpl(
  const Thyra::ModelEvaluatorBase::InArgs<Scalar>& inArgs_bar,
  const Thyra::ModelEvaluatorBase::OutArgs<Scalar>& outArgs_bar
  ) const
{


  using Teuchos::rcp_dynamic_cast;
  typedef ScalarTraits<Scalar> ST;
  typedef Thyra::ModelEvaluatorBase MEB;
  typedef Thyra::VectorBase<Scalar> VB;
  typedef Thyra::ProductVectorBase<Scalar> PVB;
  typedef Thyra::BlockedLinearOpBase<Scalar> BLWB;

/*
  THYRA_MODEL_EVALUATOR_DECORATOR_EVAL_MODEL_GEN_BEGIN(
    "Rythmos::ImplicitRKModelEvaluator",inArgs_bar,outArgs_bar,daeModel_
    );
*/

  TEST_FOR_EXCEPTION( delta_t_ <= 0.0, std::logic_error,
    "Error, you have not initialized this object correctly!" );

  //
  // A) Unwrap the inArgs and outArgs to get at product vectors and block op
  //

  const RCP<const PVB> x_bar = rcp_dynamic_cast<const PVB>(inArgs_bar.get_x(), true);
  const RCP<PVB> f_bar = rcp_dynamic_cast<PVB>(outArgs_bar.get_f(), true);
  RCP<BLWB> W_op_bar = rcp_dynamic_cast<BLWB>(outArgs_bar.get_W_op(), true);

  //
  // B) Assemble f_bar and W_op_bar by looping over stages
  //

  MEB::InArgs<Scalar> daeInArgs = daeModel_->createInArgs();
  MEB::OutArgs<Scalar> daeOutArgs = daeModel_->createOutArgs();
  const RCP<VB> x_dot_i = createMember(daeModel_->get_x_space());
  daeInArgs.setArgs(initCond_);
  
  Scalar t_i = initTime_; // ToDo: Define t_init!

  const Scalar oneOverDeltaT = 1.0/delta_t_;

  for ( int i = 0; i < numTimeSteps_; ++i ) {

    // B.1) Setup the DAE's inArgs for time step eqn f(i) ...
    const RCP<const Thyra::VectorBase<Scalar> >
      x_i = x_bar->getVectorBlock(i),
      x_im1 = ( i==0 ? initCond_.get_x() : x_bar->getVectorBlock(i-1) );
    V_VmV( x_dot_i.ptr(), *x_i, *x_im1 ); // x_dot_i = 1/dt * ( x[i] - x[i-1] )
    Vt_S( x_dot_i.ptr(), oneOverDeltaT ); // ... 
    daeInArgs.set_x_dot( x_dot_i );
    daeInArgs.set_x( x_i );
    daeInArgs.set_t( t_i );
    daeInArgs.set_alpha( oneOverDeltaT );
    daeInArgs.set_beta( 1.0 );

    // B.2) Setup the DAE's outArgs for f(i) and/or W(i,i) ...
    if (!is_null(f_bar))
      daeOutArgs.set_f( f_bar->getNonconstVectorBlock(i) );
    if (!is_null(W_op_bar))
      daeOutArgs.set_W_op(W_op_bar->getNonconstBlock(i,i).assert_not_null());

    // B.3) Compute f_bar(i) and/or W_op_bar(i,i) ...
    daeModel_->evalModel( daeInArgs, daeOutArgs );
    daeOutArgs.set_f(Teuchos::null);
    daeOutArgs.set_W_op(Teuchos::null);
    
    // B.4) Evaluate W_op_bar(i,i-1)
    if ( !is_null(W_op_bar) && i > 0 ) {
      daeInArgs.set_alpha( -oneOverDeltaT );
      daeInArgs.set_beta( 0.0 );
      daeOutArgs.set_W_op(W_op_bar->getNonconstBlock(i,i-1).assert_not_null());
      daeModel_->evalModel( daeInArgs, daeOutArgs );
      daeOutArgs.set_W_op(Teuchos::null);
    }

    //
    t_i += delta_t_;

  }

/*  
  THYRA_MODEL_EVALUATOR_DECORATOR_EVAL_MODEL_END();
*/

}
TEUCHOS_UNIT_TEST( Rythmos_ForwardSensitivityExplicitModelEvaluator, evalModel ) {
  typedef Thyra::ModelEvaluatorBase MEB;
  RCP<ForwardSensitivityExplicitModelEvaluator<double> > model =
    forwardSensitivityExplicitModelEvaluator<double>();
  RCP<SinCosModel> innerModel = sinCosModel(false);
  double a = 0.4;
  double f = 1.5;
  double L = 1.6;
  {
    RCP<ParameterList> pl = Teuchos::parameterList();
    pl->set("Accept model parameters",true);
    pl->set("Implicit model formulation",false);
    pl->set("Coeff a", a );
    pl->set("Coeff f", f );
    pl->set("Coeff L", L );
    innerModel->setParameterList(pl);
  }
  model->initializeStructure(innerModel, 0 );
  RCP<VectorBase<double> > x;
  MEB::InArgs<double> pointInArgs;  // Used to change the solution for re-evaluation
  RCP<StepperBase<double> > stepper; // Used for initializePointState
  {
    pointInArgs = innerModel->createInArgs();
    pointInArgs.set_t(0.1);
    x = Thyra::createMember(innerModel->get_x_space());
    {
      Thyra::DetachedVectorView<double> x_view( *x );
      x_view[0] = 2.0;
      x_view[1] = 3.0;
    }
    pointInArgs.set_x(x);
    RCP<VectorBase<double> > p0 = Thyra::createMember(innerModel->get_p_space(0));
    {
      Thyra::DetachedVectorView<double> p0_view( *p0 );
      p0_view[0] = a;
      p0_view[1] = f;
      p0_view[2] = L;
    }
    pointInArgs.set_p(0,p0);
    {
      // Create a stepper with these initial conditions to use to call
      // initializePointState on this ME:
      stepper = forwardEulerStepper<double>();
      stepper->setInitialCondition(pointInArgs);
      model->initializePointState(Teuchos::inOutArg(*stepper),false);
    }
  }
  MEB::InArgs<double> inArgs = model->createInArgs();
  RCP<VectorBase<double> > x_bar = Thyra::createMember(model->get_x_space());
  RCP<Thyra::DefaultMultiVectorProductVector<double> >
    s_bar = Teuchos::rcp_dynamic_cast<Thyra::DefaultMultiVectorProductVector<double> >(
      x_bar, true
      );
  RCP<Thyra::MultiVectorBase<double> >
    S = s_bar->getNonconstMultiVector();
  // Fill S with data
  {
    TEST_EQUALITY_CONST( S->domain()->dim(), 3 );
    TEST_EQUALITY_CONST( S->range()->dim(), 2 );
    RCP<VectorBase<double> > S0 = S->col(0);
    RCP<VectorBase<double> > S1 = S->col(1);
    RCP<VectorBase<double> > S2 = S->col(2);
    TEST_EQUALITY_CONST( S0->space()->dim(), 2 );
    TEST_EQUALITY_CONST( S1->space()->dim(), 2 );
    TEST_EQUALITY_CONST( S2->space()->dim(), 2 );
    Thyra::DetachedVectorView<double> S0_view( *S0 );
    S0_view[0] = 7.0;
    S0_view[1] = 8.0;
    Thyra::DetachedVectorView<double> S1_view( *S1 );
    S1_view[0] = 9.0;
    S1_view[1] = 10.0;
    Thyra::DetachedVectorView<double> S2_view( *S2 );
    S2_view[0] = 11.0;
    S2_view[1] = 12.0;
  }
  inArgs.set_x(x_bar);
  MEB::OutArgs<double> outArgs = model->createOutArgs();
  RCP<VectorBase<double> > f_bar = Thyra::createMember(model->get_f_space());
  RCP<Thyra::DefaultMultiVectorProductVector<double> >
    f_sens = Teuchos::rcp_dynamic_cast<Thyra::DefaultMultiVectorProductVector<double> >(
      f_bar, true
      );
  RCP<Thyra::MultiVectorBase<double> >
    F_sens = f_sens->getNonconstMultiVector().assert_not_null();

  V_S(Teuchos::outArg(*f_bar),0.0);
  outArgs.set_f(f_bar);
  
  inArgs.set_t(0.1);
  model->evalModel(inArgs,outArgs);

  // Verify F_sens = df/dx*S = df/dp
  // df/dx = [ 0             1 ]
  //         [ -(f/L)*(f/L)  0 ]
  // S =   [ 7   9  11 ]    x = [ 2 ]
  //       [ 8  10  12 ]        [ 3 ]
  // df/dp = [     0             0                   0              ]
  //         [ (f/L)*(f/L) 2*f/(L*L)*(a-x_0) -2*f*f/(L*L*L)*(a-x_0) ]
  // F_sens_0 = 
  // [            8               ]
  // [ -7*(f/L)*(f/L)+(f*f)/(L*L) ]
  // F_sens_1 = 
  // [            10                    ]
  // [ -9*(f/L)*(f/L)+2*f/(L*L)*(a-x_0) ]
  // F_sens_2 = 
  // [            12                         ]
  // [ -11*(f/L)*(f/L)-2*f*f/(L*L*L)*(a-x_0) ]
  // 
  double tol = 1.0e-10;
  {
    TEST_EQUALITY_CONST( F_sens->domain()->dim(), 3 );
    TEST_EQUALITY_CONST( F_sens->range()->dim(), 2 );
    RCP<VectorBase<double> > F_sens_0 = F_sens->col(0);
    RCP<VectorBase<double> > F_sens_1 = F_sens->col(1);
    RCP<VectorBase<double> > F_sens_2 = F_sens->col(2);
    TEST_EQUALITY_CONST( F_sens_0->space()->dim(), 2 );
    TEST_EQUALITY_CONST( F_sens_1->space()->dim(), 2 );
    TEST_EQUALITY_CONST( F_sens_2->space()->dim(), 2 );

    Thyra::DetachedVectorView<double> F_sens_0_view( *F_sens_0 );
    TEST_FLOATING_EQUALITY( F_sens_0_view[0], 8.0, tol );
    TEST_FLOATING_EQUALITY( F_sens_0_view[1], -7.0*(f/L)*(f/L)+(f*f)/(L*L), tol );

    Thyra::DetachedVectorView<double> F_sens_1_view( *F_sens_1 );
    TEST_FLOATING_EQUALITY( F_sens_1_view[0], 10.0, tol );
    TEST_FLOATING_EQUALITY( F_sens_1_view[1], -9*(f/L)*(f/L)+2*f/(L*L)*(a-2.0), tol );

    Thyra::DetachedVectorView<double> F_sens_2_view( *F_sens_2 );
    TEST_FLOATING_EQUALITY( F_sens_2_view[0], 12.0, tol );
    TEST_FLOATING_EQUALITY( F_sens_2_view[1], -11*(f/L)*(f/L)-2*f*f/(L*L*L)*(a-2.0), tol );
  }

  // Now change x and evaluate again.
  {
    Thyra::DetachedVectorView<double> x_view( *x );
    x_view[0] = 20.0;
    x_view[1] = 21.0;
  }
  // We need to call initializePointState again due to the vector
  // being cloned inside.
  stepper->setInitialCondition(pointInArgs);
  model->initializePointState(Teuchos::inOutArg(*stepper),false);

  model->evalModel(inArgs,outArgs);
  {
    TEST_EQUALITY_CONST( F_sens->domain()->dim(), 3 );
    TEST_EQUALITY_CONST( F_sens->range()->dim(), 2 );
    RCP<VectorBase<double> > F_sens_0 = F_sens->col(0);
    RCP<VectorBase<double> > F_sens_1 = F_sens->col(1);
    RCP<VectorBase<double> > F_sens_2 = F_sens->col(2);
    TEST_EQUALITY_CONST( F_sens_0->space()->dim(), 2 );
    TEST_EQUALITY_CONST( F_sens_1->space()->dim(), 2 );
    TEST_EQUALITY_CONST( F_sens_2->space()->dim(), 2 );

    Thyra::DetachedVectorView<double> F_sens_0_view( *F_sens_0 );
    TEST_FLOATING_EQUALITY( F_sens_0_view[0], 8.0, tol );
    TEST_FLOATING_EQUALITY( F_sens_0_view[1], -7.0*(f/L)*(f/L)+(f*f)/(L*L), tol );

    Thyra::DetachedVectorView<double> F_sens_1_view( *F_sens_1 );
    TEST_FLOATING_EQUALITY( F_sens_1_view[0], 10.0, tol );
    TEST_FLOATING_EQUALITY( F_sens_1_view[1], -9*(f/L)*(f/L)+2*f/(L*L)*(a-20.0), tol );

    Thyra::DetachedVectorView<double> F_sens_2_view( *F_sens_2 );
    TEST_FLOATING_EQUALITY( F_sens_2_view[0], 12.0, tol );
    TEST_FLOATING_EQUALITY( F_sens_2_view[1], -11*(f/L)*(f/L)-2*f*f/(L*L*L)*(a-20.0), tol );
  }

}
int main(int argc, char *argv[])
{

  using std::endl;
  typedef double Scalar;
  typedef double ScalarMag;
  using Teuchos::describe;
  using Teuchos::RCP;
  using Teuchos::rcp;
  using Teuchos::rcp_implicit_cast;
  using Teuchos::rcp_dynamic_cast;
  using Teuchos::as;
  using Teuchos::ParameterList;
  using Teuchos::CommandLineProcessor;
  typedef Teuchos::ParameterList::PrintOptions PLPrintOptions;
  typedef Thyra::ModelEvaluatorBase MEB;
  typedef Thyra::DefaultMultiVectorProductVectorSpace<Scalar> DMVPVS;
  using Thyra::productVectorBase;

  bool result, success = true;

  Teuchos::GlobalMPISession mpiSession(&argc,&argv);

  RCP<Epetra_Comm> epetra_comm;
#ifdef HAVE_MPI
  epetra_comm = rcp( new Epetra_MpiComm(MPI_COMM_WORLD) );
#else
  epetra_comm = rcp( new Epetra_SerialComm );
#endif // HAVE_MPI

  RCP<Teuchos::FancyOStream>
    out = Teuchos::VerboseObjectBase::getDefaultOStream();

  try {

    //
    // Read commandline options
    //

    CommandLineProcessor clp;
    clp.throwExceptions(false);
    clp.addOutputSetupOptions(true);

    std::string paramsFileName = "";
    clp.setOption( "params-file", &paramsFileName,
      "File name for XML parameters" );

    std::string extraParamsString = "";
    clp.setOption( "extra-params", &extraParamsString,
      "Extra XML parameters" );

    std::string extraParamsFile = "";
    clp.setOption( "extra-params-file", &extraParamsFile, "File containing extra parameters in XML format.");

    double maxStateError = 1e-6;
    clp.setOption( "max-state-error", &maxStateError,
      "The maximum allowed error in the integrated state in relation to the exact state solution" );

    double finalTime = 1e-3;
    clp.setOption( "final-time", &finalTime,
      "Final integration time (initial time is 0.0)" );

    int numTimeSteps = 10;
    clp.setOption( "num-time-steps", &numTimeSteps,
      "Number of (fixed) time steps.  If <= 0.0, then variable time steps are taken" );

    bool useBDF = false;
    clp.setOption( "use-BDF", "use-BE", &useBDF,
      "Use BDF or Backward Euler (BE)" );

    bool useIRK = false;
    clp.setOption( "use-IRK", "use-other", &useIRK,
      "Use IRK or something" );

    bool doFwdSensSolve = false;
    clp.setOption( "fwd-sens-solve", "state-solve", &doFwdSensSolve,
      "Do the forward sensitivity solve or just the state solve" );

    bool doFwdSensErrorControl = false;
    clp.setOption( "fwd-sens-err-cntrl", "no-fwd-sens-err-cntrl", &doFwdSensErrorControl,
      "Do error control on the forward sensitivity solve or not" );

    double maxRestateError = 0.0;
    clp.setOption( "max-restate-error", &maxRestateError,
      "The maximum allowed error between the state integrated by itself verses integrated along with DxDp" );

    double maxSensError = 1e-4;
    clp.setOption( "max-sens-error", &maxSensError,
      "The maximum allowed error in the integrated sensitivity in relation to"
      " the finite-difference sensitivity" );

    Teuchos::EVerbosityLevel verbLevel = Teuchos::VERB_DEFAULT;
    setVerbosityLevelOption( "verb-level", &verbLevel,
      "Top-level verbosity level.  By default, this gets deincremented as you go deeper into numerical objects.",
      &clp );

    bool testExactSensitivity = false;
    clp.setOption( "test-exact-sens", "no-test-exact-sens", &testExactSensitivity,
      "Test the exact sensitivity with finite differences or not." );

    bool dumpFinalSolutions = false;
    clp.setOption(
      "dump-final-solutions", "no-dump-final-solutions", &dumpFinalSolutions,
      "Determine if the final solutions are dumpped or not." );

    CommandLineProcessor::EParseCommandLineReturn parse_return = clp.parse(argc,argv);
    if( parse_return != CommandLineProcessor::PARSE_SUCCESSFUL ) return parse_return;

    if ( Teuchos::VERB_DEFAULT == verbLevel )
      verbLevel = Teuchos::VERB_LOW;

    const Teuchos::EVerbosityLevel
      solnVerbLevel = ( dumpFinalSolutions ? Teuchos::VERB_EXTREME : verbLevel );

    //
    // Get the base parameter list that all other parameter lists will be read
    // from.
    //

    RCP<ParameterList>
      paramList = Teuchos::parameterList();
    if (paramsFileName.length())
      updateParametersFromXmlFile( paramsFileName, paramList.ptr() );
    if(extraParamsFile.length())
      Teuchos::updateParametersFromXmlFile( "./"+extraParamsFile, paramList.ptr() );
    if (extraParamsString.length())
      updateParametersFromXmlString( extraParamsString, paramList.ptr() );

    if (testExactSensitivity) {
      paramList->sublist(DiagonalTransientModel_name).set("Exact Solution as Response",true);
    }

    paramList->validateParameters(*getValidParameters(),0); // Only validate top level lists!

    //
    // Create the Stratimikos linear solver factory.
    //
    // This is the linear solve strategy that will be used to solve for the
    // linear system with the W.
    //

    Stratimikos::DefaultLinearSolverBuilder linearSolverBuilder;
    linearSolverBuilder.setParameterList(sublist(paramList,Stratimikos_name));
    RCP<Thyra::LinearOpWithSolveFactoryBase<Scalar> >
      W_factory = createLinearSolveStrategy(linearSolverBuilder);

    //
    // Create the underlying EpetraExt::ModelEvaluator
    //

    RCP<EpetraExt::DiagonalTransientModel>
      epetraStateModel = EpetraExt::diagonalTransientModel(
        epetra_comm,
        sublist(paramList,DiagonalTransientModel_name)
        );

    *out <<"\nepetraStateModel valid options:\n";
    epetraStateModel->getValidParameters()->print(
      *out, PLPrintOptions().indent(2).showTypes(true).showDoc(true)
      );

    //
    // Create the Thyra-wrapped ModelEvaluator
    //

    RCP<Thyra::ModelEvaluator<double> >
      stateModel = epetraModelEvaluator(epetraStateModel,W_factory);

    *out << "\nParameter names = " << *stateModel->get_p_names(0) << "\n";

    //
    // Create the Rythmos stateStepper
    //

    RCP<Rythmos::TimeStepNonlinearSolver<double> >
      nonlinearSolver = Rythmos::timeStepNonlinearSolver<double>();
    RCP<ParameterList>
      nonlinearSolverPL = sublist(paramList,TimeStepNonlinearSolver_name);
    nonlinearSolverPL->get("Default Tol",1e-3*maxStateError); // Set default if not set
    nonlinearSolver->setParameterList(nonlinearSolverPL);

    RCP<Rythmos::StepperBase<Scalar> > stateStepper;

    if (useBDF) {
      stateStepper = rcp(
        new Rythmos::ImplicitBDFStepper<double>(
          stateModel, nonlinearSolver
          )
        );
    }
    else if (useIRK) {
      // We need a separate LOWSFB object for the IRK stepper
      RCP<Thyra::LinearOpWithSolveFactoryBase<Scalar> >
        irk_W_factory = createLinearSolveStrategy(linearSolverBuilder);
      RCP<Rythmos::RKButcherTableauBase<double> > irkbt = Rythmos::createRKBT<double>("Backward Euler");
      stateStepper = Rythmos::implicitRKStepper<double>(
        stateModel, nonlinearSolver, irk_W_factory, irkbt
        );
    }
    else {
      stateStepper = rcp(
        new Rythmos::BackwardEulerStepper<double>(
          stateModel, nonlinearSolver
          )
        );
    }

    *out <<"\nstateStepper:\n" << describe(*stateStepper,verbLevel);
    *out <<"\nstateStepper valid options:\n";
    stateStepper->getValidParameters()->print(
      *out, PLPrintOptions().indent(2).showTypes(true).showDoc(true)
      );

    stateStepper->setParameterList(sublist(paramList,RythmosStepper_name));

    //
    // Setup finite difference objects that will be used for tests
    //

    Thyra::DirectionalFiniteDiffCalculator<Scalar> fdCalc;
    fdCalc.setParameterList(sublist(paramList,FdCalc_name));
    fdCalc.setOStream(out);
    fdCalc.setVerbLevel(verbLevel);

    //
    // Use a StepperAsModelEvaluator to integrate the state
    //

    const MEB::InArgs<Scalar>
      state_ic = stateModel->getNominalValues();
    *out << "\nstate_ic:\n" << describe(state_ic,verbLevel);

    RCP<Rythmos::IntegratorBase<Scalar> > integrator;
    {
      RCP<ParameterList>
        integratorPL = sublist(paramList,RythmosIntegrator_name);
      integratorPL->set( "Take Variable Steps", as<bool>(numTimeSteps < 0) );
      integratorPL->set( "Fixed dt", as<double>((finalTime - state_ic.get_t())/numTimeSteps) );
      RCP<Rythmos::IntegratorBase<Scalar> >
        defaultIntegrator = Rythmos::controlledDefaultIntegrator<Scalar>(
          Rythmos::simpleIntegrationControlStrategy<Scalar>(integratorPL)
          );
      integrator = defaultIntegrator;
    }

    RCP<Rythmos::StepperAsModelEvaluator<Scalar> >
      stateIntegratorAsModel = Rythmos::stepperAsModelEvaluator(
        stateStepper, integrator, state_ic
        );
    stateIntegratorAsModel->setVerbLevel(verbLevel);

    *out << "\nUse the StepperAsModelEvaluator to integrate state x(p,finalTime) ... \n";

    RCP<Thyra::VectorBase<Scalar> > x_final;

    {

      Teuchos::OSTab tab(out);

      x_final = createMember(stateIntegratorAsModel->get_g_space(0));

      eval_g(
        *stateIntegratorAsModel,
        0, *state_ic.get_p(0),
        finalTime,
        0, &*x_final
        );

      *out
        << "\nx_final = x(p,finalTime) evaluated using stateIntegratorAsModel:\n"
        << describe(*x_final,solnVerbLevel);

    }

    //
    // Test the integrated state against the exact analytical state solution
    //

    RCP<const Thyra::VectorBase<Scalar> >
      exact_x_final = create_Vector(
        epetraStateModel->getExactSolution(finalTime),
        stateModel->get_x_space()
        );

    result = Thyra::testRelNormDiffErr(
      "exact_x_final", *exact_x_final, "x_final", *x_final,
      "maxStateError", maxStateError, "warningTol", 1.0, // Don't warn
      &*out, solnVerbLevel
      );
    if (!result) success = false;

    //
    // Solve and test the forward sensitivity computation
    //

    if (doFwdSensSolve) {

      //
      // Create the forward sensitivity stepper
      //

      RCP<Rythmos::ForwardSensitivityStepper<Scalar> > stateAndSensStepper =
        Rythmos::forwardSensitivityStepper<Scalar>();
      if (doFwdSensErrorControl) {
        stateAndSensStepper->initializeDecoupledSteppers(
          stateModel, 0, stateModel->getNominalValues(),
          stateStepper, nonlinearSolver,
          integrator->cloneIntegrator(), finalTime
          );
      }
      else {
        stateAndSensStepper->initializeSyncedSteppers(
          stateModel, 0, stateModel->getNominalValues(),
          stateStepper, nonlinearSolver
          );
        // The above call will result in stateStepper and nonlinearSolver being
        // cloned.  This helps to ensure consistency between the state and
        // sensitivity computations!
      }

      //
      // Set the initial condition for the state and forward sensitivities
      //

      RCP<Thyra::VectorBase<Scalar> > s_bar_init
        = createMember(stateAndSensStepper->getFwdSensModel()->get_x_space());
      assign( s_bar_init.ptr(), 0.0 );
      RCP<Thyra::VectorBase<Scalar> > s_bar_dot_init
        = createMember(stateAndSensStepper->getFwdSensModel()->get_x_space());
      assign( s_bar_dot_init.ptr(), 0.0 );
      // Above, I believe that these are the correct initial conditions for
      // s_bar and s_bar_dot given how the EpetraExt::DiagonalTransientModel
      // is currently implemented!

      RCP<const Rythmos::StateAndForwardSensitivityModelEvaluator<Scalar> >
        stateAndSensModel = stateAndSensStepper->getStateAndFwdSensModel();

      MEB::InArgs<Scalar>
        state_and_sens_ic = stateAndSensStepper->getModel()->createInArgs();

      // Copy time, parameters etc.
      state_and_sens_ic.setArgs(state_ic);
      // Set initial condition for x_bar = [ x; s_bar ]
      state_and_sens_ic.set_x(
        stateAndSensModel->create_x_bar_vec(state_ic.get_x(),s_bar_init)
        );
      // Set initial condition for x_bar_dot = [ x_dot; s_bar_dot ]
      state_and_sens_ic.set_x_dot(
        stateAndSensModel->create_x_bar_vec(state_ic.get_x_dot(),s_bar_dot_init)
        );

      *out << "\nstate_and_sens_ic:\n" << describe(state_and_sens_ic,verbLevel);

      stateAndSensStepper->setInitialCondition(state_and_sens_ic);

      //
      // Use a StepperAsModelEvaluator to integrate the state+sens
      //

      RCP<Rythmos::StepperAsModelEvaluator<Scalar> >
        stateAndSensIntegratorAsModel = Rythmos::stepperAsModelEvaluator(
          rcp_implicit_cast<Rythmos::StepperBase<Scalar> >(stateAndSensStepper),
          integrator, state_and_sens_ic
          );
      stateAndSensIntegratorAsModel->setVerbLevel(verbLevel);

      *out << "\nUse the StepperAsModelEvaluator to integrate state + sens x_bar(p,finalTime) ... \n";

      RCP<Thyra::VectorBase<Scalar> > x_bar_final;

      {

        Teuchos::OSTab tab(out);

        x_bar_final = createMember(stateAndSensIntegratorAsModel->get_g_space(0));

        eval_g(
          *stateAndSensIntegratorAsModel,
          0, *state_ic.get_p(0),
          finalTime,
          0, &*x_bar_final
          );

        *out
          << "\nx_bar_final = x_bar(p,finalTime) evaluated using stateAndSensIntegratorAsModel:\n"
          << describe(*x_bar_final,solnVerbLevel);

      }

      //
      // Test that the state computed above is same as computed initially!
      //

      *out << "\nChecking that x(p,finalTime) computed as part of x_bar above is the same ...\n";

      {

        Teuchos::OSTab tab(out);

        RCP<const Thyra::VectorBase<Scalar> >
          x_in_x_bar_final = productVectorBase<Scalar>(x_bar_final)->getVectorBlock(0);

        result = Thyra::testRelNormDiffErr<Scalar>(
          "x_final", *x_final,
          "x_in_x_bar_final", *x_in_x_bar_final,
          "maxRestateError", maxRestateError,
          "warningTol", 1.0, // Don't warn
          &*out, solnVerbLevel
          );
        if (!result) success = false;

      }

      //
      // Compute DxDp using finite differences
      //

      *out << "\nApproximating DxDp(p,t) using directional finite differences of integrator for x(p,t) ...\n";

      RCP<Thyra::MultiVectorBase<Scalar> > DxDp_fd_final;

      {

        Teuchos::OSTab tab(out);


        MEB::InArgs<Scalar>
          fdBasePoint = stateIntegratorAsModel->createInArgs();

        fdBasePoint.set_t(finalTime);
        fdBasePoint.set_p(0,stateModel->getNominalValues().get_p(0));

        DxDp_fd_final = createMembers(
          stateIntegratorAsModel->get_g_space(0),
          stateIntegratorAsModel->get_p_space(0)->dim()
          );

        typedef Thyra::DirectionalFiniteDiffCalculatorTypes::SelectedDerivatives
          SelectedDerivatives;

        MEB::OutArgs<Scalar> fdOutArgs =
          fdCalc.createOutArgs(
            *stateIntegratorAsModel,
            SelectedDerivatives().supports(MEB::OUT_ARG_DgDp,0,0)
            );
        fdOutArgs.set_DgDp(0,0,DxDp_fd_final);

        // Silence the model evaluators that are called.  The fdCal object
        // will show all of the inputs and outputs for each call.
        stateStepper->setVerbLevel(Teuchos::VERB_NONE);
        stateIntegratorAsModel->setVerbLevel(Teuchos::VERB_NONE);

        fdCalc.calcDerivatives(
          *stateIntegratorAsModel, fdBasePoint,
          stateIntegratorAsModel->createOutArgs(), // Don't bother with function value
          fdOutArgs
          );

        *out
          << "\nFinite difference DxDp_fd_final = DxDp(p,finalTime): "
          << describe(*DxDp_fd_final,solnVerbLevel);

      }

      //
      // Test that the integrated sens and the F.D. sens are similar
      //

      *out << "\nChecking that integrated DxDp(p,finalTime) and finite-diff DxDp(p,finalTime) are similar ...\n";

      {

        Teuchos::OSTab tab(out);

        RCP<const Thyra::VectorBase<Scalar> >
          DxDp_vec_final = Thyra::productVectorBase<Scalar>(x_bar_final)->getVectorBlock(1);

        RCP<const Thyra::VectorBase<Scalar> >
          DxDp_fd_vec_final = Thyra::multiVectorProductVector(
            rcp_dynamic_cast<const Thyra::DefaultMultiVectorProductVectorSpace<Scalar> >(
              DxDp_vec_final->range()
              ),
            DxDp_fd_final
            );

        result = Thyra::testRelNormDiffErr(
          "DxDp_vec_final", *DxDp_vec_final,
          "DxDp_fd_vec_final", *DxDp_fd_vec_final,
          "maxSensError", maxSensError,
          "warningTol", 1.0, // Don't warn
          &*out, solnVerbLevel
          );
        if (!result) success = false;

      }

    }

  }
  TEUCHOS_STANDARD_CATCH_STATEMENTS(true,*out,success);

  if(success)
    *out << "\nEnd Result: TEST PASSED" << endl;
  else
    *out << "\nEnd Result: TEST FAILED" << endl;

  return ( success ? 0 : 1 );

} // end main() [Doxygen looks for this!]
void ImplicitRKModelEvaluator<Scalar>::evalModelImpl(
  const Thyra::ModelEvaluatorBase::InArgs<Scalar>& inArgs_bar,
  const Thyra::ModelEvaluatorBase::OutArgs<Scalar>& outArgs_bar
  ) const
{

  using Teuchos::rcp_dynamic_cast;
  typedef ScalarTraits<Scalar> ST;
  typedef Thyra::ModelEvaluatorBase MEB;
  typedef Thyra::VectorBase<Scalar> VB;
  typedef Thyra::ProductVectorBase<Scalar> PVB;
  typedef Thyra::BlockedLinearOpBase<Scalar> BLWB;

  TEST_FOR_EXCEPTION( !isInitialized_, std::logic_error,
      "Error!  initializeIRKModel must be called before evalModel\n"
      );

  TEST_FOR_EXCEPTION( !setTimeStepPointCalled_, std::logic_error,
      "Error!  setTimeStepPoint must be called before evalModel"
      );

  THYRA_MODEL_EVALUATOR_DECORATOR_EVAL_MODEL_GEN_BEGIN(
    "Rythmos::ImplicitRKModelEvaluator",inArgs_bar,outArgs_bar,daeModel_
    );

  //
  // A) Unwrap the inArgs and outArgs to get at product vectors and block op
  //

  const RCP<const PVB> x_bar = rcp_dynamic_cast<const PVB>(inArgs_bar.get_x(), true);
  const RCP<PVB> f_bar = rcp_dynamic_cast<PVB>(outArgs_bar.get_f(), true);
  const RCP<BLWB> W_op_bar = rcp_dynamic_cast<BLWB>(outArgs_bar.get_W_op(), true);

  //
  // B) Assemble f_bar and W_op_bar by looping over stages
  //

  MEB::InArgs<Scalar> daeInArgs = daeModel_->createInArgs();
  MEB::OutArgs<Scalar> daeOutArgs = daeModel_->createOutArgs();
  const RCP<VB> x_i = createMember(daeModel_->get_x_space());
  daeInArgs.setArgs(basePoint_);
  
  const int numStages = irkButcherTableau_->numStages();

  for ( int i = 0; i < numStages; ++i ) {

    // B.1) Setup the DAE's inArgs for stage f(i) ...
    assembleIRKState( i, irkButcherTableau_->A(), delta_t_, *x_old_, *x_bar, outArg(*x_i) );
    daeInArgs.set_x( x_i );
    daeInArgs.set_x_dot( x_bar->getVectorBlock(i) );
    daeInArgs.set_t( t_old_ + irkButcherTableau_->c()(i) * delta_t_ );
    Scalar alpha = ST::zero();
    if (i == 0) {
      alpha = ST::one();
    } else {
      alpha = ST::zero();
    }
    Scalar beta = delta_t_ * irkButcherTableau_->A()(i,0);
    daeInArgs.set_alpha( alpha );
    daeInArgs.set_beta( beta );

    // B.2) Setup the DAE's outArgs for stage f(i) ...
    if (!is_null(f_bar))
      daeOutArgs.set_f( f_bar->getNonconstVectorBlock(i) );
    if (!is_null(W_op_bar)) {
      daeOutArgs.set_W_op(W_op_bar->getNonconstBlock(i,0));
    }

    // B.3) Compute f_bar(i) and/or W_op_bar(i,0) ...
    daeModel_->evalModel( daeInArgs, daeOutArgs );
    daeOutArgs.set_f(Teuchos::null);
    daeOutArgs.set_W_op(Teuchos::null);
    
    // B.4) Evaluate the rest of the W_op_bar(i,j=1...numStages-1) ...
    if (!is_null(W_op_bar)) {
      for ( int j = 1; j < numStages; ++j ) {
        alpha = ST::zero();
        if (i == j) {
          alpha = ST::one();
        } else {
          alpha = ST::zero();
        }
        beta = delta_t_ * irkButcherTableau_->A()(i,j);
        daeInArgs.set_alpha( alpha );
        daeInArgs.set_beta( beta );
        daeOutArgs.set_W_op(W_op_bar->getNonconstBlock(i,j));
        daeModel_->evalModel( daeInArgs, daeOutArgs );
        daeOutArgs.set_W_op(Teuchos::null);
      }
    }

  }
  
  THYRA_MODEL_EVALUATOR_DECORATOR_EVAL_MODEL_END();
  
}