void ImplicitBDFStepperRampingStepControl<Scalar>::initialize(
  const StepperBase<Scalar>& stepper)
{
  // Initialize can be called from the stepper when setInitialCondition
  // is called.
  using Teuchos::as;
  typedef Teuchos::ScalarTraits<Scalar> ST;
  using Thyra::createMember;

  // Set initial time:
  TimeRange<Scalar> stepperRange = stepper.getTimeRange();
  TEUCHOS_TEST_FOR_EXCEPTION(
      !stepperRange.isValid(),
      std::logic_error,
      "Error, Stepper does not have valid time range for initialization "
      "of ImplicitBDFStepperRampingStepControl!\n");

  if (is_null(parameterList_)) {
    RCP<Teuchos::ParameterList> emptyParameterList =
      Teuchos::rcp(new Teuchos::ParameterList);
    this->setParameterList(emptyParameterList);
  }

  if (is_null(errWtVecCalc_)) {
    RCP<ImplicitBDFStepperErrWtVecCalc<Scalar> > IBDFErrWtVecCalc =
      rcp(new ImplicitBDFStepperErrWtVecCalc<Scalar>());
    errWtVecCalc_ = IBDFErrWtVecCalc;
  }

  stepControlState_ = UNINITIALIZED;

  requestedStepSize_ = Scalar(-1.0);
  currentStepSize_ = initialStepSize_;
  currentOrder_ = 1;
  nextStepSize_ = initialStepSize_;
  nextOrder_ = 1;
  numberOfSteps_ = 0;
  totalNumberOfFailedSteps_ = 0;
  countOfConstantStepsAfterFailure_ = 0;

  if (is_null(delta_)) {
    delta_ = createMember(stepper.get_x_space());
  }
  if (is_null(errWtVec_)) {
    errWtVec_ = createMember(stepper.get_x_space());
  }
  V_S(delta_.ptr(),ST::zero());

  if ( doOutput_(Teuchos::VERB_HIGH) ) {
    RCP<Teuchos::FancyOStream> out = this->getOStream();
    Teuchos::OSTab ostab(out,1,"initialize");
    *out << "currentOrder_ = " << currentOrder_ << std::endl;
    *out << "numberOfSteps_ = " << numberOfSteps_ << std::endl;
  }

  setStepControlState_(BEFORE_FIRST_STEP);

}
Exemplo n.º 2
0
const Teuchos::RCP<TriKota::DiagonalROME<Scalar> >
createModel(
    const int globalDim,
    const typename Teuchos::ScalarTraits<Scalar>::magnitudeType &g_offset
)
{
    using Teuchos::RCP;

    const RCP<const Teuchos::Comm<Thyra::Ordinal> > comm =
        Teuchos::DefaultComm<Thyra::Ordinal>::getComm();

    const int numProcs = comm->getSize();
    TEUCHOS_TEST_FOR_EXCEPT_MSG( numProcs > globalDim,
                                 "Error, the number of processors can not be greater than the global"
                                 " dimension of the vectors!." );
    const int localDim = globalDim / numProcs;
    const int localDimRemainder = globalDim % numProcs;
    TEUCHOS_TEST_FOR_EXCEPT_MSG( localDimRemainder != 0,
                                 "Error, the number of processors must divide into the global number"
                                 " of elements exactly for now!." );

    const RCP<TriKota::DiagonalROME<Scalar> > model =
        Teuchos::rcp(new TriKota::DiagonalROME<Scalar>(localDim));
    const RCP<const Thyra::VectorSpaceBase<Scalar> > p_space = model->get_p_space(0);
    const RCP<Thyra::VectorBase<Scalar> > ps = createMember(p_space);
    const Scalar ps_val = 2.0;
    Thyra::V_S(ps.ptr(), ps_val);
    model->setSolutionVector(ps);
    model->setScalarOffset(g_offset);

    return model;
}
void ExplicitModelEvaluator<Scalar>::
buildInverseMassMatrix() const
{
  typedef Thyra::ModelEvaluatorBase MEB;
  using Teuchos::RCP;
  using Thyra::createMember;
  
  RCP<const Thyra::ModelEvaluator<Scalar> > me = this->getUnderlyingModel();

  // first allocate space for the mass matrix
  RCP<Thyra::LinearOpBase<Scalar> > mass = me->create_W_op();

  // intialize a zero to get rid of the x-dot 
  if(zero_==Teuchos::null) {
    zero_ = Thyra::createMember(*me->get_x_space());
    Thyra::assign(zero_.ptr(),0.0);
  }
  
  // request only the mass matrix from the physics
  // Model evaluator builds: alpha*u_dot + beta*F(u) = 0
  MEB::InArgs<Scalar>  inArgs  = me->createInArgs();
  inArgs.set_x(createMember(me->get_x_space()));
  inArgs.set_x_dot(zero_);
  inArgs.set_alpha(-1.0);
  inArgs.set_beta(0.0);

  // set the one time beta to ensure dirichlet conditions
  // are correctly included in the mass matrix: do it for
  // both epetra and Tpetra. If a panzer model evaluator has
  // not been passed in...oh well you get what you asked for!
  if(panzerModel_!=Teuchos::null)
    panzerModel_->setOneTimeDirichletBeta(-1.0);
  else if(panzerEpetraModel_!=Teuchos::null)
    panzerEpetraModel_->setOneTimeDirichletBeta(-1.0);

  // set only the mass matrix
  MEB::OutArgs<Scalar> outArgs = me->createOutArgs();
  outArgs.set_W_op(mass);

  // this will fill the mass matrix operator 
  me->evalModel(inArgs,outArgs);

  if(!massLumping_) {
    invMassMatrix_ = Thyra::inverse<Scalar>(*me->get_W_factory(),mass);
  }
  else {
    // build lumped mass matrix (assumes all positive mass entries, does a simple sum)
    Teuchos::RCP<Thyra::VectorBase<Scalar> > ones = Thyra::createMember(*mass->domain());
    Thyra::assign(ones.ptr(),1.0);

    RCP<Thyra::VectorBase<Scalar> > invLumpMass = Thyra::createMember(*mass->range());
    Thyra::apply(*mass,Thyra::NOTRANS,*ones,invLumpMass.ptr());
    Thyra::reciprocal(*invLumpMass,invLumpMass.ptr());

    invMassMatrix_ = Thyra::diagonal(invLumpMass);
  }
}
Simple2DModelEvaluator<Scalar>::Simple2DModelEvaluator()
  : x_space_(Thyra::defaultSpmdVectorSpace<Scalar>(2)),
    f_space_(x_space_),
    W_factory_(Thyra::defaultSerialDenseLinearOpWithSolveFactory<Scalar>()),
    d_(0.0),
    p_(x_space_->dim(), Teuchos::ScalarTraits<Scalar>::zero()),
    showGetInvalidArg_(false)
{

  using Teuchos::RCP;
  using Thyra::VectorBase;
  using Thyra::createMember;
  typedef Thyra::ModelEvaluatorBase MEB;
  typedef Teuchos::ScalarTraits<Scalar> ST;
  
  MEB::InArgsSetup<Scalar> inArgs;
  inArgs.setModelEvalDescription(this->description());
  inArgs.setSupports(MEB::IN_ARG_x);
  prototypeInArgs_ = inArgs;
  
  MEB::OutArgsSetup<Scalar> outArgs;
  outArgs.setModelEvalDescription(this->description());
  outArgs.setSupports(MEB::OUT_ARG_f);
  outArgs.setSupports(MEB::OUT_ARG_W_op);
  outArgs.setSupports(MEB::OUT_ARG_W_prec);
  prototypeOutArgs_ = outArgs;

  nominalValues_ = inArgs;
  x0_ = createMember(x_space_);
  V_S(x0_.ptr(), ST::zero());
  nominalValues_.set_x(x0_);

  set_d(10.0);
  set_p(Teuchos::tuple<Scalar>(2.0, 0.0)());
  set_x0(Teuchos::tuple<Scalar>(1.0, 1.0)());

}
NonlinearCGUtils::ESolveReturn
NonlinearCG<Scalar>::doSolve(
  const Ptr<Thyra::VectorBase<Scalar> > &p_inout,
  const Ptr<ScalarMag> &g_opt_out,
  const Ptr<const ScalarMag> &g_reduct_tol_in,
  const Ptr<const ScalarMag> &g_grad_tol_in,
  const Ptr<const ScalarMag> &alpha_init_in,
  const Ptr<int> &numIters_out
  )
{

  typedef ScalarTraits<Scalar> ST;
  typedef ScalarTraits<ScalarMag> SMT;
  
  using Teuchos::null;
  using Teuchos::as;
  using Teuchos::tuple;
  using Teuchos::rcpFromPtr;
  using Teuchos::optInArg;
  using Teuchos::inOutArg;
  using GlobiPack::computeValue;
  using GlobiPack::PointEval1D;
  using Thyra::VectorSpaceBase;
  using Thyra::VectorBase;
  using Thyra::MultiVectorBase;
  using Thyra::scalarProd;
  using Thyra::createMember;
  using Thyra::createMembers;
  using Thyra::get_ele;
  using Thyra::norm;
  using Thyra::V_StV;
  using Thyra::Vt_S;
  using Thyra::eval_g_DgDp;
  typedef Thyra::Ordinal Ordinal;
  typedef Thyra::ModelEvaluatorBase MEB;
  namespace NCGU = NonlinearCGUtils;
  using std::max;

  // Validate input

  g_opt_out.assert_not_null();

  // Set streams

  const RCP<Teuchos::FancyOStream> out = this->getOStream();
  linesearch_->setOStream(out);

  // Determine what step constants will be computed

  const bool compute_beta_PR =
    (
      solverType_ == NCGU::NONLINEAR_CG_PR_PLUS
      ||
      solverType_ == NCGU::NONLINEAR_CG_FR_PR
      );

  const bool compute_beta_HS = (solverType_ == NCGU::NONLINEAR_CG_HS);

  //
  // A) Set up the storage for the algorithm
  //
  
  const RCP<DefaultPolyLineSearchPointEvaluator<Scalar> >
    pointEvaluator = defaultPolyLineSearchPointEvaluator<Scalar>();

  const RCP<UnconstrainedOptMeritFunc1D<Scalar> >
    meritFunc = unconstrainedOptMeritFunc1D<Scalar>(
      model_, paramIndex_, responseIndex_ );

  const RCP<const VectorSpaceBase<Scalar> >
    p_space = model_->get_p_space(paramIndex_),
    g_space = model_->get_g_space(responseIndex_);

  // Stoarge for current iteration
  RCP<VectorBase<Scalar> >
    p_k = rcpFromPtr(p_inout),        // Current solution for p
    p_kp1 = createMember(p_space),    // Trial point for p (in line search)
    g_vec = createMember(g_space),    // Vector (size 1) form of objective g(p) 
    g_grad_k = createMember(p_space), // Gradient of g DgDp^T
    d_k = createMember(p_space),      // Search direction
    g_grad_k_diff_km1 = null;         // g_grad_k - g_grad_km1 (if needed)

  // Storage for previous iteration
  RCP<VectorBase<Scalar> >
    g_grad_km1 = null, // Will allocate if we need it!
    d_km1 = null; // Will allocate if we need it!
  ScalarMag
    alpha_km1 = SMT::zero(),
    g_km1 = SMT::zero(),
    g_grad_km1_inner_g_grad_km1 = SMT::zero(),
    g_grad_km1_inner_d_km1 = SMT::zero();
  
  if (compute_beta_PR || compute_beta_HS) {
    g_grad_km1 = createMember(p_space);
    g_grad_k_diff_km1 = createMember(p_space);
  }
  
  if (compute_beta_HS) {
    d_km1 = createMember(p_space);
  }

  //
  // B) Do the nonlinear CG iterations
  //

  *out << "\nStarting nonlinear CG iterations ...\n";

  if (and_conv_tests_) {
    *out << "\nNOTE: Using AND of convergence tests!\n";
  }
  else {
    *out << "\nNOTE: Using OR of convergence tests!\n";
  }

  const Scalar alpha_init =
    ( !is_null(alpha_init_in) ? *alpha_init_in : alpha_init_ );
  const Scalar g_reduct_tol =
    ( !is_null(g_reduct_tol_in) ? *g_reduct_tol_in : g_reduct_tol_ );
  const Scalar g_grad_tol =
    ( !is_null(g_grad_tol_in) ? *g_grad_tol_in : g_grad_tol_ );

  const Ordinal globalDim = p_space->dim();

  bool foundSolution = false;
  bool fatalLinesearchFailure = false;
  bool restart = true;
  int numConsecutiveLineSearchFailures = 0;

  int numConsecutiveIters = 0;

  for (numIters_ = 0; numIters_ < maxIters_; ++numIters_, ++numConsecutiveIters) {

    Teuchos::OSTab tab(out);

    *out << "\nNonlinear CG Iteration k = " << numIters_ << "\n";

    Teuchos::OSTab tab2(out);

    //
    // B.1) Evaluate the point (on first iteration)
    //
    
    eval_g_DgDp(
      *model_, paramIndex_, *p_k, responseIndex_,
      numIters_ == 0 ? g_vec.ptr() : null, // Only on first iteration
      MEB::Derivative<Scalar>(g_grad_k, MEB::DERIV_MV_GRADIENT_FORM) );

    const ScalarMag g_k = get_ele(*g_vec, 0);
    // Above: If numIters_ > 0, then g_vec was updated in meritFunc->eval(...).

    //
    // B.2) Check for convergence
    //

    // B.2.a) ||g_k - g_km1|| |g_k + g_mag| <= g_reduct_tol

    bool g_reduct_converged = false;

    if (numIters_ > 0) {

      const ScalarMag g_reduct = g_k - g_km1;
      
      *out << "\ng_k - g_km1 = "<<g_reduct<<"\n";
      
      const ScalarMag g_reduct_err =
        SMT::magnitude(g_reduct / SMT::magnitude(g_k + g_mag_));
      
      g_reduct_converged = (g_reduct_err <= g_reduct_tol);
      
      *out << "\nCheck convergence: |g_k - g_km1| / |g_k + g_mag| = "<<g_reduct_err
           << (g_reduct_converged ? " <= " : " > ")
           << "g_reduct_tol = "<<g_reduct_tol<<"\n";
      
    }

    // B.2.b) ||g_grad_k|| g_mag <= g_grad_tol

    const Scalar g_grad_k_inner_g_grad_k = scalarProd<Scalar>(*g_grad_k, *g_grad_k);
    const ScalarMag norm_g_grad_k = ST::magnitude(ST::squareroot(g_grad_k_inner_g_grad_k));

    *out << "\n||g_grad_k|| = "<<norm_g_grad_k << "\n";

    const ScalarMag g_grad_err = norm_g_grad_k / g_mag_;

    const bool g_grad_converged = (g_grad_err <= g_grad_tol);

    *out << "\nCheck convergence: ||g_grad_k|| / g_mag = "<<g_grad_err
         << (g_grad_converged ? " <= " : " > ")
         << "g_grad_tol = "<<g_grad_tol<<"\n";

    // B.2.c) Convergence status
    
    bool isConverged = false;
    if (and_conv_tests_) {
      isConverged = g_reduct_converged && g_grad_converged;
    }
    else {
      isConverged = g_reduct_converged || g_grad_converged;
    }

    if (isConverged) {
      if (numIters_ < minIters_) {
        *out << "\nnumIters="<<numIters_<<" < minIters="<<minIters_
             << ", continuing on!\n";
      }
      else {
        *out << "\nFound solution, existing algorithm!\n";
        foundSolution = true;
      }
    }
    else {
      *out << "\nNot converged!\n";
    }
    
    if (foundSolution) {
      break;
    }

    //
    // B.3) Compute the search direction d_k
    //

    if (numConsecutiveIters == globalDim) {

      *out << "\nThe number of consecutive iterations exceeds the"
           << " global dimension so restarting!\n";

      restart = true;

    }

    if (restart) {

      *out << "\nResetting search direction back to steppest descent!\n";

      // d_k = -g_grad_k
      V_StV( d_k.ptr(), as<Scalar>(-1.0), *g_grad_k );

      restart = false;

    }
    else {
      
      // g_grad_k - g_grad_km1
      if (!is_null(g_grad_k_diff_km1)) {
        V_VmV( g_grad_k_diff_km1.ptr(), *g_grad_k, *g_grad_km1 );
      }

      // beta_FR = inner(g_grad_k, g_grad_k) / inner(g_grad_km1, g_grad_km1)
      const Scalar beta_FR =
        g_grad_k_inner_g_grad_k / g_grad_km1_inner_g_grad_km1;
      *out << "\nbeta_FR = " << beta_FR << "\n";
      // NOTE: Computing beta_FR is free so we might as well just do it!

      // beta_PR = inner(g_grad_k, g_grad_k - g_grad_km1) /
      //    inner(g_grad_km1, g_grad_km1)
      Scalar beta_PR = ST::zero();
      if (compute_beta_PR) {
        beta_PR =
          inner(*g_grad_k, *g_grad_k_diff_km1) / g_grad_km1_inner_g_grad_km1;
        *out << "\nbeta_PR = " << beta_PR << "\n";
      }

      // beta_HS = inner(g_grad_k, g_grad_k - g_grad_km1) /
      //    inner(g_grad_k - g_grad_km1, d_km1)
      Scalar beta_HS = ST::zero();
      if (compute_beta_HS) {
        beta_HS =
          inner(*g_grad_k, *g_grad_k_diff_km1) / inner(*g_grad_k_diff_km1, *d_km1);
        *out << "\nbeta_HS = " << beta_HS << "\n";
      }
      
      Scalar beta_k = ST::zero();
      switch(solverType_) {
        case NCGU::NONLINEAR_CG_FR: {
          beta_k = beta_FR;
          break;
        }
        case NCGU::NONLINEAR_CG_PR_PLUS: {
          beta_k = max(beta_PR, ST::zero());
          break;
        }
        case NCGU::NONLINEAR_CG_FR_PR: {
          // NOTE: This does not seem to be working :-(
          if (numConsecutiveIters < 2) {
            beta_k = beta_PR;
          }
          else if (beta_PR < -beta_FR)
            beta_k = -beta_FR;
          else if (ST::magnitude(beta_PR) <= beta_FR)
            beta_k = beta_PR;
          else // beta_PR > beta_FR
            beta_k = beta_FR;
        }
        case NCGU::NONLINEAR_CG_HS: {
          beta_k = beta_HS;
          break;
        }
        default:
          TEUCHOS_TEST_FOR_EXCEPT(true);
      }
      *out << "\nbeta_k = " << beta_k << "\n";

      // d_k = beta_k * d_last + -g_grad_k
      if (!is_null(d_km1))
        V_StV( d_k.ptr(), beta_k, *d_km1 );
      else
        Vt_S( d_k.ptr(), beta_k );
      Vp_StV( d_k.ptr(), as<Scalar>(-1.0), *g_grad_k );

    }
    
    //
    // B.4) Perform the line search
    //

    // B.4.a) Compute the initial step length

    Scalar alpha_k = as<Scalar>(-1.0);

    if (numIters_ == 0) {
      alpha_k = alpha_init;
    }
    else {
      if (alpha_reinit_) {
        alpha_k = alpha_init;
      }
      else {
        alpha_k = alpha_km1;
        // ToDo: Implement better logic from Nocedal and Wright for selecting
        // this step length after first iteration!
      }
    }

    // B.4.b) Perform the linesearch (computing updated quantities in process)

    pointEvaluator->initialize(tuple<RCP<const VectorBase<Scalar> > >(p_k, d_k)());

    ScalarMag g_grad_k_inner_d_k = ST::zero();

    // Set up the merit function to only compute the value
    meritFunc->setEvaluationQuantities(pointEvaluator, p_kp1, g_vec, null);

    PointEval1D<ScalarMag> point_k(ST::zero(), g_k);
    if (linesearch_->requiresBaseDeriv()) {
      g_grad_k_inner_d_k = scalarProd(*g_grad_k, *d_k);
      point_k.Dphi = g_grad_k_inner_d_k;
    }

    ScalarMag g_kp1 = computeValue(*meritFunc, alpha_k);
    // NOTE: The above call updates p_kp1 and g_vec as well!

    PointEval1D<ScalarMag> point_kp1(alpha_k, g_kp1);

    const bool linesearchResult = linesearch_->doLineSearch(
      *meritFunc, point_k, inOutArg(point_kp1), null );

    alpha_k = point_kp1.alpha;
    g_kp1 = point_kp1.phi;

    if (linesearchResult) {
      numConsecutiveLineSearchFailures = 0;
    }
    else {
      if (numConsecutiveLineSearchFailures==0) {
        *out << "\nLine search failure, resetting the search direction!\n";
        restart = true;
      }
      if (numConsecutiveLineSearchFailures==1) {
        *out << "\nLine search failure on last iteration also, terminating algorithm!\n";
        fatalLinesearchFailure = true;
      }
      ++numConsecutiveLineSearchFailures;
    }

    if (fatalLinesearchFailure) {
      break;
    }

    //
    // B.5) Transition to the next iteration
    //
    
    alpha_km1 = alpha_k;
    g_km1 = g_k;
    g_grad_km1_inner_g_grad_km1 = g_grad_k_inner_g_grad_k;
    g_grad_km1_inner_d_km1 = g_grad_k_inner_d_k;
    std::swap(p_k, p_kp1);
    if (!is_null(g_grad_km1))
      std::swap(g_grad_km1, g_grad_k);
    if (!is_null(d_km1))
      std::swap(d_k, d_km1);
    
#ifdef TEUCHOS_DEBUG
    // Make sure we compute these correctly before they are used!
    V_S(g_grad_k.ptr(), ST::nan());
    V_S(p_kp1.ptr(), ST::nan());
#endif

  }

  //
  // C) Final clean up
  //
  
  // Get the most current value of g(p)
  *g_opt_out = get_ele(*g_vec, 0);

  // Make sure that the final value for p has been copied in!
  V_V( p_inout, *p_k );

  if (!is_null(numIters_out)) {
    *numIters_out = numIters_;
  }

  if (numIters_ == maxIters_) {
    *out << "\nMax nonlinear CG iterations exceeded!\n";
  }
  
  if (foundSolution) {
    return NonlinearCGUtils::SOLVE_SOLUTION_FOUND;
  }
  else if(fatalLinesearchFailure) {
    return NonlinearCGUtils::SOLVE_LINSEARCH_FAILURE;
  }

  // Else, the max number of iterations was exceeded
  return NonlinearCGUtils::SOLVE_MAX_ITERS_EXCEEDED;

}
void computeCubicSplineCoeff(
    const typename DataStore<Scalar>::DataStoreVector_t & data,
    const Ptr<CubicSplineCoeff<Scalar> > & coeffPtr
    )
{
  using Teuchos::outArg;
  typedef Teuchos::ScalarTraits<Scalar> ST;
  using Thyra::createMember;
  TEUCHOS_TEST_FOR_EXCEPTION( 
      (data.size() < 2), std::logic_error,
      "Error!  A minimum of two data points is required for this cubic spline."
      );
  // time data in the DataStoreVector should be unique and sorted 
  Array<Scalar> t;
  Array<Teuchos::RCP<const Thyra::VectorBase<Scalar> > > x_vec, xdot_vec;
  dataStoreVectorToVector<Scalar>( data, &t, &x_vec, &xdot_vec, NULL );
#ifdef HAVE_RYTHMOS_DEBUG
  assertTimePointsAreSorted<Scalar>( t );
#endif // HAVE_RYTHMOS_DEBUG
  // 11/18/08 tscoffe:  Question:  Should I erase everything in coeffPtr or
  // re-use what I can?  For now, I'll erase and create new each time.
  CubicSplineCoeff<Scalar>& coeff = *coeffPtr;
  // If there are only two points, then we do something special and just create
  // a linear polynomial between the points and return.
  if (t.size() == 2) {
    coeff.t.clear(); 
    coeff.a.clear(); coeff.b.clear(); coeff.c.clear(); coeff.d.clear();
    coeff.t.reserve(2); 
    coeff.a.reserve(1); coeff.b.reserve(1); coeff.c.reserve(1); coeff.d.reserve(1);
    coeff.t.push_back(t[0]);
    coeff.t.push_back(t[1]);
    coeff.a.push_back(x_vec[0]->clone_v());
    coeff.b.push_back(createMember(x_vec[0]->space()));
    coeff.c.push_back(createMember(x_vec[0]->space()));
    coeff.d.push_back(createMember(x_vec[0]->space()));
    Scalar h = coeff.t[1] - coeff.t[0];
    V_StVpStV(outArg(*coeff.b[0]),ST::one()/h,*x_vec[1],-ST::one()/h,*x_vec[0]);
    V_S(outArg(*coeff.c[0]),ST::zero());
    V_S(outArg(*coeff.d[0]),ST::zero());
    return;
  }
  // Data objects we'll need:
  int n = t.length()-1; // Number of intervals
  coeff.t.clear(); coeff.t.reserve(n+1);
  coeff.a.clear(); coeff.a.reserve(n+1);
  coeff.b.clear(); coeff.b.reserve(n);
  coeff.c.clear(); coeff.c.reserve(n+1);
  coeff.d.clear(); coeff.d.reserve(n);

  Array<Scalar> h(n);
  Array<RCP<Thyra::VectorBase<Scalar> > > alpha(n), z(n+1);
  Array<Scalar> l(n+1), mu(n);
  for (int i=0 ; i<n ; ++i) {
    coeff.t.push_back(t[i]);
    coeff.a.push_back(x_vec[i]->clone_v());
    coeff.b.push_back(Thyra::createMember(x_vec[0]->space()));
    coeff.c.push_back(Thyra::createMember(x_vec[0]->space()));
    coeff.d.push_back(Thyra::createMember(x_vec[0]->space()));
    alpha[i] = Thyra::createMember(x_vec[0]->space());
    z[i] = Thyra::createMember(x_vec[0]->space());
  }
  coeff.a.push_back(x_vec[n]->clone_v());
  coeff.t.push_back(t[n]);
  coeff.c.push_back(Thyra::createMember(x_vec[0]->space()));
  z[n] = Thyra::createMember(x_vec[0]->space());
  Scalar zero = ST::zero();
  Scalar one = ST::one();
  Scalar two = Scalar(2*ST::one());
  Scalar three = Scalar(3*ST::one());

  // Algorithm starts here:
  for (int i=0 ; i<n ; ++i) {
    h[i] = coeff.t[i+1]-coeff.t[i];
  }
  for (int i=1 ; i<n ; ++i) {
    V_StVpStV(outArg(*(alpha[i])),three/h[i],*coeff.a[i+1],-3/h[i],*coeff.a[i]);
    Vp_StV(outArg(*(alpha[i])),-three/h[i-1],*coeff.a[i]);
    Vp_StV(outArg(*(alpha[i])),+three/h[i-1],*coeff.a[i-1]);
  }
  l[0] = one;
  mu[0] = zero;
  V_S(outArg(*(z[0])),zero);
  for (int i=1 ; i<n ; ++i) {
    l[i] = 2*(coeff.t[i+1]-coeff.t[i-1])-h[i-1]*mu[i-1];
    mu[i] = h[i]/l[i];
    V_StVpStV(outArg(*(z[i])),one/l[i],*alpha[i],-h[i-1]/l[i],*z[i-1]);
  }
  l[n] = one;
  V_S(outArg(*(z[n])),zero);
  V_S(outArg(*(coeff.c[n])),zero);
  for (int j=n-1 ; j >= 0 ; --j) {
    V_StVpStV(outArg(*(coeff.c[j])),one,*z[j],-mu[j],*coeff.c[j+1]);
    V_StVpStV(outArg(*(coeff.b[j])),one/h[j],*coeff.a[j+1],-one/h[j],*coeff.a[j]);
    Vp_StV(outArg(*(coeff.b[j])),-h[j]/three,*coeff.c[j+1]);
    Vp_StV(outArg(*(coeff.b[j])),-h[j]*two/three,*coeff.c[j]);
    V_StVpStV(outArg(*(coeff.d[j])),one/(three*h[j]),*coeff.c[j+1],-one/(three*h[j]),*coeff.c[j]);
  }
  // Pop the last entry off of a and c to make them the right size.
  coeff.a.erase(coeff.a.end()-1);
  coeff.c.erase(coeff.c.end()-1);
}
void CubicSplineInterpolator<Scalar>::interpolate(
  const Array<Scalar> &t_values,
  typename DataStore<Scalar>::DataStoreVector_t *data_out
  ) const
{
  using Teuchos::as;
  using Teuchos::outArg;
  typedef Teuchos::ScalarTraits<Scalar> ST;

  TEUCHOS_TEST_FOR_EXCEPTION( nodesSet_ == false, std::logic_error,
      "Error!, setNodes must be called before interpolate"
      );
#ifdef HAVE_RYTHMOS_DEBUG
  // Check that our nodes_ have not changed between the call to setNodes and interpolate
  assertNodesUnChanged<Scalar>(*nodes_,*nodes_copy_);
  // Assert that the base interpolator preconditions are satisfied
  assertBaseInterpolatePreconditions(*nodes_,t_values,data_out);
#endif // HAVE_RYTHMOS_DEBUG
  
  // Output info
  const RCP<FancyOStream> out = this->getOStream();
  const Teuchos::EVerbosityLevel verbLevel = this->getVerbLevel(); 
  Teuchos::OSTab ostab(out,1,"CSI::interpolator");
  if ( as<int>(verbLevel) >= as<int>(Teuchos::VERB_HIGH) ) {
    *out << "nodes_:" << std::endl;
    for (Teuchos::Ordinal i=0 ; i<(*nodes_).size() ; ++i) {
      *out << "nodes_[" << i << "] = " << std::endl;
      (*nodes_)[i].describe(*out,Teuchos::VERB_EXTREME);
    }
    *out << "t_values = " << std::endl;
    for (Teuchos::Ordinal i=0 ; i<t_values.size() ; ++i) {
      *out << "t_values[" << i << "] = " << t_values[i] << std::endl;
    }
  }

  data_out->clear();

  // Return immediately if no time points are requested ...
  if (t_values.size() == 0) {
    return;
  }

  if ((*nodes_).size() == 1) {
    // trivial case of one node.  Preconditions assert that t_values[0] ==
    // (*nodes_)[0].time so we can just pass it out
    DataStore<Scalar> DS((*nodes_)[0]);
    data_out->push_back(DS);
  }
  else { // (*nodes_).size() >= 2
    int n = 0; // index into t_values
    // Loop through all of the time interpolation points in the buffer and
    // satisfiy all of the requested time points that you find.  NOTE: The
    // loop will be existed once all of the time points are satisified (see
    // return below).
    for (Teuchos::Ordinal i=0 ; i < (*nodes_).size()-1; ++i) {
      const Scalar& ti = (*nodes_)[i].time;
      const Scalar& tip1 = (*nodes_)[i+1].time;
      const TimeRange<Scalar> range_i(ti,tip1);
      // For the interpolation range of [ti,tip1], satisify all of the
      // requested points in this range.
      while ( range_i.isInRange(t_values[n]) ) {
        // First we check for exact node matches:
        if (compareTimeValues(t_values[n],ti)==0) {
          DataStore<Scalar> DS((*nodes_)[i]);
          data_out->push_back(DS);
        }
        else if (compareTimeValues(t_values[n],tip1)==0) {
          DataStore<Scalar> DS((*nodes_)[i+1]);
          data_out->push_back(DS);
        } else {
          if (!splineCoeffComputed_) {
            computeCubicSplineCoeff<Scalar>(*nodes_,outArg(splineCoeff_));
            splineCoeffComputed_ = true;
          }
          DataStore<Scalar> DS;
          RCP<Thyra::VectorBase<Scalar> > x = createMember((*nodes_)[i].x->space());
          evaluateCubicSpline<Scalar>( splineCoeff_, i, t_values[n], outArg(*x) );
          DS.time = t_values[n];
          DS.x = x;
          DS.accuracy = ST::zero();
          data_out->push_back(DS);
        }
        // Move to the next user time point to consider!
        n++;
        if (n == as<int>(t_values.size())) {
          // WE ARE ALL DONE!  MOVE OUT!
          return;
        }
      }
      // Move on the the next interpolation time range
    }
  } // (*nodes_).size() == 1
}
int main(int argc, char *argv[])
{

    using std::endl;
    typedef double Scalar;
    typedef double ScalarMag;
    using Teuchos::describe;
    using Teuchos::RCP;
    using Teuchos::rcp;
    using Teuchos::rcp_implicit_cast;
    using Teuchos::rcp_dynamic_cast;
    using Teuchos::as;
    using Teuchos::ParameterList;
    using Teuchos::CommandLineProcessor;
    typedef Teuchos::ParameterList::PrintOptions PLPrintOptions;
    typedef Thyra::ModelEvaluatorBase MEB;
    using Thyra::createMember;
    using Thyra::createMembers;

    bool success = true;

    Teuchos::GlobalMPISession mpiSession(&argc,&argv);

    RCP<Epetra_Comm> epetra_comm;
#ifdef HAVE_MPI
    epetra_comm = rcp( new Epetra_MpiComm(MPI_COMM_WORLD) );
#else
    epetra_comm = rcp( new Epetra_SerialComm );
#endif // HAVE_MPI

    RCP<Teuchos::FancyOStream>
    out = Teuchos::VerboseObjectBase::getDefaultOStream();

    try {

        //
        // A) Read commandline options
        //

        CommandLineProcessor clp;
        clp.throwExceptions(false);
        clp.addOutputSetupOptions(true);

        std::string paramsFileName = "";
        clp.setOption( "params-file", &paramsFileName,
                       "File name for XML parameters" );

        std::string extraParamsString = "";
        clp.setOption( "extra-params", &extraParamsString,
                       "Extra XML parameter string" );

        Teuchos::EVerbosityLevel verbLevel = Teuchos::VERB_DEFAULT;
        setVerbosityLevelOption( "verb-level", &verbLevel,
                                 "Top-level verbosity level.  By default, this gets deincremented as you go deeper into numerical objects.",
                                 &clp );

        double finalTime = 1.0;
        clp.setOption( "final-time", &finalTime, "Final time (the inital time)" );

        int numTimeSteps = 2;
        clp.setOption( "num-time-steps", &numTimeSteps, "Number of time steps" );

        bool dumpFinalSolutions = false;
        clp.setOption(
            "dump-final-solutions", "no-dump-final-solutions", &dumpFinalSolutions,
            "Determine if the final solutions are dumpped or not." );

        double maxStateError = 1e-6;
        clp.setOption( "max-state-error", &maxStateError,
                       "The maximum allowed error in the integrated state in relation to the exact state solution" );

        // ToDo: Read in more parameters

        CommandLineProcessor::EParseCommandLineReturn parse_return = clp.parse(argc,argv);
        if( parse_return != CommandLineProcessor::PARSE_SUCCESSFUL ) return parse_return;

        if ( Teuchos::VERB_DEFAULT == verbLevel )
            verbLevel = Teuchos::VERB_LOW;

        const Teuchos::EVerbosityLevel
        solnVerbLevel = ( dumpFinalSolutions ? Teuchos::VERB_EXTREME : verbLevel );

        //
        // B) Get the base parameter list that all other parameter lists will be
        // read from.
        //

        RCP<ParameterList> paramList = Teuchos::parameterList();
        if (paramsFileName.length())
            updateParametersFromXmlFile( paramsFileName, &*paramList );
        if (extraParamsString.length())
            updateParametersFromXmlString( extraParamsString, &*paramList );

        paramList->validateParameters(*getValidParameters());

        //
        // C) Create the Stratimikos linear solver factories.
        //

        // Get the linear solve strategy that will be used to solve for the linear
        // system with the dae's W matrix.
        Stratimikos::DefaultLinearSolverBuilder daeLinearSolverBuilder;
        daeLinearSolverBuilder.setParameterList(sublist(paramList,DAELinearSolver_name));
        RCP<Thyra::LinearOpWithSolveFactoryBase<Scalar> >
        daeLOWSF = createLinearSolveStrategy(daeLinearSolverBuilder);

        // Get the linear solve strategy that can be used to override the overall
        // linear system solve
        Stratimikos::DefaultLinearSolverBuilder overallLinearSolverBuilder;
        overallLinearSolverBuilder.setParameterList(sublist(paramList,OverallLinearSolver_name));
        RCP<Thyra::LinearOpWithSolveFactoryBase<Scalar> >
        overallLOWSF = createLinearSolveStrategy(overallLinearSolverBuilder);

        //
        // D) Create the underlying EpetraExt::ModelEvaluator
        //

        RCP<EpetraExt::DiagonalTransientModel> epetraDaeModel =
            EpetraExt::diagonalTransientModel(
                epetra_comm,
                sublist(paramList,DiagonalTransientModel_name)
            );

        *out <<"\nepetraDaeModel valid options:\n";
        epetraDaeModel->getValidParameters()->print(
            *out, PLPrintOptions().indent(2).showTypes(true).showDoc(true)
        );

        //
        // E) Create the Thyra-wrapped ModelEvaluator
        //

        RCP<Thyra::ModelEvaluator<double> > daeModel =
            epetraModelEvaluator(epetraDaeModel,daeLOWSF);

        //
        // F) Create the TimeDiscretizedBackwardEulerModelEvaluator
        //

        MEB::InArgs<Scalar> initCond = daeModel->createInArgs();
        initCond.setArgs(daeModel->getNominalValues());

        RCP<Thyra::ModelEvaluator<Scalar> >
        discretizedModel = Rythmos::timeDiscretizedBackwardEulerModelEvaluator<Scalar>(
                               daeModel, initCond, finalTime, numTimeSteps, overallLOWSF );

        *out << "\ndiscretizedModel = " << describe(*discretizedModel,verbLevel);

        //
        // F) Setup a nonlinear solver and solve the system
        //

        // F.1) Setup a nonlinear solver

        Thyra::DampenedNewtonNonlinearSolver<Scalar> nonlinearSolver;
        nonlinearSolver.setOStream(out);
        nonlinearSolver.setVerbLevel(verbLevel);
        //nonlinearSolver.setParameterList(sublist(paramList,NonlinearSolver_name));
        //2007/11/27: rabartl: ToDo: Implement parameter list handling for
        //DampenedNonlinearSolve so that I can uncomment the above line.
        nonlinearSolver.setModel(discretizedModel);

        // F.2) Solve the system

        RCP<Thyra::VectorBase<Scalar> >
        x_bar = createMember(discretizedModel->get_x_space());
        V_S( x_bar.ptr(), 0.0 );

        Thyra::SolveStatus<Scalar> solveStatus =
            Thyra::solve( nonlinearSolver, &*x_bar );

        *out << "\nsolveStatus:\n" << solveStatus;

        *out << "\nx_bar = " << describe(*x_bar,solnVerbLevel);

        //
        // G) Verify that the solution is correct???
        //

        // Check against the end time exact solution.

        RCP<const Thyra::VectorBase<Scalar> >
        exact_x_final = Thyra::create_Vector(
                            epetraDaeModel->getExactSolution(finalTime),
                            daeModel->get_x_space()
                        );

        RCP<const Thyra::VectorBase<Scalar> > solved_x_final
            = rcp_dynamic_cast<Thyra::ProductVectorBase<Scalar> >(x_bar,true)->getVectorBlock(numTimeSteps-1);

        const bool result = Thyra::testRelNormDiffErr(
                                "exact_x_final", *exact_x_final, "solved_x_final", *solved_x_final,
                                "maxStateError", maxStateError, "warningTol", 1.0, // Don't warn
                                &*out, solnVerbLevel
                            );
        if (!result) success = false;

    }
    TEUCHOS_STANDARD_CATCH_STATEMENTS(true,*out,success);

    if(success)
        *out << "\nEnd Result: TEST PASSED" << endl;
    else
        *out << "\nEnd Result: TEST FAILED" << endl;

    return ( success ? 0 : 1 );

} // end main() [Doxygen looks for this!]