TEUCHOS_UNIT_TEST( Rythmos_GlobalErrorEstimator, SinCos ) {
  typedef Teuchos::ScalarTraits<double> ST;
  // Forward Solve, storing data in linear interpolation buffer
  int storageLimit = 100;
  double finalTime = 0.1;
  double dt = 0.1;
  RCP<IntegratorBuilder<double> > ib = integratorBuilder<double>();
  {
    RCP<ParameterList> ibPL = Teuchos::parameterList();
    ibPL->sublist("Integrator Settings").sublist("Integrator Selection").set("Integrator Type","Default Integrator");
    ibPL->sublist("Integrator Settings").set("Final Time",finalTime);
    ibPL->sublist("Integration Control Strategy Selection").set("Integration Control Strategy Type","Simple Integration Control Strategy");
    ibPL->sublist("Integration Control Strategy Selection").sublist("Simple Integration Control Strategy").set("Take Variable Steps",false);
    ibPL->sublist("Integration Control Strategy Selection").sublist("Simple Integration Control Strategy").set("Fixed dt",dt);

    ibPL->sublist("Stepper Settings").sublist("Stepper Selection").set("Stepper Type","Backward Euler");
    //ibPL->sublist("Stepper Settings").sublist("Stepper Selection").set("Stepper Type","Implicit RK");
    //ibPL->sublist("Stepper Settings").sublist("Runge Kutta Butcher Tableau Selection").set("Runge Kutta Butcher Tableau Type","Backward Euler");
    ibPL->sublist("Interpolation Buffer Settings").sublist("Trailing Interpolation Buffer Selection").set("Interpolation Buffer Type","Interpolation Buffer");
    ibPL->sublist("Interpolation Buffer Settings").sublist("Trailing Interpolation Buffer Selection").sublist("Interpolation Buffer").set("StorageLimit",storageLimit);
    ibPL->sublist("Interpolation Buffer Settings").sublist("Interpolator Selection").set("Interpolator Type","Linear Interpolator");
    ib->setParameterList(ibPL);
  }
  RCP<SinCosModel> fwdModel = sinCosModel(true); // implicit formulation
  Thyra::ModelEvaluatorBase::InArgs<double> fwdIC = fwdModel->getNominalValues();
  RCP<Thyra::NonlinearSolverBase<double> > fwdNLSolver = timeStepNonlinearSolver<double>();
  RCP<IntegratorBase<double> > fwdIntegrator = ib->create(fwdModel,fwdIC,fwdNLSolver);
  RCP<const VectorBase<double> > x_final;
  {
    Array<double> time_vec;
    time_vec.push_back(finalTime);
    Array<RCP<const Thyra::VectorBase<double> > > x_final_array;
    fwdIntegrator->getFwdPoints(time_vec,&x_final_array,NULL,NULL);
    x_final = x_final_array[0];
  }
  // Verify x_final is correct
  {
    // Defaults from SinCos Model:
    double f = 1.0;
    double L = 1.0;
    double a = 0.0;
    double x_ic_0 = 0.0;
    double x_ic_1 = 1.0;
    double x_0 = dt/(1.0+std::pow(dt*f/L,2))*(x_ic_0/dt+x_ic_1+dt*std::pow(f/L,2)*a);
    double x_1 = dt/(1.0+std::pow(dt*f/L,2))*(-std::pow(f/L,2)*x_ic_0+x_ic_1/dt+std::pow(f/L,2)*a);
    double tol = 1.0e-10;
    Thyra::ConstDetachedVectorView<double> x_final_view( *x_final );
    TEST_FLOATING_EQUALITY( x_final_view[0], x_0, tol );
    TEST_FLOATING_EQUALITY( x_final_view[1], x_1, tol );
  }
  // Copy InterpolationBuffer data into Cubic Spline interpolation buffer for use in Adjoint Solve
  TimeRange<double> fwdTimeRange; 
  RCP<InterpolationBufferBase<double> > fwdCubicSplineInterpBuffer;
  {
    RCP<PointwiseInterpolationBufferAppender<double> > piba = pointwiseInterpolationBufferAppender<double>();
    RCP<InterpolationBuffer<double> > sinkInterpBuffer = interpolationBuffer<double>();
    sinkInterpBuffer->setStorage(storageLimit);
    RCP<CubicSplineInterpolator<double> > csi = cubicSplineInterpolator<double>();
    sinkInterpBuffer->setInterpolator(csi);
    RCP<const InterpolationBufferBase<double> > sourceInterpBuffer;
    {
      RCP<TrailingInterpolationBufferAcceptingIntegratorBase<double> > tibaib = 
        Teuchos::rcp_dynamic_cast<TrailingInterpolationBufferAcceptingIntegratorBase<double> >(fwdIntegrator,true);
      sourceInterpBuffer = tibaib->getTrailingInterpolationBuffer();
    }
    fwdTimeRange = sourceInterpBuffer->getTimeRange();
    piba->append(*sourceInterpBuffer, fwdTimeRange, Teuchos::outArg(*sinkInterpBuffer));
    fwdCubicSplineInterpBuffer = sinkInterpBuffer;

    TimeRange<double> sourceRange = sourceInterpBuffer->getTimeRange();
    TimeRange<double> sinkRange = sinkInterpBuffer->getTimeRange();
    TEST_EQUALITY( sourceRange.lower(), sinkRange.lower() );
    TEST_EQUALITY( sourceRange.upper(), sinkRange.upper() );
  }
  // Adjoint Solve, reading forward solve data from Cubic Spline interpolation buffer
  {
    RCP<ParameterList> ibPL = Teuchos::parameterList();
    ibPL->sublist("Integrator Settings").sublist("Integrator Selection").set("Integrator Type","Default Integrator");
    ibPL->sublist("Integrator Settings").set("Final Time",finalTime);
    ibPL->sublist("Integration Control Strategy Selection").set("Integration Control Strategy Type","Simple Integration Control Strategy");
    ibPL->sublist("Integration Control Strategy Selection").sublist("Simple Integration Control Strategy").set("Take Variable Steps",false);
    ibPL->sublist("Integration Control Strategy Selection").sublist("Simple Integration Control Strategy").set("Fixed dt",dt);

    ibPL->sublist("Stepper Settings").sublist("Stepper Selection").set("Stepper Type","Backward Euler");
    //ibPL->sublist("Stepper Settings").sublist("Stepper Selection").set("Stepper Type","Implicit RK");
    //ibPL->sublist("Stepper Settings").sublist("Runge Kutta Butcher Tableau Selection").set("Runge Kutta Butcher Tableau Type","Implicit 1 Stage 2nd order Gauss");
    ibPL->sublist("Interpolation Buffer Settings").sublist("Trailing Interpolation Buffer Selection").set("Interpolation Buffer Type","Interpolation Buffer");
    ibPL->sublist("Interpolation Buffer Settings").sublist("Trailing Interpolation Buffer Selection").sublist("Interpolation Buffer").set("StorageLimit",storageLimit);
    ibPL->sublist("Interpolation Buffer Settings").sublist("Interpolator Selection").set("Interpolator Type","Linear Interpolator");
    ib->setParameterList(ibPL);
  }
  RCP<Thyra::ModelEvaluator<double> > adjModel;
  {
    RCP<Rythmos::AdjointModelEvaluator<double> > model = 
      Rythmos::adjointModelEvaluator<double>(
          fwdModel, fwdTimeRange
          );
    //model->setFwdStateSolutionBuffer(fwdCubicSplineInterpBuffer);
    adjModel = model;
  }
  Thyra::ModelEvaluatorBase::InArgs<double> adjIC = adjModel->getNominalValues();
  double phi_ic_0 = 2.0;
  double phi_ic_1 = 3.0;
  {
    // Initial conditions for adjoint:
    const RCP<const Thyra::VectorSpaceBase<double> >
      f_space = fwdModel->get_f_space();
    const RCP<Thyra::VectorBase<double> > x_ic = createMember(f_space);
    {
      Thyra::DetachedVectorView<double> x_ic_view( *x_ic );
      x_ic_view[0] = phi_ic_0;
      x_ic_view[1] = phi_ic_1;
    }
    const RCP<Thyra::VectorBase<double> > xdot_ic = createMember(f_space);
    V_S( Teuchos::outArg(*xdot_ic), ST::zero() );
    adjIC.set_x(x_ic);
    adjIC.set_x_dot(xdot_ic);
  }
  RCP<Thyra::LinearNonlinearSolver<double> > adjNLSolver = Thyra::linearNonlinearSolver<double>();
  RCP<IntegratorBase<double> > adjIntegrator = ib->create(adjModel,adjIC,adjNLSolver);
  RCP<const VectorBase<double> > phi_final;
  {
    Array<double> time_vec;
    time_vec.push_back(finalTime);
    Array<RCP<const Thyra::VectorBase<double> > > phi_final_array;
    adjIntegrator->getFwdPoints(time_vec,&phi_final_array,NULL,NULL);
    phi_final = phi_final_array[0];
  }
  // Verify phi_final is correct
  {
    // Defaults from SinCos Model:
    double f = 1.0;
    double L = 1.0;
    double h = -dt;
    double phi_0 = 1.0/(1.0+std::pow(f*h/L,2.0))*(phi_ic_0+std::pow(f/L,2.0)*h*phi_ic_1);
    double phi_1 = 1.0/(1.0+std::pow(f*h/L,2.0))*(-h*phi_ic_0+phi_ic_1);
    double tol = 1.0e-10;
    Thyra::ConstDetachedVectorView<double> phi_final_view( *phi_final );
    TEST_FLOATING_EQUALITY( phi_final_view[0], phi_0, tol );
    TEST_FLOATING_EQUALITY( phi_final_view[1], phi_1, tol );
  }
  // Compute error estimate
  //TEST_ASSERT( false );
}
void
Piro::MatrixFreeLinearOp<Scalar>::applyImpl(
    const Thyra::EOpTransp M_trans,
    const Thyra::MultiVectorBase<Scalar> &X,
    const Teuchos::Ptr<Thyra::MultiVectorBase<Scalar> > &Y,
    const Scalar alpha,
    const Scalar beta) const
{
  using Teuchos::RCP;
  using Teuchos::Ptr;

  TEUCHOS_TEST_FOR_EXCEPTION(
      !this->opSupported(M_trans),
      Thyra::Exceptions::OpNotSupported,
      this->description() << " does not support operation " << Thyra::toString(M_trans));

  TEUCHOS_TEST_FOR_EXCEPTION(
      !X.range()->isCompatible(*this->domain()),
      Thyra::Exceptions::IncompatibleVectorSpaces,
      "Domain of " << this->description() << ": " << this->domain()->description() <<
      " is not compatible with column space of " << X.description() << ": " << X.range()->description());

  TEUCHOS_TEST_FOR_EXCEPTION(
      !Y->range()->isCompatible(*this->range()),
      Thyra::Exceptions::IncompatibleVectorSpaces,
      "Range of " << this->description() << ": " << this->range()->description() <<
      " is not compatible with column space of " << Y->description() << ": " << Y->range()->description());

  TEUCHOS_TEST_FOR_EXCEPTION(
      !Y->domain()->isCompatible(*X.domain()),
      Thyra::Exceptions::IncompatibleVectorSpaces,
      "Row space of " << Y->description() << ": " << Y->domain()->description() <<
      " is not compatible with row space of " << X.description() << ": " << X.domain()->description());

  TEUCHOS_TEST_FOR_EXCEPTION(
      &X == Y.get(),
      std::logic_error,
      "X and Y arguments are both aliases of " << X.description());

  if (alpha == Teuchos::ScalarTraits<Scalar>::zero()) {
    // Y <- beta * Y
    Thyra::Vt_S(Y, beta);
    return;
  }

  typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType ScalarMagnitude;

  RCP<const Thyra::VectorBase<Scalar> > x_dot_base;
  if (basePoint_.supports(Thyra::ModelEvaluatorBase::IN_ARG_x_dot)) 
    x_dot_base = basePoint_.get_x_dot();

  RCP<const Thyra::VectorBase<Scalar> > x_base = basePoint_.get_x();
  if (Teuchos::is_null(x_base)) {
    x_base = model_->getNominalValues().get_x();
  }
  x_base.assert_not_null();

  const ScalarMagnitude norm_x_base = Thyra::norm_2(*x_base);

  // Number of columns common to both vectors X and Y
  // (X and Y have compatible row spaces)
  const Thyra::Ordinal colCount = X.domain()->dim();
  for (Teuchos::Ordinal j = Teuchos::Ordinal(); j < colCount; ++j) {
    const RCP<const Thyra::VectorBase<Scalar> > X_vec = X.col(j);
    const RCP<Thyra::VectorBase<Scalar> > Y_vec = Y->col(j);

    const ScalarMagnitude norm_dx = Thyra::norm_2(*X_vec);

    if (norm_dx == Teuchos::ScalarTraits<ScalarMagnitude>::zero()) {
      if (beta == Teuchos::ScalarTraits<Scalar>::zero()) {
        // Y_vec <- 0
        Thyra::put_scalar(Teuchos::ScalarTraits<ScalarMagnitude>::zero(), Y_vec.ptr());
      } else {
        // Y_vec <- beta * Y_vec
        Thyra::scale(beta, Y_vec.ptr());
      }
    } else {
      // Scalar perturbation
      const ScalarMagnitude relative_pert_ratio = static_cast<ScalarMagnitude>(lambda_);
      const ScalarMagnitude eta = (relative_pert_ratio * ((norm_x_base / norm_dx) + relative_pert_ratio));

      // Compute perturbed residual
      // Dynamic: f_pert <- f(x_dot_base + eta * (W_alpha * X), x_base + eta * (W_beta * X))
      // Static: f_pert <- f(x_base + eta * X)
      const RCP<Thyra::VectorBase<Scalar> > f_pert = Thyra::createMember(this->range());
      {
        Thyra::ModelEvaluatorBase::InArgs<Scalar> pertInArgs = model_->createInArgs();
        {
          pertInArgs.setArgs(basePoint_);

          const bool isDynamic = Teuchos::nonnull(x_dot_base);

          if (isDynamic) {
            const RCP<Thyra::VectorBase<Scalar> > x_dot_pert = Thyra::createMember(this->domain());
            const Scalar W_alpha = pertInArgs.get_alpha();
            Thyra::V_VpStV<Scalar>(x_dot_pert.ptr(), *x_dot_base, W_alpha * eta, *X_vec);
            pertInArgs.set_x_dot(x_dot_pert);
          }

          const RCP<Thyra::VectorBase<Scalar> > x_pert = Thyra::createMember(this->domain());
          const Scalar W_beta = isDynamic ? pertInArgs.get_beta() : Teuchos::ScalarTraits<Scalar>::one();
          Thyra::V_VpStV<Scalar>(x_pert.ptr(), *x_base, W_beta * eta, *X_vec);
          pertInArgs.set_x(x_pert);
        }

        Thyra::ModelEvaluatorBase::OutArgs<Scalar> pertOutArgs = model_->createOutArgs();
        {
          pertOutArgs.set_f(f_pert);
        }

        model_->evalModel(pertInArgs, pertOutArgs);
      }

      // Y <- alpha * (1/eta) * (f_pert - f_base) + beta * Y
      const Scalar alpha_over_eta = alpha / eta;

      if (beta == Teuchos::ScalarTraits<Scalar>::zero()) {
        // Y <- alpha * (1/eta) * (f_pert - f_base)
        Thyra::V_StVpStV<Scalar>(Y_vec.ptr(), alpha_over_eta, *f_pert, -alpha_over_eta, *f_base_);
      } else {
        // Aliasing f_pert and alpha_op_X (f_pert == alpha_op_X)
        const RCP<Thyra::VectorBase<Scalar> > alpha_op_X = f_pert;

        // alpha_op_X <- alpha * (1/eta) * (f_pert - f_base)
        Thyra::Vp_StV(alpha_op_X.ptr(), -Teuchos::ScalarTraits<Scalar>::one(), *f_base_);
        const Scalar alpha_over_eta = alpha / eta;
        Thyra::Vt_S(alpha_op_X.ptr(), alpha_over_eta);

        // Y <- alpha_op_X + beta * Y
        Thyra::Vp_V<Scalar>(Y_vec.ptr(), *alpha_op_X, beta);
      }
    }
  }
}