void DiscreteBoundaryOperator<ValueType>::applyImpl(
    const Thyra::EOpTransp M_trans,
    const Thyra::MultiVectorBase<ValueType> &X_in,
    const Teuchos::Ptr<Thyra::MultiVectorBase<ValueType>> &Y_inout,
    const ValueType alpha, const ValueType beta) const {
  typedef Thyra::Ordinal Ordinal;

  // Note: the name is VERY misleading: these asserts don't disappear in
  // release runs, and in case of failure throw exceptions rather than
  // abort.
  TEUCHOS_ASSERT(this->opSupported(M_trans));
  TEUCHOS_ASSERT(X_in.range()->isCompatible(*this->domain()));
  TEUCHOS_ASSERT(Y_inout->range()->isCompatible(*this->range()));
  TEUCHOS_ASSERT(Y_inout->domain()->isCompatible(*X_in.domain()));

  const Ordinal colCount = X_in.domain()->dim();

  // Loop over the input columns

  for (Ordinal col = 0; col < colCount; ++col) {
    // Get access the the elements of X_in's and Y_inout's column #col
    Thyra::ConstDetachedSpmdVectorView<ValueType> xVec(X_in.col(col));
    Thyra::DetachedSpmdVectorView<ValueType> yVec(Y_inout->col(col));
    const Teuchos::ArrayRCP<const ValueType> xArray(xVec.sv().values());
    const Teuchos::ArrayRCP<ValueType> yArray(yVec.sv().values());

    // Wrap the Trilinos array in an Armadillo vector. const_cast is used
    // because it's more natural to have a const arma::Col<ValueType> array
    // than an arma::Col<const ValueType> one.
    const arma::Col<ValueType> xCol(const_cast<ValueType *>(xArray.get()),
                                    xArray.size(), false /* copy_aux_mem */);
    arma::Col<ValueType> yCol(yArray.get(), yArray.size(), false);

    applyBuiltInImpl(static_cast<TranspositionMode>(M_trans), xCol, yCol, alpha,
                     beta);
  }
}
NOX::Thyra::MultiVector::
MultiVector(const ::Thyra::MultiVectorBase<double>& source)
  : thyraMultiVec(source.clone_mv()),
    noxThyraVectors(thyraMultiVec->domain()->dim())
{
}
void ExampleTridiagSerialLinearOp<Scalar>::applyImpl(
  const Thyra::EOpTransp M_trans,
  const Thyra::MultiVectorBase<Scalar> &X_in,
  const Teuchos::Ptr<Thyra::MultiVectorBase<Scalar> > &Y_inout,
  const Scalar alpha,
  const Scalar beta
  ) const
{

  typedef Teuchos::ScalarTraits<Scalar> ST;
  typedef Thyra::Ordinal Ordinal;

  const Ordinal dim = space_->dim();
      
  // Loop over the input columns

  const Ordinal m = X_in.domain()->dim();

  for (Ordinal col_j = 0; col_j < m; ++col_j) {

    // Get access the the elements of column j
    Thyra::ConstDetachedVectorView<Scalar> x_vec(X_in.col(col_j));
    Thyra::DetachedVectorView<Scalar> y_vec(Y_inout->col(col_j));
    const Teuchos::ArrayRCP<const Scalar> x = x_vec.sv().values();
    const Teuchos::ArrayRCP<Scalar> y = y_vec.sv().values();
        
    // Perform y = beta*y (being careful to set y=0 if beta=0 in case y is
    // uninitialized on input!)
    if( beta == ST::zero() ) {
      for( Ordinal k = 0; k < dim; ++k ) y[k] = ST::zero();
    }
    else if( beta != ST::one() ) {
      for( Ordinal k = 0; k < dim; ++k ) y[k] *= beta;
    }

    // Perform y = alpha*op(M)*x 
    Ordinal k = 0;
    if( M_trans == Thyra::NOTRANS ) {
      y[k] += alpha * ( diag_[k]*x[k] + upper_[k]*x[k+1] );  // First row
      for( k = 1; k < dim - 1; ++k )   // Middle rows
        y[k] += alpha * ( lower_[k-1]*x[k-1] + diag_[k]*x[k] + upper_[k]*x[k+1] );
      y[k] += alpha * ( lower_[k-1]*x[k-1] + diag_[k]*x[k] ); // Last row
    }
    else if( M_trans == Thyra::CONJ ) {
      y[k] += alpha * ( ST::conjugate(diag_[k])*x[k] + ST::conjugate(upper_[k])*x[k+1] );
      for( k = 1; k < dim - 1; ++k )
        y[k] += alpha * ( ST::conjugate(lower_[k-1])*x[k-1]
          + ST::conjugate(diag_[k])*x[k] + ST::conjugate(upper_[k])*x[k+1] );
      y[k] += alpha * ( ST::conjugate(lower_[k-1])*x[k-1] + ST::conjugate(diag_[k])*x[k] );
    }
    else if( M_trans == Thyra::TRANS ) {
      y[k] += alpha * ( diag_[k]*x[k] + lower_[k]*x[k+1] );
      for( k = 1; k < dim - 1; ++k )
        y[k] += alpha * ( upper_[k-1]*x[k-1] + diag_[k]*x[k] + lower_[k]*x[k+1] );
      y[k] += alpha * ( upper_[k-1]*x[k-1] + diag_[k]*x[k] );
    }
    else if( M_trans == Thyra::CONJTRANS ) {
      y[k] += alpha * ( ST::conjugate(diag_[k])*x[k] + ST::conjugate(lower_[k])*x[k+1] );
      for( k = 1; k < dim - 1; ++k )
        y[k] += alpha * ( ST::conjugate(upper_[k-1])*x[k-1]
          + ST::conjugate(diag_[k])*x[k] + ST::conjugate(lower_[k])*x[k+1] );
      y[k] += alpha * ( ST::conjugate(upper_[k-1])*x[k-1] + ST::conjugate(diag_[k])*x[k] );
    }
    else {
      TEUCHOS_TEST_FOR_EXCEPT(true); // Throw exception if we get here!
    }
  }

}
void 
MatrixFreeJacobianOperator<Scalar>::applyImpl(const ::Thyra::EOpTransp M_trans,
					      const ::Thyra::MultiVectorBase< Scalar > &thyra_mv_y,
					      const Teuchos::Ptr< ::Thyra::MultiVectorBase< Scalar > > &thyra_mv_u,
					      const Scalar alpha,
					      const Scalar beta) const
{
  TEUCHOS_ASSERT(setup_called_);

  // Use a directional derivative to approximate u = Jy
  
  /*
   * delta = scalar perturbation
   * x     = solution vector used to evaluate f
   * f     = function evaluation (RHS)
   * y     = vector that J is applied to
   *
   *            f(x + delta * y) - f(x)
   * u = Jy =   -----------------------
   *                     delta
   */
  TEUCHOS_ASSERT(thyra_mv_y.domain()->dim() == 1);
  TEUCHOS_ASSERT(thyra_mv_u->domain()->dim() == 1);
  const Teuchos::RCP<const ::Thyra::VectorBase<Scalar> > y_ptr = thyra_mv_y.col(0);
  const Teuchos::RCP< ::Thyra::VectorBase<Scalar> > u_ptr = thyra_mv_u->col(0);
  const ::Thyra::VectorBase<Scalar>& y = *y_ptr;
  ::Thyra::VectorBase<Scalar>& u = *u_ptr;
  typename Teuchos::ScalarTraits<Scalar>::magnitudeType norm_2_x = ::Thyra::norm(*x_base_);
  typename Teuchos::ScalarTraits<Scalar>::magnitudeType norm_2_y = ::Thyra::norm(y);

  // Make sure the y-norm is not zero, otherwise we can get an inf perturbation from divide by zero
  if (norm_2_y == 0.0) {
    norm_2_y = 1.0;
    ::Thyra::assign(Teuchos::ptrFromRef(u),0.0);
    return;
  }
 
  if (perturbation_type_ == SalingerLOCA) {
    delta_ = lambda_ * (lambda_ + norm_2_x / norm_2_y);
  }
  else if (perturbation_type_ == KelleySalingerPawlowski) {
    Scalar inner_prod_x_y = ::Thyra::inner(*x_base_,y); 
    
    if (inner_prod_x_y==Teuchos::ScalarTraits<Scalar>::zero()) 
      inner_prod_x_y = 1.0e-12;
    
    delta_ = lambda_ * (1.0e-12 / lambda_ + std::fabs(inner_prod_x_y) / (norm_2_y * norm_2_y) ) * inner_prod_x_y / std::fabs(inner_prod_x_y);
  }
  else if (perturbation_type_ == KnollKeyes) {
    delta_ = lambda_ * ::Thyra::norm_1(*x_base_) / (Teuchos::as<double>(x_base_->space()->dim()) * norm_2_y) + lambda_;
  }
  else {
    delta_ = user_defined_delta_;
  }

  // perturbed solution: x_p = delta * y + x_0
  ::Thyra::V_StVpV(x_perturb_.ptr(),delta_,y,*x_base_);

  if (is_null(in_args_)) {
    in_args_ = Teuchos::rcp(new ::Thyra::ModelEvaluatorBase::InArgs<Scalar>(model_->createInArgs()));
    in_args_->setArgs(model_->getNominalValues());
  }

  in_args_->set_x(x_perturb_);
  
  if (is_null(out_args_))
    out_args_ = Teuchos::rcp(new ::Thyra::ModelEvaluatorBase::OutArgs<Scalar>(model_->createOutArgs()));

  out_args_->set_f(f_perturb_);

  // f_p = f(delta * y + x)
  model_->evalModel(*in_args_,*out_args_);

  // to be safe, remove arguments
  in_args_->set_x(Teuchos::null);
  out_args_->set_f(Teuchos::null);

  Scalar inv_delta = Teuchos::ScalarTraits<Scalar>::one() / delta_;
  
  if ( difference_type_ == Centered ) {
    // perturbed solution: x_p = -delta * y + x_0
    ::Thyra::V_StVpV(x_perturb_.ptr(),-delta_,y,*x_base_);

    out_args_->set_f(f2_perturb_);

    // f_p2 = f(-delta * y + x)
    model_->evalModel(*in_args_,*out_args_);

    // to be safe, remove arguments
    in_args_->set_x(Teuchos::null);
    out_args_->set_f(Teuchos::null);

    ::Thyra::V_StVpStV(ptrFromRef(u),inv_delta,*f_perturb_,-inv_delta,*f2_perturb_);
  }
  else {
    ::Thyra::V_StVpStV(ptrFromRef(u),inv_delta,*f_perturb_,-inv_delta,*f_base_);
  }

}
void
Piro::MatrixFreeLinearOp<Scalar>::applyImpl(
    const Thyra::EOpTransp M_trans,
    const Thyra::MultiVectorBase<Scalar> &X,
    const Teuchos::Ptr<Thyra::MultiVectorBase<Scalar> > &Y,
    const Scalar alpha,
    const Scalar beta) const
{
  using Teuchos::RCP;
  using Teuchos::Ptr;

  TEUCHOS_TEST_FOR_EXCEPTION(
      !this->opSupported(M_trans),
      Thyra::Exceptions::OpNotSupported,
      this->description() << " does not support operation " << Thyra::toString(M_trans));

  TEUCHOS_TEST_FOR_EXCEPTION(
      !X.range()->isCompatible(*this->domain()),
      Thyra::Exceptions::IncompatibleVectorSpaces,
      "Domain of " << this->description() << ": " << this->domain()->description() <<
      " is not compatible with column space of " << X.description() << ": " << X.range()->description());

  TEUCHOS_TEST_FOR_EXCEPTION(
      !Y->range()->isCompatible(*this->range()),
      Thyra::Exceptions::IncompatibleVectorSpaces,
      "Range of " << this->description() << ": " << this->range()->description() <<
      " is not compatible with column space of " << Y->description() << ": " << Y->range()->description());

  TEUCHOS_TEST_FOR_EXCEPTION(
      !Y->domain()->isCompatible(*X.domain()),
      Thyra::Exceptions::IncompatibleVectorSpaces,
      "Row space of " << Y->description() << ": " << Y->domain()->description() <<
      " is not compatible with row space of " << X.description() << ": " << X.domain()->description());

  TEUCHOS_TEST_FOR_EXCEPTION(
      &X == Y.get(),
      std::logic_error,
      "X and Y arguments are both aliases of " << X.description());

  if (alpha == Teuchos::ScalarTraits<Scalar>::zero()) {
    // Y <- beta * Y
    Thyra::Vt_S(Y, beta);
    return;
  }

  typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType ScalarMagnitude;

  RCP<const Thyra::VectorBase<Scalar> > x_dot_base;
  if (basePoint_.supports(Thyra::ModelEvaluatorBase::IN_ARG_x_dot)) 
    x_dot_base = basePoint_.get_x_dot();

  RCP<const Thyra::VectorBase<Scalar> > x_base = basePoint_.get_x();
  if (Teuchos::is_null(x_base)) {
    x_base = model_->getNominalValues().get_x();
  }
  x_base.assert_not_null();

  const ScalarMagnitude norm_x_base = Thyra::norm_2(*x_base);

  // Number of columns common to both vectors X and Y
  // (X and Y have compatible row spaces)
  const Thyra::Ordinal colCount = X.domain()->dim();
  for (Teuchos::Ordinal j = Teuchos::Ordinal(); j < colCount; ++j) {
    const RCP<const Thyra::VectorBase<Scalar> > X_vec = X.col(j);
    const RCP<Thyra::VectorBase<Scalar> > Y_vec = Y->col(j);

    const ScalarMagnitude norm_dx = Thyra::norm_2(*X_vec);

    if (norm_dx == Teuchos::ScalarTraits<ScalarMagnitude>::zero()) {
      if (beta == Teuchos::ScalarTraits<Scalar>::zero()) {
        // Y_vec <- 0
        Thyra::put_scalar(Teuchos::ScalarTraits<ScalarMagnitude>::zero(), Y_vec.ptr());
      } else {
        // Y_vec <- beta * Y_vec
        Thyra::scale(beta, Y_vec.ptr());
      }
    } else {
      // Scalar perturbation
      const ScalarMagnitude relative_pert_ratio = static_cast<ScalarMagnitude>(lambda_);
      const ScalarMagnitude eta = (relative_pert_ratio * ((norm_x_base / norm_dx) + relative_pert_ratio));

      // Compute perturbed residual
      // Dynamic: f_pert <- f(x_dot_base + eta * (W_alpha * X), x_base + eta * (W_beta * X))
      // Static: f_pert <- f(x_base + eta * X)
      const RCP<Thyra::VectorBase<Scalar> > f_pert = Thyra::createMember(this->range());
      {
        Thyra::ModelEvaluatorBase::InArgs<Scalar> pertInArgs = model_->createInArgs();
        {
          pertInArgs.setArgs(basePoint_);

          const bool isDynamic = Teuchos::nonnull(x_dot_base);

          if (isDynamic) {
            const RCP<Thyra::VectorBase<Scalar> > x_dot_pert = Thyra::createMember(this->domain());
            const Scalar W_alpha = pertInArgs.get_alpha();
            Thyra::V_VpStV<Scalar>(x_dot_pert.ptr(), *x_dot_base, W_alpha * eta, *X_vec);
            pertInArgs.set_x_dot(x_dot_pert);
          }

          const RCP<Thyra::VectorBase<Scalar> > x_pert = Thyra::createMember(this->domain());
          const Scalar W_beta = isDynamic ? pertInArgs.get_beta() : Teuchos::ScalarTraits<Scalar>::one();
          Thyra::V_VpStV<Scalar>(x_pert.ptr(), *x_base, W_beta * eta, *X_vec);
          pertInArgs.set_x(x_pert);
        }

        Thyra::ModelEvaluatorBase::OutArgs<Scalar> pertOutArgs = model_->createOutArgs();
        {
          pertOutArgs.set_f(f_pert);
        }

        model_->evalModel(pertInArgs, pertOutArgs);
      }

      // Y <- alpha * (1/eta) * (f_pert - f_base) + beta * Y
      const Scalar alpha_over_eta = alpha / eta;

      if (beta == Teuchos::ScalarTraits<Scalar>::zero()) {
        // Y <- alpha * (1/eta) * (f_pert - f_base)
        Thyra::V_StVpStV<Scalar>(Y_vec.ptr(), alpha_over_eta, *f_pert, -alpha_over_eta, *f_base_);
      } else {
        // Aliasing f_pert and alpha_op_X (f_pert == alpha_op_X)
        const RCP<Thyra::VectorBase<Scalar> > alpha_op_X = f_pert;

        // alpha_op_X <- alpha * (1/eta) * (f_pert - f_base)
        Thyra::Vp_StV(alpha_op_X.ptr(), -Teuchos::ScalarTraits<Scalar>::one(), *f_base_);
        const Scalar alpha_over_eta = alpha / eta;
        Thyra::Vt_S(alpha_op_X.ptr(), alpha_over_eta);

        // Y <- alpha_op_X + beta * Y
        Thyra::Vp_V<Scalar>(Y_vec.ptr(), *alpha_op_X, beta);
      }
    }
  }
}