void LinAlgOpPack::Vp_StPtMtV(
  DVectorSlice* y, value_type a
  ,const GenPermMatrixSlice& P, BLAS_Cpp::Transp P_trans
  ,const MatrixOp& M, BLAS_Cpp::Transp M_trans
  ,const DVectorSlice& x, value_type b
  )
{
  namespace mmp = MemMngPack;
  using BLAS_Cpp::no_trans;
  using AbstractLinAlgPack::VectorMutableDense;
  using AbstractLinAlgPack::VectorDenseMutableEncap;
  using AbstractLinAlgPack::Vp_StPtMtV;
  VectorSpace::space_ptr_t
    ay_space = ( M_trans == no_trans ? M.space_cols() : M.space_rows() ).space(P,P_trans);
  VectorSpace::vec_mut_ptr_t
    ay =  ( ay_space.get()
        ? ay_space->create_member()
        : Teuchos::rcp_implicit_cast<VectorMutable>(
          Teuchos::rcp(new VectorMutableDense(BLAS_Cpp::rows(P.rows(),P.cols(),P_trans)))
          ) ),
    ax = ( M_trans == no_trans ? M.space_rows() : M.space_cols() ).create_member();
  (VectorDenseMutableEncap(*ay))() = *y;
  (VectorDenseMutableEncap(*ax))() = x;
  Vp_StPtMtV( ay.get(), a, P, P_trans, M, M_trans, *ax, b );
  *y = VectorDenseMutableEncap(*ay)();
}
const MatrixOp::MatNorm
MatrixOpNonsing::calc_cond_num(
  EMatNormType  requested_norm_type
  ,bool         allow_replacement
  ) const
{
  using BLAS_Cpp::no_trans;
  using BLAS_Cpp::trans;
  using LinAlgOpPack::V_InvMtV;
  const VectorSpace
    &space_cols = this->space_cols(),
    &space_rows = this->space_rows();
  const index_type
    num_cols = space_rows.dim();
  TEST_FOR_EXCEPTION(
    !(requested_norm_type == MAT_NORM_1 || requested_norm_type == MAT_NORM_INF), MethodNotImplemented
    ,"MatrixOp::calc_norm(...): Error, This default implemenation can only "
    "compute the one norm or the infinity norm!"
    );
  //
  // Here we implement Algorithm 2.5 in "Applied Numerical Linear Algebra", Demmel (1997)
  // using the momenclature in the text.  This is applied to the inverse matrix.
  //
  const MatrixOpNonsing
    &B = *this;
  bool
    do_trans = requested_norm_type == MAT_NORM_INF;
  VectorSpace::vec_mut_ptr_t
    x    = (do_trans ? space_rows : space_cols).create_member(1.0/num_cols),
    w    = (do_trans ? space_cols : space_rows).create_member(),
    zeta = (do_trans ? space_cols : space_rows).create_member(),
    z    = (do_trans ? space_rows : space_cols).create_member();
  const index_type max_iter = 5;  // Recommended by Highman 1988, (see Demmel's reference)
  value_type w_nrm = 0.0;
  for( index_type k = 0; k <= max_iter; ++k ) {
    V_InvMtV( w.get(), B, !do_trans ? no_trans : trans, *x );     // w = B*x
    sign( *w, zeta.get() );                                       // zeta = sign(w)
    V_InvMtV( z.get(), B, !do_trans ? trans : no_trans, *zeta );  // z = B'*zeta
    value_type  z_j = 0.0;                                        // max |z(j)| = ||z||inf
    index_type  j   = 0;
    max_abs_ele( *z, &z_j, &j );
    const value_type zTx = dot(*z,*x);                            // z'*x
    w_nrm = w->norm_1();                                        // ||w||1
    if( ::fabs(z_j) <= zTx ) {                                    // Update
      break;
    }
    else {
      *x = 0.0;
      x->set_ele(j,1.0);
    }
  }
  const MatNorm M_nrm = this->calc_norm(requested_norm_type);
  return MatNorm( w_nrm * M_nrm.value ,requested_norm_type );
}
void AbstractLinAlgPack::Vp_StV(
  VectorMutable* y, const value_type& a, const SpVectorSlice& sx )
{
  Vp_V_assert_compatibility(y,sx);
  if( sx.nz() ) {
    VectorSpace::vec_mut_ptr_t
        x = y->space().create_member();
    x->set_sub_vector(sub_vec_view(sx));
    Vp_StV( y, a, *x );
  }
}
void LinAlgOpPack::V_InvMtV(
  DVectorSlice* y, const MatrixOpNonsing& M
  ,BLAS_Cpp::Transp M_trans, const SpVectorSlice& x
  )
{
  using BLAS_Cpp::trans;
  using AbstractLinAlgPack::VectorDenseMutableEncap;
  VectorSpace::vec_mut_ptr_t
    ay = ( M_trans == trans ? M.space_cols() : M.space_rows() ).create_member();
  AbstractLinAlgPack::V_InvMtV( ay.get(), M, M_trans, x );
  *y = VectorDenseMutableEncap(*ay)();
}
void LinAlgOpPack::Vp_StMtV(
  DVectorSlice* y, value_type a, const MatrixOp& M
  ,BLAS_Cpp::Transp M_trans, const SpVectorSlice& x, value_type b
  )
{
  using BLAS_Cpp::no_trans;
  using AbstractLinAlgPack::VectorDenseMutableEncap;
  VectorSpace::vec_mut_ptr_t
    ay = ( M_trans == no_trans ? M.space_cols() : M.space_rows() ).create_member();
  (VectorDenseMutableEncap(*ay))() = *y;
  AbstractLinAlgPack::Vp_StMtV( ay.get(), a, M, M_trans, x, b );
  *y = VectorDenseMutableEncap(*ay)();
}
void MatrixOpSubView::Vp_StMtV(
  VectorMutable* y, value_type a, BLAS_Cpp::Transp M_trans_in
  , const Vector& x, value_type b
  ) const
{
  using BLAS_Cpp::no_trans;
  using BLAS_Cpp::trans;

  assert_initialized();

  BLAS_Cpp::Transp
    M_trans_trans = ( M_trans_==no_trans ? M_trans_in : BLAS_Cpp::trans_not(M_trans_in) );

  // ToDo: Assert compatibility!

  if( rng_rows_.full_range() && rng_cols_.full_range() ) {
    AbstractLinAlgPack::Vp_StMtV(  // The matrix is just transposed
      y, a
      ,*M_full_, M_trans_trans
      ,x, b );
    return;
  }
  // y = b*y
  Vt_S( y, b );
  //
  // xt1                      = 0.0
  // xt3 = xt(op_op_rng_cols) = x
  // xt3                      = 0.0
  //
  // [ yt1 ]                        [ xt1 ]
  // [ yt2 ] = a * op(op(M_full)) * [ xt3 ]
  // [ yt3 ]                        [ xt3 ]
  //
  // =>
  //
  // y += yt2 = yt(op_op_rng_rows)
  //
  const Range1D
    op_op_rng_rows = ( M_trans_trans == no_trans ? rng_rows_ : rng_cols_ ),
    op_op_rng_cols = ( M_trans_trans == no_trans ? rng_cols_ : rng_rows_ );
  VectorSpace::vec_mut_ptr_t
    xt = ( M_trans_trans == no_trans ? M_full_->space_rows() : M_full_->space_cols() ).create_member(),
    yt = ( M_trans_trans == no_trans ? M_full_->space_cols() : M_full_->space_rows() ).create_member();
  *xt = 0.0;
  *xt->sub_view(op_op_rng_cols) = x;
    LinAlgOpPack::V_StMtV( yt.get(), a, *M_full_, M_trans_trans, *xt );
  LinAlgOpPack::Vp_V( y, *yt->sub_view(op_op_rng_rows) );
}
bool CheckDescentQuasiNormalStep_Step::do_step(
  Algorithm& _algo, poss_type step_poss, IterationPack::EDoStepType type
  ,poss_type assoc_step_poss
  )
{
  using BLAS_Cpp::no_trans;
  using AbstractLinAlgPack::dot;
  using LinAlgOpPack::V_MtV;

  NLPAlgo         &algo        = rsqp_algo(_algo);
  NLPAlgoState    &s           = algo.rsqp_state();
  NLP             &nlp         = algo.nlp();
  const Range1D   equ_decomp   = s.equ_decomp();

  EJournalOutputLevel olevel = algo.algo_cntr().journal_output_level();
  std::ostream& out = algo.track().journal_out();

  // print step header.
  if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
    using IterationPack::print_algorithm_step;
    print_algorithm_step( algo, step_poss, type, assoc_step_poss, out );
  }
  
  const size_type
    nb = nlp.num_bounded_x();

  // Get iteration quantities
  IterQuantityAccess<VectorMutable>
    &c_iq   = s.c(),
    &Ypy_iq = s.Ypy();
  const Vector::vec_ptr_t
    cd_k = c_iq.get_k(0).sub_view(equ_decomp);
  const Vector
    &Ypy_k = Ypy_iq.get_k(0);
  
  value_type descent_c = -1.0;
  if( s.get_iter_quant_id( Gc_name ) != AlgorithmState::DOES_NOT_EXIST ) {
    if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
      out	<< "\nGc_k exists; compute descent_c = c_k(equ_decomp)'*Gc_k(:,equ_decomp)'*Ypy_k ...\n";
    }
    const MatrixOp::mat_ptr_t
      Gcd_k = s.Gc().get_k(0).sub_view(Range1D(),equ_decomp);
    VectorSpace::vec_mut_ptr_t
      t = cd_k->space().create_member();
    V_MtV( t.get(), *Gcd_k, BLAS_Cpp::trans, Ypy_k );
    if( static_cast<int>(olevel) >= static_cast<int>(PRINT_VECTORS) ) {
      out	<< "\nGc_k(:,equ_decomp)'*Ypy_k =\n" << *t;
    }
    descent_c = dot( *cd_k, *t );
  }
  else {
    if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
      out	<< "\nGc_k does not exist; compute descent_c = c_k(equ_decomp)'*FDGc_k(:,equ_decomp)'*Ypy_k "
        << "using finite differences ...\n";
    }
    VectorSpace::vec_mut_ptr_t
      t = nlp.space_c()->create_member();
    calc_fd_prod().calc_deriv_product(
      s.x().get_k(0),nb?&nlp.xl():NULL,nb?&nlp.xu():NULL
      ,Ypy_k,NULL,&c_iq.get_k(0),true,&nlp
      ,NULL,t.get()
      ,static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ? &out : NULL
      );
    if( static_cast<int>(olevel) >= static_cast<int>(PRINT_VECTORS) ) {
      out	<< "\nFDGc_k(:,equ_decomp)'*Ypy_k =\n" << *t->sub_view(equ_decomp);
    }
    descent_c = dot( *cd_k, *t->sub_view(equ_decomp) );
  }

  if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
    out	<< "\ndescent_c = " << descent_c << std::endl;
  }

  if( descent_c > 0.0 ) { // ToDo: add some allowance for > 0.0 for finite difference errors!
    if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
      out	<< "\nError, descent_c > 0.0; this is not a descent direction\n"
        << "Throw TestFailed and terminate the algorithm ...\n";
    }
    TEST_FOR_EXCEPTION(
      true, TestFailed
      ,"CheckDescentQuasiNormalStep_Step::do_step(...) : Error, descent for the decomposed constraints "
      "with respect to the quasi-normal step c_k(equ_decomp)'*FDGc_k(:,equ_decomp)'*Ypy_k = "
      << descent_c << " > 0.0;  This is not a descent direction!\n" );
  }

  return true;
}
bool NLPDirectTester::finite_diff_check(
  NLPDirect         *nlp
  ,const Vector     &xo
  ,const Vector     *xl
  ,const Vector     *xu
  ,const Vector     *c
  ,const Vector     *Gf
  ,const Vector     *py
  ,const Vector     *rGf
  ,const MatrixOp   *GcU
  ,const MatrixOp   *D
  ,const MatrixOp   *Uz
  ,bool             print_all_warnings
  ,std::ostream     *out
  ) const
{

  using std::setw;
  using std::endl;
  using std::right;

  using AbstractLinAlgPack::sum;
  using AbstractLinAlgPack::dot;
  using AbstractLinAlgPack::Vp_StV;
  using AbstractLinAlgPack::random_vector;
  using AbstractLinAlgPack::assert_print_nan_inf;
  using LinAlgOpPack::V_StV;
  using LinAlgOpPack::V_StMtV;
  using LinAlgOpPack::Vp_MtV;
  using LinAlgOpPack::M_StM;
  using LinAlgOpPack::M_StMtM;

  typedef VectorSpace::vec_mut_ptr_t  vec_mut_ptr_t;

//  using AbstractLinAlgPack::TestingPack::CompareDenseVectors;
//  using AbstractLinAlgPack::TestingPack::CompareDenseSparseMatrices;

  using TestingHelperPack::update_success;

  bool success = true, preformed_fd;
  if(out) {
    *out << std::boolalpha
       << std::endl
       << "*********************************************************\n"
       << "*** NLPDirectTester::finite_diff_check(...) ***\n"
       << "*********************************************************\n";
  }

  const Range1D
    var_dep      = nlp->var_dep(),
    var_indep    = nlp->var_indep(),
    con_decomp   = nlp->con_decomp(),
    con_undecomp = nlp->con_undecomp();
  NLP::vec_space_ptr_t
    space_x = nlp->space_x(),
    space_c = nlp->space_c();

  // //////////////////////////////////////////////
  // Validate the input

  TEST_FOR_EXCEPTION(
    py && !c, std::invalid_argument
    ,"NLPDirectTester::finite_diff_check(...) : "
    "Error, if py!=NULL then c!=NULL must also be true!" );

  const CalcFiniteDiffProd
    &fd_deriv_prod = this->calc_fd_prod();

  const value_type
    rand_y_l = -1.0, rand_y_u = 1.0,
    small_num = ::sqrt(std::numeric_limits<value_type>::epsilon());

  try {

  // ///////////////////////////////////////////////
  // (1) Check Gf

  if(Gf) {
    switch( Gf_testing_method() ) {
      case FD_COMPUTE_ALL: {
        // Compute FDGf outright
        TEST_FOR_EXCEPT(true); // ToDo: update above!
        break;
      }
      case FD_DIRECTIONAL: {
        // Compute FDGF'*y using random y's
        if(out)
          *out
            << "\nComparing products Gf'*y with finite difference values FDGf'*y for "
            << "random y's ...\n";
        vec_mut_ptr_t y = space_x->create_member();
        value_type max_warning_viol = 0.0;
        int num_warning_viol = 0;
        const int num_fd_directions_used = ( num_fd_directions() > 0 ? num_fd_directions() : 1 );
        for( int direc_i = 1; direc_i <= num_fd_directions_used; ++direc_i ) {
          if( num_fd_directions() > 0 ) {
            random_vector( rand_y_l, rand_y_u, y.get() );
            if(out)
              *out
                << "\n****"
                << "\n**** Random directional vector " << direc_i << " ( ||y||_1 / n = "
                << (y->norm_1() / y->dim()) << " )"
                << "\n***\n";
          }
          else {
            *y = 1.0;
            if(out)
              *out
                << "\n****"
                << "\n**** Ones vector y ( ||y||_1 / n = "<<(y->norm_1()/y->dim())<<" )"
                << "\n***\n";
          }
          value_type
            Gf_y = dot( *Gf, *y ),
            FDGf_y;
          preformed_fd = fd_deriv_prod.calc_deriv_product(
            xo,xl,xu
            ,*y,NULL,NULL,true,nlp,&FDGf_y,NULL,out,dump_all(),dump_all()
            );
          if( !preformed_fd )
            goto FD_NOT_PREFORMED;
          assert_print_nan_inf(FDGf_y, "FDGf'*y",true,out);
          const value_type
            calc_err = ::fabs( ( Gf_y - FDGf_y )/( ::fabs(Gf_y) + ::fabs(FDGf_y) + small_num ) );
          if( calc_err >= Gf_warning_tol() ) {
            max_warning_viol = my_max( max_warning_viol, calc_err );
            ++num_warning_viol;
          }
          if(out)
            *out
              << "\nrel_err(Gf'*y,FDGf'*y) = "
              << "rel_err(" << Gf_y << "," << FDGf_y << ") = "
              << calc_err << endl;
          if( calc_err >= Gf_error_tol() ) {
            if(out) {
              *out
                << "Error, above relative error exceeded Gf_error_tol = " << Gf_error_tol() << endl;
              if(dump_all()) {
                *out << "\ny =\n" << *y;
              }
            }
          }
        }
        if(out && num_warning_viol)
          *out
            << "\nThere were " << num_warning_viol << " warning tolerance "
            << "violations out of num_fd_directions = " << num_fd_directions()
            << " computations of FDGf'*y\n"
            << "and the maximum violation was " << max_warning_viol
            << " > Gf_waring_tol = " << Gf_warning_tol() << endl;
        break;
      }
      default:
        TEST_FOR_EXCEPT(true); // Invalid value
    }
  }

  // /////////////////////////////////////////////
  // (2) Check py = -inv(C)*c
  //
  // We want to check; 
  // 
  //  FDC * (inv(C)*c) \approx c
  //       \_________/
  //         -py
  //
  // We can compute this as:
  //           
  // FDC * py = [ FDC, FDN ] * [ -py ; 0 ]
  //            \__________/
  //                FDA'
  // 
  // t1 =  [ -py ; 0 ]
  // 
  // t2 = FDA'*t1
  // 
  // Compare t2 \approx c
  // 
  if(py) {
    if(out)
      *out
        << "\nComparing c with finite difference product FDA'*[ -py; 0 ] = -FDC*py ...\n";
    // t1 =  [ -py ; 0 ]
    VectorSpace::vec_mut_ptr_t
      t1 = space_x->create_member();
    V_StV( t1->sub_view(var_dep).get(), -1.0, *py );
    *t1->sub_view(var_indep) = 0.0;
    // t2 = FDA'*t1
    VectorSpace::vec_mut_ptr_t
      t2 = nlp->space_c()->create_member();
    preformed_fd = fd_deriv_prod.calc_deriv_product(
      xo,xl,xu
      ,*t1,NULL,NULL,true,nlp,NULL,t2.get(),out,dump_all(),dump_all()
      );
    if( !preformed_fd )
      goto FD_NOT_PREFORMED;
    const value_type
      sum_c  = sum(*c),
      sum_t2 = sum(*t2);
    assert_print_nan_inf(sum_t2, "sum(-FDC*py)",true,out);
    const value_type
      calc_err = ::fabs( ( sum_c - sum_t2 )/( ::fabs(sum_c) + ::fabs(sum_t2) + small_num ) );
    if(out)
      *out
        << "\nrel_err(sum(c),sum(-FDC*py)) = "
        << "rel_err(" << sum_c << "," << sum_t2 << ") = "
        << calc_err << endl;
    if( calc_err >= Gc_error_tol() ) {
      if(out)
        *out
          << "Error, above relative error exceeded Gc_error_tol = " << Gc_error_tol() << endl;
      if(print_all_warnings)
        *out << "\nt1 = [ -py; 0 ] =\n" << *t1
           << "\nt2 = FDA'*t1 = -FDC*py =\n"   << *t2;
      update_success( false, &success );
    }
    if( calc_err >= Gc_warning_tol() ) {
      if(out)
        *out
          << "\nWarning, above relative error exceeded Gc_warning_tol = " << Gc_warning_tol() << endl;
    }
  }

  // /////////////////////////////////////////////
  // (3) Check D = -inv(C)*N

  if(D) {
    switch( Gc_testing_method() ) {
      case FD_COMPUTE_ALL: {
        //
        // Compute FDN outright and check
        // -FDC * D \aprox FDN
        // 
        // We want to compute:
        // 
        // FDC * -D = [ FDC, FDN ] * [ -D; 0 ]
        //            \__________/
        //                FDA'
        // 
        // To compute the above we perform:
        // 
        // T = FDA' * [ -D; 0 ] (one column at a time)
        // 
        // Compare T \approx FDN
        //
/*
        // FDN
        DMatrix FDN(m,n-m);
        fd_deriv_computer.calc_deriv( xo, xl, xu
          , Range1D(m+1,n), nlp, NULL
          , &FDN() ,BLAS_Cpp::trans, out );

        // T = FDA' * [ -D; 0 ] (one column at a time)
        DMatrix T(m,n-m);
        DVector t(n);
        t(m+1,n) = 0.0;
        for( int s = 1; s <= n-m; ++s ) {
          // t = [ -D(:,s); 0 ]
          V_StV( &t(1,m), -1.0, D->col(s) );
          // T(:,s) =  FDA' * t
          fd_deriv_prod.calc_deriv_product(
            xo,xl,xu,t(),NULL,NULL,nlp,NULL,&T.col(s),out);
        }        

        // Compare T \approx FDN
        if(out)
          *out
            << "\nChecking the computed D = -inv(C)*N\n"
            << "where D(i,j) = (-FDC*D)(i,j), dM(i,j) = FDN(i,j) ...\n";
        result = comp_M.comp(
          T(), FDN, BLAS_Cpp::no_trans
          , CompareDenseSparseMatrices::FULL_MATRIX
          , CompareDenseSparseMatrices::REL_ERR_BY_COL
          , Gc_warning_tol(), Gc_error_tol()
          , print_all_warnings, out );
        update_success( result, &success );
        if(!result) return false;
*/
        TEST_FOR_EXCEPT(true); // Todo: Implement above!
        break;
      }
      case FD_DIRECTIONAL: {
        //
        // Compute -FDC * D * v \aprox FDN * v
        // for random v's
        //
        // We will compute this as:
        // 
        // t1 = [ 0; y ] <: R^(n)
        // 
        // t2 = FDA' * t1  (  FDN * y ) <: R^(m)
        //
        // t1 = [ -D * y ; 0 ]  <: R^(n)
        // 
        // t3 = FDA' * t1  ( -FDC * D * y ) <: R^(m)
        // 
        // Compare t2 \approx t3
        //
        if(out)
          *out
            << "\nComparing finite difference products -FDC*D*y with FDN*y for "
              "random vectors y ...\n";
        VectorSpace::vec_mut_ptr_t
          y  = space_x->sub_space(var_indep)->create_member(),
          t1 = space_x->create_member(),
          t2 = space_c->create_member(),
          t3 = space_c->create_member();
        value_type max_warning_viol = 0.0;
        int num_warning_viol = 0;
        const int num_fd_directions_used = ( num_fd_directions() > 0 ? num_fd_directions() : 1 );
        for( int direc_i = 1; direc_i <= num_fd_directions_used; ++direc_i ) {
          if( num_fd_directions() > 0 ) {
            random_vector( rand_y_l, rand_y_u, y.get() );
            if(out)
              *out
                << "\n****"
                << "\n**** Random directional vector " << direc_i << " ( ||y||_1 / n = "
                << (y->norm_1() / y->dim()) << " )"
                << "\n***\n";
          }
          else {
            *y = 1.0;
            if(out)
              *out
                << "\n****"
                << "\n**** Ones vector y ( ||y||_1 / n = "<<(y->norm_1()/y->dim())<<" )"
                << "\n***\n";
          }
          // t1 = [ 0; y ] <: R^(n)
          *t1->sub_view(var_dep)   = 0.0;
          *t1->sub_view(var_indep) = *y;
          // t2 = FDA' * t1  (  FDN * y ) <: R^(m)
          preformed_fd = fd_deriv_prod.calc_deriv_product(
            xo,xl,xu
            ,*t1,NULL,NULL,true,nlp,NULL,t2.get(),out,dump_all(),dump_all()
            );
          if( !preformed_fd )
            goto FD_NOT_PREFORMED;
          // t1 = [ -D * y ; 0 ]  <: R^(n)
          V_StMtV( t1->sub_view(var_dep).get(), -1.0, *D, BLAS_Cpp::no_trans, *y );
          *t1->sub_view(var_indep) = 0.0;
          // t3 = FDA' * t1  ( -FDC * D * y ) <: R^(m)
          preformed_fd = fd_deriv_prod.calc_deriv_product(
            xo,xl,xu
            ,*t1,NULL,NULL,true,nlp,NULL,t3.get(),out,dump_all(),dump_all()
            );
          // Compare t2 \approx t3
          const value_type
            sum_t2 = sum(*t2),
            sum_t3 = sum(*t3);
          const value_type
            calc_err = ::fabs( ( sum_t2 - sum_t3 )/( ::fabs(sum_t2) + ::fabs(sum_t3) + small_num ) );
          if(out)
            *out
              << "\nrel_err(sum(-FDC*D*y),sum(FDN*y)) = "
              << "rel_err(" << sum_t3 << "," << sum_t2 << ") = "
              << calc_err << endl;
          if( calc_err >= Gc_warning_tol() ) {
            max_warning_viol = my_max( max_warning_viol, calc_err );
            ++num_warning_viol;
          }
          if( calc_err >= Gc_error_tol() ) {
            if(out)
              *out
                << "Error, above relative error exceeded Gc_error_tol = " << Gc_error_tol() << endl
                << "Stoping the tests!\n";
            if(print_all_warnings)
              *out << "\ny =\n" << *y
                   << "\nt1 = [ -D*y; 0 ] =\n" << *t1
                   << "\nt2 =  FDA' * [ 0; y ] = FDN * y =\n" << *t2
                   << "\nt3 =  FDA' * t1 = -FDC * D * y =\n" << *t3;
            update_success( false, &success );
          }
        }
        if(out && num_warning_viol)
          *out
            << "\nThere were " << num_warning_viol << " warning tolerance "
            << "violations out of num_fd_directions = " << num_fd_directions()
            << " computations of sum(FDC*D*y) and sum(FDN*y)\n"
            << "and the maximum relative iolation was " << max_warning_viol
            << " > Gc_waring_tol = " << Gc_warning_tol() << endl;
        break;
      }
      default:
        TEST_FOR_EXCEPT(true);
    }
  }

  // ///////////////////////////////////////////////
  // (4) Check rGf = Gf(var_indep) + D'*Gf(var_dep)

  if(rGf) {
    if( Gf && D ) {
      if(out)
        *out
          << "\nComparing rGf_tmp = Gf(var_indep) - D'*Gf(var_dep) with rGf ...\n";
      VectorSpace::vec_mut_ptr_t
        rGf_tmp = space_x->sub_space(var_indep)->create_member();
      *rGf_tmp = *Gf->sub_view(var_indep);
      Vp_MtV( rGf_tmp.get(), *D, BLAS_Cpp::trans, *Gf->sub_view(var_dep) );
      const value_type
        sum_rGf_tmp  = sum(*rGf_tmp),
        sum_rGf      = sum(*rGf);
      const value_type
        calc_err = ::fabs( ( sum_rGf_tmp - sum_rGf )/( ::fabs(sum_rGf_tmp) + ::fabs(sum_rGf) + small_num ) );
      if(out)
        *out
          << "\nrel_err(sum(rGf_tmp),sum(rGf)) = "
          << "rel_err(" << sum_rGf_tmp << "," << sum_rGf << ") = "
          << calc_err << endl;
      if( calc_err >= Gc_error_tol() ) {
        if(out)
          *out
            << "Error, above relative error exceeded Gc_error_tol = " << Gc_error_tol() << endl;
        if(print_all_warnings)
          *out << "\nrGf_tmp =\n" << *rGf_tmp
             << "\nrGf =\n"   << *rGf;
        update_success( false, &success );
      }
      if( calc_err >= Gc_warning_tol() ) {
        if(out)
          *out
            << "\nWarning, above relative error exceeded Gc_warning_tol = "
            << Gc_warning_tol() << endl;
      }
    }
    else if( D ) {
      if(out)
        *out
          << "\nComparing rGf'*y with the finite difference product"
          << " fd_prod(f,[D*y;y]) for random vectors y ...\n";
      VectorSpace::vec_mut_ptr_t
        y  = space_x->sub_space(var_indep)->create_member(),
        t  = space_x->create_member();
      value_type max_warning_viol = 0.0;
      int num_warning_viol = 0;
      const int num_fd_directions_used = ( num_fd_directions() > 0 ? num_fd_directions() : 1 );
      for( int direc_i = 1; direc_i <= num_fd_directions_used; ++direc_i ) {
        if( num_fd_directions() > 0 ) {
          random_vector( rand_y_l, rand_y_u, y.get() );
          if(out)
            *out
              << "\n****"
              << "\n**** Random directional vector " << direc_i << " ( ||y||_1 / n = "
              << (y->norm_1() / y->dim()) << " )"
              << "\n***\n";
        }
        else {
          *y = 1.0;
          if(out)
            *out
              << "\n****"
              << "\n**** Ones vector y ( ||y||_1 / n = "<<(y->norm_1()/y->dim())<<" )"
              << "\n***\n";
        }
        // t = [ D*y; y ]
        LinAlgOpPack::V_MtV(&*t->sub_view(var_dep),*D,BLAS_Cpp::no_trans,*y);
        *t->sub_view(var_indep) = *y;
        value_type fd_rGf_y = 0.0;
        // fd_Gf_y
        preformed_fd = fd_deriv_prod.calc_deriv_product(
          xo,xl,xu
          ,*t,NULL,NULL,true,nlp,&fd_rGf_y,NULL,out,dump_all(),dump_all()
          );
        if( !preformed_fd )
          goto FD_NOT_PREFORMED;
        if(out) *out << "fd_prod(f,[D*y;y]) = " << fd_rGf_y << "\n";
        // rGf_y = rGf'*y
        const value_type rGf_y = dot(*rGf,*y);
        if(out) *out << "rGf'*y = " << rGf_y << "\n";
        // Compare fd_rGf_y and rGf*y
        const value_type
          calc_err = ::fabs( ( rGf_y - fd_rGf_y )/( ::fabs(rGf_y) + ::fabs(fd_rGf_y) + small_num ) );
        if( calc_err >= Gc_warning_tol() ) {
          max_warning_viol = my_max( max_warning_viol, calc_err );
          ++num_warning_viol;
        }
        if(out)
          *out
            << "\nrel_err(rGf'*y,fd_prod(f,[D*y;y])) = "
            << "rel_err(" << fd_rGf_y << "," << rGf_y << ") = "
            << calc_err << endl;
        if( calc_err >= Gf_error_tol() ) {
          if(out)
            *out << "Error, above relative error exceeded Gc_error_tol = " << Gc_error_tol() << endl;
          if(print_all_warnings)
            *out << "\ny =\n" << *y
                 << "\nt = [ D*y; y ] =\n" << *t;
          update_success( false, &success );
        }
      }
    }
    else {
      TEST_FOR_EXCEPT(true); // ToDo: Test rGf without D? (This is not going to be easy!)
    }
  }
  
  // ///////////////////////////////////////////////////
  // (5) Check GcU, and/or Uz (for undecomposed equalities)

  if(GcU || Uz) {
    TEST_FOR_EXCEPT(true); // ToDo: Implement!
  }
  
FD_NOT_PREFORMED:

  if(!preformed_fd) {
    if(out)
      *out
        << "\nError, the finite difference computation was not preformed due to cramped bounds\n"
        << "Finite difference test failed!\n" << endl;
    return false;
  }

  } // end try
  catch( const AbstractLinAlgPack::NaNInfException& except ) {
    if(out)
      *out
        << "Error, found a NaN or Inf.  Stoping tests\n";
    success = false;
  }
  
  if( out ) {
    if( success )
      *out
        << "\nCongradulations, all the finite difference errors where within the\n"
        "specified error tolerances!\n";
    else
      *out
        << "\nOh no, at least one of the above finite difference tests failed!\n";
  }

  return success;

}
bool NLPInterfacePack::test_nlp_direct(
  NLPDirect                                    *nlp
  ,OptionsFromStreamPack::OptionsFromStream    *options
  ,std::ostream                                *out
  )
{
  using TestingHelperPack::update_success;

  bool result;
  bool success = true;
  
  Teuchos::VerboseObjectTempState<NLP>
    nlpOutputTempState(Teuchos::rcp(nlp,false),Teuchos::getFancyOStream(Teuchos::rcp(out,false)),Teuchos::VERB_LOW);

  if(out)
    *out
      << "\n****************************"
      << "\n*** test_nlp_direct(...) ***"
      << "\n*****************************\n";
  
  nlp->initialize(true);

  // Test the DVector spaces
  if(out)
    *out << "\nTesting the vector spaces ...\n";
  
  VectorSpaceTester vec_space_tester;
  if(options) {
    VectorSpaceTesterSetOptions
      opt_setter(&vec_space_tester);
    opt_setter.set_options(*options);
  }

  if(out)
    *out << "\nTesting nlp->space_x() ...\n";
  result = vec_space_tester.check_vector_space(*nlp->space_x(),out);
  if(out) {
    if(result)
      *out << "nlp->space_x() checks out!\n";
    else
      *out << "nlp->space_x() check failed!\n";
  }
  update_success( result, &success );
  if(out)
    *out << "\nTesting nlp->space_x()->sub_space(nlp->var_dep()) ...\n";
  result = vec_space_tester.check_vector_space(
    *nlp->space_x()->sub_space(nlp->var_dep()),out);
  if(out) {
    if(result)
      *out << "nlp->space_x()->sub_space(nlp->var_dep()) checks out!\n";
    else
      *out << "nlp->space_x()->sub_space(nlp->var_dep()) check failed!\n";
  }
  update_success( result, &success );
  if(out)
    *out << "\nTesting nlp->space_x()->sub_space(nlp->var_indep()) ...\n";
  result = vec_space_tester.check_vector_space(
    *nlp->space_x()->sub_space(nlp->var_indep()),out);
  if(out) {
    if(result)
      *out << "nlp->space_x()->sub_space(nlp->var_indep()) checks out!\n";
    else
      *out << "nlp->space_x()->sub_space(nlp->var_indep()) check failed!\n";
  }
  update_success( result, &success );
  if(out)
    *out << "\nTesting nlp->space_c() ...\n";
  result = vec_space_tester.check_vector_space(*nlp->space_c(),out);
  if(out) {
    if(result)
      *out << "nlp->space_c() checks out!\n";
    else
      *out << "nlp->space_c() check failed!\n";
  }
  update_success( result, &success );
  if(out)
    *out << "\nTesting nlp->space_c()->sub_space(nlp->con_decomp()) ...\n";
  result = vec_space_tester.check_vector_space(
    *nlp->space_c()->sub_space(nlp->con_decomp()),out);
  if(out) {
    if(result)
      *out << "nlp->space_c()->sub_space(nlp->con_decomp()) checks out!\n";
    else
      *out << "nlp->space_c()->sub_space(nlp->con_decomp()) check failed!\n";
  }
  update_success( result, &success );
  if( nlp->con_decomp().size() < nlp->m() ) {
    if(out)
      *out << "\nTesting nlp->space_c()->sub_space(nlp->con_undecomp()) ...\n";
    result = vec_space_tester.check_vector_space(
      *nlp->space_c()->sub_space(nlp->con_undecomp()),out);
    if(out) {
      if(result)
        *out << "nlp->space_c()->sub_space(nlp->con_undecomp()) checks out!\n";
      else
        *out << "nlp->space_c()->sub_space(nlp->con_undecomp()) check failed!\n";
    }
    update_success( result, &success );
  }

  // Test the NLP interface first!

  NLPTester nlp_tester;
  if(options) {
    NLPTesterSetOptions
      nlp_tester_opt_setter(&nlp_tester);
    nlp_tester_opt_setter.set_options(*options);
  }
  const bool print_all_warnings = nlp_tester.print_all();

  result = nlp_tester.test_interface(
    nlp, nlp->xinit(), print_all_warnings, out );
  update_success( result, &success );
  
  // Test the NLPDirect interface now!

  const bool supports_Gf = nlp->supports_Gf();

  if(out)
    *out << "\nCalling nlp->calc_point(...) at nlp->xinit() ...\n";
  const size_type
    n  = nlp->n(),
    m  = nlp->m();
  const Range1D
    var_dep      = nlp->var_dep(),
    var_indep    = nlp->var_indep(),
    con_decomp   = nlp->con_decomp(),
    con_undecomp = nlp->con_undecomp();
  VectorSpace::vec_mut_ptr_t
    c   =      nlp->space_c()->create_member(),
    Gf  =      nlp->space_x()->create_member(),
    py  =      nlp->space_x()->sub_space(var_dep)->create_member(),
    rGf =      nlp->space_x()->sub_space(var_indep)->create_member();
  NLPDirect::mat_fcty_ptr_t::element_type::obj_ptr_t
    GcU = con_decomp.size() < m ? nlp->factory_GcU()->create() : Teuchos::null,
    D   =                         nlp->factory_D()->create(),
    Uz   = con_decomp.size() < m ? nlp->factory_Uz()->create()   : Teuchos::null;
  nlp->calc_point(
    nlp->xinit(), NULL, c.get(), true, supports_Gf?Gf.get():NULL, py.get(), rGf.get()
    ,GcU.get(), D.get(), Uz.get() );
  if(out) {
    if(supports_Gf) {
      *out << "\n||Gf||inf  = " << Gf->norm_inf();
      if(nlp_tester.print_all())
        *out << "\nGf =\n" << *Gf;
    }
    *out << "\n||py||inf  = " << py->norm_inf();
    if(nlp_tester.print_all())
      *out << "\npy =\n" << *py;
    *out << "\n||rGf||inf  = " << rGf->norm_inf();
    if(nlp_tester.print_all())
      *out << "\nrGf =\n" << *rGf;
    if(nlp_tester.print_all())
      *out << "\nD =\n" << *D;
    if( con_decomp.size() < m ) {
      TEST_FOR_EXCEPT(true); // ToDo: Print GcU and Uz
    }
    *out << "\n";
  }

  CalcFiniteDiffProd
    calc_fd_prod;
  if(options) {
    CalcFiniteDiffProdSetOptions
      options_setter( &calc_fd_prod );
    options_setter.set_options(*options);
  }
  NLPDirectTester
    nlp_first_order_direct_tester(Teuchos::rcp(&calc_fd_prod,false));
  if(options) {
    NLPDirectTesterSetOptions
      nlp_tester_opt_setter(&nlp_first_order_direct_tester);
    nlp_tester_opt_setter.set_options(*options);
  }
  result = nlp_first_order_direct_tester.finite_diff_check(
    nlp, nlp->xinit()
    ,nlp->num_bounded_x() ? &nlp->xl() : NULL
    ,nlp->num_bounded_x() ? &nlp->xu() : NULL
    ,c.get()
    ,supports_Gf?Gf.get():NULL,py.get(),rGf.get(),GcU.get(),D.get(),Uz.get()
    ,print_all_warnings, out
    );
  update_success( result, &success );

  return success;
}
bool CalcReducedGradLagrangianStd_AddedStep::do_step(
  Algorithm& _algo, poss_type step_poss, IterationPack::EDoStepType type
  ,poss_type assoc_step_poss
  )
{
  using BLAS_Cpp::trans;
  using LinAlgOpPack::V_VpV;
  using LinAlgOpPack::V_MtV;
  using LinAlgOpPack::Vp_V;
  using LinAlgOpPack::Vp_MtV;

  NLPAlgo	        &algo  = rsqp_algo(_algo);
  NLPAlgoState    &s     = algo.rsqp_state();

  EJournalOutputLevel olevel = algo.algo_cntr().journal_output_level();
  EJournalOutputLevel ns_olevel = algo.algo_cntr().null_space_journal_output_level();
  std::ostream& out = algo.track().journal_out();

  // print step header.
  if( static_cast<int>(ns_olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
    using IterationPack::print_algorithm_step;
    print_algorithm_step( algo, step_poss, type, assoc_step_poss, out );
  }

  // Calculate: rGL = rGf + Z' * nu + Uz' * lambda(equ_undecomp)

  IterQuantityAccess<VectorMutable>
    &rGL_iq  = s.rGL(),
    &nu_iq   = s.nu(),
    &Gf_iq   = s.Gf();

  VectorMutable &rGL_k = rGL_iq.set_k(0);

  if( nu_iq.updated_k(0) && nu_iq.get_k(0).nz() > 0 ) {
    // Compute rGL = Z'*(Gf + nu) to reduce the effect of roundoff in this
    // catastropic cancelation
    const Vector &nu_k = nu_iq.get_k(0);
    VectorSpace::vec_mut_ptr_t
      tmp = nu_k.space().create_member();

    if( (int)olevel >= (int)PRINT_VECTORS )
      out << "\nnu_k = \n" << nu_k;
    V_VpV( tmp.get(), Gf_iq.get_k(0), nu_k );
    if( (int)olevel >= (int)PRINT_VECTORS )
      out << "\nGf_k+nu_k = \n" << *tmp;
    V_MtV(	&rGL_k, s.Z().get_k(0), trans, *tmp );
    if( (int)ns_olevel >= (int)PRINT_VECTORS )
      out << "\nrGL_k = \n" << rGL_k;
  }
  else {
    rGL_k = s.rGf().get_k(0);
  }

  // ToDo: Add terms for undecomposed equalities and inequalities!
  // + Uz' * lambda(equ_undecomp)

  if( static_cast<int>(ns_olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
    out	<< "\n||rGL_k||inf = " << rGL_k.norm_inf() << "\n";
  }

  if( static_cast<int>(ns_olevel) >= static_cast<int>(PRINT_VECTORS) ) {
    out	<< "\nrGL_k = \n" << rGL_k;
  }

  return true;
}
void MatrixSymPosDefLBFGS::secant_update(
  VectorMutable* s, VectorMutable* y, VectorMutable* Bs
  )
{
  using AbstractLinAlgPack::BFGS_sTy_suff_p_d;
  using AbstractLinAlgPack::dot;
  using LinAlgOpPack::V_MtV;
  using Teuchos::Workspace;
  Teuchos::WorkspaceStore* wss = Teuchos::get_default_workspace_store().get();

  assert_initialized();

  // Check skipping the BFGS update
  const value_type
    sTy	      = dot(*s,*y);
  std::ostringstream omsg;
  if( !BFGS_sTy_suff_p_d(*s,*y,&sTy,&omsg,"MatrixSymPosDefLBFGS::secant_update(...)" ) ) {
    throw UpdateSkippedException( omsg.str() );	
  }

  try {

  // Update counters
  if( m_bar_ == m_ ) {
    // We are at the end of the storage so remove the oldest stored update
    // and move updates to make room for the new update.  This has to be done for the
    // the matrix to behave properly
    {for( size_type k = 1; k <= m_-1; ++k ) {
      S_->col(k) = S_->col(k+1);                            // Shift S.col() to the left
      Y_->col(k) = Y_->col(k+1);                            // Shift Y.col() to the left
      STY_.col(k)(1,m_-1) = STY_.col(k+1)(2,m_);            // Move submatrix STY(2,m-1,2,m-1) up and left
      STSYTY_.col(k)(k+1,m_) = STSYTY_.col(k+1)(k+2,m_+1);  // Move triangular submatrix STS(2,m-1,2,m-1) up and left
      STSYTY_.col(k+1)(1,k) = STSYTY_.col(k+2)(2,k+1);      // Move triangular submatrix YTY(2,m-1,2,m-1) up and left
    }}
    // ToDo: Create an abstract interface, call it MultiVectorShiftVecs, to rearrange S and Y all at once.
    // This will be important for parallel performance.
  }
  else {
    m_bar_++;
  }
  // Set the update vectors
  *S_->col(m_bar_) = *s;
  *Y_->col(m_bar_) = *y;

  // /////////////////////////////////////////////////////////////////////////////////////
  // Update S'Y
  //
  // Update the row and column m_bar
  //
  //	S'Y = 
  //
  //	[	s(1)'*y(1)		...		s(1)'*y(m_bar)		...		s(1)'*y(m_bar)		]
  //	[	.						.							.					] row
  //	[	s(m_bar)'*y(1)	...		s(m_bar)'*y(m_bar)	...		s(m_bar)'*y(m_bar)	] m_bar
  //	[	.						.							.					]
  //	[	s(m_bar)'*y(1)	...		s(m_bar)'*y(m_bar)	...		s(m_bar)'*y(m_bar)	]
  //
  //								col m_bar
  //
  // Therefore we set:
  //	(S'Y)(:,m_bar) =  S'*y(m_bar)
  //	(S'Y)(m_bar,:) =  s(m_bar)'*Y

  const multi_vec_ptr_t
    S = this->S(),
    Y = this->Y();

  VectorSpace::vec_mut_ptr_t
    t = S->space_rows().create_member();  // temporary workspace

  //	(S'Y)(:,m_bar) =  S'*y(m_bar)
  V_MtV( t.get(), *S, BLAS_Cpp::trans, *y );
  STY_.col(m_bar_)(1,m_bar_) = VectorDenseEncap(*t)();

  //	(S'Y)(m_bar,:)' =  Y'*s(m_bar)
  V_MtV( t.get(), *Y, BLAS_Cpp::trans, *s );
  STY_.row(m_bar_)(1,m_bar_) = VectorDenseEncap(*t)();

  // /////////////////////////////////////////////////////////////////
  // Update S'S
  //
  //	S'S = 
  //
  //	[	s(1)'*s(1)		...		symmetric					symmetric			]
  //	[	.						.							.					] row
  //	[	s(m_bar)'*s(1)	...		s(m_bar)'*s(m_bar)	...		symmetric			] m_bar
  //	[	.						.							.					]
  //	[	s(m_bar)'*s(1)	...		s(m_bar)'*s(m_bar)	...		s(m_bar)'*s(m_bar)	]
  //
  //								col m_bar
  //
  // Here we will update the lower triangular part of S'S.  To do this we
  // only need to compute:
  //		t = S'*s(m_bar) = { s(m_bar)' * [ s(1),..,s(m_bar),..,s(m_bar) ]  }'
  // then set the appropriate rows and columns of S'S.

  Workspace<value_type>   work_ws(wss,m_bar_);
  DVectorSlice                  work(&work_ws[0],work_ws.size());

  // work = S'*s(m_bar)
  V_MtV( t.get(), *S, BLAS_Cpp::trans, *s );
  work = VectorDenseEncap(*t)();

  // Set row elements
  STSYTY_.row(m_bar_+1)(1,m_bar_) = work;
  // Set column elements
  STSYTY_.col(m_bar_)(m_bar_+1,m_bar_+1) = work(m_bar_,m_bar_);

  // /////////////////////////////////////////////////////////////////////////////////////
  // Update Y'Y
  //
  // Update the row and column m_bar
  //
  //	Y'Y = 
  //
  //	[	y(1)'*y(1)		...		y(1)'*y(m_bar)		...		y(1)'*y(m_bar)		]
  //	[	.						.							.					] row
  //	[	symmetric		...		y(m_bar)'*y(m_bar)	...		y(m_bar)'*y(m_bar)	] m_bar
  //	[	.						.							.					]
  //	[	symmetric		...		symmetric			...		y(m_bar)'*y(m_bar)	]
  //
  //								col m_bar
  //
  // Here we will update the upper triangular part of Y'Y.  To do this we
  // only need to compute:
  //		t = Y'*y(m_bar) = { y(m_bar)' * [ y(1),..,y(m_bar),..,y(m_bar) ]  }'
  // then set the appropriate rows and columns of Y'Y.

  // work = Y'*y(m_bar)
  V_MtV( t.get(), *Y, BLAS_Cpp::trans, *y );
  work = VectorDenseEncap(*t)();

  // Set row elements
  STSYTY_.col(m_bar_+1)(1,m_bar_) = work;
  // Set column elements
  STSYTY_.row(m_bar_)(m_bar_+1,m_bar_+1) = work(m_bar_,m_bar_);

  // /////////////////////////////
  // Update gamma_k

  // gamma_k = s'*y / y'*y
  if(auto_rescaling_)
    gamma_k_ = STY_(m_bar_,m_bar_) / STSYTY_(m_bar_,m_bar_+1);

  // We do not initially update Q unless we have to form a matrix-vector
  // product later.
  
  Q_updated_ = false;
  num_secant_updates_++;

  }	//	end try
  catch(...) {
    // If we throw any exception the we should make the matrix uninitialized
    // so that we do not leave this object in an inconsistant state.
    n_ = 0;
    throw;
  }

}
void MatrixSymPosDefLBFGS::V_InvMtV(
  VectorMutable* y, BLAS_Cpp::Transp trans_rhs1
  , const Vector& x
  ) const
{
  using AbstractLinAlgPack::Vp_StMtV;
  using DenseLinAlgPack::V_InvMtV;
  using LinAlgOpPack::V_mV;
  using LinAlgOpPack::V_StV;
  using LinAlgOpPack::Vp_V;
  using LinAlgOpPack::V_MtV;
  using LinAlgOpPack::V_StMtV;
  using LinAlgOpPack::Vp_MtV;
  using DenseLinAlgPack::Vp_StMtV;
  typedef VectorDenseEncap         vde;
  typedef VectorDenseMutableEncap  vdme;
  using Teuchos::Workspace;
  Teuchos::WorkspaceStore* wss = Teuchos::get_default_workspace_store().get();

  assert_initialized();

  TEUCHOS_TEST_FOR_EXCEPT( !(  inverse_is_updated_  ) ); // For now just always update

  // y = inv(Bk) * x = Hk * x
  //
  // = gk*x + [S gk*Y] * [ inv(R')*(D+gk*Y'Y)*inv(R)     -inv(R') ] * [   S'  ] * x
  //                     [            -inv(R)                0    ]   [ gk*Y' ]
  //
  // Perform in the following (in order):
  //
  // y = gk*x
  //
  // t1 = [   S'*x  ]					<: R^(2*m)
  //      [ gk*Y'*x ]
  //
  // t2 = inv(R) * t1(1:m)			<: R^(m)
  //
  // t3 = - inv(R') * t1(m+1,2*m)		<: R^(m)
  //
  // t4 = gk * Y'Y * t2				<: R^(m)
  //
  // t4 += D*t2
  //
  // t5 = inv(R') * t4				<: R^(m)
  //
  // t5 += t3
  //
  // y += S*t5
  //
  // y += -gk*Y*t2

  // y = gk*x
  V_StV( y, gamma_k_, x );

  const size_type
    mb = m_bar_;	
  
  if( !mb )
    return;	// No updates have been performed.

  const multi_vec_ptr_t
    S = this->S(),
    Y = this->Y();

  // Get workspace

  Workspace<value_type>    t1_ws(wss,2*mb);
  DVectorSlice                   t1(&t1_ws[0],t1_ws.size());
  Workspace<value_type>    t2_ws(wss,mb);
  DVectorSlice                   t2(&t2_ws[0],t2_ws.size());
  Workspace<value_type>    t3_ws(wss,mb);
  DVectorSlice                   t3(&t3_ws[0],t3_ws.size());
  Workspace<value_type>    t4_ws(wss,mb);
  DVectorSlice                   t4(&t4_ws[0],t4_ws.size());
  Workspace<value_type>    t5_ws(wss,mb);
  DVectorSlice                   t5(&t5_ws[0],t5_ws.size());

  VectorSpace::vec_mut_ptr_t
    t = S->space_rows().create_member();

  const DMatrixSliceTri
    &R = this->R();

  const DMatrixSliceSym
    &YTY = this->YTY();

  // t1 = [   S'*x  ]
  //      [ gk*Y'*x ]
  V_MtV( t.get(), *S, BLAS_Cpp::trans, x );
  t1(1,mb) = vde(*t)();
  V_StMtV( t.get(), gamma_k_, *Y, BLAS_Cpp::trans, x );
  t1(mb+1,2*mb) = vde(*t)();

  // t2 = inv(R) * t1(1:m)
  V_InvMtV( &t2, R, BLAS_Cpp::no_trans, t1(1,mb) );

  // t3 = - inv(R') * t1(m+1,2*m)
  V_mV( &t3, t1(mb+1,2*mb) );
  V_InvMtV( &t3, R, BLAS_Cpp::trans, t3 );

  // t4 = gk * Y'Y * t2
  V_StMtV( &t4, gamma_k_, YTY, BLAS_Cpp::no_trans, t2 );

  // t4 += D*t2
  Vp_DtV( &t4, t2 );

  // t5 = inv(R') * t4
  V_InvMtV( &t5, R, BLAS_Cpp::trans, t4 );

  // t5 += t3
  Vp_V( &t5, t3 );

  // y += S*t5
  (vdme(*t)() = t5);
  Vp_MtV( y, *S, BLAS_Cpp::no_trans, *t );

  // y += -gk*Y*t2
  (vdme(*t)() = t2);
  Vp_StMtV( y, -gamma_k_, *Y, BLAS_Cpp::no_trans, *t );

}
void MatrixSymPosDefLBFGS::Vp_StMtV(
    VectorMutable* y, value_type alpha, BLAS_Cpp::Transp trans_rhs1
  , const Vector& x, value_type beta
  ) const
{
  using AbstractLinAlgPack::Vt_S;
  using AbstractLinAlgPack::Vp_StV;
  using AbstractLinAlgPack::Vp_StMtV;
  using LinAlgOpPack::V_StMtV;
  using LinAlgOpPack::V_MtV;
  typedef VectorDenseEncap         vde;
  typedef VectorDenseMutableEncap  vdme;
  using Teuchos::Workspace;
  Teuchos::WorkspaceStore* wss = Teuchos::get_default_workspace_store().get();

  assert_initialized();

  TEUCHOS_TEST_FOR_EXCEPT( !(  original_is_updated_  ) ); // For now just always update

  // y = b*y + Bk * x
  //
  // y = b*y + (1/gk)*x - [ (1/gk)*S  Y ] * inv(Q) * [ (1/gk)*S' ] * x
  //                                                 [     Y'    ]
  // Perform the following operations (in order):
  //
  // y = b*y
  //
  // y += (1/gk)*x
  //
  // t1 = [ (1/gk)*S'*x ]		<: R^(2*m)
  //		[      Y'*x   ]
  //
  // t2 =	inv(Q) * t1			<: R^(2*m)
  //
  // y += -(1/gk) * S * t2(1:m)
  //
  // y += -1.0 * Y * t2(m+1,2m)

  const value_type
    invgk = 1.0 / gamma_k_;

  // y = b*y
  Vt_S( y, beta );

  // y += (1/gk)*x
  Vp_StV( y, invgk, x );

  if( !m_bar_ )
    return;	// No updates have been added yet.

  const multi_vec_ptr_t
    S = this->S(),
    Y = this->Y();

  // Get workspace

  const size_type
    mb = m_bar_;

  Workspace<value_type>  t1_ws(wss,2*mb);
  DVectorSlice                 t1(&t1_ws[0],t1_ws.size());
  Workspace<value_type>  t2_ws(wss,2*mb);
  DVectorSlice                 t2(&t2_ws[0],t2_ws.size());

  VectorSpace::vec_mut_ptr_t
    t = S->space_rows().create_member();

  // t1 = [ (1/gk)*S'*x ]
  //		[      Y'*x   ]

  V_StMtV( t.get(), invgk, *S, BLAS_Cpp::trans, x );
  t1(1,mb) = vde(*t)();
  V_MtV( t.get(), *Y, BLAS_Cpp::trans, x );
  t1(mb+1,2*mb) = vde(*t)();

  // t2 =	inv(Q) * t1
  V_invQtV( &t2, t1 );

  // y += -(1/gk) * S * t2(1:m)
  (vdme(*t)() = t2(1,mb));
  Vp_StMtV( y, -invgk, *S, BLAS_Cpp::no_trans, *t );

  // y += -1.0 * Y * t2(m+1,2m
  (vdme(*t)() = t2(mb+1,2*mb));
  Vp_StMtV( y, -1.0, *Y, BLAS_Cpp::no_trans, *t );

}