void LinAlgOpPack::Mp_StM(
  DMatrixSlice* C, value_type a
  ,const MatrixOp& B, BLAS_Cpp::Transp B_trans
  )
{
  using AbstractLinAlgPack::VectorSpace;
  using AbstractLinAlgPack::VectorDenseEncap;
  using AbstractLinAlgPack::MatrixOpGetGMS;
  using AbstractLinAlgPack::MatrixDenseEncap;
  const MatrixOpGetGMS
    *B_get_gms = dynamic_cast<const MatrixOpGetGMS*>(&B);
  if(B_get_gms) {
    DenseLinAlgPack::Mp_StM( C, a, MatrixDenseEncap(*B_get_gms)(), B_trans );		
  }
  else {
    const size_type num_cols = C->cols();
    VectorSpace::multi_vec_mut_ptr_t
      B_mv = ( B_trans == BLAS_Cpp::no_trans 
          ? B.space_cols() : B.space_rows()
          ).create_members(num_cols);
    assign(B_mv.get(),B,B_trans);
    for( size_type j = 1; j <= num_cols; ++j ) {
      DenseLinAlgPack::Vp_StV(&C->col(j),a,VectorDenseEncap(*B_mv->col(j))());
    }
  }
}
bool VariableBoundsTester::check_in_bounds(
  std::ostream* out, bool print_all_warnings, bool print_vectors
  ,const Vector& xL, const char xL_name[]
  ,const Vector& xU, const char xU_name[]
  ,const Vector& x,  const char x_name[]
  )
{
  using AbstractLinAlgPack::max_near_feas_step;

  if(out)
    *out
      << "\n*** Checking that variables are in bounds\n";

  VectorSpace::vec_mut_ptr_t zero = x.space().create_member(0.0);
  std::pair<value_type,value_type>
    u = max_near_feas_step( x, *zero, xL, xU, warning_tol() );
  if(u.first < 0.0) {
    if(out)
      *out << "\nWarning! the variables " << xL_name << " <= " << x_name << " <= " << xU_name
        << " are out of bounds by more than warning_tol = "	<< warning_tol() << "\n";
    u = max_near_feas_step( x, *zero, xL, xU, error_tol() );
    if(u.first < 0.0) {
      if(out)
        *out << "\nError! the variables " << xL_name << " <= " << x_name << " <= " << xU_name
          << " are out of bounds by more than error_tol = " << error_tol() << "\n";
      return false;
    }
  }
  return true;
}
void LinAlgOpPack::Vp_StPtMtV(
  DVectorSlice* y, value_type a
  ,const GenPermMatrixSlice& P, BLAS_Cpp::Transp P_trans
  ,const MatrixOp& M, BLAS_Cpp::Transp M_trans
  ,const DVectorSlice& x, value_type b
  )
{
  namespace mmp = MemMngPack;
  using BLAS_Cpp::no_trans;
  using AbstractLinAlgPack::VectorMutableDense;
  using AbstractLinAlgPack::VectorDenseMutableEncap;
  using AbstractLinAlgPack::Vp_StPtMtV;
  VectorSpace::space_ptr_t
    ay_space = ( M_trans == no_trans ? M.space_cols() : M.space_rows() ).space(P,P_trans);
  VectorSpace::vec_mut_ptr_t
    ay =  ( ay_space.get()
        ? ay_space->create_member()
        : Teuchos::rcp_implicit_cast<VectorMutable>(
          Teuchos::rcp(new VectorMutableDense(BLAS_Cpp::rows(P.rows(),P.cols(),P_trans)))
          ) ),
    ax = ( M_trans == no_trans ? M.space_rows() : M.space_cols() ).create_member();
  (VectorDenseMutableEncap(*ay))() = *y;
  (VectorDenseMutableEncap(*ax))() = x;
  Vp_StPtMtV( ay.get(), a, P, P_trans, M, M_trans, *ax, b );
  *y = VectorDenseMutableEncap(*ay)();
}
bool MultiVectorMutableCols::syrk(
  BLAS_Cpp::Transp M_trans, value_type alpha
  , value_type beta, MatrixSymOp* sym_lhs ) const
{
  using LinAlgOpPack::dot;
  MatrixSymOpGetGMSSymMutable
    *symwo_gms_lhs = dynamic_cast<MatrixSymOpGetGMSSymMutable*>(sym_lhs);
  if(!symwo_gms_lhs) {
    return MatrixOp::syrk(M_trans,alpha,beta,sym_lhs); // Boot it
  }
  MatrixDenseSymMutableEncap  DMatrixSliceSym(symwo_gms_lhs);
  const int num_vecs = this->col_vecs_.size();
  TEST_FOR_EXCEPTION(
    num_vecs != DMatrixSliceSym().rows(), std::logic_error
    ,"MultiVectorMutableCols::syrk(...) : Error, sizes do not match up!" );
  // Fill the upper or lower triangular region.
  if( DMatrixSliceSym().uplo() == BLAS_Cpp::upper ) {
    for( int i = 1; i <= num_vecs; ++i ) {
      for( int j = i; j <= num_vecs; ++j ) { // Upper triangle!
        DMatrixSliceSym().gms()(i,j) = beta * DMatrixSliceSym().gms()(i,j) + alpha * dot(*col_vecs_[i-1],*col_vecs_[j-1]);
      }
    }
  }
  else {
    for( int i = 1; i <= num_vecs; ++i ) {
      for( int j = 1; j <= i; ++j ) { // Lower triangle!
        DMatrixSliceSym().gms()(i,j) = beta * DMatrixSliceSym().gms()(i,j) + alpha * dot(*col_vecs_[i-1],*col_vecs_[j-1]);
      }
    }
  }
  return true;
}
void LinAlgOpPack::Vp_StMtV(
  DVectorSlice* y, value_type a, const MatrixOp& M
  ,BLAS_Cpp::Transp M_trans, const SpVectorSlice& x, value_type b
  )
{
  using BLAS_Cpp::no_trans;
  using AbstractLinAlgPack::VectorDenseMutableEncap;
  VectorSpace::vec_mut_ptr_t
    ay = ( M_trans == no_trans ? M.space_cols() : M.space_rows() ).create_member();
  (VectorDenseMutableEncap(*ay))() = *y;
  AbstractLinAlgPack::Vp_StMtV( ay.get(), a, M, M_trans, x, b );
  *y = VectorDenseMutableEncap(*ay)();
}
void LinAlgOpPack::V_InvMtV(
  DVector* y, const MatrixOpNonsing& M
  ,BLAS_Cpp::Transp M_trans, const DVectorSlice& x
  )
{
  using BLAS_Cpp::trans;
  using AbstractLinAlgPack::VectorDenseMutableEncap;
  VectorSpace::vec_mut_ptr_t
    ay = ( M_trans == trans ? M.space_cols() : M.space_rows() ).create_member(),
    ax = ( M_trans == trans ? M.space_rows() : M.space_cols() ).create_member();
  (VectorDenseMutableEncap(*ax))() = x;
  AbstractLinAlgPack::V_InvMtV( ay.get(), M, M_trans, *ax );
  *y = VectorDenseMutableEncap(*ay)();
}
value_type MeritFuncCalc1DQuadratic::operator()(value_type alpha) const
{
  using AbstractLinAlgPack::Vp_StV;
  *x_ = *d_[0];
  value_type alpha_i = alpha;
  for( size_type i = 1; i <= p_; ++i, alpha_i *= alpha ) {
    Vp_StV( x_, alpha_i, *d_[i] );
  }
  return phi_( *x_ );
}
void ExampleNLPObjGrad::imp_calc_f(const Vector& x, bool newx
  , const ZeroOrderInfo& zero_order_info) const
{
  using AbstractLinAlgPack::dot;
  assert_is_initialized();
  f(); // assert f is set
  TEUCHOS_TEST_FOR_EXCEPTION( n() != x.dim(), std::length_error, "ExampleNLPObjGrad::imp_calc_f(...)"  );
  // f(x) = (obj_scale/2) * sum( x(i)^2, for i = 1..n )
  *zero_order_info.f = obj_scale_ / 2.0 * dot(x,x);
}
void MatrixHessianRelaxed::Vp_StMtV(
    DVectorSlice* y, value_type a, BLAS_Cpp::Transp M_trans
  , const SpVectorSlice& x, value_type b ) const
{
  using BLAS_Cpp::no_trans;
  using BLAS_Cpp::trans;
  using AbstractLinAlgPack::Vp_StMtV;
  //
  // y = b*y + a * M * x
  // 
  //   = b*y + a * [ H  0    ] * [ x1 ]
  //               [ 0  bigM ]   [ x2 ]
  //               
  // =>              
  //               
  // y1 = b*y1 + a*H*x1
  // 
  // y2 = b*y2 + bigM * x2
  //
  LinAlgOpPack::Vp_MtV_assert_sizes(y->size(),rows(),cols(),M_trans,x.size());

  DVectorSlice
    y1 = (*y)(1,n_);
  value_type
    &y2 = (*y)(n_+1);
  const SpVectorSlice
    x1 = x(1,n_);
  const SpVectorSlice::element_type
    *x2_ele = x.lookup_element(n_+1);
  const value_type
    x2 = x2_ele ? x2_ele->value() : 0.0;

  // y1 = b*y1 + a*H*x1
  Vp_StMtV( &y1, a, *H_, no_trans, x1, b );

  // y2 = b*y2 + bigM * x2
  if( b == 0.0 )
    y2 = bigM_ * x2;
  else
    y2 = b*y2 + bigM_ * x2;
  
}
void MultiVectorMutableCols::Vp_StMtV(
  VectorMutable* y, value_type a, BLAS_Cpp::Transp M_trans
  ,const SpVectorSlice& x, value_type b
  ) const
{
  using AbstractLinAlgPack::dot;

  // y = b*y
  LinAlgOpPack::Vt_S(y,b);

  if( M_trans == BLAS_Cpp::no_trans ) {
    //
    // y += a*M*x
    //
    // =>
    //
    // y += a * x(1) * M(:,1) + a * x(2) * M(:,2) + ...
    //
    SpVectorSlice::difference_type o = x.offset();
    for( SpVectorSlice::const_iterator itr = x.begin(); itr != x.end(); ++itr ) {
      const size_type j = itr->index() + o;
      LinAlgOpPack::Vp_StV( y, a * itr->value(), *col_vecs_[j-1] );
    }
  }
  else {
    //
    // y += a*M'*x
    //
    // =>
    //
    // y(1) += a M(:,1)*x
    // y(2) += a M(:,2)*x
    // ...
    //
    for( size_type j = 1; j <= col_vecs_.size(); ++j )
      y->set_ele(
        j
        ,y->get_ele(j) + a * dot(*col_vecs_[j-1],x)
        );
  }
}
void MultiVectorMutableCols::Vp_StMtV(
  VectorMutable* y, value_type a, BLAS_Cpp::Transp M_trans
  ,const Vector& x, value_type b
  ) const
{
  using AbstractLinAlgPack::dot;

  // y = b*y
  LinAlgOpPack::Vt_S(y,b);

  if( M_trans == BLAS_Cpp::no_trans ) {
    //
    // y += a*M*x
    //
    // =>
    //
    // y += a * x(1) * M(:,1) + a * x(2) * M(:,2) + ...
    //
    for( size_type j = 1; j <= col_vecs_.size(); ++j )
      LinAlgOpPack::Vp_StV( y, a * x.get_ele(j), *col_vecs_[j-1] );
  }
  else {
    //
    // y += a*M'*x
    //
    // =>
    //
    // y(1) += a M(:,1)*x
    // y(2) += a M(:,2)*x
    // ...
    //
    for( size_type j = 1; j <= col_vecs_.size(); ++j )
      y->set_ele(
        j
        ,y->get_ele(j) + a * dot(*col_vecs_[j-1],x)
        );
  }
}
bool ReducedHessianExactStd_Step::do_step(
    Algorithm& _algo, poss_type step_poss, IterationPack::EDoStepType type
  , poss_type assoc_step_poss)
{
  using Teuchos::dyn_cast;
  using DenseLinAlgPack::nonconst_sym;
  using AbstractLinAlgPack::Mp_StMtMtM;
  typedef AbstractLinAlgPack::MatrixSymDenseInitialize	MatrixSymDenseInitialize;
  typedef AbstractLinAlgPack::MatrixSymOp			MatrixSymOp;
  using ConstrainedOptPack::NLPSecondOrder;

  NLPAlgo	&algo	= rsqp_algo(_algo);
  NLPAlgoState	&s		= algo.rsqp_state();
  NLPSecondOrder
#ifdef _WINDOWS
        &nlp	= dynamic_cast<NLPSecondOrder&>(algo.nlp());
#else
        &nlp	= dyn_cast<NLPSecondOrder>(algo.nlp());
#endif
  MatrixSymOp
    *HL_sym_op = dynamic_cast<MatrixSymOp*>(&s.HL().get_k(0));

  EJournalOutputLevel olevel = algo.algo_cntr().journal_output_level();
  std::ostream& out = algo.track().journal_out();

  // print step header.
  if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
    using IterationPack::print_algorithm_step;
    print_algorithm_step( algo, step_poss, type, assoc_step_poss, out );
  }

  // problem size
  size_type	n		= nlp.n(),
        r		= nlp.r(),
        nind	= n - r;

  // Compute HL first (You may want to move this into its own step later?)

  if( !s.lambda().updated_k(-1) ) {
    if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
      out << "Initializing lambda_km1 = nlp.get_lambda_init ... \n";
    }
    nlp.get_init_lagrange_mult( &s.lambda().set_k(-1).v(), NULL );
    if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
      out << "||lambda_km1||inf = " << s.lambda().get_k(-1).norm_inf() << std::endl;
    }
    if( static_cast<int>(olevel) >= static_cast<int>(PRINT_VECTORS) ) {
      out << "lambda_km1 = \n" << s.lambda().get_k(-1)();
    }
  }

  nlp.set_HL(	HL_sym_op );
  nlp.calc_HL( s.x().get_k(0)(), s.lambda().get_k(-1)(), false );

  if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ITERATION_QUANTITIES) ) {
    s.HL().get_k(0).output( out << "\nHL_k = \n" );
  }

  // If rHL has already been updated for this iteration then just leave it.
  if( !s.rHL().updated_k(0) ) {

    if( !HL_sym_op ) {
      std::ostringstream omsg;
      omsg
        << "ReducedHessianExactStd_Step::do_step(...) : Error, "
        << "The matrix HL with the concrete type "
        << typeName(s.HL().get_k(0)) << " does not support the "
        << "MatrixSymOp iterface";
      throw std::logic_error( omsg.str() );
    }		

    MatrixSymDenseInitialize
      *rHL_sym_init = dynamic_cast<MatrixSymDenseInitialize*>(&s.rHL().set_k(0));
    if( !rHL_sym_init ) {
      std::ostringstream omsg;
      omsg
        << "ReducedHessianExactStd_Step::do_step(...) : Error, "
        << "The matrix rHL with the concrete type "
        << typeName(s.rHL().get_k(0)) << " does not support the "
        << "MatrixSymDenseInitialize iterface";
      throw std::logic_error( omsg.str() );
    }		

    // Compute the dense reduced Hessian
    DMatrix rHL_sym_store(nind,nind);
    DMatrixSliceSym rHL_sym(rHL_sym_store(),BLAS_Cpp::lower);
    Mp_StMtMtM( &rHL_sym, 1.0, MatrixSymOp::DUMMY_ARG, *HL_sym_op
          , s.Z().get_k(0), BLAS_Cpp::no_trans, 0.0 );

    if( (int)olevel >= (int)PRINT_ITERATION_QUANTITIES ) {
      out << "\nLower triangular partion of dense reduced Hessian (ignore nonzeros above diagonal):\nrHL_dense = \n" << rHL_sym_store(); 
    }
  
    // Set the reduced Hessain
    rHL_sym_init->initialize( rHL_sym );

    if( (int)olevel >= (int)PRINT_ITERATION_QUANTITIES ) {
      s.rHL().get_k(0).output( out << "\nrHL_k = \n" );
    }
  }

  return true;
}
bool TangentialStepWithInequStd_Step::do_step(
  Algorithm& _algo, poss_type step_poss, IterationPack::EDoStepType type
  ,poss_type assoc_step_poss
  )
{

  using Teuchos::RCP;
  using Teuchos::dyn_cast;
  using ::fabs;
  using LinAlgOpPack::Vt_S;
  using LinAlgOpPack::V_VpV;
  using LinAlgOpPack::V_VmV;
  using LinAlgOpPack::Vp_StV;
  using LinAlgOpPack::Vp_V;
  using LinAlgOpPack::V_StV;
  using LinAlgOpPack::V_MtV;
//	using ConstrainedOptPack::min_abs;
  using AbstractLinAlgPack::max_near_feas_step;
  typedef VectorMutable::vec_mut_ptr_t   vec_mut_ptr_t;

  NLPAlgo &algo = rsqp_algo(_algo);
  NLPAlgoState &s = algo.rsqp_state();
  EJournalOutputLevel olevel = algo.algo_cntr().journal_output_level();
  EJournalOutputLevel ns_olevel = algo.algo_cntr().null_space_journal_output_level();
  std::ostream &out = algo.track().journal_out();
  //const bool check_results = algo.algo_cntr().check_results();

  // print step header.
  if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
    using IterationPack::print_algorithm_step;
    print_algorithm_step( algo, step_poss, type, assoc_step_poss, out );
  }

  // problem dimensions
  const size_type
    //n  = algo.nlp().n(),
    m  = algo.nlp().m(),
    r  = s.equ_decomp().size();

  // Get the iteration quantity container objects
  IterQuantityAccess<value_type>
    &alpha_iq = s.alpha(),
    &zeta_iq  = s.zeta(),
    &eta_iq   = s.eta();
  IterQuantityAccess<VectorMutable>
    &dl_iq      = dl_iq_(s),
    &du_iq      = du_iq_(s),
    &nu_iq      = s.nu(),
    *c_iq       = m > 0  ? &s.c()       : NULL,
    *lambda_iq  = m > 0  ? &s.lambda()  : NULL,
    &rGf_iq     = s.rGf(),
    &w_iq       = s.w(),
    &qp_grad_iq = s.qp_grad(),
    &py_iq      = s.py(),
    &pz_iq      = s.pz(),
    &Ypy_iq     = s.Ypy(),
    &Zpz_iq     = s.Zpz();
  IterQuantityAccess<MatrixOp>
    &Z_iq   = s.Z(),
    //*Uz_iq  = (m > r)  ? &s.Uz() : NULL,
    *Uy_iq  = (m > r)  ? &s.Uy() : NULL;
  IterQuantityAccess<MatrixSymOp>
    &rHL_iq = s.rHL();
  IterQuantityAccess<ActSetStats>
    &act_set_stats_iq = act_set_stats_(s);
  
  // Accessed/modified/updated (just some)
  VectorMutable  *Ypy_k = (m ? &Ypy_iq.get_k(0) : NULL);
  const MatrixOp  &Z_k   = Z_iq.get_k(0);
  VectorMutable  &pz_k  = pz_iq.set_k(0);
  VectorMutable  &Zpz_k = Zpz_iq.set_k(0);

  // Comupte qp_grad which is an approximation to rGf + Z'*HL*Y*py

  // qp_grad = rGf
  VectorMutable
    &qp_grad_k = ( qp_grad_iq.set_k(0) = rGf_iq.get_k(0) );

  // qp_grad += zeta * w
  if( w_iq.updated_k(0) ) {
    if(zeta_iq.updated_k(0))
      Vp_StV( &qp_grad_k, zeta_iq.get_k(0), w_iq.get_k(0) );
    else
      Vp_V( &qp_grad_k, w_iq.get_k(0) );
  }

  //
  // Set the bounds for:
  //
  //   dl <= Z*pz + Y*py <= du  ->  dl - Ypy <= Z*pz <= du - Ypz

  vec_mut_ptr_t
    bl = s.space_x().create_member(),
    bu = s.space_x().create_member();

  if(m) {
    // bl = dl_k - Ypy_k
    V_VmV( bl.get(), dl_iq.get_k(0), *Ypy_k );
    // bu = du_k - Ypy_k
    V_VmV( bu.get(), du_iq.get_k(0), *Ypy_k );
  }
  else {
    *bl = dl_iq.get_k(0);
    *bu = du_iq.get_k(0);
  }

  // Print out the QP bounds for the constraints
  if( static_cast<int>(ns_olevel) >= static_cast<int>(PRINT_VECTORS) ) {
    out << "\nqp_grad_k = \n" << qp_grad_k;
  }
  if( static_cast<int>(olevel) >= static_cast<int>(PRINT_VECTORS) ) {
    out << "\nbl = \n" << *bl;
    out << "\nbu = \n" << *bu;
  }

  //
  // Determine if we should perform a warm start or not.
  //
  bool do_warm_start = false;
  if( act_set_stats_iq.updated_k(-1) ) {
    if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
      out	<< "\nDetermining if the QP should use a warm start ...\n";
    }
    // We need to see if we should preform a warm start for the next iteration
    ActSetStats &stats = act_set_stats_iq.get_k(-1);
    const size_type
      num_active = stats.num_active(),
      num_adds   = stats.num_adds(),
      num_drops  = stats.num_drops();
    const value_type
      frac_same
      = ( num_adds == ActSetStats::NOT_KNOWN || num_active == 0
        ? 0.0
        : my_max(((double)(num_active)-num_adds-num_drops) / num_active, 0.0 ) );
    do_warm_start = ( num_active > 0 && frac_same >= warm_start_frac() );
    if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
      out << "\nnum_active = " << num_active;
      if( num_active ) {
        out	<< "\nmax(num_active-num_adds-num_drops,0)/(num_active) = "
          << "max("<<num_active<<"-"<<num_adds<<"-"<<num_drops<<",0)/("<<num_active<<") = "
          << frac_same;
        if( do_warm_start )
          out << " >= ";
        else
          out << " < ";
        out << "warm_start_frac = " << warm_start_frac();
      }
      if( do_warm_start )
        out << "\nUse a warm start this time!\n";
      else
        out << "\nDon't use a warm start this time!\n";
    }
  }

  // Use active set from last iteration as an estimate for current active set
  // if we are to use a warm start.
  // 
  // ToDo: If the selection of dependent and independent variables changes
  // then you will have to adjust this or not perform a warm start at all!
  if( do_warm_start ) {
    nu_iq.set_k(0,-1);
  }
  else {
    nu_iq.set_k(0) = 0.0; // No guess of the active set
  }
  VectorMutable  &nu_k  = nu_iq.get_k(0);

  //
  // Setup the reduced QP subproblem
  //
  // The call to the QP is setup for the more flexible call to the QPSolverRelaxed
  // interface to deal with the three independent variabilities: using simple
  // bounds for pz or not, general inequalities included or not, and extra equality
  // constraints included or not.
  // If this method of calling the QP solver were not used then 4 separate
  // calls to solve_qp(...) would have to be included to handle the four possible
  // QP formulations.
  //

  // The numeric arguments for the QP solver (in the nomenclatrue of QPSolverRelaxed)

  const value_type  qp_bnd_inf = NLP::infinite_bound();

  const Vector            &qp_g       = qp_grad_k;
  const MatrixSymOp       &qp_G       = rHL_iq.get_k(0);
  const value_type        qp_etaL     = 0.0;
  vec_mut_ptr_t           qp_dL       = Teuchos::null;
  vec_mut_ptr_t           qp_dU       = Teuchos::null;
  Teuchos::RCP<const MatrixOp>
                          qp_E        = Teuchos::null;
  BLAS_Cpp::Transp        qp_trans_E  = BLAS_Cpp::no_trans;
  vec_mut_ptr_t           qp_b        = Teuchos::null;
  vec_mut_ptr_t           qp_eL       = Teuchos::null;
  vec_mut_ptr_t           qp_eU       = Teuchos::null;
  Teuchos::RCP<const MatrixOp>
                          qp_F        = Teuchos::null;
  BLAS_Cpp::Transp        qp_trans_F  = BLAS_Cpp::no_trans;
  vec_mut_ptr_t           qp_f        = Teuchos::null;
  value_type              qp_eta      = 0.0;
  VectorMutable           &qp_d       = pz_k;  // pz_k will be updated directly!
  vec_mut_ptr_t           qp_nu       = Teuchos::null;
  vec_mut_ptr_t           qp_mu       = Teuchos::null;
  vec_mut_ptr_t           qp_Ed       = Teuchos::null;
  vec_mut_ptr_t           qp_lambda   = Teuchos::null;

  //
  // Determine if we can use simple bounds on pz.
  // 
  // If we have a variable-reduction null-space matrix
  // (with any choice for Y) then:
  // 
  // d = Z*pz + (1-eta) * Y*py
  // 
  // [ d(var_dep)   ]  = [ D ] * pz  + (1-eta) * [ Ypy(var_dep)   ]
  // [ d(var_indep) ]    [ I ]                   [ Ypy(var_indep) ]
  // 
  // For a cooridinate decomposition (Y = [ I ; 0 ]) then Ypy(var_indep) ==
  // 0.0 and in this case the bounds on d(var_indep) become simple bounds on
  // pz even with the relaxation.  Also, if dl(var_dep) and du(var_dep) are
  // unbounded, then we can also use simple bounds since we don't need the
  // relaxation and we can set eta=0. In this case we just have to subtract
  // from the upper and lower bounds on pz!
  // 
  // Otherwise, we can not use simple variable bounds and implement the
  // relaxation properly.
  // 

  const MatrixIdentConcat
    *Zvr = dynamic_cast<const MatrixIdentConcat*>( &Z_k );
  const Range1D
    var_dep   = Zvr ? Zvr->D_rng() : Range1D::Invalid,
    var_indep = Zvr ? Zvr->I_rng() : Range1D();

  RCP<Vector> Ypy_indep;
  const value_type
    Ypy_indep_norm_inf
    = ( m ? (Ypy_indep=Ypy_k->sub_view(var_indep))->norm_inf() : 0.0);

  if( (int)olevel >= (int)PRINT_ALGORITHM_STEPS )
    out
      << "\nDetermine if we can use simple bounds on pz ...\n"
      << "    m = " << m << std::endl
      << "    dynamic_cast<const MatrixIdentConcat*>(&Z_k) = " << Zvr << std::endl
      << "    ||Ypy_k(var_indep)||inf = " << Ypy_indep_norm_inf << std::endl;

  const bool
    bounded_var_dep
    = (
      m > 0
      &&
      num_bounded( *bl->sub_view(var_dep), *bu->sub_view(var_dep), qp_bnd_inf )
      );

  const bool
    use_simple_pz_bounds
    = (
      m == 0
      ||
      ( Zvr != NULL && ( Ypy_indep_norm_inf == 0.0 || bounded_var_dep == 0 ) )
      );

  if( (int)olevel >= (int)PRINT_ALGORITHM_STEPS )
    out
      << (use_simple_pz_bounds
          ? "\nUsing simple bounds on pz ...\n"
          : "\nUsing bounds on full Z*pz ...\n")
      << (bounded_var_dep
          ? "\nThere are finite bounds on dependent variables.  Adding extra inequality constrints for D*pz ...\n" 
          : "\nThere are no finite bounds on dependent variables.  There will be no extra inequality constraints added on D*pz ...\n" ) ;

  if( use_simple_pz_bounds ) {
    // Set simple bound constraints on pz
    qp_dL = bl->sub_view(var_indep);
    qp_dU = bu->sub_view(var_indep);
    qp_nu = nu_k.sub_view(var_indep); // nu_k(var_indep) will be updated directly!
    if( m && bounded_var_dep ) {
      // Set general inequality constraints for D*pz
      qp_E   = Teuchos::rcp(&Zvr->D(),false);
      qp_b   = Ypy_k->sub_view(var_dep);
      qp_eL  = bl->sub_view(var_dep);
      qp_eU  = bu->sub_view(var_dep);
      qp_mu  = nu_k.sub_view(var_dep);  // nu_k(var_dep) will be updated directly!
      qp_Ed  = Zpz_k.sub_view(var_dep); // Zpz_k(var_dep) will be updated directly!
    }
    else {
      // Leave these as NULL since there is no extra general inequality constraints
    }
  }
  else if( !use_simple_pz_bounds ) { // ToDo: Leave out parts for unbounded dependent variables!
    // There are no simple bounds! (leave qp_dL, qp_dU and qp_nu as null)
    // Set general inequality constraints for Z*pz
    qp_E   = Teuchos::rcp(&Z_k,false);
    qp_b   = Teuchos::rcp(Ypy_k,false);
    qp_eL  = bl;
    qp_eU  = bu;
    qp_mu  = Teuchos::rcp(&nu_k,false);
    qp_Ed  = Teuchos::rcp(&Zpz_k,false); // Zpz_k will be updated directly!
  }
  else {
    TEST_FOR_EXCEPT(true);
  }

  // Set the general equality constriants (if they exist)
  Range1D equ_undecomp = s.equ_undecomp();
  if( m > r && m > 0 ) {
    // qp_f = Uy_k * py_k + c_k(equ_undecomp)
    qp_f = s.space_c().sub_space(equ_undecomp)->create_member();
    V_MtV( qp_f.get(), Uy_iq->get_k(0), BLAS_Cpp::no_trans, py_iq.get_k(0) );
    Vp_V( qp_f.get(), *c_iq->get_k(0).sub_view(equ_undecomp) );
    // Must resize for the undecomposed constriants if it has not already been
    qp_F       = Teuchos::rcp(&Uy_iq->get_k(0),false);
    qp_lambda  = lambda_iq->set_k(0).sub_view(equ_undecomp); // lambda_k(equ_undecomp), will be updated directly!
  }

  // Setup the rest of the arguments

  QPSolverRelaxed::EOutputLevel
    qp_olevel;
  switch( olevel ) {
    case PRINT_NOTHING:
      qp_olevel = QPSolverRelaxed::PRINT_NONE;
      break;
    case PRINT_BASIC_ALGORITHM_INFO:
      qp_olevel = QPSolverRelaxed::PRINT_NONE;
      break;
    case PRINT_ALGORITHM_STEPS:
      qp_olevel = QPSolverRelaxed::PRINT_BASIC_INFO;
      break;
    case PRINT_ACTIVE_SET:
      qp_olevel = QPSolverRelaxed::PRINT_ITER_SUMMARY;
      break;
    case PRINT_VECTORS:
      qp_olevel = QPSolverRelaxed::PRINT_ITER_VECTORS;
      break;
    case PRINT_ITERATION_QUANTITIES:
      qp_olevel = QPSolverRelaxed::PRINT_EVERY_THING;
      break;
    default:
      TEST_FOR_EXCEPT(true);
  }
  // ToDo: Set print options so that only vectors matrices etc
  // are only printed in the null space

  //
  // Solve the QP
  // 
  qp_solver().infinite_bound(qp_bnd_inf);
  const QPSolverStats::ESolutionType
    solution_type =
    qp_solver().solve_qp(
      int(olevel) == int(PRINT_NOTHING) ? NULL : &out
      ,qp_olevel
      ,( algo.algo_cntr().check_results()
         ? QPSolverRelaxed::RUN_TESTS :  QPSolverRelaxed::NO_TESTS )
      ,qp_g, qp_G, qp_etaL, qp_dL.get(), qp_dU.get()
      ,qp_E.get(), qp_trans_E, qp_E.get() ? qp_b.get() : NULL
      ,qp_E.get() ? qp_eL.get() : NULL, qp_E.get() ? qp_eU.get() : NULL
      ,qp_F.get(), qp_trans_F, qp_F.get() ? qp_f.get() : NULL
      ,NULL // obj_d
      ,&qp_eta, &qp_d
      ,qp_nu.get()
      ,qp_mu.get(), qp_E.get() ? qp_Ed.get() : NULL
      ,qp_F.get() ? qp_lambda.get() : NULL
      ,NULL // qp_Fd
      );

  //
  // Check the optimality conditions for the QP
  //
  std::ostringstream omsg;
  bool throw_qp_failure = false;
  if(		qp_testing() == QP_TEST
    || ( qp_testing() == QP_TEST_DEFAULT && algo.algo_cntr().check_results() )  )
  {
    if( int(olevel) >= int(PRINT_ALGORITHM_STEPS) ) {
      out	<< "\nChecking the optimality conditions of the reduced QP subproblem ...\n";
    }
    if(!qp_tester().check_optimality_conditions(
      solution_type,qp_solver().infinite_bound()
      ,int(olevel) == int(PRINT_NOTHING) ? NULL : &out
      ,int(olevel) >= int(PRINT_VECTORS) ? true : false
      ,int(olevel) >= int(PRINT_ITERATION_QUANTITIES) ? true : false
      ,qp_g, qp_G, qp_etaL, qp_dL.get(), qp_dU.get()
      ,qp_E.get(), qp_trans_E, qp_E.get() ? qp_b.get() : NULL
      ,qp_E.get() ? qp_eL.get() : NULL, qp_E.get() ? qp_eU.get() : NULL
      ,qp_F.get(), qp_trans_F, qp_F.get() ? qp_f.get() : NULL
      ,NULL // obj_d
      ,&qp_eta, &qp_d
      ,qp_nu.get()
      ,qp_mu.get(), qp_E.get() ? qp_Ed.get() : NULL
      ,qp_F.get() ? qp_lambda.get() : NULL
      ,NULL // qp_Fd
      ))
    {
      omsg << "\n*** Alert! at least one of the QP optimality conditions did not check out.\n";
      if(  static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
        out << omsg.str();
      }
      throw_qp_failure = true;
    }
  }

  //
  // Set the solution
  //
  if( !use_simple_pz_bounds ) {
    // Everything is already updated!
  }
  else if( use_simple_pz_bounds ) {
    // Just have to set Zpz_k(var_indep) = pz_k
    *Zpz_k.sub_view(var_indep) = pz_k;
    if( m && !bounded_var_dep ) {
      // Must compute Zpz_k(var_dep) = D*pz
      LinAlgOpPack::V_MtV( &*Zpz_k.sub_view(var_dep), Zvr->D(), BLAS_Cpp::no_trans, pz_k );
      // ToDo: Remove the compuation of Zpz here unless you must
    }
  }
  else {
    TEST_FOR_EXCEPT(true);
  }

  // Set the solution statistics
  qp_solver_stats_(s).set_k(0) = qp_solver().get_qp_stats();

  // Cut back Ypy_k = (1-eta) * Ypy_k
  const value_type eps = std::numeric_limits<value_type>::epsilon();
  if( fabs(qp_eta - 0.0) > eps ) {
    if(  static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
      out
        << "\n*** Alert! the QP was infeasible (eta = "<<qp_eta<<").  Cutting back Ypy_k = (1.0 - eta)*Ypy_k  ...\n";
    }
    Vt_S( Ypy_k, 1.0 - qp_eta );
  }

  // eta_k
  eta_iq.set_k(0) = qp_eta;

  //
  // Modify the solution if we have to!
  // 
  switch(solution_type) {
    case QPSolverStats::OPTIMAL_SOLUTION:
      break;	// we are good!
    case QPSolverStats::PRIMAL_FEASIBLE_POINT:
    {
      omsg
        << "\n*** Alert! the returned QP solution is PRIMAL_FEASIBLE_POINT but not optimal!\n";
      if( primal_feasible_point_error() )
        omsg
          << "\n*** primal_feasible_point_error == true, this is an error!\n";
      if(  static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
        out << omsg.str();
      }
      throw_qp_failure = primal_feasible_point_error();
      break;
    }	
    case QPSolverStats::DUAL_FEASIBLE_POINT:
    {
      omsg
        << "\n*** Alert! the returned QP solution is DUAL_FEASIBLE_POINT"
        << "\n*** but not optimal so we cut back the step ...\n";
      if( dual_feasible_point_error() )
        omsg
          << "\n*** dual_feasible_point_error == true, this is an error!\n";
      if(  static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
        out << omsg.str();
      }
      // Cut back the step to fit in the bounds
      // 
      // dl <= u*(Ypy_k+Zpz_k) <= du
      //
      vec_mut_ptr_t
        zero  = s.space_x().create_member(0.0),
        d_tmp = s.space_x().create_member();
      V_VpV( d_tmp.get(), *Ypy_k, Zpz_k );
      const std::pair<value_type,value_type>
        u_steps = max_near_feas_step( *zero, *d_tmp, dl_iq.get_k(0), du_iq.get_k(0), 0.0 );
      const value_type
        u = my_min( u_steps.first, 1.0 ); // largest positive step size
      alpha_iq.set_k(0) = u;
      if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
        out	<< "\nFinding u s.t. dl <= u*(Ypy_k+Zpz_k) <= du\n"
          << "max step length u = " << u << std::endl
          << "alpha_k = u = " << alpha_iq.get_k(0) << std::endl;
      }
      throw_qp_failure = dual_feasible_point_error();
      break;
    }	
    case QPSolverStats::SUBOPTIMAL_POINT:
    {
      omsg
        << "\n*** Alert!, the returned QP solution is SUBOPTIMAL_POINT!\n";
      if(  static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
        out << omsg.str();
      }
      throw_qp_failure = true;
      break;
    }
    default:
      TEST_FOR_EXCEPT(true);	// should not happen!
  }

  //
  // Output the final solution!
  //
  if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
    out	<< "\n||pz_k||inf    = " << s.pz().get_k(0).norm_inf()
      << "\nnu_k.nz()      = " << s.nu().get_k(0).nz()
      << "\nmax(|nu_k(i)|) = " << s.nu().get_k(0).norm_inf()
//			<< "\nmin(|nu_k(i)|) = " << min_abs( s.nu().get_k(0)() )
      ;
    if( m > r ) out << "\n||lambda_k(undecomp)||inf = " << s.lambda().get_k(0).norm_inf();
    out	<< "\n||Zpz_k||2     = " << s.Zpz().get_k(0).norm_2()
      ;
    if(qp_eta > 0.0) out << "\n||Ypy||2 = " << s.Ypy().get_k(0).norm_2();
    out << std::endl;
  }

  if( static_cast<int>(ns_olevel) >= static_cast<int>(PRINT_VECTORS) ) {
    out << "\npz_k = \n" << s.pz().get_k(0);
    if(var_indep.size())
      out << "\nnu_k(var_indep) = \n" << *s.nu().get_k(0).sub_view(var_indep);
  }

  if( static_cast<int>(ns_olevel) >= static_cast<int>(PRINT_VECTORS) ) {
    if(var_indep.size())
      out	<< "\nZpz(var_indep)_k = \n" << *s.Zpz().get_k(0).sub_view(var_indep);
    out << std::endl;
  }

  if( static_cast<int>(olevel) >= static_cast<int>(PRINT_VECTORS) ) {
    if(var_dep.size())
      out	<< "\nZpz(var_dep)_k = \n" << *s.Zpz().get_k(0).sub_view(var_dep);
    out	<< "\nZpz_k = \n" << s.Zpz().get_k(0);
    out << std::endl;
  }

  if( static_cast<int>(olevel) >= static_cast<int>(PRINT_VECTORS) ) {
    out << "\nnu_k = \n" << s.nu().get_k(0);
    if(var_dep.size())
      out << "\nnu_k(var_dep) = \n" << *s.nu().get_k(0).sub_view(var_dep);
    if( m > r )
      out << "\nlambda_k(equ_undecomp) = \n" << *s.lambda().get_k(0).sub_view(equ_undecomp);
    if(qp_eta > 0.0) out << "\nYpy = \n" << s.Ypy().get_k(0);
  }

  if( qp_eta == 1.0 ) {
    omsg
      << "TangentialStepWithInequStd_Step::do_step(...) : Error, a QP relaxation parameter\n"
      << "of eta = " << qp_eta << " was calculated and therefore it must be assumed\n"
      << "that the NLP's constraints are infeasible\n"
      << "Throwing an InfeasibleConstraints exception!\n";
    if(  static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
      out << omsg.str();
    }
    throw InfeasibleConstraints(omsg.str());
  }

  if( throw_qp_failure )
    throw QPFailure( omsg.str(), qp_solver().get_qp_stats() );

  return true;
}
bool NLPDirectTester::finite_diff_check(
  NLPDirect         *nlp
  ,const Vector     &xo
  ,const Vector     *xl
  ,const Vector     *xu
  ,const Vector     *c
  ,const Vector     *Gf
  ,const Vector     *py
  ,const Vector     *rGf
  ,const MatrixOp   *GcU
  ,const MatrixOp   *D
  ,const MatrixOp   *Uz
  ,bool             print_all_warnings
  ,std::ostream     *out
  ) const
{

  using std::setw;
  using std::endl;
  using std::right;

  using AbstractLinAlgPack::sum;
  using AbstractLinAlgPack::dot;
  using AbstractLinAlgPack::Vp_StV;
  using AbstractLinAlgPack::random_vector;
  using AbstractLinAlgPack::assert_print_nan_inf;
  using LinAlgOpPack::V_StV;
  using LinAlgOpPack::V_StMtV;
  using LinAlgOpPack::Vp_MtV;
  using LinAlgOpPack::M_StM;
  using LinAlgOpPack::M_StMtM;

  typedef VectorSpace::vec_mut_ptr_t  vec_mut_ptr_t;

//  using AbstractLinAlgPack::TestingPack::CompareDenseVectors;
//  using AbstractLinAlgPack::TestingPack::CompareDenseSparseMatrices;

  using TestingHelperPack::update_success;

  bool success = true, preformed_fd;
  if(out) {
    *out << std::boolalpha
       << std::endl
       << "*********************************************************\n"
       << "*** NLPDirectTester::finite_diff_check(...) ***\n"
       << "*********************************************************\n";
  }

  const Range1D
    var_dep      = nlp->var_dep(),
    var_indep    = nlp->var_indep(),
    con_decomp   = nlp->con_decomp(),
    con_undecomp = nlp->con_undecomp();
  NLP::vec_space_ptr_t
    space_x = nlp->space_x(),
    space_c = nlp->space_c();

  // //////////////////////////////////////////////
  // Validate the input

  TEST_FOR_EXCEPTION(
    py && !c, std::invalid_argument
    ,"NLPDirectTester::finite_diff_check(...) : "
    "Error, if py!=NULL then c!=NULL must also be true!" );

  const CalcFiniteDiffProd
    &fd_deriv_prod = this->calc_fd_prod();

  const value_type
    rand_y_l = -1.0, rand_y_u = 1.0,
    small_num = ::sqrt(std::numeric_limits<value_type>::epsilon());

  try {

  // ///////////////////////////////////////////////
  // (1) Check Gf

  if(Gf) {
    switch( Gf_testing_method() ) {
      case FD_COMPUTE_ALL: {
        // Compute FDGf outright
        TEST_FOR_EXCEPT(true); // ToDo: update above!
        break;
      }
      case FD_DIRECTIONAL: {
        // Compute FDGF'*y using random y's
        if(out)
          *out
            << "\nComparing products Gf'*y with finite difference values FDGf'*y for "
            << "random y's ...\n";
        vec_mut_ptr_t y = space_x->create_member();
        value_type max_warning_viol = 0.0;
        int num_warning_viol = 0;
        const int num_fd_directions_used = ( num_fd_directions() > 0 ? num_fd_directions() : 1 );
        for( int direc_i = 1; direc_i <= num_fd_directions_used; ++direc_i ) {
          if( num_fd_directions() > 0 ) {
            random_vector( rand_y_l, rand_y_u, y.get() );
            if(out)
              *out
                << "\n****"
                << "\n**** Random directional vector " << direc_i << " ( ||y||_1 / n = "
                << (y->norm_1() / y->dim()) << " )"
                << "\n***\n";
          }
          else {
            *y = 1.0;
            if(out)
              *out
                << "\n****"
                << "\n**** Ones vector y ( ||y||_1 / n = "<<(y->norm_1()/y->dim())<<" )"
                << "\n***\n";
          }
          value_type
            Gf_y = dot( *Gf, *y ),
            FDGf_y;
          preformed_fd = fd_deriv_prod.calc_deriv_product(
            xo,xl,xu
            ,*y,NULL,NULL,true,nlp,&FDGf_y,NULL,out,dump_all(),dump_all()
            );
          if( !preformed_fd )
            goto FD_NOT_PREFORMED;
          assert_print_nan_inf(FDGf_y, "FDGf'*y",true,out);
          const value_type
            calc_err = ::fabs( ( Gf_y - FDGf_y )/( ::fabs(Gf_y) + ::fabs(FDGf_y) + small_num ) );
          if( calc_err >= Gf_warning_tol() ) {
            max_warning_viol = my_max( max_warning_viol, calc_err );
            ++num_warning_viol;
          }
          if(out)
            *out
              << "\nrel_err(Gf'*y,FDGf'*y) = "
              << "rel_err(" << Gf_y << "," << FDGf_y << ") = "
              << calc_err << endl;
          if( calc_err >= Gf_error_tol() ) {
            if(out) {
              *out
                << "Error, above relative error exceeded Gf_error_tol = " << Gf_error_tol() << endl;
              if(dump_all()) {
                *out << "\ny =\n" << *y;
              }
            }
          }
        }
        if(out && num_warning_viol)
          *out
            << "\nThere were " << num_warning_viol << " warning tolerance "
            << "violations out of num_fd_directions = " << num_fd_directions()
            << " computations of FDGf'*y\n"
            << "and the maximum violation was " << max_warning_viol
            << " > Gf_waring_tol = " << Gf_warning_tol() << endl;
        break;
      }
      default:
        TEST_FOR_EXCEPT(true); // Invalid value
    }
  }

  // /////////////////////////////////////////////
  // (2) Check py = -inv(C)*c
  //
  // We want to check; 
  // 
  //  FDC * (inv(C)*c) \approx c
  //       \_________/
  //         -py
  //
  // We can compute this as:
  //           
  // FDC * py = [ FDC, FDN ] * [ -py ; 0 ]
  //            \__________/
  //                FDA'
  // 
  // t1 =  [ -py ; 0 ]
  // 
  // t2 = FDA'*t1
  // 
  // Compare t2 \approx c
  // 
  if(py) {
    if(out)
      *out
        << "\nComparing c with finite difference product FDA'*[ -py; 0 ] = -FDC*py ...\n";
    // t1 =  [ -py ; 0 ]
    VectorSpace::vec_mut_ptr_t
      t1 = space_x->create_member();
    V_StV( t1->sub_view(var_dep).get(), -1.0, *py );
    *t1->sub_view(var_indep) = 0.0;
    // t2 = FDA'*t1
    VectorSpace::vec_mut_ptr_t
      t2 = nlp->space_c()->create_member();
    preformed_fd = fd_deriv_prod.calc_deriv_product(
      xo,xl,xu
      ,*t1,NULL,NULL,true,nlp,NULL,t2.get(),out,dump_all(),dump_all()
      );
    if( !preformed_fd )
      goto FD_NOT_PREFORMED;
    const value_type
      sum_c  = sum(*c),
      sum_t2 = sum(*t2);
    assert_print_nan_inf(sum_t2, "sum(-FDC*py)",true,out);
    const value_type
      calc_err = ::fabs( ( sum_c - sum_t2 )/( ::fabs(sum_c) + ::fabs(sum_t2) + small_num ) );
    if(out)
      *out
        << "\nrel_err(sum(c),sum(-FDC*py)) = "
        << "rel_err(" << sum_c << "," << sum_t2 << ") = "
        << calc_err << endl;
    if( calc_err >= Gc_error_tol() ) {
      if(out)
        *out
          << "Error, above relative error exceeded Gc_error_tol = " << Gc_error_tol() << endl;
      if(print_all_warnings)
        *out << "\nt1 = [ -py; 0 ] =\n" << *t1
           << "\nt2 = FDA'*t1 = -FDC*py =\n"   << *t2;
      update_success( false, &success );
    }
    if( calc_err >= Gc_warning_tol() ) {
      if(out)
        *out
          << "\nWarning, above relative error exceeded Gc_warning_tol = " << Gc_warning_tol() << endl;
    }
  }

  // /////////////////////////////////////////////
  // (3) Check D = -inv(C)*N

  if(D) {
    switch( Gc_testing_method() ) {
      case FD_COMPUTE_ALL: {
        //
        // Compute FDN outright and check
        // -FDC * D \aprox FDN
        // 
        // We want to compute:
        // 
        // FDC * -D = [ FDC, FDN ] * [ -D; 0 ]
        //            \__________/
        //                FDA'
        // 
        // To compute the above we perform:
        // 
        // T = FDA' * [ -D; 0 ] (one column at a time)
        // 
        // Compare T \approx FDN
        //
/*
        // FDN
        DMatrix FDN(m,n-m);
        fd_deriv_computer.calc_deriv( xo, xl, xu
          , Range1D(m+1,n), nlp, NULL
          , &FDN() ,BLAS_Cpp::trans, out );

        // T = FDA' * [ -D; 0 ] (one column at a time)
        DMatrix T(m,n-m);
        DVector t(n);
        t(m+1,n) = 0.0;
        for( int s = 1; s <= n-m; ++s ) {
          // t = [ -D(:,s); 0 ]
          V_StV( &t(1,m), -1.0, D->col(s) );
          // T(:,s) =  FDA' * t
          fd_deriv_prod.calc_deriv_product(
            xo,xl,xu,t(),NULL,NULL,nlp,NULL,&T.col(s),out);
        }        

        // Compare T \approx FDN
        if(out)
          *out
            << "\nChecking the computed D = -inv(C)*N\n"
            << "where D(i,j) = (-FDC*D)(i,j), dM(i,j) = FDN(i,j) ...\n";
        result = comp_M.comp(
          T(), FDN, BLAS_Cpp::no_trans
          , CompareDenseSparseMatrices::FULL_MATRIX
          , CompareDenseSparseMatrices::REL_ERR_BY_COL
          , Gc_warning_tol(), Gc_error_tol()
          , print_all_warnings, out );
        update_success( result, &success );
        if(!result) return false;
*/
        TEST_FOR_EXCEPT(true); // Todo: Implement above!
        break;
      }
      case FD_DIRECTIONAL: {
        //
        // Compute -FDC * D * v \aprox FDN * v
        // for random v's
        //
        // We will compute this as:
        // 
        // t1 = [ 0; y ] <: R^(n)
        // 
        // t2 = FDA' * t1  (  FDN * y ) <: R^(m)
        //
        // t1 = [ -D * y ; 0 ]  <: R^(n)
        // 
        // t3 = FDA' * t1  ( -FDC * D * y ) <: R^(m)
        // 
        // Compare t2 \approx t3
        //
        if(out)
          *out
            << "\nComparing finite difference products -FDC*D*y with FDN*y for "
              "random vectors y ...\n";
        VectorSpace::vec_mut_ptr_t
          y  = space_x->sub_space(var_indep)->create_member(),
          t1 = space_x->create_member(),
          t2 = space_c->create_member(),
          t3 = space_c->create_member();
        value_type max_warning_viol = 0.0;
        int num_warning_viol = 0;
        const int num_fd_directions_used = ( num_fd_directions() > 0 ? num_fd_directions() : 1 );
        for( int direc_i = 1; direc_i <= num_fd_directions_used; ++direc_i ) {
          if( num_fd_directions() > 0 ) {
            random_vector( rand_y_l, rand_y_u, y.get() );
            if(out)
              *out
                << "\n****"
                << "\n**** Random directional vector " << direc_i << " ( ||y||_1 / n = "
                << (y->norm_1() / y->dim()) << " )"
                << "\n***\n";
          }
          else {
            *y = 1.0;
            if(out)
              *out
                << "\n****"
                << "\n**** Ones vector y ( ||y||_1 / n = "<<(y->norm_1()/y->dim())<<" )"
                << "\n***\n";
          }
          // t1 = [ 0; y ] <: R^(n)
          *t1->sub_view(var_dep)   = 0.0;
          *t1->sub_view(var_indep) = *y;
          // t2 = FDA' * t1  (  FDN * y ) <: R^(m)
          preformed_fd = fd_deriv_prod.calc_deriv_product(
            xo,xl,xu
            ,*t1,NULL,NULL,true,nlp,NULL,t2.get(),out,dump_all(),dump_all()
            );
          if( !preformed_fd )
            goto FD_NOT_PREFORMED;
          // t1 = [ -D * y ; 0 ]  <: R^(n)
          V_StMtV( t1->sub_view(var_dep).get(), -1.0, *D, BLAS_Cpp::no_trans, *y );
          *t1->sub_view(var_indep) = 0.0;
          // t3 = FDA' * t1  ( -FDC * D * y ) <: R^(m)
          preformed_fd = fd_deriv_prod.calc_deriv_product(
            xo,xl,xu
            ,*t1,NULL,NULL,true,nlp,NULL,t3.get(),out,dump_all(),dump_all()
            );
          // Compare t2 \approx t3
          const value_type
            sum_t2 = sum(*t2),
            sum_t3 = sum(*t3);
          const value_type
            calc_err = ::fabs( ( sum_t2 - sum_t3 )/( ::fabs(sum_t2) + ::fabs(sum_t3) + small_num ) );
          if(out)
            *out
              << "\nrel_err(sum(-FDC*D*y),sum(FDN*y)) = "
              << "rel_err(" << sum_t3 << "," << sum_t2 << ") = "
              << calc_err << endl;
          if( calc_err >= Gc_warning_tol() ) {
            max_warning_viol = my_max( max_warning_viol, calc_err );
            ++num_warning_viol;
          }
          if( calc_err >= Gc_error_tol() ) {
            if(out)
              *out
                << "Error, above relative error exceeded Gc_error_tol = " << Gc_error_tol() << endl
                << "Stoping the tests!\n";
            if(print_all_warnings)
              *out << "\ny =\n" << *y
                   << "\nt1 = [ -D*y; 0 ] =\n" << *t1
                   << "\nt2 =  FDA' * [ 0; y ] = FDN * y =\n" << *t2
                   << "\nt3 =  FDA' * t1 = -FDC * D * y =\n" << *t3;
            update_success( false, &success );
          }
        }
        if(out && num_warning_viol)
          *out
            << "\nThere were " << num_warning_viol << " warning tolerance "
            << "violations out of num_fd_directions = " << num_fd_directions()
            << " computations of sum(FDC*D*y) and sum(FDN*y)\n"
            << "and the maximum relative iolation was " << max_warning_viol
            << " > Gc_waring_tol = " << Gc_warning_tol() << endl;
        break;
      }
      default:
        TEST_FOR_EXCEPT(true);
    }
  }

  // ///////////////////////////////////////////////
  // (4) Check rGf = Gf(var_indep) + D'*Gf(var_dep)

  if(rGf) {
    if( Gf && D ) {
      if(out)
        *out
          << "\nComparing rGf_tmp = Gf(var_indep) - D'*Gf(var_dep) with rGf ...\n";
      VectorSpace::vec_mut_ptr_t
        rGf_tmp = space_x->sub_space(var_indep)->create_member();
      *rGf_tmp = *Gf->sub_view(var_indep);
      Vp_MtV( rGf_tmp.get(), *D, BLAS_Cpp::trans, *Gf->sub_view(var_dep) );
      const value_type
        sum_rGf_tmp  = sum(*rGf_tmp),
        sum_rGf      = sum(*rGf);
      const value_type
        calc_err = ::fabs( ( sum_rGf_tmp - sum_rGf )/( ::fabs(sum_rGf_tmp) + ::fabs(sum_rGf) + small_num ) );
      if(out)
        *out
          << "\nrel_err(sum(rGf_tmp),sum(rGf)) = "
          << "rel_err(" << sum_rGf_tmp << "," << sum_rGf << ") = "
          << calc_err << endl;
      if( calc_err >= Gc_error_tol() ) {
        if(out)
          *out
            << "Error, above relative error exceeded Gc_error_tol = " << Gc_error_tol() << endl;
        if(print_all_warnings)
          *out << "\nrGf_tmp =\n" << *rGf_tmp
             << "\nrGf =\n"   << *rGf;
        update_success( false, &success );
      }
      if( calc_err >= Gc_warning_tol() ) {
        if(out)
          *out
            << "\nWarning, above relative error exceeded Gc_warning_tol = "
            << Gc_warning_tol() << endl;
      }
    }
    else if( D ) {
      if(out)
        *out
          << "\nComparing rGf'*y with the finite difference product"
          << " fd_prod(f,[D*y;y]) for random vectors y ...\n";
      VectorSpace::vec_mut_ptr_t
        y  = space_x->sub_space(var_indep)->create_member(),
        t  = space_x->create_member();
      value_type max_warning_viol = 0.0;
      int num_warning_viol = 0;
      const int num_fd_directions_used = ( num_fd_directions() > 0 ? num_fd_directions() : 1 );
      for( int direc_i = 1; direc_i <= num_fd_directions_used; ++direc_i ) {
        if( num_fd_directions() > 0 ) {
          random_vector( rand_y_l, rand_y_u, y.get() );
          if(out)
            *out
              << "\n****"
              << "\n**** Random directional vector " << direc_i << " ( ||y||_1 / n = "
              << (y->norm_1() / y->dim()) << " )"
              << "\n***\n";
        }
        else {
          *y = 1.0;
          if(out)
            *out
              << "\n****"
              << "\n**** Ones vector y ( ||y||_1 / n = "<<(y->norm_1()/y->dim())<<" )"
              << "\n***\n";
        }
        // t = [ D*y; y ]
        LinAlgOpPack::V_MtV(&*t->sub_view(var_dep),*D,BLAS_Cpp::no_trans,*y);
        *t->sub_view(var_indep) = *y;
        value_type fd_rGf_y = 0.0;
        // fd_Gf_y
        preformed_fd = fd_deriv_prod.calc_deriv_product(
          xo,xl,xu
          ,*t,NULL,NULL,true,nlp,&fd_rGf_y,NULL,out,dump_all(),dump_all()
          );
        if( !preformed_fd )
          goto FD_NOT_PREFORMED;
        if(out) *out << "fd_prod(f,[D*y;y]) = " << fd_rGf_y << "\n";
        // rGf_y = rGf'*y
        const value_type rGf_y = dot(*rGf,*y);
        if(out) *out << "rGf'*y = " << rGf_y << "\n";
        // Compare fd_rGf_y and rGf*y
        const value_type
          calc_err = ::fabs( ( rGf_y - fd_rGf_y )/( ::fabs(rGf_y) + ::fabs(fd_rGf_y) + small_num ) );
        if( calc_err >= Gc_warning_tol() ) {
          max_warning_viol = my_max( max_warning_viol, calc_err );
          ++num_warning_viol;
        }
        if(out)
          *out
            << "\nrel_err(rGf'*y,fd_prod(f,[D*y;y])) = "
            << "rel_err(" << fd_rGf_y << "," << rGf_y << ") = "
            << calc_err << endl;
        if( calc_err >= Gf_error_tol() ) {
          if(out)
            *out << "Error, above relative error exceeded Gc_error_tol = " << Gc_error_tol() << endl;
          if(print_all_warnings)
            *out << "\ny =\n" << *y
                 << "\nt = [ D*y; y ] =\n" << *t;
          update_success( false, &success );
        }
      }
    }
    else {
      TEST_FOR_EXCEPT(true); // ToDo: Test rGf without D? (This is not going to be easy!)
    }
  }
  
  // ///////////////////////////////////////////////////
  // (5) Check GcU, and/or Uz (for undecomposed equalities)

  if(GcU || Uz) {
    TEST_FOR_EXCEPT(true); // ToDo: Implement!
  }
  
FD_NOT_PREFORMED:

  if(!preformed_fd) {
    if(out)
      *out
        << "\nError, the finite difference computation was not preformed due to cramped bounds\n"
        << "Finite difference test failed!\n" << endl;
    return false;
  }

  } // end try
  catch( const AbstractLinAlgPack::NaNInfException& except ) {
    if(out)
      *out
        << "Error, found a NaN or Inf.  Stoping tests\n";
    success = false;
  }
  
  if( out ) {
    if( success )
      *out
        << "\nCongradulations, all the finite difference errors where within the\n"
        "specified error tolerances!\n";
    else
      *out
        << "\nOh no, at least one of the above finite difference tests failed!\n";
  }

  return success;

}
void MatrixSymPosDefLBFGS::secant_update(
  VectorMutable* s, VectorMutable* y, VectorMutable* Bs
  )
{
  using AbstractLinAlgPack::BFGS_sTy_suff_p_d;
  using AbstractLinAlgPack::dot;
  using LinAlgOpPack::V_MtV;
  using Teuchos::Workspace;
  Teuchos::WorkspaceStore* wss = Teuchos::get_default_workspace_store().get();

  assert_initialized();

  // Check skipping the BFGS update
  const value_type
    sTy	      = dot(*s,*y);
  std::ostringstream omsg;
  if( !BFGS_sTy_suff_p_d(*s,*y,&sTy,&omsg,"MatrixSymPosDefLBFGS::secant_update(...)" ) ) {
    throw UpdateSkippedException( omsg.str() );	
  }

  try {

  // Update counters
  if( m_bar_ == m_ ) {
    // We are at the end of the storage so remove the oldest stored update
    // and move updates to make room for the new update.  This has to be done for the
    // the matrix to behave properly
    {for( size_type k = 1; k <= m_-1; ++k ) {
      S_->col(k) = S_->col(k+1);                            // Shift S.col() to the left
      Y_->col(k) = Y_->col(k+1);                            // Shift Y.col() to the left
      STY_.col(k)(1,m_-1) = STY_.col(k+1)(2,m_);            // Move submatrix STY(2,m-1,2,m-1) up and left
      STSYTY_.col(k)(k+1,m_) = STSYTY_.col(k+1)(k+2,m_+1);  // Move triangular submatrix STS(2,m-1,2,m-1) up and left
      STSYTY_.col(k+1)(1,k) = STSYTY_.col(k+2)(2,k+1);      // Move triangular submatrix YTY(2,m-1,2,m-1) up and left
    }}
    // ToDo: Create an abstract interface, call it MultiVectorShiftVecs, to rearrange S and Y all at once.
    // This will be important for parallel performance.
  }
  else {
    m_bar_++;
  }
  // Set the update vectors
  *S_->col(m_bar_) = *s;
  *Y_->col(m_bar_) = *y;

  // /////////////////////////////////////////////////////////////////////////////////////
  // Update S'Y
  //
  // Update the row and column m_bar
  //
  //	S'Y = 
  //
  //	[	s(1)'*y(1)		...		s(1)'*y(m_bar)		...		s(1)'*y(m_bar)		]
  //	[	.						.							.					] row
  //	[	s(m_bar)'*y(1)	...		s(m_bar)'*y(m_bar)	...		s(m_bar)'*y(m_bar)	] m_bar
  //	[	.						.							.					]
  //	[	s(m_bar)'*y(1)	...		s(m_bar)'*y(m_bar)	...		s(m_bar)'*y(m_bar)	]
  //
  //								col m_bar
  //
  // Therefore we set:
  //	(S'Y)(:,m_bar) =  S'*y(m_bar)
  //	(S'Y)(m_bar,:) =  s(m_bar)'*Y

  const multi_vec_ptr_t
    S = this->S(),
    Y = this->Y();

  VectorSpace::vec_mut_ptr_t
    t = S->space_rows().create_member();  // temporary workspace

  //	(S'Y)(:,m_bar) =  S'*y(m_bar)
  V_MtV( t.get(), *S, BLAS_Cpp::trans, *y );
  STY_.col(m_bar_)(1,m_bar_) = VectorDenseEncap(*t)();

  //	(S'Y)(m_bar,:)' =  Y'*s(m_bar)
  V_MtV( t.get(), *Y, BLAS_Cpp::trans, *s );
  STY_.row(m_bar_)(1,m_bar_) = VectorDenseEncap(*t)();

  // /////////////////////////////////////////////////////////////////
  // Update S'S
  //
  //	S'S = 
  //
  //	[	s(1)'*s(1)		...		symmetric					symmetric			]
  //	[	.						.							.					] row
  //	[	s(m_bar)'*s(1)	...		s(m_bar)'*s(m_bar)	...		symmetric			] m_bar
  //	[	.						.							.					]
  //	[	s(m_bar)'*s(1)	...		s(m_bar)'*s(m_bar)	...		s(m_bar)'*s(m_bar)	]
  //
  //								col m_bar
  //
  // Here we will update the lower triangular part of S'S.  To do this we
  // only need to compute:
  //		t = S'*s(m_bar) = { s(m_bar)' * [ s(1),..,s(m_bar),..,s(m_bar) ]  }'
  // then set the appropriate rows and columns of S'S.

  Workspace<value_type>   work_ws(wss,m_bar_);
  DVectorSlice                  work(&work_ws[0],work_ws.size());

  // work = S'*s(m_bar)
  V_MtV( t.get(), *S, BLAS_Cpp::trans, *s );
  work = VectorDenseEncap(*t)();

  // Set row elements
  STSYTY_.row(m_bar_+1)(1,m_bar_) = work;
  // Set column elements
  STSYTY_.col(m_bar_)(m_bar_+1,m_bar_+1) = work(m_bar_,m_bar_);

  // /////////////////////////////////////////////////////////////////////////////////////
  // Update Y'Y
  //
  // Update the row and column m_bar
  //
  //	Y'Y = 
  //
  //	[	y(1)'*y(1)		...		y(1)'*y(m_bar)		...		y(1)'*y(m_bar)		]
  //	[	.						.							.					] row
  //	[	symmetric		...		y(m_bar)'*y(m_bar)	...		y(m_bar)'*y(m_bar)	] m_bar
  //	[	.						.							.					]
  //	[	symmetric		...		symmetric			...		y(m_bar)'*y(m_bar)	]
  //
  //								col m_bar
  //
  // Here we will update the upper triangular part of Y'Y.  To do this we
  // only need to compute:
  //		t = Y'*y(m_bar) = { y(m_bar)' * [ y(1),..,y(m_bar),..,y(m_bar) ]  }'
  // then set the appropriate rows and columns of Y'Y.

  // work = Y'*y(m_bar)
  V_MtV( t.get(), *Y, BLAS_Cpp::trans, *y );
  work = VectorDenseEncap(*t)();

  // Set row elements
  STSYTY_.col(m_bar_+1)(1,m_bar_) = work;
  // Set column elements
  STSYTY_.row(m_bar_)(m_bar_+1,m_bar_+1) = work(m_bar_,m_bar_);

  // /////////////////////////////
  // Update gamma_k

  // gamma_k = s'*y / y'*y
  if(auto_rescaling_)
    gamma_k_ = STY_(m_bar_,m_bar_) / STSYTY_(m_bar_,m_bar_+1);

  // We do not initially update Q unless we have to form a matrix-vector
  // product later.
  
  Q_updated_ = false;
  num_secant_updates_++;

  }	//	end try
  catch(...) {
    // If we throw any exception the we should make the matrix uninitialized
    // so that we do not leave this object in an inconsistant state.
    n_ = 0;
    throw;
  }

}
bool LineSearchFullStep_Step::do_step(Algorithm& _algo
  , poss_type step_poss, IterationPack::EDoStepType type, poss_type assoc_step_poss)
{
  using AbstractLinAlgPack::Vp_StV;
  using AbstractLinAlgPack::assert_print_nan_inf;
  using LinAlgOpPack::V_VpV;

  NLPAlgo        &algo   = rsqp_algo(_algo);
  NLPAlgoState   &s      = algo.rsqp_state();
  NLP            &nlp    = algo.nlp();

  const size_type
    m = nlp.m();

  EJournalOutputLevel olevel = algo.algo_cntr().journal_output_level();
  std::ostream& out = algo.track().journal_out();

  // print step header.
  if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
    using IterationPack::print_algorithm_step;
    print_algorithm_step( algo, step_poss, type, assoc_step_poss, out );
  }
  
  // alpha_k = 1.0
  IterQuantityAccess<value_type>
    &alpha_iq   = s.alpha();
  if( !alpha_iq.updated_k(0) )
    alpha_iq.set_k(0) = 1.0;

  if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
    out	<< "\nf_k        = " << s.f().get_k(0);
    if(m)
      out << "\n||c_k||inf = " << s.c().get_k(0).norm_inf();
    out << "\nalpha_k    = " << alpha_iq.get_k(0) << std::endl;
  }

  // x_kp1 = x_k + d_k
  IterQuantityAccess<VectorMutable>  &x_iq = s.x();
  const Vector                       &x_k   = x_iq.get_k(0);
  VectorMutable                      &x_kp1 = x_iq.set_k(+1);
  x_kp1 = x_k;
  Vp_StV( &x_kp1, alpha_iq.get_k(0), s.d().get_k(0) );

  if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
    out	<< "\n||x_kp1||inf   = " << s.x().get_k(+1).norm_inf() << std::endl;
  }
  if( static_cast<int>(olevel) >= static_cast<int>(PRINT_VECTORS) ) {
    out << "\nx_kp1 =\n" << s.x().get_k(+1);
  }

  if(algo.algo_cntr().check_results()) {
    assert_print_nan_inf(
      x_kp1, "x_kp1",true
      ,int(olevel) >= int(PRINT_ALGORITHM_STEPS) ? &out : NULL
      );
    if( nlp.num_bounded_x() ) {
      if(!bounds_tester().check_in_bounds(
          int(olevel) >= int(PRINT_ALGORITHM_STEPS) ? &out : NULL
        , int(olevel) >= int(PRINT_VECTORS)					// print_all_warnings
        , int(olevel) >= int(PRINT_ITERATION_QUANTITIES)	// print_vectors
        , nlp.xl(), "xl"
        , nlp.xu(), "xu"
        , x_kp1, "x_kp1"
        ))
      {
        TEST_FOR_EXCEPTION(
          true, TestFailed
          ,"LineSearchFullStep_Step::do_step(...) : Error, "
          "the variables bounds xl <= x_k(+1) <= xu where violated!" );
      }
    }
  }

  // Calcuate f and c at the new point.
  nlp.unset_quantities();
  nlp.set_f( &s.f().set_k(+1) );
  if(m) nlp.set_c( &s.c().set_k(+1) );
  nlp.calc_f(x_kp1);
  if(m) nlp.calc_c( x_kp1, false );
  nlp.unset_quantities();

  if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
    out	<< "\nf_kp1        = " << s.f().get_k(+1);
    if(m)
      out << "\n||c_kp1||inf = " << s.c().get_k(+1).norm_inf();
    out << std::endl;
  }

  if( m && static_cast<int>(olevel) >= static_cast<int>(PRINT_VECTORS) ) {
    out << "\nc_kp1 =\n" << s.c().get_k(+1); 
  }

  if(algo.algo_cntr().check_results()) {
    assert_print_nan_inf( s.f().get_k(+1), "f(x_kp1)", true, &out );
    if(m)
      assert_print_nan_inf( s.c().get_k(+1), "c(x_kp1)", true, &out );
  }

  return true;
}
void ExampleNLPFirstOrder::imp_calc_Gc(
    const Vector& x, bool newx, const FirstOrderInfo& first_order_info) const
{
    namespace rcp = MemMngPack;
    using Teuchos::dyn_cast;
    using AbstractLinAlgPack::Vp_S; // Should not have to do this!

    const index_type
    n = this->n(),
    m = this->m();
    const Range1D
    var_dep   = this->var_dep(),
    var_indep = this->var_indep();

    // Get references to aggregate C and N matrices (if allocated)
    MatrixOpNonsing
    *C_aggr = NULL;
    MatrixOp
    *N_aggr = NULL;
    BasisSystemComposite::get_C_N(
        first_order_info.Gc, &C_aggr, &N_aggr ); // Will return NULLs if Gc is not initialized

    // Allocate C and N matrix objects if not done yet!
    Teuchos::RCP<MatrixOpNonsing>
    C_ptr = Teuchos::null;
    Teuchos::RCP<MatrixOp>
    N_ptr = Teuchos::null;
    if( C_aggr == NULL ) {
        const VectorSpace::space_ptr_t
        space_x  = this->space_x(),
        space_xD = space_x->sub_space(var_dep);
        C_ptr  = Teuchos::rcp(new MatrixSymDiagStd(space_xD->create_member()));
        N_ptr  = Teuchos::rcp(new MatrixSymDiagStd(space_xD->create_member()));
        C_aggr = C_ptr.get();
        N_aggr = N_ptr.get();
    }

    // Get references to concreate C and N matrices
    MatrixSymDiagStd
    &C = dyn_cast<MatrixSymDiagStd>(*C_aggr);
    MatrixSymDiagStd
    &N = dyn_cast<MatrixSymDiagStd>(*N_aggr);
    // Get x = [ x_D' x_I ]
    Vector::vec_ptr_t
    x_D = x.sub_view(var_dep),
    x_I = x.sub_view(var_indep);
    // Set the diagonals of C and N (this is the only computation going on here)
    C.diag() = *x_I;          // C.diag = x_I - 1.0
    Vp_S( &C.diag(),  -1.0 ); // ...
    N.diag() = *x_D;          // N.diag = x_D - 10.0
    Vp_S( &N.diag(), -10.0 ); // ...
    // Initialize the matrix object Gc if not done so yet
    if( C_ptr.get() != NULL ) {
        BasisSystemComposite::initialize_Gc(
            this->space_x(), var_dep, var_indep
            ,this->space_c()
            ,C_ptr, N_ptr
            ,first_order_info.Gc
        );
    }
}
QPSolverStats::ESolutionType
QPSolverRelaxedQPKWIK::imp_solve_qp(
  std::ostream* out, EOutputLevel olevel, ERunTests test_what
  ,const Vector& g, const MatrixSymOp& G
  ,value_type etaL
  ,const Vector* dL, const Vector* dU
  ,const MatrixOp* E, BLAS_Cpp::Transp trans_E, const Vector* b
  ,const Vector* eL, const Vector* eU
  ,const MatrixOp* F, BLAS_Cpp::Transp trans_F, const Vector* f
  ,value_type* obj_d
  ,value_type* eta, VectorMutable* d
  ,VectorMutable* nu
  ,VectorMutable* mu, VectorMutable* Ed
  ,VectorMutable* lambda, VectorMutable* Fd
  )
{
  using Teuchos::dyn_cast;
  using DenseLinAlgPack::nonconst_tri_ele;
  using LinAlgOpPack::dot;
  using LinAlgOpPack::V_StV;
  using LinAlgOpPack::assign;
  using LinAlgOpPack::V_StV;
  using LinAlgOpPack::V_MtV;
  using AbstractLinAlgPack::EtaVector;
  using AbstractLinAlgPack::transVtMtV;
  using AbstractLinAlgPack::num_bounded;
  using ConstrainedOptPack::MatrixExtractInvCholFactor;

  // /////////////////////////
  // Map to QPKWIK input

  // Validate that rHL is of the proper type.
  const MatrixExtractInvCholFactor &cG
    = dyn_cast<const MatrixExtractInvCholFactor>(G);

  // Determine the number of sparse bounds on variables and inequalities.
  // By default set for the dense case
  const value_type inf = this->infinite_bound();
  const size_type
    nd              = d->dim(),
    m_in            = E  ? b->dim()                  : 0,
    m_eq            = F  ? f->dim()                  : 0,
    nvarbounds      = dL ? num_bounded(*dL,*dU,inf)  : 0,
    ninequbounds    = E  ? num_bounded(*eL,*eU,inf)  : 0,
    nequalities     = F  ? f->dim()                  : 0;

  // Determine if this is a QP with a structure different from the
  // one just solved.
  
  const bool same_qp_struct = (  N_ == nd && M1_ == nvarbounds && M2_ == ninequbounds && M3_ == nequalities );

  /////////////////////////////////////////////////////////////////
  // Set the input parameters to be sent to QPKWIKNEW

  // N
  N_ = nd;

  // M1
  M1_ = nvarbounds;

  // M2
  M2_ = ninequbounds;

  // M3
  M3_ = nequalities;

  // GRAD
  GRAD_ = VectorDenseEncap(g)();

  // UINV_AUG
  //
  // UINV_AUG = [ sqrt(bigM)  0  ]
  //            [ 0           L' ]
  //
  UINV_AUG_.resize(N_+1,N_+1);
  cG.extract_inv_chol( &nonconst_tri_ele( UINV_AUG_(2,N_+1,2,N_+1), BLAS_Cpp::upper ) );
  UINV_AUG_(1,1) = 1.0 / ::sqrt( NUMPARAM_[2] );
  UINV_AUG_.col(1)(2,N_+1) = 0.0;
  UINV_AUG_.row(1)(2,N_+1) = 0.0;

  // LDUINV_AUG
  LDUINV_AUG_ = UINV_AUG_.rows();

  // IBND, BL , BU, A, LDA, YPY

  IBND_INV_.resize( nd + m_in);
  std::fill( IBND_INV_.begin(), IBND_INV_.end(), 0 ); // Initialize the zero
  IBND_.resize( my_max( 1, M1_ + M2_ ) );
  BL_.resize( my_max( 1, M1_ + M2_ ) );
  BU_.resize( my_max( 1, M1_ + M2_ + M3_ ) );
  LDA_ = my_max( 1, M2_ + M3_ );
  A_.resize( LDA_, (  M2_ + M3_ > 0 ? N_ : 1 ) );
  YPY_.resize( my_max( 1, M1_ + M2_ ) );
  if(M1_)
    YPY_(1,M1_) = 0.0; // Must be for this QP interface

  // Initialize variable bound constraints
  if( dL ) {
    VectorDenseEncap dL_de(*dL);
    VectorDenseEncap dU_de(*dU);
    // read iterators
    AbstractLinAlgPack::sparse_bounds_itr
      dLU_itr( dL_de().begin(), dL_de().end()
          ,dU_de().begin(), dU_de().end()
          ,inf );
    // written iterators
    IBND_t::iterator
      IBND_itr = IBND_.begin(),
      IBND_end = IBND_.begin() + M1_;
    DVector::iterator
      BL_itr = BL_.begin(),
      BU_itr = BU_.begin(),
      YPY_itr = YPY_.begin();
    // Loop
    for( size_type ibnd_i = 1; IBND_itr != IBND_end; ++ibnd_i, ++dLU_itr ) {
      IBND_INV_[dLU_itr.index()-1] = ibnd_i;
      *IBND_itr++ = dLU_itr.index();
      *BL_itr++	= dLU_itr.lbound();
      *BU_itr++	= dLU_itr.ubound();
      *YPY_itr++	= 0.0; // Must be zero with this QP interface
    }
  }

  // Initialize inequality constraints
  
  if(M2_) {
    VectorDenseEncap eL_de(*eL);
    VectorDenseEncap eU_de(*eU);
    VectorDenseEncap b_de(*b);
    AbstractLinAlgPack::sparse_bounds_itr
      eLU_itr( eL_de().begin(), eL_de().end()
          ,eU_de().begin(), eU_de().end()
          ,inf );
    if( M2_ < m_in ) {
      // Initialize BL, BU, YPY and A for sparse bounds on general inequalities
      // written iterators
      DVector::iterator
        BL_itr		= BL_.begin() + M1_,
        BU_itr		= BU_.begin() + M1_,
        YPY_itr		= YPY_.begin() + M1_;
      IBND_t::iterator
        ibnds_itr	= IBND_.begin() + M1_;
      // loop
      for(size_type i = 1; i <= M2_; ++i, ++eLU_itr, ++ibnds_itr ) {
        TEST_FOR_EXCEPT( !( !eLU_itr.at_end() ) );
        const size_type k      = eLU_itr.index();
        *BL_itr++              = eLU_itr.lbound();
        *BU_itr++              = eLU_itr.ubound();
        *YPY_itr++             = b_de()(k);
        *ibnds_itr             = k;  // Only for my record, not used by QPKWIK
        IBND_INV_[nd+k-1]      = M1_ + i;
        // Add the corresponding row of op(E) to A
        // y == A.row(i)'
        // y' = e_k' * op(E) => y = op(E')*e_k
        DVectorSlice y = A_.row(i);
        EtaVector e_k(k,eL_de().dim());
        V_MtV( &y( 1, N_ ), *E, BLAS_Cpp::trans_not(trans_E), e_k() ); // op(E')*e_k
      }
    }
    else {
      //
      // Initialize BL, BU, YPY and A for dense bounds on general inequalities
      //
      // Initialize BL(M1+1:M1+M2), BU(M1+1:M1+M2)
      // and IBND(M1+1:M1+M2) = identity (only for my record, not used by QPKWIK)
      DVector::iterator
        BL_itr		= BL_.begin() + M1_,
        BU_itr		= BU_.begin() + M1_;
      IBND_t::iterator
        ibnds_itr	= IBND_.begin() + M1_;
      for(size_type i = 1; i <= m_in; ++i ) {
        if( !eLU_itr.at_end() && eLU_itr.index() == i ) {
          *BL_itr++ = eLU_itr.lbound();
          *BU_itr++ = eLU_itr.ubound();
          ++eLU_itr;
        }
        else {
          *BL_itr++ = -inf;
          *BU_itr++ = +inf;
        }
        *ibnds_itr++     = i;
        IBND_INV_[nd+i-1]= M1_ + i;
      }
      // A(1:M2,1:N) = op(E)
      assign( &A_(1,M2_,1,N_), *E, trans_E );
      // YPY
      YPY_(M1_+1,M1_+M2_) = b_de();
    }
  }

  // Initialize equalities

  if(M3_) {
    V_StV( &BU_( M1_ + M2_ + 1, M1_ + M2_ + M3_ ), -1.0, VectorDenseEncap(*f)() );
    assign( &A_( M2_ + 1, M2_ + M3_, 1, N_ ), *F, trans_F );
  }

  // IYPY
  IYPY_ = 1; // ???

  // WARM
  WARM_ = 0; // Cold start by default

  // MAX_ITER
  MAX_ITER_ = static_cast<f_int>(max_qp_iter_frac() * N_);

  // INF
  INF_ = ( same_qp_struct ? 1 : 0 );
  
  // Initilize output, internal state and workspace quantities.
  if(!same_qp_struct) {
    X_.resize(N_);
    NACTSTORE_ = 0;
    IACTSTORE_.resize(N_+1);
    IACT_.resize(N_+1);
    UR_.resize(N_+1);
    ISTATE_.resize( QPKWIKNEW_CppDecl::qpkwiknew_listate(N_,M1_,M2_,M3_) );
    LRW_ = QPKWIKNEW_CppDecl::qpkwiknew_lrw(N_,M1_,M2_,M3_);
    RW_.resize(LRW_);
  }

  // /////////////////////////////////////////////
  // Setup a warm start form the input arguments
  //
  // Interestingly enough, QPKWIK sorts all of the
  // constraints according to scaled multiplier values
  // and mixes equality with inequality constriants.
  // It seems to me that you should start with equality
  // constraints first.

  WARM_      = 0;
  NACTSTORE_ = 0;

  if( m_eq ) {
    // Add equality constraints first since we know these will
    // be part of the active set.
    for( size_type j = 1; j <= m_eq; ++j ) {
      IACTSTORE_[NACTSTORE_] = 2*M1_ + 2*M2_ + j;
      ++NACTSTORE_;
    }
  }
  if( ( nu && nu->nz() ) || ( mu && mu->nz() ) ) {
    // Add inequality constraints
    const size_type
      nu_nz = nu ? nu->nz() : 0,
      mu_nz = mu ? mu->nz() : 0;
    // Combine all the multipliers for the bound and general inequality
    // constraints and sort them from the largest to the smallest.  Hopefully
    // the constraints with the larger multiplier values will not be dropped
    // from the active set.
    SpVector gamma( nd + 1 + m_in , nu_nz + mu_nz );
    typedef SpVector::element_type ele_t;
    if(nu && nu_nz) {
      VectorDenseEncap nu_de(*nu);
      DVectorSlice::const_iterator
        nu_itr = nu_de().begin(),
        nu_end = nu_de().end();
      index_type i = 1;
      while( nu_itr != nu_end ) {
        for( ; *nu_itr == 0.0; ++nu_itr, ++i );
        gamma.add_element(ele_t(i,*nu_itr));
        ++nu_itr; ++i;
      }
    }
    if(mu && mu_nz) {
      VectorDenseEncap mu_de(*mu);
      DVectorSlice::const_iterator
        mu_itr = mu_de().begin(),
        mu_end = mu_de().end();
      index_type i = 1;
      while( mu_itr != mu_end ) {
        for( ; *mu_itr == 0.0; ++mu_itr, ++i );
        gamma.add_element(ele_t(i+nd,*mu_itr));
        ++mu_itr; ++i;
      }
    }
    std::sort( gamma.begin(), gamma.end()
      , AbstractLinAlgPack::SortByDescendingAbsValue() );
    // Now add the inequality constraints in decreasing order
    const SpVector::difference_type o = gamma.offset();
    for( SpVector::const_iterator itr = gamma.begin(); itr != gamma.end(); ++itr ) {
      const size_type  j   = itr->index() + o;
      const value_type val = itr->value();
      if( j <= nd ) { // Variable bound
        const size_type ibnd_i = IBND_INV_[j-1];
        TEST_FOR_EXCEPT( !( ibnd_i ) );
        IACTSTORE_[NACTSTORE_]
          = (val < 0.0
             ? ibnd_i               // lower bound (see IACT(*))
             : M1_ + M2_ + ibnd_i   // upper bound (see IACT(*))
            );
        ++NACTSTORE_;
      }
      else if( j <= nd + m_in ) { // General inequality constraint
        const size_type ibnd_i = IBND_INV_[j-1]; // offset into M1_ + ibnd_j
        TEST_FOR_EXCEPT( !( ibnd_i ) );
        IACTSTORE_[NACTSTORE_]
          = (val < 0.0
             ? ibnd_i               // lower bound (see IACT(*))
             : M1_ + M2_ + ibnd_i   // upper bound (see IACT(*))
            );
        ++NACTSTORE_;
      }
    }
  }
  if( NACTSTORE_ > 0 )
    WARM_ = 1;

  // /////////////////////////
  // Call QPKWIK

  if( out && olevel > PRINT_NONE ) {
    *out
      << "\nCalling QPKWIK to solve QP problem ...\n";
  }

  QPKWIKNEW_CppDecl::qpkwiknew(
    N_, M1_, M2_, M3_, &GRAD_(1), &UINV_AUG_(1,1), LDUINV_AUG_, &IBND_[0]
    ,&BL_(1), &BU_(1), &A_(1,1), LDA_, &YPY_(1), IYPY_, WARM_, NUMPARAM_, MAX_ITER_, &X_(1)
    ,&NACTSTORE_, &IACTSTORE_[0], &INF_, &NACT_, &IACT_[0], &UR_[0], &EXTRA_
    ,&ITER_, &NUM_ADDS_, &NUM_DROPS_, &ISTATE_[0], LRW_, &RW_[0]
    );

  // ////////////////////////
  // Map from QPKWIK output

  // eta
  *eta = EXTRA_;
  // d
  (VectorDenseMutableEncap(*d))() = X_();
  // nu (simple variable bounds) and mu (general inequalities)
  if(nu) *nu = 0.0;
  if(mu) *mu = 0.0;
  // ToDo: Create VectorDenseMutableEncap views for faster access!
  {for(size_type i = 1; i <= NACT_; ++i) {
    size_type j = IACT_[i-1];
    EConstraintType type = constraint_type(M1_,M2_,M3_,j);
    FortranTypes::f_int idc = constraint_index(M1_,M2_,M3_,&IBND_[0],type,j);
    switch(type) {
      case NU_L:
        nu->set_ele( idc , -UR_(i) );
        break;
      case GAMA_L:
        mu->set_ele( IBND_[ M1_ + idc - 1 ], -UR_(i) );
        break;
      case NU_U:
        nu->set_ele( idc, UR_(i)) ;
        break;
      case GAMA_U:
        mu->set_ele( IBND_[ M1_ + idc - 1 ], UR_(i) );
        break;
      case LAMBDA:
        lambda->set_ele( idc, UR_(i) );
        break;
    }
  }}
  // obj_d (This could be updated within QPKWIK in the future)
  if(obj_d) {
    // obj_d = g'*d + 1/2 * d' * G * g
    *obj_d = dot(g,*d) + 0.5 * transVtMtV(*d,G,BLAS_Cpp::no_trans,*d);
  }
  // Ed (This could be updated within QPKWIK in the future)
  if(Ed) {
    V_MtV( Ed, *E, trans_E, *d );
  }
  // Fd (This could be updated within QPKWIK in the future)
  if(Fd) {
    V_MtV( Fd, *F, trans_F, *d );
  }
  // Set the QP statistics
  QPSolverStats::ESolutionType solution_type;
  if( INF_ >= 0 ) {
    solution_type = QPSolverStats::OPTIMAL_SOLUTION;
  }
  else if( INF_ == -1 ) { // Infeasible constraints
    TEST_FOR_EXCEPTION(
      true, QPSolverRelaxed::Infeasible
      ,"QPSolverRelaxedQPKWIK::solve_qp(...) : Error, QP is infeasible" );
  }
  else if( INF_ == -2 ) { // LRW too small
    TEST_FOR_EXCEPT( !( INF_ != -2 ) );  // Local programming error?
  }
  else if( INF_ == -3 ) { // Max iterations exceeded
    solution_type = QPSolverStats::DUAL_FEASIBLE_POINT;
  }
  else {
    TEST_FOR_EXCEPT(true); // Unknown return value!
  }
  qp_stats_.set_stats(
    solution_type, QPSolverStats::CONVEX
    ,ITER_, NUM_ADDS_, NUM_DROPS_
    ,WARM_==1, *eta > 0.0 );

  return qp_stats_.solution_type();
}
void dense_Vp_StPtMtV(
  DVectorSlice                 *y
  ,value_type                 a
  ,const GenPermMatrixSlice   &P
  ,BLAS_Cpp::Transp           P_trans
  ,const M_t                  &M
  ,BLAS_Cpp::Transp           M_trans
  ,const V_t                  &x
  ,value_type                 b
  )
{
  using BLAS_Cpp::no_trans;
  using BLAS_Cpp::trans;
  using BLAS_Cpp::trans_not;
  using BLAS_Cpp::rows;
  using BLAS_Cpp::cols;
  using DenseLinAlgPack::dot;
  using DenseLinAlgPack::DVector;
  using DenseLinAlgPack::Vt_S;
  using AbstractLinAlgPack::dot;
  using AbstractLinAlgPack::Vp_StMtV;
  using AbstractLinAlgPack::GenPermMatrixSlice;
  typedef AbstractLinAlgPack::EtaVector eta;
  using Teuchos::Workspace;
  Teuchos::WorkspaceStore* wss = Teuchos::get_default_workspace_store().get();
  
  // Validate the sizes
  // 
  // y += op(P)*op(M)*x
  // 
  const DenseLinAlgPack::size_type
    ny = y->size(),
    nx = x.size(),
    opM_rows = rows( M.rows(), M.cols(), M_trans ),
    opM_cols = cols( M.rows(), M.cols(), M_trans );
  if(    ny != rows( P.rows(), P.cols(), P_trans )
       || nx != opM_cols
       || cols( P.rows(), P.cols(), P_trans ) != opM_rows )
    throw std::length_error( "MatrixOp::Vp_StPtMtV(...) : Error, "
      "sizes of arguments does not match up" );
  //
  // Compute y = b*y + a*op(P)*op(M)*x in a resonably efficient manner.  This
  // implementation will assume that M is stored as a dense matrix.  Either
  // t = op(M)*x is computed first (O(opM_rows*nx) flops) then y = b*y + a*op(P)*t
  // (O(ny) + O(P_nz) flops) or each row of t' = e(j)' * op(M) (O(nx) flops)
  // is computed one at a time and then y(i) = b*y(i) + a*t'*x (O(nx)  flops)
  // where op(P)(i,j) = 1.0.  In the latter case, there are P_nz rows
  // of op(M) that have to be generated so the total cost is O(P_nz*nx).
  // Therefore, we will do the former if opM_rows < P_nz and the latter otherwise.
  //
  if( !P.nz() ) {
    // y = b*y
    if(b==0.0)       *y = 0.0;
    else if(b!=1.0)  Vt_S(y,b);
  }
  else if( opM_rows > P.nz() || P.is_identity() ) {
    // t = op(M)*x
    Workspace<value_type> t_ws(wss,opM_rows);
    DVectorSlice t(&t_ws[0],t_ws.size());
    LinAlgOpPack::V_MtV( &t, M, M_trans, x );
    // y = b*y + a*op(P)*t
    Vp_StMtV( y, a, P, P_trans, t(), b );
  }
  else {
    // y = b*y
    if(b==0.0)       *y = 0.0;
    else if(b!=1.0)  Vt_S(y,b);
    // Compute t' = e(j)' * op(M) then y(i) += a*t'*x where op(P)(i,j) = 1.0
    Workspace<value_type> t_ws(wss,opM_cols);
    DVectorSlice t(&t_ws[0],t_ws.size());
    if( P.is_identity() ) {
      for( size_type k = 1; k <= P.nz(); ++k ) {
        const size_type
          i = k,
          j = k;
        // t = op(M') * e(j)			
        LinAlgOpPack::V_MtV( &t, M, trans_not(M_trans), eta(j,opM_rows)() );
        // y(i) += a*t'*x
        (*y)(i) += a * dot( t(), x );
      }
    }
    else {
      for( GenPermMatrixSlice::const_iterator itr = P.begin(); itr != P.end(); ++itr ) {
        const DenseLinAlgPack::size_type
          i = P_trans == no_trans ? itr->row_i() : itr->col_j(),
          j = P_trans == no_trans ? itr->col_j() : itr->row_i();
        // t = op(M') * e(j)			
        LinAlgOpPack::V_MtV( &t, M, trans_not(M_trans), eta(j,opM_rows)() );
        // y(i) += a*t'*x
        (*y)(i) += a * dot( t(), x );
      }
    }
  }
}
bool PreEvalNewPointBarrier_Step::do_step(
  Algorithm& _algo, poss_type step_poss, IterationPack::EDoStepType type
  ,poss_type assoc_step_poss
  )
  {
  using Teuchos::dyn_cast;
  using IterationPack::print_algorithm_step;
  using AbstractLinAlgPack::force_in_bounds_buffer;

  NLPAlgo            &algo   = dyn_cast<NLPAlgo>(_algo);
  IpState             &s      = dyn_cast<IpState>(_algo.state());
  NLP                 &nlp    = algo.nlp();
  NLPFirstOrder   *nlp_foi = dynamic_cast<NLPFirstOrder*>(&nlp);
  
  EJournalOutputLevel olevel = algo.algo_cntr().journal_output_level();
  std::ostream& out = algo.track().journal_out();
  
  if(!nlp.is_initialized())
    nlp.initialize(algo.algo_cntr().check_results());

  // print step header.
  if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) 
    {
    using IterationPack::print_algorithm_step;
    print_algorithm_step( _algo, step_poss, type, assoc_step_poss, out );
    }

  IterQuantityAccess<value_type>     &barrier_parameter_iq = s.barrier_parameter();
  IterQuantityAccess<VectorMutable>  &x_iq  = s.x();

  if( x_iq.last_updated() == IterQuantity::NONE_UPDATED ) 
    {
    if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) 
      {
      out << "\nInitialize x with x_k = nlp.xinit() ...\n"
        << " and push x_k within bounds.\n";
      }
    VectorMutable& x_k = x_iq.set_k(0) = nlp.xinit();
  
    // apply transformation operator to push x sufficiently within bounds
    force_in_bounds_buffer(relative_bound_push_, 
                 absolute_bound_push_,
                 nlp.xl(),
                 nlp.xu(),
                 &x_k);

    // evaluate the func and constraints
    IterQuantityAccess<value_type>
      &f_iq    = s.f();
    IterQuantityAccess<VectorMutable>
      &Gf_iq   = s.Gf(),
      *c_iq    = nlp.m() > 0 ? &s.c() : NULL;
    IterQuantityAccess<MatrixOp>
      *Gc_iq   = nlp_foi ? &s.Gc() : NULL;

    using AbstractLinAlgPack::assert_print_nan_inf;
    assert_print_nan_inf(x_k, "x", true, NULL); // With throw exception if Inf or NaN!

    // Wipe out storage for computed iteration quantities (just in case?) : RAB: 7/29/2002
    if(f_iq.updated_k(0))
      f_iq.set_not_updated_k(0);
    if(Gf_iq.updated_k(0))
      Gf_iq.set_not_updated_k(0);
    if (c_iq)
      {
      if(c_iq->updated_k(0))
        c_iq->set_not_updated_k(0);
      }
    if (nlp_foi)
      {
      if(Gc_iq->updated_k(0))
        Gc_iq->set_not_updated_k(0);
      }
    }

  if (barrier_parameter_iq.last_updated() == IterQuantity::NONE_UPDATED)
    {
    barrier_parameter_iq.set_k(-1) = 0.1; // RAB: 7/29/2002: This should be parameterized! (allow user to set this!)
    }

  // Print vector information
  if( static_cast<int>(olevel) >= static_cast<int>(PRINT_VECTORS) ) 
    {
    out << "x_k =\n" << x_iq.get_k(0);
    }

  return true;
  }
bool CheckConvergenceStd_Strategy::Converged(
  Algorithm& _algo
  )
  {
  using AbstractLinAlgPack::assert_print_nan_inf;
  using AbstractLinAlgPack::combined_nu_comp_err;
  
  NLPAlgo        &algo = rsqp_algo(_algo);
  NLPAlgoState   &s    = algo.rsqp_state();
  NLP            &nlp  = algo.nlp();

  EJournalOutputLevel olevel = algo.algo_cntr().journal_output_level();
  std::ostream& out = algo.track().journal_out();

  const size_type
    n  = nlp.n(),
    m  = nlp.m(),
    nb = nlp.num_bounded_x();

  // Get the iteration quantities
  IterQuantityAccess<value_type>
    &opt_kkt_err_iq  = s.opt_kkt_err(),
    &feas_kkt_err_iq = s.feas_kkt_err(),
      &comp_kkt_err_iq = s.comp_kkt_err();
  
  IterQuantityAccess<VectorMutable>
    &x_iq       = s.x(),
    &d_iq       = s.d(),
    &Gf_iq      = s.Gf(),
    *c_iq       = m     ? &s.c()      : NULL,
    *rGL_iq     = n > m ? &s.rGL()    : NULL,
    *GL_iq      = n > m ? &s.GL()     : NULL,
    *nu_iq      = n > m ? &s.nu()     : NULL;

  // opt_err = (||rGL||inf or ||GL||) / (||Gf|| + scale_kkt_factor)
  value_type 
    norm_inf_Gf_k = 0.0,
    norm_inf_GLrGL_k = 0.0;

  if( n > m && scale_opt_error_by_Gf() && Gf_iq.updated_k(0) ) {
    assert_print_nan_inf(
      norm_inf_Gf_k = Gf_iq.get_k(0).norm_inf(),
      "||Gf_k||inf",true,&out
      );
  }

  // NOTE:
  // The strategy object CheckConvergenceIP_Strategy assumes
  // that this will always be the gradient of the lagrangian
  // of the original problem, not the gradient of the lagrangian
  // for psi. (don't use augmented nlp info here)
  if( n > m ) {
    if( opt_error_check() == OPT_ERROR_REDUCED_GRADIENT_LAGR ) {
      assert_print_nan_inf( norm_inf_GLrGL_k = rGL_iq->get_k(0).norm_inf(),
                  "||rGL_k||inf",true,&out);
    }
    else {
      assert_print_nan_inf( norm_inf_GLrGL_k = GL_iq->get_k(0).norm_inf(),
                  "||GL_k||inf",true,&out);
    }
  }

  const value_type
    opt_scale_factor = 1.0 + norm_inf_Gf_k,
    opt_err = norm_inf_GLrGL_k / opt_scale_factor;
  
  // feas_err
  const value_type feas_err = ( ( m ? c_iq->get_k(0).norm_inf() : 0.0 ) );

  // comp_err
  value_type comp_err = 0.0;
  if ( n > m ) {
    if (nb > 0) {
      comp_err = combined_nu_comp_err(nu_iq->get_k(0), x_iq.get_k(0), nlp.xl(), nlp.xu());
    }
    if(m) {
      assert_print_nan_inf( feas_err,"||c_k||inf",true,&out);
    }
  }

  // scaling factors
  const value_type 
    scale_opt_factor = CalculateScalingFactor(s, scale_opt_error_by()),
    scale_feas_factor = CalculateScalingFactor(s, scale_feas_error_by()),
    scale_comp_factor = CalculateScalingFactor(s, scale_comp_error_by());
  
  // kkt_err
  const value_type
    opt_kkt_err_k  = opt_err/scale_opt_factor,
     feas_kkt_err_k = feas_err/scale_feas_factor,
    comp_kkt_err_k = comp_err/scale_comp_factor;

  // update the iteration quantities
  if(n > m) opt_kkt_err_iq.set_k(0) = opt_kkt_err_k;
  feas_kkt_err_iq.set_k(0) = feas_kkt_err_k;
  comp_kkt_err_iq.set_k(0) = comp_kkt_err_k;

  // step_err
  value_type step_err = 0.0;
  if( d_iq.updated_k(0) ) {
    step_err = AbstractLinAlgPack::max_rel_step(x_iq.get_k(0),d_iq.get_k(0));
    assert_print_nan_inf( step_err,"max(d(i)/max(1,x(i)),i=1...n)",true,&out);
  }

  const value_type
    opt_tol		= algo.algo_cntr().opt_tol(),
    feas_tol	= algo.algo_cntr().feas_tol(),
    comp_tol	= algo.algo_cntr().comp_tol(),
    step_tol	= algo.algo_cntr().step_tol();
  
  const bool found_solution = 
    opt_kkt_err_k < opt_tol 
    && feas_kkt_err_k < feas_tol 
    && comp_kkt_err_k < comp_tol 
    && step_err < step_tol;
  
  if( int(olevel) >= int(PRINT_ALGORITHM_STEPS) || (int(olevel) >= int(PRINT_BASIC_ALGORITHM_INFO) && found_solution) )
  {
    out	
      << "\nscale_opt_factor = " << scale_opt_factor
      << " (scale_opt_error_by = " << (scale_opt_error_by()==SCALE_BY_ONE ? "SCALE_BY_ONE"
                       : (scale_opt_error_by()==SCALE_BY_NORM_INF_X ? "SCALE_BY_NORM_INF_X"
                        : "SCALE_BY_NORM_2_X" ) ) << ")"

      << "\nscale_feas_factor = " << scale_feas_factor
      << " (scale_feas_error_by = " << (scale_feas_error_by()==SCALE_BY_ONE ? "SCALE_BY_ONE"
                       : (scale_feas_error_by()==SCALE_BY_NORM_INF_X ? "SCALE_BY_NORM_INF_X"
                        : "SCALE_BY_NORM_2_X" ) ) << ")"

      << "\nscale_comp_factor = " << scale_comp_factor
      << " (scale_comp_error_by = " << (scale_comp_error_by()==SCALE_BY_ONE ? "SCALE_BY_ONE"
                       : (scale_comp_error_by()==SCALE_BY_NORM_INF_X ? "SCALE_BY_NORM_INF_X"
                        : "SCALE_BY_NORM_2_X" ) ) << ")"
      << "\nopt_scale_factor = " << opt_scale_factor
      << " (scale_opt_error_by_Gf = " << (scale_opt_error_by_Gf()?"true":"false") << ")"
      << "\nopt_kkt_err_k    = " << opt_kkt_err_k << ( opt_kkt_err_k < opt_tol ? " < " : " > " )
      << "opt_tol  = " << opt_tol
      << "\nfeas_kkt_err_k   = " << feas_kkt_err_k << ( feas_kkt_err_k < feas_tol ? " < " : " > " )
      << "feas_tol = " << feas_tol
      << "\ncomp_kkt_err_k   = " << comp_kkt_err_k << ( comp_kkt_err_k < comp_tol ? " < " : " > " )
      << "comp_tol = " << comp_tol
      << "\nstep_err         = " << step_err << ( step_err < step_tol ? " < " : " > " )
      << "step_tol = " << step_tol
      << std::endl;
    }
  
  return found_solution;

  }
void MatrixSymPosDefLBFGS::V_InvMtV(
  VectorMutable* y, BLAS_Cpp::Transp trans_rhs1
  , const Vector& x
  ) const
{
  using AbstractLinAlgPack::Vp_StMtV;
  using DenseLinAlgPack::V_InvMtV;
  using LinAlgOpPack::V_mV;
  using LinAlgOpPack::V_StV;
  using LinAlgOpPack::Vp_V;
  using LinAlgOpPack::V_MtV;
  using LinAlgOpPack::V_StMtV;
  using LinAlgOpPack::Vp_MtV;
  using DenseLinAlgPack::Vp_StMtV;
  typedef VectorDenseEncap         vde;
  typedef VectorDenseMutableEncap  vdme;
  using Teuchos::Workspace;
  Teuchos::WorkspaceStore* wss = Teuchos::get_default_workspace_store().get();

  assert_initialized();

  TEUCHOS_TEST_FOR_EXCEPT( !(  inverse_is_updated_  ) ); // For now just always update

  // y = inv(Bk) * x = Hk * x
  //
  // = gk*x + [S gk*Y] * [ inv(R')*(D+gk*Y'Y)*inv(R)     -inv(R') ] * [   S'  ] * x
  //                     [            -inv(R)                0    ]   [ gk*Y' ]
  //
  // Perform in the following (in order):
  //
  // y = gk*x
  //
  // t1 = [   S'*x  ]					<: R^(2*m)
  //      [ gk*Y'*x ]
  //
  // t2 = inv(R) * t1(1:m)			<: R^(m)
  //
  // t3 = - inv(R') * t1(m+1,2*m)		<: R^(m)
  //
  // t4 = gk * Y'Y * t2				<: R^(m)
  //
  // t4 += D*t2
  //
  // t5 = inv(R') * t4				<: R^(m)
  //
  // t5 += t3
  //
  // y += S*t5
  //
  // y += -gk*Y*t2

  // y = gk*x
  V_StV( y, gamma_k_, x );

  const size_type
    mb = m_bar_;	
  
  if( !mb )
    return;	// No updates have been performed.

  const multi_vec_ptr_t
    S = this->S(),
    Y = this->Y();

  // Get workspace

  Workspace<value_type>    t1_ws(wss,2*mb);
  DVectorSlice                   t1(&t1_ws[0],t1_ws.size());
  Workspace<value_type>    t2_ws(wss,mb);
  DVectorSlice                   t2(&t2_ws[0],t2_ws.size());
  Workspace<value_type>    t3_ws(wss,mb);
  DVectorSlice                   t3(&t3_ws[0],t3_ws.size());
  Workspace<value_type>    t4_ws(wss,mb);
  DVectorSlice                   t4(&t4_ws[0],t4_ws.size());
  Workspace<value_type>    t5_ws(wss,mb);
  DVectorSlice                   t5(&t5_ws[0],t5_ws.size());

  VectorSpace::vec_mut_ptr_t
    t = S->space_rows().create_member();

  const DMatrixSliceTri
    &R = this->R();

  const DMatrixSliceSym
    &YTY = this->YTY();

  // t1 = [   S'*x  ]
  //      [ gk*Y'*x ]
  V_MtV( t.get(), *S, BLAS_Cpp::trans, x );
  t1(1,mb) = vde(*t)();
  V_StMtV( t.get(), gamma_k_, *Y, BLAS_Cpp::trans, x );
  t1(mb+1,2*mb) = vde(*t)();

  // t2 = inv(R) * t1(1:m)
  V_InvMtV( &t2, R, BLAS_Cpp::no_trans, t1(1,mb) );

  // t3 = - inv(R') * t1(m+1,2*m)
  V_mV( &t3, t1(mb+1,2*mb) );
  V_InvMtV( &t3, R, BLAS_Cpp::trans, t3 );

  // t4 = gk * Y'Y * t2
  V_StMtV( &t4, gamma_k_, YTY, BLAS_Cpp::no_trans, t2 );

  // t4 += D*t2
  Vp_DtV( &t4, t2 );

  // t5 = inv(R') * t4
  V_InvMtV( &t5, R, BLAS_Cpp::trans, t4 );

  // t5 += t3
  Vp_V( &t5, t3 );

  // y += S*t5
  (vdme(*t)() = t5);
  Vp_MtV( y, *S, BLAS_Cpp::no_trans, *t );

  // y += -gk*Y*t2
  (vdme(*t)() = t2);
  Vp_StMtV( y, -gamma_k_, *Y, BLAS_Cpp::no_trans, *t );

}
bool CheckConvergenceIP_Strategy::Converged(
  Algorithm& _algo
  )
  {
  using Teuchos::dyn_cast;
  using AbstractLinAlgPack::num_bounded;
  using AbstractLinAlgPack::IP_comp_err_with_mu;

  // Calculate kkt errors and check for overall convergence
  //bool found_solution = CheckConvergenceStd_Strategy::Converged(_algo);
  bool found_solution = false;

  // Recalculate the complementarity error including mu
  
  // Get the iteration quantities
  IpState &s = dyn_cast<IpState>(*_algo.get_state());
  NLPAlgo& algo = rsqp_algo(_algo);
  NLP& nlp = algo.nlp();
  
  EJournalOutputLevel olevel = algo.algo_cntr().journal_output_level();
  std::ostream& out = algo.track().journal_out();

  // Get necessary iteration quantities
  const value_type &mu_km1 = s.barrier_parameter().get_k(-1);
  const Vector& x_k = s.x().get_k(0);
  const VectorMutable& Gf_k = s.Gf().get_k(0);
  const Vector& rGL_k = s.rGL().get_k(0);
  const Vector& c_k = s.c().get_k(0);
  const Vector& vl_k = s.Vl().get_k(0).diag();
  const Vector& vu_k = s.Vu().get_k(0).diag();
  
  // Calculate the errors with Andreas' scaling
  value_type& opt_err = s.opt_kkt_err().set_k(0);
  value_type& feas_err = s.feas_kkt_err().set_k(0);
  value_type& comp_err = s.comp_kkt_err().set_k(0);
  value_type& comp_err_mu = s.comp_err_mu().set_k(0);

  // scaling
  value_type scale_1 = 1 + x_k.norm_1()/x_k.dim();

  Teuchos::RCP<VectorMutable> temp = Gf_k.clone();
  temp->axpy(-1.0, vl_k);
  temp->axpy(1.0, vu_k);
  value_type scale_2 = temp->norm_1();
  scale_2 += vl_k.norm_1() + vu_k.norm_1();

  *temp = nlp.infinite_bound();
  const size_type nlb = num_bounded(nlp.xl(), *temp, nlp.infinite_bound());
  *temp = -nlp.infinite_bound();
  const size_type nub = num_bounded(*temp, nlp.xu(), nlp.infinite_bound());
  scale_2 = 1 + scale_2/(1+nlp.m()+nlb+nub);

  // Calculate the opt_err
  opt_err = rGL_k.norm_inf() / scale_2;

  // Calculate the feas_err
  feas_err = c_k.norm_inf() / scale_1;
  
  // Calculate the comp_err
  if( (int)olevel >= (int)PRINT_VECTORS )
    {
    out << "\nx =\n"    << s.x().get_k(0);
    out << "\nxl =\n"   << nlp.xl();
    out << "\nvl =\n"   << s.Vl().get_k(0).diag();
    out << "\nxu =\n"   << nlp.xu();
    out << "\nvu =\n"   << s.Vu().get_k(0).diag();
    }

  comp_err = IP_comp_err_with_mu(
    0.0, nlp.infinite_bound(), s.x().get_k(0), nlp.xl(), nlp.xu()
    ,s.Vl().get_k(0).diag(), s.Vu().get_k(0).diag());

  comp_err_mu = IP_comp_err_with_mu(
    mu_km1, nlp.infinite_bound(), s.x().get_k(0), nlp.xl(), nlp.xu()
    ,s.Vl().get_k(0).diag(), s.Vu().get_k(0).diag());

  comp_err = comp_err / scale_2;
  comp_err_mu = comp_err_mu / scale_2;

  // check for convergence
  
  const value_type opt_tol = algo.algo_cntr().opt_tol();
  const value_type feas_tol = algo.algo_cntr().feas_tol();
  const value_type comp_tol = algo.algo_cntr().comp_tol();

  if (opt_err < opt_tol && feas_err < feas_tol && comp_err < comp_tol)
    {
    found_solution = true;
    }

  if( int(olevel) >= int(PRINT_ALGORITHM_STEPS) || (int(olevel) >= int(PRINT_BASIC_ALGORITHM_INFO) && found_solution) )
    {
    out	
      << "\nopt_kkt_err_k   = " << opt_err << ( opt_err < opt_tol ? " < " : " > " )
      << "opt_tol = " << opt_tol
      << "\nfeas_kkt_err_k   = " << feas_err << ( feas_err < feas_tol ? " < " : " > " )
      << "feas_tol = " << feas_tol
      << "\ncomp_kkt_err_k   = " << comp_err << ( comp_err < comp_tol ? " < " : " > " )
      << "comp_tol = " << comp_tol
      << "\ncomp_err_mu      = " << comp_err_mu
      << "\nbarrier_parameter_k (mu_km1) = " << mu_km1
      << "comp_tol = " << comp_tol
      << std::endl;
    }
    
  return found_solution;
  }
bool MoochoPack::CalcDFromYPYZPZ_Step::do_step(Algorithm& _algo
  , poss_type step_poss, IterationPack::EDoStepType type, poss_type assoc_step_poss)
{

  using Teuchos::implicit_cast;
  using AbstractLinAlgPack::dot;
  using LinAlgOpPack::V_VpV;

  NLPAlgo &algo = rsqp_algo(_algo);
  NLPAlgoState &s = algo.rsqp_state();

  EJournalOutputLevel olevel = algo.algo_cntr().journal_output_level();
  EJournalOutputLevel ns_olevel = algo.algo_cntr().null_space_journal_output_level();
  std::ostream& out = algo.track().journal_out();

  // print step header.
  if( implicit_cast<int>(olevel) >= implicit_cast<int>(PRINT_ALGORITHM_STEPS) ) {
    using IterationPack::print_algorithm_step;
    print_algorithm_step( algo, step_poss, type, assoc_step_poss, out );
  }

  // d = Ypy + Zpz
  VectorMutable &d_k = s.d().set_k(0);
  const Vector &Ypy_k = s.Ypy().get_k(0);
  const Vector &Zpz_k = s.Zpz().get_k(0);
  V_VpV( &d_k, Ypy_k, Zpz_k );

  Range1D
    var_dep = s.var_dep(),
    var_indep = s.var_indep();
  
  if( implicit_cast<int>(olevel) >= implicit_cast<int>(PRINT_ALGORITHM_STEPS) ) {
    const value_type very_small = std::numeric_limits<value_type>::min();
    out
      << "\n(Ypy_k'*Zpz_k)/(||Ypy_k||2 * ||Zpz_k||2 + eps)\n"
      << "  = ("<<dot(Ypy_k,Zpz_k)<<")/("<<Ypy_k.norm_2()<<" * "<<Zpz_k.norm_2()<<" + "<<very_small<<")\n"
      << "  = " << dot(Ypy_k,Zpz_k) / ( Ypy_k.norm_2() * Zpz_k.norm_2() + very_small ) << "\n";
/*
  ConstrainedOptPack::print_vector_change_stats(
  s.x().get_k(0), "x", s.d().get_k(0), "d", out );
*/
    // ToDo: Replace the above with a reduction operator!
  }

  if( implicit_cast<int>(olevel) >= implicit_cast<int>(PRINT_ALGORITHM_STEPS) ) {
    out << "\n||d_k||inf            = " << d_k.norm_inf();
    if( var_dep.size() )
      out << "\n||d(var_dep)_k||inf   = " << d_k.sub_view(var_dep)->norm_inf();
    if( var_indep.size() )
      out << "\n||d(var_indep)_k||inf = " << d_k.sub_view(var_indep)->norm_inf();
    out << std::endl;
  }
  if( implicit_cast<int>(olevel) >= implicit_cast<int>(PRINT_VECTORS) ) {
    out << "\nd_k = \n" << d_k;
    if( var_dep.size() )
      out << "\nd(var_dep)_k = \n" << *d_k.sub_view(var_dep);
  }
  if( implicit_cast<int>(ns_olevel) >= implicit_cast<int>(PRINT_VECTORS) ) {
    if( var_indep.size() )
      out << "\nd(var_indep)_k = \n" << *d_k.sub_view(var_indep);
  }
  
  return true;

}
bool EvalNewPointStd_Step::do_step(
  Algorithm& _algo, poss_type step_poss, IterationPack::EDoStepType type
  ,poss_type assoc_step_poss
  )
{
  using Teuchos::rcp;
  using Teuchos::dyn_cast;
  using AbstractLinAlgPack::assert_print_nan_inf;
  using IterationPack::print_algorithm_step;
  using NLPInterfacePack::NLPFirstOrder;

  NLPAlgo         &algo   = rsqp_algo(_algo);
  NLPAlgoState    &s      = algo.rsqp_state();
  NLPFirstOrder   &nlp    = dyn_cast<NLPFirstOrder>(algo.nlp());

  EJournalOutputLevel olevel = algo.algo_cntr().journal_output_level();
  EJournalOutputLevel ns_olevel = algo.algo_cntr().null_space_journal_output_level();
  std::ostream& out = algo.track().journal_out();

  // print step header.
  if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
    using IterationPack::print_algorithm_step;
    print_algorithm_step( algo, step_poss, type, assoc_step_poss, out );
  }

  if(!nlp.is_initialized())
    nlp.initialize(algo.algo_cntr().check_results());

  Teuchos::VerboseObjectTempState<NLP>
    nlpOutputTempState(rcp(&nlp,false),Teuchos::getFancyOStream(rcp(&out,false)),convertToVerbLevel(olevel));

  const size_type
    n  = nlp.n(),
    nb = nlp.num_bounded_x(),
    m  = nlp.m();
  size_type
    r  = 0;

  // Get the iteration quantity container objects
  IterQuantityAccess<value_type>
    &f_iq   = s.f();
  IterQuantityAccess<VectorMutable>
    &x_iq   = s.x(),
    *c_iq   = m > 0  ? &s.c() : NULL,
    &Gf_iq  = s.Gf();
  IterQuantityAccess<MatrixOp>
    *Gc_iq  = m  > 0 ? &s.Gc() : NULL,
    *Z_iq   = NULL,
    *Y_iq   = NULL,
    *Uz_iq  = NULL,
    *Uy_iq  = NULL;
  IterQuantityAccess<MatrixOpNonsing>
    *R_iq   = NULL;

  MatrixOp::EMatNormType mat_nrm_inf = MatrixOp::MAT_NORM_INF;
  const bool calc_matrix_norms = algo.algo_cntr().calc_matrix_norms();
  const bool calc_matrix_info_null_space_only = algo.algo_cntr().calc_matrix_info_null_space_only();
  
  if( x_iq.last_updated() == IterQuantity::NONE_UPDATED ) {
    if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
      out << "\nx is not updated for any k so set x_k = nlp.xinit() ...\n";
    }
    x_iq.set_k(0) = nlp.xinit();
  }
  
  // Validate x
  if( nb && algo.algo_cntr().check_results() ) {
    assert_print_nan_inf(
      x_iq.get_k(0), "x_k", true
      , int(olevel) >= int(PRINT_ALGORITHM_STEPS) ? &out : NULL
      );
    if( nlp.num_bounded_x() > 0 ) {
      if(!bounds_tester().check_in_bounds(
           int(olevel)  >= int(PRINT_ALGORITHM_STEPS) ? &out : NULL
           ,int(olevel) >= int(PRINT_VECTORS)                // print_all_warnings
           ,int(olevel) >= int(PRINT_ITERATION_QUANTITIES)  // print_vectors
           ,nlp.xl(),        "xl"
           ,nlp.xu(),        "xu"
           ,x_iq.get_k(0),   "x_k"
           ))
      {
        TEUCHOS_TEST_FOR_EXCEPTION(
          true, TestFailed
          ,"EvalNewPointStd_Step::do_step(...) : Error, "
          "the variables bounds xl <= x_k <= xu where violated!" );
      }
    }
  }

  Vector &x = x_iq.get_k(0);

  Range1D  var_dep(Range1D::INVALID), var_indep(Range1D::INVALID);
  if( s.get_decomp_sys().get() ) {
    const ConstrainedOptPack::DecompositionSystemVarReduct
      *decomp_sys_vr = dynamic_cast<ConstrainedOptPack::DecompositionSystemVarReduct*>(&s.decomp_sys());
    if(decomp_sys_vr) {
      var_dep   = decomp_sys_vr->var_dep();
      var_indep = decomp_sys_vr->var_indep();
    }
    s.var_dep(var_dep);
    s.var_indep(var_indep);
  }

  if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
    out << "\n||x_k||inf            = " << x.norm_inf();
    if( var_dep.size() )
      out << "\n||x(var_dep)_k||inf   = " << x.sub_view(var_dep)->norm_inf();
    if( var_indep.size() )
      out << "\n||x(var_indep)_k||inf = " << x.sub_view(var_indep)->norm_inf();
    out << std::endl;
  }
  if( static_cast<int>(olevel) >= static_cast<int>(PRINT_VECTORS) ) {
    out << "\nx_k = \n" << x;
    if( var_dep.size() )
      out << "\nx(var_dep)_k = \n" << *x.sub_view(var_dep);
  }
  if( static_cast<int>(ns_olevel) >= static_cast<int>(PRINT_VECTORS) ) {
    if( var_indep.size() )
      out << "\nx(var_indep)_k = \n" << *x.sub_view(var_indep);
  }

  // Set the references to the current point's quantities to be updated
  const bool f_k_updated  = f_iq.updated_k(0);
  const bool Gf_k_updated = Gf_iq.updated_k(0);
  const bool c_k_updated  = m  > 0 ? c_iq->updated_k(0)  : false;
  const bool Gc_k_updated = m  > 0 ? Gc_iq->updated_k(0) : false;
  nlp.unset_quantities();
  if(!f_k_updated) nlp.set_f( &f_iq.set_k(0) );
  if(!Gf_k_updated) nlp.set_Gf( &Gf_iq.set_k(0) );
  if( m > 0 ) {
    if(!c_k_updated) nlp.set_c( &c_iq->set_k(0) );
    if(!Gc_k_updated) nlp.set_Gc( &Gc_iq->set_k(0) );
  }

  // Calculate Gc at x_k
  bool new_point = true;
  if(m > 0) {
    if(!Gc_k_updated) nlp.calc_Gc( x, new_point );
    new_point = false;
  }

  //
  // Update (or select a new) range/null decomposition
  //
  bool new_decomp_selected = false;
  if( m > 0 ) {
    
    // Update the range/null decomposition
    decomp_sys_handler().update_decomposition(
      algo, s, nlp, decomp_sys_testing(), decomp_sys_testing_print_level()
      ,&new_decomp_selected
      );

    r  = s.equ_decomp().size();

    Z_iq   = ( n > m && r > 0 )      ? &s.Z()  : NULL;
    Y_iq   = ( r > 0 )               ? &s.Y()  : NULL;
    Uz_iq  = ( m  > 0 && m  > r )    ? &s.Uz() : NULL;
    Uy_iq  = ( m  > 0 && m  > r )    ? &s.Uy() : NULL;
    R_iq   = ( m > 0 )               ? &s.R()  : NULL;

    // Determine if we will test the decomp_sys or not
    const DecompositionSystem::ERunTests
      ds_test_what = ( ( decomp_sys_testing() == DecompositionSystemHandler_Strategy::DST_TEST
                 || ( decomp_sys_testing() == DecompositionSystemHandler_Strategy::DST_DEFAULT
                  && algo.algo_cntr().check_results() ) )
               ? DecompositionSystem::RUN_TESTS
               : DecompositionSystem::NO_TESTS );
    
    // Determine the output level for decomp_sys				
    DecompositionSystem::EOutputLevel ds_olevel;
    switch(olevel) {
      case PRINT_NOTHING:
      case PRINT_BASIC_ALGORITHM_INFO:
        ds_olevel = DecompositionSystem::PRINT_NONE;
        break;
      case PRINT_ALGORITHM_STEPS:
      case PRINT_ACTIVE_SET:
        ds_olevel = DecompositionSystem::PRINT_BASIC_INFO;
        break;
      case PRINT_VECTORS:
        ds_olevel = DecompositionSystem::PRINT_VECTORS;
        break;
      case PRINT_ITERATION_QUANTITIES:
        ds_olevel = DecompositionSystem::PRINT_EVERY_THING;
        break;
      default:
        TEUCHOS_TEST_FOR_EXCEPT(true); // Should not get here!
    };

    // Test the decomposition system
    if( ds_test_what == DecompositionSystem::RUN_TESTS ) {
      // Set the output level
      if( decomp_sys_tester().print_tests() == DecompositionSystemTester::PRINT_NOT_SELECTED ) {
        DecompositionSystemTester::EPrintTestLevel  ds_olevel;
        switch(olevel) {
          case PRINT_NOTHING:
          case PRINT_BASIC_ALGORITHM_INFO:
            ds_olevel = DecompositionSystemTester::PRINT_NONE;
            break;
          case PRINT_ALGORITHM_STEPS:
          case PRINT_ACTIVE_SET:
            ds_olevel = DecompositionSystemTester::PRINT_BASIC;
            break;
          case PRINT_VECTORS:
            ds_olevel = DecompositionSystemTester::PRINT_MORE;
            break;
          case PRINT_ITERATION_QUANTITIES:
            ds_olevel = DecompositionSystemTester::PRINT_ALL;
            break;
          default:
            TEUCHOS_TEST_FOR_EXCEPT(true); // Should not get here!
        }
        decomp_sys_tester().print_tests(ds_olevel);
        decomp_sys_tester().dump_all( olevel == PRINT_ITERATION_QUANTITIES );
      }
      // Run the tests
      if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
        out << "\nTesting the range/null decompostion ...\n";
      }
      const bool
        decomp_sys_passed = decomp_sys_tester().test_decomp_system(
          s.decomp_sys()
          ,Gc_iq->get_k(0)                   // Gc
          ,Z_iq ? &Z_iq->get_k(0) : NULL     // Z
          ,&Y_iq->get_k(0)                   // Y
          ,&R_iq->get_k(0)                   // R
          ,m > r  ? &Uz_iq->get_k(0) : NULL  // Uz
          ,m > r  ? &Uy_iq->get_k(0) : NULL  // Uy
          ,( olevel >= PRINT_ALGORITHM_STEPS ) ? &out : NULL
          );
      TEUCHOS_TEST_FOR_EXCEPTION(
        !decomp_sys_passed, TestFailed
        ,"EvalNewPointStd_Step::do_step(...) : Error, "
        "the tests of the decomposition system failed!" );
    }
  }
  else {
    // Unconstrained problem
    Z_iq = &s.Z();
    dyn_cast<MatrixSymIdent>(Z_iq->set_k(0)).initialize( nlp.space_x() );
    s.equ_decomp(Range1D::Invalid);
    s.equ_undecomp(Range1D::Invalid);
  }

  // Calculate the rest of the quantities.  If decomp_sys is a variable
  // reduction decomposition system object, then nlp will be hip to the
  // basis selection and will permute these quantities to that basis.
  // Note that x will already be permuted to the current basis.
  if(!Gf_k_updated) { nlp.calc_Gf( x, new_point ); new_point = false; }
  if( m && (!c_k_updated || new_decomp_selected ) ) {
    if(c_k_updated) nlp.set_c( &c_iq->set_k(0) ); // This was not set earlier!
    nlp.calc_c( x, false);
  }
  if(!f_k_updated) {
    nlp.calc_f( x, false);
  }
  nlp.unset_quantities();
  
  // Check for NaN and Inf
  assert_print_nan_inf(f_iq.get_k(0),"f_k",true,&out); 
  if(m)
    assert_print_nan_inf(c_iq->get_k(0),"c_k",true,&out); 
  assert_print_nan_inf(Gf_iq.get_k(0),"Gf_k",true,&out); 
  
  // Print the iteration quantities before we test the derivatives for debugging

  // Update the selection of dependent and independent variables
  if( s.get_decomp_sys().get() ) {
    const ConstrainedOptPack::DecompositionSystemVarReduct
      *decomp_sys_vr = dynamic_cast<ConstrainedOptPack::DecompositionSystemVarReduct*>(&s.decomp_sys());
    if(decomp_sys_vr) {
      var_dep   = decomp_sys_vr->var_dep();
      var_indep = decomp_sys_vr->var_indep();
    }
  }
  
  if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
    out << "\nPrinting the updated iteration quantities ...\n";
  }

  if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
    out	<< "\nf_k                      = "     << f_iq.get_k(0);
    out << "\n||Gf_k||inf              = "     << Gf_iq.get_k(0).norm_inf();
    if( var_dep.size() )
      out << "\n||Gf_k(var_dep)_k||inf   = " << Gf_iq.get_k(0).sub_view(var_dep)->norm_inf();
    if( var_indep.size() )
      out << "\n||Gf_k(var_indep)_k||inf = " << Gf_iq.get_k(0).sub_view(var_indep)->norm_inf();
    if(m) {
      out << "\n||c_k||inf               = " << c_iq->get_k(0).norm_inf();
      if( calc_matrix_norms && !calc_matrix_info_null_space_only )
        out << "\n||Gc_k||inf              = " << Gc_iq->get_k(0).calc_norm(mat_nrm_inf).value;
      if( n > r && calc_matrix_norms && !calc_matrix_info_null_space_only )
        out << "\n||Z||inf                 = " << Z_iq->get_k(0).calc_norm(mat_nrm_inf).value;
      if( r && calc_matrix_norms && !calc_matrix_info_null_space_only )
        out << "\n||Y||inf                 = " << Y_iq->get_k(0).calc_norm(mat_nrm_inf).value;
      if( r && calc_matrix_norms && !calc_matrix_info_null_space_only  )
        out << "\n||R||inf                 = " << R_iq->get_k(0).calc_norm(mat_nrm_inf).value;
      if( algo.algo_cntr().calc_conditioning() && !calc_matrix_info_null_space_only ) {
        out << "\ncond_inf(R)              = " << R_iq->get_k(0).calc_cond_num(mat_nrm_inf).value;
      }
      if( m > r && calc_matrix_norms && !calc_matrix_info_null_space_only ) {
        out << "\n||Uz_k||inf              = " << Uz_iq->get_k(0).calc_norm(mat_nrm_inf).value;
        out << "\n||Uy_k||inf              = " << Uy_iq->get_k(0).calc_norm(mat_nrm_inf).value;
      }
    }
    out << std::endl;
  }

  if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ITERATION_QUANTITIES) ) {
    if(m)
      out << "\nGc_k =\n" << Gc_iq->get_k(0);
    if( n > r )
      out << "\nZ_k =\n" << Z_iq->get_k(0);
    if(r) {
      out << "\nY_k =\n" << Y_iq->get_k(0);
      out << "\nR_k =\n" << R_iq->get_k(0);
    }
    if( m > r ) {
      out << "\nUz_k =\n" << Uz_iq->get_k(0);
      out << "\nUy_k =\n" << Uy_iq->get_k(0);
    }
  }
  if( static_cast<int>(olevel) >= static_cast<int>(PRINT_VECTORS) ) {
    out	<< "\nGf_k =\n" << Gf_iq.get_k(0);
    if( var_dep.size() )
      out << "\nGf(var_dep)_k =\n " << *Gf_iq.get_k(0).sub_view(var_dep);
  }
  if( static_cast<int>(ns_olevel) >= static_cast<int>(PRINT_VECTORS) ) {
    if( var_indep.size() )
      out << "\nGf(var_indep)_k =\n" << *Gf_iq.get_k(0).sub_view(var_indep);
  }
  if( static_cast<int>(olevel) >= static_cast<int>(PRINT_VECTORS) ) {
    if(m)
      out	<< "\nc_k = \n" << c_iq->get_k(0);
  }
  
  // Check the derivatives if we are checking the results
  if(		fd_deriv_testing() == FD_TEST
    || ( fd_deriv_testing() == FD_DEFAULT && algo.algo_cntr().check_results() )  )
  {
    
    if( olevel >= PRINT_ALGORITHM_STEPS ) {
      out	<< "\n*** Checking derivatives by finite differences\n";
    }

    const bool
      nlp_passed = deriv_tester().finite_diff_check(
        &nlp
        ,x
        ,nb ? &nlp.xl() : NULL
        ,nb ? &nlp.xu() : NULL
        ,m  ? &Gc_iq->get_k(0) : NULL
        ,&Gf_iq.get_k(0)
        ,olevel >= PRINT_VECTORS
        ,( olevel >= PRINT_ALGORITHM_STEPS ) ? &out : NULL
        );
    TEUCHOS_TEST_FOR_EXCEPTION(
      !nlp_passed, TestFailed
      ,"EvalNewPointStd_Step::do_step(...) : Error, "
      "the tests of the nlp derivatives failed!" );
  }

  return true;
}
bool ReducedHessianSecantUpdateLPBFGS_Strategy::perform_update(
  DVectorSlice* s_bfgs, DVectorSlice* y_bfgs, bool first_update
  ,std::ostream& out, EJournalOutputLevel olevel, NLPAlgo *algo, NLPAlgoState *s
  ,MatrixOp *rHL_k
  )
{
  using std::setw;
  using std::endl;
  using std::right;
  using Teuchos::dyn_cast;
  namespace rcp = MemMngPack;
  using Teuchos::RCP;
  using LinAlgOpPack::V_MtV;
  using DenseLinAlgPack::dot;
  using AbstractLinAlgPack::norm_inf;
  using AbstractLinAlgPack::transVtMtV;
  typedef ConstrainedOptPack::MatrixHessianSuperBasic MHSB_t;
  using Teuchos::Workspace;
  Teuchos::WorkspaceStore* wss = Teuchos::get_default_workspace_store().get();

  if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
    out << "\n*** (LPBFGS) Full limited memory BFGS to projected BFGS ...\n";
  }

#ifdef _WINDOWS
  MHSB_t &rHL_super = dynamic_cast<MHSB_t&>(*rHL_k);
#else
  MHSB_t &rHL_super = dyn_cast<MHSB_t>(*rHL_k);
#endif

  const size_type
    n    = algo->nlp().n(),
    r    = algo->nlp().r(),
    n_pz = n-r;

  bool do_projected_rHL_RR = false;

  // See if we still have a limited memory BFGS update matrix
  RCP<MatrixSymPosDefLBFGS> // We don't want this to be deleted until we are done with it
    lbfgs_rHL_RR = Teuchos::rcp_const_cast<MatrixSymPosDefLBFGS>(
      Teuchos::rcp_dynamic_cast<const MatrixSymPosDefLBFGS>(rHL_super.B_RR_ptr()) );

  if( lbfgs_rHL_RR.get() && rHL_super.Q_R().is_identity()  ) {
    //
    // We have a limited memory BFGS matrix and have not started projected BFGS updating
    // yet so lets determine if it is time to consider switching
    //
    // Check that the current update is sufficiently p.d. before we do anything
    const value_type
      sTy = dot(*s_bfgs,*y_bfgs),
      yTy = dot(*y_bfgs,*y_bfgs);
    if( !ConstrainedOptPack::BFGS_sTy_suff_p_d(
      *s_bfgs,*y_bfgs,&sTy
      ,  int(olevel) >= int(PRINT_ALGORITHM_STEPS) ? &out : NULL )
      && !proj_bfgs_updater().bfgs_update().use_dampening()
      )
    {
      if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
        out	<< "\nWarning!  use_damening == false so there is no way we can perform any"
            " kind of BFGS update (projected or not) so we will skip it!\n";
      }
      quasi_newton_stats_(*s).set_k(0).set_updated_stats(
        QuasiNewtonStats::INDEF_SKIPED );
      return true;
    }
    // Consider if we can even look at the active set yet.
    const bool consider_switch  = lbfgs_rHL_RR->num_secant_updates() >= min_num_updates_proj_start();
    if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
      out << "\nnum_previous_updates = " << lbfgs_rHL_RR->num_secant_updates()
        << ( consider_switch ? " >= " : " < " )
        << "min_num_updates_proj_start = " << min_num_updates_proj_start()
        << ( consider_switch
           ? "\nConsidering if we should switch to projected BFGS updating of superbasics ...\n"
           : "\nNot time to consider switching to projected BFGS updating of superbasics yet!" );
    }
    if( consider_switch ) {
      // 
      // Our job here is to determine if it is time to switch to projected projected
      // BFGS updating.
      //
      if( act_set_stats_(*s).updated_k(-1) ) {
        if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
          out	<< "\nDetermining if projected BFGS updating of superbasics should begin ...\n";
        }
        // Determine if the active set has calmed down enough
        const SpVector
          &nu_km1 = s->nu().get_k(-1);
        const SpVectorSlice
          nu_indep = nu_km1(s->var_indep());
        const bool 
          act_set_calmed_down
          = PBFGSPack::act_set_calmed_down(
            act_set_stats_(*s).get_k(-1)
            ,proj_bfgs_updater().act_set_frac_proj_start()
            ,olevel,out
            ),
          max_num_updates_exceeded
          = lbfgs_rHL_RR->m_bar() >= max_num_updates_proj_start();
        do_projected_rHL_RR = act_set_calmed_down || max_num_updates_exceeded;
        if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
          if( act_set_calmed_down ) {
            out << "\nThe active set has calmed down enough so lets further consider switching to\n"
              << "projected BFGS updating of superbasic variables ...\n";
          }
          else if( max_num_updates_exceeded ) {
            out << "\nThe active set has not calmed down enough but num_previous_updates = "
              << lbfgs_rHL_RR->m_bar() << " >= max_num_updates_proj_start = "	<< max_num_updates_proj_start()
              << "\nso we will further consider switching to projected BFGS updating of superbasic variables ...\n";
          }
          else {
            out << "\nIt is not time to switch to projected BFGS so just keep performing full limited memory BFGS for now ...\n";
          }
        }
        if( do_projected_rHL_RR ) {
          //
          // Determine the set of initially fixed and free independent variables.
          //
          typedef Workspace<size_type>                              i_x_t;
          typedef Workspace<ConstrainedOptPack::EBounds>   bnd_t;
          i_x_t   i_x_free(wss,n_pz);
          i_x_t   i_x_fixed(wss,n_pz);
          bnd_t   bnd_fixed(wss,n_pz);
          i_x_t   l_x_fixed_sorted(wss,n_pz);
          size_type n_pz_X = 0, n_pz_R = 0;
          // rHL = rHL_scale * I
          value_type
            rHL_scale = sTy > 0.0 ? yTy/sTy : 1.0;
          if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
            out	<< "\nScaling for diagonal intitial rHL = rHL_scale*I, rHL_scale = " << rHL_scale << std::endl;
          }
          value_type sRTBRRsR = 0.0, sRTyR = 0.0, sXTBXXsX = 0.0, sXTyX = 0.0;
          // Get the elements in i_x_free[] for variables that are definitely free
          // and initialize s_R'*B_RR*s_R and s_R'*y_R
          PBFGSPack::init_i_x_free_sRTsR_sRTyR(
            nu_indep, *s_bfgs, *y_bfgs
            , &n_pz_R, &i_x_free[0], &sRTBRRsR, &sRTyR );
          sRTBRRsR *= rHL_scale;
          Workspace<value_type> rHL_XX_diag_ws(wss,nu_indep.nz());
          DVectorSlice rHL_XX_diag(&rHL_XX_diag_ws[0],rHL_XX_diag_ws.size());
          rHL_XX_diag = rHL_scale;
          // Sort fixed variables according to |s_X(i)^2*B_XX(i,i)|/|sRTBRRsR| + |s_X(i)*y_X(i)|/|sRTyR|
          // and initialize s_X'*B_XX*s_X and s_X*y_X
          PBFGSPack::sort_fixed_max_cond_viol(
            nu_indep,*s_bfgs,*y_bfgs,rHL_XX_diag,sRTBRRsR,sRTyR
            ,&sXTBXXsX,&sXTyX,&l_x_fixed_sorted[0]);
          // Pick initial set of i_x_free[] and i_x_fixed[] (sorted!)
          PBFGSPack::choose_fixed_free(
            proj_bfgs_updater().project_error_tol()
            ,proj_bfgs_updater().super_basic_mult_drop_tol(),nu_indep
            ,*s_bfgs,*y_bfgs,rHL_XX_diag,&l_x_fixed_sorted[0]
            ,olevel,out,&sRTBRRsR,&sRTyR,&sXTBXXsX,&sXTyX
            ,&n_pz_X,&n_pz_R,&i_x_free[0],&i_x_fixed[0],&bnd_fixed[0] );
          if( n_pz_R < n_pz ) {
            //
            // We are ready to transition to projected BFGS updating!
            //
            // Determine if we are be using dense or limited memory BFGS?
            const bool
              low_num_super_basics = n_pz_R <= num_superbasics_switch_dense();
            if( static_cast<int>(olevel) >= static_cast<int>(PRINT_BASIC_ALGORITHM_INFO) ) {
              out	<< "\nSwitching to projected BFGS updating ..."
                << "\nn_pz_R = " << n_pz_R << ( low_num_super_basics ? " <= " : " > " )
                << " num_superbasics_switch_dense = " << num_superbasics_switch_dense()
                << ( low_num_super_basics
                   ? "\nThere are not too many superbasic variables so use dense projected BFGS ...\n"
                   : "\nThere are too many superbasic variables so use limited memory projected BFGS ...\n"
                  );
            }
            // Create new matrix to use for rHL_RR initialized to rHL_RR = rHL_scale*I
            RCP<MatrixSymSecant>
              rHL_RR = NULL;
            if( low_num_super_basics ) {
              rHL_RR = new MatrixSymPosDefCholFactor(
                NULL    // Let it allocate its own memory
                ,NULL   // ...
                ,n_pz   // maximum size
                ,lbfgs_rHL_RR->maintain_original()
                ,lbfgs_rHL_RR->maintain_inverse()
                );
            }
            else {
              rHL_RR = new MatrixSymPosDefLBFGS(
                n_pz, lbfgs_rHL_RR->m()
                ,lbfgs_rHL_RR->maintain_original()
                ,lbfgs_rHL_RR->maintain_inverse()
                ,lbfgs_rHL_RR->auto_rescaling()
                );
            }
            rHL_RR->init_identity( n_pz_R, rHL_scale );
            if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ITERATION_QUANTITIES) ) {
              out << "\nrHL_RR after intialized to rHL_RR = rHL_scale*I but before performing current BFGS update:\nrHL_RR =\n"
                << dynamic_cast<MatrixOp&>(*rHL_RR); // I know this is okay!
            }
            // Initialize rHL_XX = rHL_scale*I
#ifdef _WINDOWS
            MatrixSymInitDiag
              &rHL_XX = dynamic_cast<MatrixSymInitDiag&>(
                const_cast<MatrixSymOp&>(*rHL_super.B_XX_ptr()));
#else
            MatrixSymInitDiag
              &rHL_XX = dyn_cast<MatrixSymInitDiag>(
                const_cast<MatrixSymOp&>(*rHL_super.B_XX_ptr()));
#endif
            rHL_XX.init_identity( n_pz_X, rHL_scale );
            // Reinitialize rHL
            rHL_super.initialize(
              n_pz, n_pz_R, &i_x_free[0], &i_x_fixed[0], &bnd_fixed[0]
              ,Teuchos::rcp_const_cast<const MatrixSymWithOpFactorized>(
                Teuchos::rcp_dynamic_cast<MatrixSymWithOpFactorized>(rHL_RR))
              ,NULL,BLAS_Cpp::no_trans,rHL_super.B_XX_ptr()
              );
            //
            // Perform the current BFGS update first
            //
            MatrixSymOp
              &rHL_RR_op = dynamic_cast<MatrixSymOp&>(*rHL_RR);
            const GenPermMatrixSlice
              &Q_R = rHL_super.Q_R(),
              &Q_X = rHL_super.Q_X();
            // Get projected BFGS update vectors y_bfgs_R, s_bfgs_R
            Workspace<value_type>
              y_bfgs_R_ws(wss,Q_R.cols()),
              s_bfgs_R_ws(wss,Q_R.cols()),
              y_bfgs_X_ws(wss,Q_X.cols()),
              s_bfgs_X_ws(wss,Q_X.cols());
            DVectorSlice y_bfgs_R(&y_bfgs_R_ws[0],y_bfgs_R_ws.size());
            DVectorSlice s_bfgs_R(&s_bfgs_R_ws[0],s_bfgs_R_ws.size());
            DVectorSlice y_bfgs_X(&y_bfgs_X_ws[0],y_bfgs_X_ws.size());
            DVectorSlice s_bfgs_X(&s_bfgs_X_ws[0],s_bfgs_X_ws.size());
            V_MtV( &y_bfgs_R, Q_R, BLAS_Cpp::trans, *y_bfgs );  // y_bfgs_R = Q_R'*y_bfgs
            V_MtV( &s_bfgs_R, Q_R, BLAS_Cpp::trans, *s_bfgs );  // s_bfgs_R = Q_R'*s_bfgs
            V_MtV( &y_bfgs_X, Q_X, BLAS_Cpp::trans, *y_bfgs );  // y_bfgs_X = Q_X'*y_bfgs
            V_MtV( &s_bfgs_X, Q_X, BLAS_Cpp::trans, *s_bfgs );  // s_bfgs_X = Q_X'*s_bfgs
            // Update rHL_RR
            if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
              out << "\nPerform current BFGS update on " << n_pz_R << " x " << n_pz_R << " projected "
                << "reduced Hessian for the superbasic variables where B = rHL_RR...\n";
            }
            QuasiNewtonStats quasi_newton_stats;
            proj_bfgs_updater().bfgs_update().perform_update(
              &s_bfgs_R(),&y_bfgs_R(),false,out,olevel,algo->algo_cntr().check_results()
              ,&rHL_RR_op, &quasi_newton_stats );
            // Perform previous updates if possible
            if( lbfgs_rHL_RR->m_bar() ) {
              const size_type num_add_updates = std::_MIN(num_add_recent_updates(),lbfgs_rHL_RR->m_bar());
              if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
                out	<< "\nAdd the min(num_previous_updates,num_add_recent_updates) = min(" << lbfgs_rHL_RR->m_bar()
                  << "," << num_add_recent_updates() << ") = " << num_add_updates << " most recent BFGS updates if possible ...\n";
              }
              // Now add previous update vectors
              const value_type
                project_error_tol = proj_bfgs_updater().project_error_tol();
              const DMatrixSlice
                S = lbfgs_rHL_RR->S(),
                Y = lbfgs_rHL_RR->Y();
              size_type k = lbfgs_rHL_RR->k_bar();  // Location in S and Y of most recent update vectors
              for( size_type l = 1; l <= num_add_updates; ++l, --k ) {
                if(k == 0) k = lbfgs_rHL_RR->m_bar();  // see MatrixSymPosDefLBFGS
                // Check to see if this update satisfies the required conditions.
                // Start with the condition on s'*y since this are cheap to check.
                V_MtV( &s_bfgs_X(), Q_X, BLAS_Cpp::trans, S.col(k) ); // s_bfgs_X = Q_X'*s_bfgs
                V_MtV( &y_bfgs_X(), Q_X, BLAS_Cpp::trans, Y.col(k) ); // y_bfgs_X = Q_X'*y_bfgs
                sRTyR    = dot( s_bfgs_R, y_bfgs_R );
                sXTyX    = dot( s_bfgs_X, y_bfgs_X );
                bool
                  sXTyX_cond    = ::fabs(sXTyX/sRTyR) <= project_error_tol,
                  do_update     = sXTyX_cond,
                  sXTBXXsX_cond = false;
                if( sXTyX_cond ) {
                  // Check the second more expensive condition
                  V_MtV( &s_bfgs_R(), Q_R, BLAS_Cpp::trans, S.col(k) ); // s_bfgs_R = Q_R'*s_bfgs
                  V_MtV( &y_bfgs_R(), Q_R, BLAS_Cpp::trans, Y.col(k) ); // y_bfgs_R = Q_R'*y_bfgs
                  sRTBRRsR = transVtMtV( s_bfgs_R, rHL_RR_op, BLAS_Cpp::no_trans, s_bfgs_R );
                  sXTBXXsX = rHL_scale * dot( s_bfgs_X, s_bfgs_X );
                  sXTBXXsX_cond = sXTBXXsX/sRTBRRsR <= project_error_tol;
                  do_update     = sXTBXXsX_cond && sXTyX_cond;
                }
                if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
                  out << "\n---------------------------------------------------------------------"
                    << "\nprevious update " << l
                    << "\n\nChecking projection error:\n"
                    << "\n|s_X'*y_X|/|s_R'*y_R| = |" << sXTyX << "|/|" << sRTyR
                    << "| = " << ::fabs(sXTyX/sRTyR)
                    << ( sXTyX_cond ? " <= " : " > " ) << " project_error_tol = "
                    << project_error_tol;
                  if( sXTyX_cond ) {
                    out	<< "\n(s_X'*rHL_XX*s_X/s_R'*rHL_RR*s_R) = (" << sXTBXXsX
                        << ") = " << (sXTBXXsX/sRTBRRsR)
                        << ( sXTBXXsX_cond ? " <= " : " > " ) << " project_error_tol = "
                        << project_error_tol;
                  }
                  out << ( do_update
                    ? "\n\nAttemping to add this previous update where B = rHL_RR ...\n"
                    : "\n\nCan not add this previous update ...\n" );
                }
                if( do_update ) {
                  // ( rHL_RR, s_bfgs_R, y_bfgs_R ) -> rHL_RR (this should not throw an exception!)
                  try {
                    proj_bfgs_updater().bfgs_update().perform_update(
                      &s_bfgs_R(),&y_bfgs_R(),false,out,olevel,algo->algo_cntr().check_results()
                      ,&rHL_RR_op, &quasi_newton_stats );
                  }
                  catch( const MatrixSymSecant::UpdateSkippedException& excpt ) {
                    if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
                      out	<< "\nOops!  The " << l << "th most recent BFGS update was rejected?:\n"
                        << excpt.what() << std::endl;
                    }
                  }
                }
              }
              if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
                out << "\n---------------------------------------------------------------------\n";
                }
              if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ITERATION_QUANTITIES) ) {
                out << "\nrHL_RR after adding previous BFGS updates:\nrHL_BRR =\n"
                  << dynamic_cast<MatrixOp&>(*rHL_RR);
              }
            }
            else {
              if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
                out	<< "\nThere were no previous update vectors to add!\n";
              }
            }
            if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ITERATION_QUANTITIES) ) {
              out << "\nFull rHL after complete reinitialization:\nrHL =\n" << *rHL_k;
            }
            quasi_newton_stats_(*s).set_k(0).set_updated_stats(
              QuasiNewtonStats::REINITIALIZED );
            return true;
          }
          else {
            if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
              out << "\nn_pz_R == n_pz = " << n_pz_R << ", No variables would be removed from "
                << "the superbasis so just keep on performing limited memory BFGS for now ...\n";
            }
            do_projected_rHL_RR = false;
          }
        }
      }
    }
    // If we have not switched to PBFGS then just update the full limited memory BFGS matrix
    if(!do_projected_rHL_RR) {
      if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
        out << "\nPerform current BFGS update on " << n_pz << " x " << n_pz << " full "
          << "limited memory reduced Hessian B = rHL ...\n";
      }
      proj_bfgs_updater().bfgs_update().perform_update(
        s_bfgs,y_bfgs,first_update,out,olevel,algo->algo_cntr().check_results()
        ,lbfgs_rHL_RR.get()
        ,&quasi_newton_stats_(*s).set_k(0)
        );
      return true;
    }
  }
  else {
    if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
      out	<< "\nWe have already switched to projected BFGS updating ...\n";
    }
  }
  //
  // If we get here then we must have switched to
  // projected updating so lets just pass it on!
  //
  return proj_bfgs_updater().perform_update(
    s_bfgs,y_bfgs,first_update,out,olevel,algo,s,rHL_k);
}
bool TangentialStepIP_Step::do_step(
  Algorithm& _algo, poss_type step_poss, IterationPack::EDoStepType type
  ,poss_type assoc_step_poss
  )
  {
  using BLAS_Cpp::no_trans;
  using Teuchos::dyn_cast;
  using AbstractLinAlgPack::assert_print_nan_inf;
  using LinAlgOpPack::Vt_S;
  using LinAlgOpPack::Vp_StV;
  using LinAlgOpPack::V_StV;
  using LinAlgOpPack::V_MtV;
  using LinAlgOpPack::V_InvMtV;
   using LinAlgOpPack::M_StM;
  using LinAlgOpPack::Mp_StM;
  using LinAlgOpPack::assign;

  NLPAlgo	&algo	= rsqp_algo(_algo);
  IpState	    &s      = dyn_cast<IpState>(_algo.state());

  EJournalOutputLevel olevel = algo.algo_cntr().journal_output_level();
  std::ostream& out = algo.track().journal_out();

  // print step header.
  if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
    using IterationPack::print_algorithm_step;
    print_algorithm_step( algo, step_poss, type, assoc_step_poss, out );
  }

  // Compute qp_grad which is an approximation to rGf + Z'*(mu*(invXu*e-invXl*e) + no_cross_term
  // minimize round off error by calc'ing Z'*(Gf + mu*(invXu*e-invXl*e))

  // qp_grad_k = Z'*(Gf + mu*(invXu*e-invXl*e))
  const MatrixSymDiagStd  &invXu = s.invXu().get_k(0);
  const MatrixSymDiagStd  &invXl = s.invXl().get_k(0);
  const value_type            &mu    = s.barrier_parameter().get_k(0);
  const MatrixOp          &Z_k   = s.Z().get_k(0);

  Teuchos::RCP<VectorMutable> rhs = s.Gf().get_k(0).clone();
  Vp_StV( rhs.get(), mu,      invXu.diag() );
  Vp_StV( rhs.get(), -1.0*mu, invXl.diag() );
  
  if( (int)olevel >= (int)PRINT_ALGORITHM_STEPS ) 
    {
    out << "\n||Gf_k + mu_k*(invXu_k-invXl_k)||inf = " << rhs->norm_inf() << std::endl;
    }
  if( (int)olevel >= (int)PRINT_VECTORS)
    {
    out << "\nGf_k + mu_k*(invXu_k-invXl_k) =\n" << *rhs;
    }

  VectorMutable &qp_grad_k = s.qp_grad().set_k(0);
  V_MtV(&qp_grad_k, Z_k, BLAS_Cpp::trans, *rhs);
  
  if( (int)olevel >= (int)PRINT_ALGORITHM_STEPS ) 
    {
    out << "\n||Z_k'*(Gf_k + mu_k*(invXu_k-invXl_k))||inf = " << qp_grad_k.norm_inf() << std::endl;
    }
  if( (int)olevel >= (int)PRINT_VECTORS )
    {
    out << "\nZ_k'*(Gf_k + mu_k*(invXu_k-invXl_k)) =\n" << qp_grad_k;
    }

  // error check for cross term
  value_type         &zeta    = s.zeta().set_k(0);
  const Vector &w_sigma = s.w_sigma().get_k(0);
  
  // need code to calculate damping parameter
  zeta = 1.0;

  Vp_StV(&qp_grad_k, zeta, w_sigma);

  if( (int)olevel >= (int)PRINT_ALGORITHM_STEPS ) 
    {
    out << "\n||qp_grad_k||inf = " << qp_grad_k.norm_inf() << std::endl;
    }
  if( (int)olevel >= (int)PRINT_VECTORS ) 
    {
    out << "\nqp_grad_k =\n" << qp_grad_k;
    }

  // build the "Hessian" term B = rHL + rHB
  // should this be MatrixSymOpNonsing
  const MatrixSymOp      &rHL_k = s.rHL().get_k(0);
  const MatrixSymOp      &rHB_k = s.rHB().get_k(0);
  MatrixSymOpNonsing &B_k   = dyn_cast<MatrixSymOpNonsing>(s.B().set_k(0));
  if (B_k.cols() != Z_k.cols())
    {
    // Initialize space in rHB
    dyn_cast<MatrixSymInitDiag>(B_k).init_identity(Z_k.space_rows(), 0.0);
    }

  //	M_StM(&B_k, 1.0, rHL_k, no_trans);
  assign(&B_k, rHL_k, BLAS_Cpp::no_trans);
  if( (int)olevel >= (int)PRINT_VECTORS ) 
    {
    out << "\nB_k = rHL_k =\n" << B_k;
    }
  Mp_StM(&B_k, 1.0, rHB_k, BLAS_Cpp::no_trans);
  if( (int)olevel >= (int)PRINT_VECTORS ) 
    {
    out << "\nB_k = rHL_k + rHB_k =\n" << B_k;
    }

  // Solve the system pz = - inv(rHL) * qp_grad
  VectorMutable   &pz_k  = s.pz().set_k(0);
  V_InvMtV( &pz_k, B_k, no_trans, qp_grad_k );
  Vt_S( &pz_k, -1.0 );

  // Zpz = Z * pz
  V_MtV( &s.Zpz().set_k(0), s.Z().get_k(0), no_trans, pz_k );

  if( (int)olevel >= (int)PRINT_ALGORITHM_STEPS )
    {
    out	<< "\n||pz||inf   = " << s.pz().get_k(0).norm_inf()
      << "\nsum(Zpz)    = " << AbstractLinAlgPack::sum(s.Zpz().get_k(0))  << std::endl;
    }

  if( (int)olevel >= (int)PRINT_VECTORS )
    {
    out << "\npz_k = \n" << s.pz().get_k(0);
    out << "\nnu_k = \n" << s.nu().get_k(0);
    out << "\nZpz_k = \n" << s.Zpz().get_k(0);
    out << std::endl;
    }

  if(algo.algo_cntr().check_results())
    {
    assert_print_nan_inf(s.pz().get_k(0),  "pz_k",true,&out);
    assert_print_nan_inf(s.Zpz().get_k(0), "Zpz_k",true,&out);
    }

  return true;
  }
bool PostEvalNewPointBarrier_Step::do_step(
  Algorithm& _algo, poss_type step_poss, IterationPack::EDoStepType type
  ,poss_type assoc_step_poss
  )
  {
  using Teuchos::dyn_cast;
  using IterationPack::print_algorithm_step;
  using AbstractLinAlgPack::inv_of_difference;
  using AbstractLinAlgPack::correct_upper_bound_multipliers;
  using AbstractLinAlgPack::correct_lower_bound_multipliers;
  using LinAlgOpPack::Vp_StV;

  NLPAlgo            &algo   = dyn_cast<NLPAlgo>(_algo);
  IpState             &s      = dyn_cast<IpState>(_algo.state());
  NLP                 &nlp    = algo.nlp();
  
  EJournalOutputLevel olevel = algo.algo_cntr().journal_output_level();
  std::ostream& out = algo.track().journal_out();
  
  if(!nlp.is_initialized())
    nlp.initialize(algo.algo_cntr().check_results());

  // print step header.
  if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) 
    {
    using IterationPack::print_algorithm_step;
    print_algorithm_step( _algo, step_poss, type, assoc_step_poss, out );
    }

  IterQuantityAccess<VectorMutable> &x_iq = s.x();
  IterQuantityAccess<MatrixSymDiagStd> &Vl_iq = s.Vl();
  IterQuantityAccess<MatrixSymDiagStd> &Vu_iq = s.Vu();

  ///***********************************************************
  // Calculate invXl = diag(1/(x-xl)) 
  //  and invXu = diag(1/(xu-x)) matrices
  ///***********************************************************

  // get references to x, invXl, and invXu
  VectorMutable& x = x_iq.get_k(0);
  MatrixSymDiagStd& invXu = s.invXu().set_k(0);
  MatrixSymDiagStd& invXl = s.invXl().set_k(0);
  
  //std::cout << "xu=\n";
  //nlp.xu().output(std::cout);

  inv_of_difference(1.0, nlp.xu(), x, &invXu.diag());
  inv_of_difference(1.0, x, nlp.xl(), &invXl.diag());

  //std::cout << "invXu=\v";
  //invXu.output(std::cout);

  //std::cout << "\ninvXl=\v";
  //invXl.output(std::cout);
  
  // Check for divide by zeros - 
    using AbstractLinAlgPack::assert_print_nan_inf;
    assert_print_nan_inf(invXu.diag(), "invXu", true, &out); 
    assert_print_nan_inf(invXl.diag(), "invXl", true, &out); 
  // These should never go negative either - could be a useful check

  // Initialize Vu and Vl if necessary
  if ( /*!Vu_iq.updated_k(0) */ Vu_iq.last_updated() == IterQuantity::NONE_UPDATED )
    {
    VectorMutable& vu = Vu_iq.set_k(0).diag();		
    vu = 0;
    Vp_StV(&vu, s.barrier_parameter().get_k(-1), invXu.diag());

    if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) 
      {
      out << "\nInitialize Vu with barrier_parameter * invXu ...\n";
      }
    }

if ( /*!Vl_iq.updated_k(0) */ Vl_iq.last_updated() == IterQuantity::NONE_UPDATED  )
    {
    VectorMutable& vl = Vl_iq.set_k(0).diag();
    vl = 0;
    Vp_StV(&vl, s.barrier_parameter().get_k(-1), invXl.diag());

    if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) 
      {
      out << "\nInitialize Vl with barrier_parameter * invXl ...\n";
      }
    }

  if (s.num_basis().updated_k(0))
    {
    // Basis changed, reorder Vl and Vu
    if (Vu_iq.updated_k(-1))
      { Vu_iq.set_k(0,-1); }
    if (Vl_iq.updated_k(-1))
      { Vl_iq.set_k(0,-1); }
      
    VectorMutable& vu = Vu_iq.set_k(0).diag();
    VectorMutable& vl = Vl_iq.set_k(0).diag();

    s.P_var_last().permute( BLAS_Cpp::trans, &vu ); // Permute back to original order
    s.P_var_last().permute( BLAS_Cpp::trans, &vl ); // Permute back to original order

    if( (int)olevel >= (int)PRINT_VECTORS ) 
      {
      out	<< "\nx resorted vl and vu to the original order\n" << x;
      }

    s.P_var_current().permute( BLAS_Cpp::no_trans, &vu ); // Permute to new (current) order
    s.P_var_current().permute( BLAS_Cpp::no_trans, &vl ); // Permute to new (current) order

    if( (int)olevel >= (int)PRINT_VECTORS ) 
      {
      out	<< "\nx resorted to new basis \n" << x;
      }
    }

  correct_upper_bound_multipliers(nlp.xu(), +NLP::infinite_bound(), &Vu_iq.get_k(0).diag());
  correct_lower_bound_multipliers(nlp.xl(), -NLP::infinite_bound(), &Vl_iq.get_k(0).diag());
  
  if( (int)olevel >= (int)PRINT_VECTORS ) 
    {
    out << "x=\n"  << s.x().get_k(0);
    out << "xl=\n" << nlp.xl();
    out << "vl=\n" << s.Vl().get_k(0).diag();
    out << "xu=\n" << nlp.xu();
    out << "vu=\n" << s.Vu().get_k(0).diag();
    }

  // Update general algorithm bound multipliers
  VectorMutable& v = s.nu().set_k(0);
  v = Vu_iq.get_k(0).diag();
  Vp_StV(&v,-1.0,Vl_iq.get_k(0).diag());

  // Print vector information
  if( static_cast<int>(olevel) >= static_cast<int>(PRINT_VECTORS) ) 
    {
    out	<< "invXu_k.diag()=\n" << invXu.diag();
    out	<< "invXl_k.diag()=\n" << invXl.diag();
    out	<< "Vu_k.diag()=\n"    << Vu_iq.get_k(0).diag();
    out	<< "Vl_k.diag()=\n"    << Vl_iq.get_k(0).diag();
    out << "nu_k=\n"           << s.nu().get_k(0);
    }

  return true;
  }
bool CheckDescentQuasiNormalStep_Step::do_step(
  Algorithm& _algo, poss_type step_poss, IterationPack::EDoStepType type
  ,poss_type assoc_step_poss
  )
{
  using BLAS_Cpp::no_trans;
  using AbstractLinAlgPack::dot;
  using LinAlgOpPack::V_MtV;

  NLPAlgo         &algo        = rsqp_algo(_algo);
  NLPAlgoState    &s           = algo.rsqp_state();
  NLP             &nlp         = algo.nlp();
  const Range1D   equ_decomp   = s.equ_decomp();

  EJournalOutputLevel olevel = algo.algo_cntr().journal_output_level();
  std::ostream& out = algo.track().journal_out();

  // print step header.
  if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
    using IterationPack::print_algorithm_step;
    print_algorithm_step( algo, step_poss, type, assoc_step_poss, out );
  }
  
  const size_type
    nb = nlp.num_bounded_x();

  // Get iteration quantities
  IterQuantityAccess<VectorMutable>
    &c_iq   = s.c(),
    &Ypy_iq = s.Ypy();
  const Vector::vec_ptr_t
    cd_k = c_iq.get_k(0).sub_view(equ_decomp);
  const Vector
    &Ypy_k = Ypy_iq.get_k(0);
  
  value_type descent_c = -1.0;
  if( s.get_iter_quant_id( Gc_name ) != AlgorithmState::DOES_NOT_EXIST ) {
    if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
      out	<< "\nGc_k exists; compute descent_c = c_k(equ_decomp)'*Gc_k(:,equ_decomp)'*Ypy_k ...\n";
    }
    const MatrixOp::mat_ptr_t
      Gcd_k = s.Gc().get_k(0).sub_view(Range1D(),equ_decomp);
    VectorSpace::vec_mut_ptr_t
      t = cd_k->space().create_member();
    V_MtV( t.get(), *Gcd_k, BLAS_Cpp::trans, Ypy_k );
    if( static_cast<int>(olevel) >= static_cast<int>(PRINT_VECTORS) ) {
      out	<< "\nGc_k(:,equ_decomp)'*Ypy_k =\n" << *t;
    }
    descent_c = dot( *cd_k, *t );
  }
  else {
    if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
      out	<< "\nGc_k does not exist; compute descent_c = c_k(equ_decomp)'*FDGc_k(:,equ_decomp)'*Ypy_k "
        << "using finite differences ...\n";
    }
    VectorSpace::vec_mut_ptr_t
      t = nlp.space_c()->create_member();
    calc_fd_prod().calc_deriv_product(
      s.x().get_k(0),nb?&nlp.xl():NULL,nb?&nlp.xu():NULL
      ,Ypy_k,NULL,&c_iq.get_k(0),true,&nlp
      ,NULL,t.get()
      ,static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ? &out : NULL
      );
    if( static_cast<int>(olevel) >= static_cast<int>(PRINT_VECTORS) ) {
      out	<< "\nFDGc_k(:,equ_decomp)'*Ypy_k =\n" << *t->sub_view(equ_decomp);
    }
    descent_c = dot( *cd_k, *t->sub_view(equ_decomp) );
  }

  if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
    out	<< "\ndescent_c = " << descent_c << std::endl;
  }

  if( descent_c > 0.0 ) { // ToDo: add some allowance for > 0.0 for finite difference errors!
    if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
      out	<< "\nError, descent_c > 0.0; this is not a descent direction\n"
        << "Throw TestFailed and terminate the algorithm ...\n";
    }
    TEST_FOR_EXCEPTION(
      true, TestFailed
      ,"CheckDescentQuasiNormalStep_Step::do_step(...) : Error, descent for the decomposed constraints "
      "with respect to the quasi-normal step c_k(equ_decomp)'*FDGc_k(:,equ_decomp)'*Ypy_k = "
      << descent_c << " > 0.0;  This is not a descent direction!\n" );
  }

  return true;
}
void MatrixSymPosDefLBFGS::Vp_StMtV(
    VectorMutable* y, value_type alpha, BLAS_Cpp::Transp trans_rhs1
  , const Vector& x, value_type beta
  ) const
{
  using AbstractLinAlgPack::Vt_S;
  using AbstractLinAlgPack::Vp_StV;
  using AbstractLinAlgPack::Vp_StMtV;
  using LinAlgOpPack::V_StMtV;
  using LinAlgOpPack::V_MtV;
  typedef VectorDenseEncap         vde;
  typedef VectorDenseMutableEncap  vdme;
  using Teuchos::Workspace;
  Teuchos::WorkspaceStore* wss = Teuchos::get_default_workspace_store().get();

  assert_initialized();

  TEUCHOS_TEST_FOR_EXCEPT( !(  original_is_updated_  ) ); // For now just always update

  // y = b*y + Bk * x
  //
  // y = b*y + (1/gk)*x - [ (1/gk)*S  Y ] * inv(Q) * [ (1/gk)*S' ] * x
  //                                                 [     Y'    ]
  // Perform the following operations (in order):
  //
  // y = b*y
  //
  // y += (1/gk)*x
  //
  // t1 = [ (1/gk)*S'*x ]		<: R^(2*m)
  //		[      Y'*x   ]
  //
  // t2 =	inv(Q) * t1			<: R^(2*m)
  //
  // y += -(1/gk) * S * t2(1:m)
  //
  // y += -1.0 * Y * t2(m+1,2m)

  const value_type
    invgk = 1.0 / gamma_k_;

  // y = b*y
  Vt_S( y, beta );

  // y += (1/gk)*x
  Vp_StV( y, invgk, x );

  if( !m_bar_ )
    return;	// No updates have been added yet.

  const multi_vec_ptr_t
    S = this->S(),
    Y = this->Y();

  // Get workspace

  const size_type
    mb = m_bar_;

  Workspace<value_type>  t1_ws(wss,2*mb);
  DVectorSlice                 t1(&t1_ws[0],t1_ws.size());
  Workspace<value_type>  t2_ws(wss,2*mb);
  DVectorSlice                 t2(&t2_ws[0],t2_ws.size());

  VectorSpace::vec_mut_ptr_t
    t = S->space_rows().create_member();

  // t1 = [ (1/gk)*S'*x ]
  //		[      Y'*x   ]

  V_StMtV( t.get(), invgk, *S, BLAS_Cpp::trans, x );
  t1(1,mb) = vde(*t)();
  V_MtV( t.get(), *Y, BLAS_Cpp::trans, x );
  t1(mb+1,2*mb) = vde(*t)();

  // t2 =	inv(Q) * t1
  V_invQtV( &t2, t1 );

  // y += -(1/gk) * S * t2(1:m)
  (vdme(*t)() = t2(1,mb));
  Vp_StMtV( y, -invgk, *S, BLAS_Cpp::no_trans, *t );

  // y += -1.0 * Y * t2(m+1,2m
  (vdme(*t)() = t2(mb+1,2*mb));
  Vp_StMtV( y, -1.0, *Y, BLAS_Cpp::no_trans, *t );

}