bool TangentialStepWithInequStd_Step::do_step(
  Algorithm& _algo, poss_type step_poss, IterationPack::EDoStepType type
  ,poss_type assoc_step_poss
  )
{

  using Teuchos::RCP;
  using Teuchos::dyn_cast;
  using ::fabs;
  using LinAlgOpPack::Vt_S;
  using LinAlgOpPack::V_VpV;
  using LinAlgOpPack::V_VmV;
  using LinAlgOpPack::Vp_StV;
  using LinAlgOpPack::Vp_V;
  using LinAlgOpPack::V_StV;
  using LinAlgOpPack::V_MtV;
//	using ConstrainedOptPack::min_abs;
  using AbstractLinAlgPack::max_near_feas_step;
  typedef VectorMutable::vec_mut_ptr_t   vec_mut_ptr_t;

  NLPAlgo &algo = rsqp_algo(_algo);
  NLPAlgoState &s = algo.rsqp_state();
  EJournalOutputLevel olevel = algo.algo_cntr().journal_output_level();
  EJournalOutputLevel ns_olevel = algo.algo_cntr().null_space_journal_output_level();
  std::ostream &out = algo.track().journal_out();
  //const bool check_results = algo.algo_cntr().check_results();

  // print step header.
  if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
    using IterationPack::print_algorithm_step;
    print_algorithm_step( algo, step_poss, type, assoc_step_poss, out );
  }

  // problem dimensions
  const size_type
    //n  = algo.nlp().n(),
    m  = algo.nlp().m(),
    r  = s.equ_decomp().size();

  // Get the iteration quantity container objects
  IterQuantityAccess<value_type>
    &alpha_iq = s.alpha(),
    &zeta_iq  = s.zeta(),
    &eta_iq   = s.eta();
  IterQuantityAccess<VectorMutable>
    &dl_iq      = dl_iq_(s),
    &du_iq      = du_iq_(s),
    &nu_iq      = s.nu(),
    *c_iq       = m > 0  ? &s.c()       : NULL,
    *lambda_iq  = m > 0  ? &s.lambda()  : NULL,
    &rGf_iq     = s.rGf(),
    &w_iq       = s.w(),
    &qp_grad_iq = s.qp_grad(),
    &py_iq      = s.py(),
    &pz_iq      = s.pz(),
    &Ypy_iq     = s.Ypy(),
    &Zpz_iq     = s.Zpz();
  IterQuantityAccess<MatrixOp>
    &Z_iq   = s.Z(),
    //*Uz_iq  = (m > r)  ? &s.Uz() : NULL,
    *Uy_iq  = (m > r)  ? &s.Uy() : NULL;
  IterQuantityAccess<MatrixSymOp>
    &rHL_iq = s.rHL();
  IterQuantityAccess<ActSetStats>
    &act_set_stats_iq = act_set_stats_(s);
  
  // Accessed/modified/updated (just some)
  VectorMutable  *Ypy_k = (m ? &Ypy_iq.get_k(0) : NULL);
  const MatrixOp  &Z_k   = Z_iq.get_k(0);
  VectorMutable  &pz_k  = pz_iq.set_k(0);
  VectorMutable  &Zpz_k = Zpz_iq.set_k(0);

  // Comupte qp_grad which is an approximation to rGf + Z'*HL*Y*py

  // qp_grad = rGf
  VectorMutable
    &qp_grad_k = ( qp_grad_iq.set_k(0) = rGf_iq.get_k(0) );

  // qp_grad += zeta * w
  if( w_iq.updated_k(0) ) {
    if(zeta_iq.updated_k(0))
      Vp_StV( &qp_grad_k, zeta_iq.get_k(0), w_iq.get_k(0) );
    else
      Vp_V( &qp_grad_k, w_iq.get_k(0) );
  }

  //
  // Set the bounds for:
  //
  //   dl <= Z*pz + Y*py <= du  ->  dl - Ypy <= Z*pz <= du - Ypz

  vec_mut_ptr_t
    bl = s.space_x().create_member(),
    bu = s.space_x().create_member();

  if(m) {
    // bl = dl_k - Ypy_k
    V_VmV( bl.get(), dl_iq.get_k(0), *Ypy_k );
    // bu = du_k - Ypy_k
    V_VmV( bu.get(), du_iq.get_k(0), *Ypy_k );
  }
  else {
    *bl = dl_iq.get_k(0);
    *bu = du_iq.get_k(0);
  }

  // Print out the QP bounds for the constraints
  if( static_cast<int>(ns_olevel) >= static_cast<int>(PRINT_VECTORS) ) {
    out << "\nqp_grad_k = \n" << qp_grad_k;
  }
  if( static_cast<int>(olevel) >= static_cast<int>(PRINT_VECTORS) ) {
    out << "\nbl = \n" << *bl;
    out << "\nbu = \n" << *bu;
  }

  //
  // Determine if we should perform a warm start or not.
  //
  bool do_warm_start = false;
  if( act_set_stats_iq.updated_k(-1) ) {
    if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
      out	<< "\nDetermining if the QP should use a warm start ...\n";
    }
    // We need to see if we should preform a warm start for the next iteration
    ActSetStats &stats = act_set_stats_iq.get_k(-1);
    const size_type
      num_active = stats.num_active(),
      num_adds   = stats.num_adds(),
      num_drops  = stats.num_drops();
    const value_type
      frac_same
      = ( num_adds == ActSetStats::NOT_KNOWN || num_active == 0
        ? 0.0
        : my_max(((double)(num_active)-num_adds-num_drops) / num_active, 0.0 ) );
    do_warm_start = ( num_active > 0 && frac_same >= warm_start_frac() );
    if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
      out << "\nnum_active = " << num_active;
      if( num_active ) {
        out	<< "\nmax(num_active-num_adds-num_drops,0)/(num_active) = "
          << "max("<<num_active<<"-"<<num_adds<<"-"<<num_drops<<",0)/("<<num_active<<") = "
          << frac_same;
        if( do_warm_start )
          out << " >= ";
        else
          out << " < ";
        out << "warm_start_frac = " << warm_start_frac();
      }
      if( do_warm_start )
        out << "\nUse a warm start this time!\n";
      else
        out << "\nDon't use a warm start this time!\n";
    }
  }

  // Use active set from last iteration as an estimate for current active set
  // if we are to use a warm start.
  // 
  // ToDo: If the selection of dependent and independent variables changes
  // then you will have to adjust this or not perform a warm start at all!
  if( do_warm_start ) {
    nu_iq.set_k(0,-1);
  }
  else {
    nu_iq.set_k(0) = 0.0; // No guess of the active set
  }
  VectorMutable  &nu_k  = nu_iq.get_k(0);

  //
  // Setup the reduced QP subproblem
  //
  // The call to the QP is setup for the more flexible call to the QPSolverRelaxed
  // interface to deal with the three independent variabilities: using simple
  // bounds for pz or not, general inequalities included or not, and extra equality
  // constraints included or not.
  // If this method of calling the QP solver were not used then 4 separate
  // calls to solve_qp(...) would have to be included to handle the four possible
  // QP formulations.
  //

  // The numeric arguments for the QP solver (in the nomenclatrue of QPSolverRelaxed)

  const value_type  qp_bnd_inf = NLP::infinite_bound();

  const Vector            &qp_g       = qp_grad_k;
  const MatrixSymOp       &qp_G       = rHL_iq.get_k(0);
  const value_type        qp_etaL     = 0.0;
  vec_mut_ptr_t           qp_dL       = Teuchos::null;
  vec_mut_ptr_t           qp_dU       = Teuchos::null;
  Teuchos::RCP<const MatrixOp>
                          qp_E        = Teuchos::null;
  BLAS_Cpp::Transp        qp_trans_E  = BLAS_Cpp::no_trans;
  vec_mut_ptr_t           qp_b        = Teuchos::null;
  vec_mut_ptr_t           qp_eL       = Teuchos::null;
  vec_mut_ptr_t           qp_eU       = Teuchos::null;
  Teuchos::RCP<const MatrixOp>
                          qp_F        = Teuchos::null;
  BLAS_Cpp::Transp        qp_trans_F  = BLAS_Cpp::no_trans;
  vec_mut_ptr_t           qp_f        = Teuchos::null;
  value_type              qp_eta      = 0.0;
  VectorMutable           &qp_d       = pz_k;  // pz_k will be updated directly!
  vec_mut_ptr_t           qp_nu       = Teuchos::null;
  vec_mut_ptr_t           qp_mu       = Teuchos::null;
  vec_mut_ptr_t           qp_Ed       = Teuchos::null;
  vec_mut_ptr_t           qp_lambda   = Teuchos::null;

  //
  // Determine if we can use simple bounds on pz.
  // 
  // If we have a variable-reduction null-space matrix
  // (with any choice for Y) then:
  // 
  // d = Z*pz + (1-eta) * Y*py
  // 
  // [ d(var_dep)   ]  = [ D ] * pz  + (1-eta) * [ Ypy(var_dep)   ]
  // [ d(var_indep) ]    [ I ]                   [ Ypy(var_indep) ]
  // 
  // For a cooridinate decomposition (Y = [ I ; 0 ]) then Ypy(var_indep) ==
  // 0.0 and in this case the bounds on d(var_indep) become simple bounds on
  // pz even with the relaxation.  Also, if dl(var_dep) and du(var_dep) are
  // unbounded, then we can also use simple bounds since we don't need the
  // relaxation and we can set eta=0. In this case we just have to subtract
  // from the upper and lower bounds on pz!
  // 
  // Otherwise, we can not use simple variable bounds and implement the
  // relaxation properly.
  // 

  const MatrixIdentConcat
    *Zvr = dynamic_cast<const MatrixIdentConcat*>( &Z_k );
  const Range1D
    var_dep   = Zvr ? Zvr->D_rng() : Range1D::Invalid,
    var_indep = Zvr ? Zvr->I_rng() : Range1D();

  RCP<Vector> Ypy_indep;
  const value_type
    Ypy_indep_norm_inf
    = ( m ? (Ypy_indep=Ypy_k->sub_view(var_indep))->norm_inf() : 0.0);

  if( (int)olevel >= (int)PRINT_ALGORITHM_STEPS )
    out
      << "\nDetermine if we can use simple bounds on pz ...\n"
      << "    m = " << m << std::endl
      << "    dynamic_cast<const MatrixIdentConcat*>(&Z_k) = " << Zvr << std::endl
      << "    ||Ypy_k(var_indep)||inf = " << Ypy_indep_norm_inf << std::endl;

  const bool
    bounded_var_dep
    = (
      m > 0
      &&
      num_bounded( *bl->sub_view(var_dep), *bu->sub_view(var_dep), qp_bnd_inf )
      );

  const bool
    use_simple_pz_bounds
    = (
      m == 0
      ||
      ( Zvr != NULL && ( Ypy_indep_norm_inf == 0.0 || bounded_var_dep == 0 ) )
      );

  if( (int)olevel >= (int)PRINT_ALGORITHM_STEPS )
    out
      << (use_simple_pz_bounds
          ? "\nUsing simple bounds on pz ...\n"
          : "\nUsing bounds on full Z*pz ...\n")
      << (bounded_var_dep
          ? "\nThere are finite bounds on dependent variables.  Adding extra inequality constrints for D*pz ...\n" 
          : "\nThere are no finite bounds on dependent variables.  There will be no extra inequality constraints added on D*pz ...\n" ) ;

  if( use_simple_pz_bounds ) {
    // Set simple bound constraints on pz
    qp_dL = bl->sub_view(var_indep);
    qp_dU = bu->sub_view(var_indep);
    qp_nu = nu_k.sub_view(var_indep); // nu_k(var_indep) will be updated directly!
    if( m && bounded_var_dep ) {
      // Set general inequality constraints for D*pz
      qp_E   = Teuchos::rcp(&Zvr->D(),false);
      qp_b   = Ypy_k->sub_view(var_dep);
      qp_eL  = bl->sub_view(var_dep);
      qp_eU  = bu->sub_view(var_dep);
      qp_mu  = nu_k.sub_view(var_dep);  // nu_k(var_dep) will be updated directly!
      qp_Ed  = Zpz_k.sub_view(var_dep); // Zpz_k(var_dep) will be updated directly!
    }
    else {
      // Leave these as NULL since there is no extra general inequality constraints
    }
  }
  else if( !use_simple_pz_bounds ) { // ToDo: Leave out parts for unbounded dependent variables!
    // There are no simple bounds! (leave qp_dL, qp_dU and qp_nu as null)
    // Set general inequality constraints for Z*pz
    qp_E   = Teuchos::rcp(&Z_k,false);
    qp_b   = Teuchos::rcp(Ypy_k,false);
    qp_eL  = bl;
    qp_eU  = bu;
    qp_mu  = Teuchos::rcp(&nu_k,false);
    qp_Ed  = Teuchos::rcp(&Zpz_k,false); // Zpz_k will be updated directly!
  }
  else {
    TEST_FOR_EXCEPT(true);
  }

  // Set the general equality constriants (if they exist)
  Range1D equ_undecomp = s.equ_undecomp();
  if( m > r && m > 0 ) {
    // qp_f = Uy_k * py_k + c_k(equ_undecomp)
    qp_f = s.space_c().sub_space(equ_undecomp)->create_member();
    V_MtV( qp_f.get(), Uy_iq->get_k(0), BLAS_Cpp::no_trans, py_iq.get_k(0) );
    Vp_V( qp_f.get(), *c_iq->get_k(0).sub_view(equ_undecomp) );
    // Must resize for the undecomposed constriants if it has not already been
    qp_F       = Teuchos::rcp(&Uy_iq->get_k(0),false);
    qp_lambda  = lambda_iq->set_k(0).sub_view(equ_undecomp); // lambda_k(equ_undecomp), will be updated directly!
  }

  // Setup the rest of the arguments

  QPSolverRelaxed::EOutputLevel
    qp_olevel;
  switch( olevel ) {
    case PRINT_NOTHING:
      qp_olevel = QPSolverRelaxed::PRINT_NONE;
      break;
    case PRINT_BASIC_ALGORITHM_INFO:
      qp_olevel = QPSolverRelaxed::PRINT_NONE;
      break;
    case PRINT_ALGORITHM_STEPS:
      qp_olevel = QPSolverRelaxed::PRINT_BASIC_INFO;
      break;
    case PRINT_ACTIVE_SET:
      qp_olevel = QPSolverRelaxed::PRINT_ITER_SUMMARY;
      break;
    case PRINT_VECTORS:
      qp_olevel = QPSolverRelaxed::PRINT_ITER_VECTORS;
      break;
    case PRINT_ITERATION_QUANTITIES:
      qp_olevel = QPSolverRelaxed::PRINT_EVERY_THING;
      break;
    default:
      TEST_FOR_EXCEPT(true);
  }
  // ToDo: Set print options so that only vectors matrices etc
  // are only printed in the null space

  //
  // Solve the QP
  // 
  qp_solver().infinite_bound(qp_bnd_inf);
  const QPSolverStats::ESolutionType
    solution_type =
    qp_solver().solve_qp(
      int(olevel) == int(PRINT_NOTHING) ? NULL : &out
      ,qp_olevel
      ,( algo.algo_cntr().check_results()
         ? QPSolverRelaxed::RUN_TESTS :  QPSolverRelaxed::NO_TESTS )
      ,qp_g, qp_G, qp_etaL, qp_dL.get(), qp_dU.get()
      ,qp_E.get(), qp_trans_E, qp_E.get() ? qp_b.get() : NULL
      ,qp_E.get() ? qp_eL.get() : NULL, qp_E.get() ? qp_eU.get() : NULL
      ,qp_F.get(), qp_trans_F, qp_F.get() ? qp_f.get() : NULL
      ,NULL // obj_d
      ,&qp_eta, &qp_d
      ,qp_nu.get()
      ,qp_mu.get(), qp_E.get() ? qp_Ed.get() : NULL
      ,qp_F.get() ? qp_lambda.get() : NULL
      ,NULL // qp_Fd
      );

  //
  // Check the optimality conditions for the QP
  //
  std::ostringstream omsg;
  bool throw_qp_failure = false;
  if(		qp_testing() == QP_TEST
    || ( qp_testing() == QP_TEST_DEFAULT && algo.algo_cntr().check_results() )  )
  {
    if( int(olevel) >= int(PRINT_ALGORITHM_STEPS) ) {
      out	<< "\nChecking the optimality conditions of the reduced QP subproblem ...\n";
    }
    if(!qp_tester().check_optimality_conditions(
      solution_type,qp_solver().infinite_bound()
      ,int(olevel) == int(PRINT_NOTHING) ? NULL : &out
      ,int(olevel) >= int(PRINT_VECTORS) ? true : false
      ,int(olevel) >= int(PRINT_ITERATION_QUANTITIES) ? true : false
      ,qp_g, qp_G, qp_etaL, qp_dL.get(), qp_dU.get()
      ,qp_E.get(), qp_trans_E, qp_E.get() ? qp_b.get() : NULL
      ,qp_E.get() ? qp_eL.get() : NULL, qp_E.get() ? qp_eU.get() : NULL
      ,qp_F.get(), qp_trans_F, qp_F.get() ? qp_f.get() : NULL
      ,NULL // obj_d
      ,&qp_eta, &qp_d
      ,qp_nu.get()
      ,qp_mu.get(), qp_E.get() ? qp_Ed.get() : NULL
      ,qp_F.get() ? qp_lambda.get() : NULL
      ,NULL // qp_Fd
      ))
    {
      omsg << "\n*** Alert! at least one of the QP optimality conditions did not check out.\n";
      if(  static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
        out << omsg.str();
      }
      throw_qp_failure = true;
    }
  }

  //
  // Set the solution
  //
  if( !use_simple_pz_bounds ) {
    // Everything is already updated!
  }
  else if( use_simple_pz_bounds ) {
    // Just have to set Zpz_k(var_indep) = pz_k
    *Zpz_k.sub_view(var_indep) = pz_k;
    if( m && !bounded_var_dep ) {
      // Must compute Zpz_k(var_dep) = D*pz
      LinAlgOpPack::V_MtV( &*Zpz_k.sub_view(var_dep), Zvr->D(), BLAS_Cpp::no_trans, pz_k );
      // ToDo: Remove the compuation of Zpz here unless you must
    }
  }
  else {
    TEST_FOR_EXCEPT(true);
  }

  // Set the solution statistics
  qp_solver_stats_(s).set_k(0) = qp_solver().get_qp_stats();

  // Cut back Ypy_k = (1-eta) * Ypy_k
  const value_type eps = std::numeric_limits<value_type>::epsilon();
  if( fabs(qp_eta - 0.0) > eps ) {
    if(  static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
      out
        << "\n*** Alert! the QP was infeasible (eta = "<<qp_eta<<").  Cutting back Ypy_k = (1.0 - eta)*Ypy_k  ...\n";
    }
    Vt_S( Ypy_k, 1.0 - qp_eta );
  }

  // eta_k
  eta_iq.set_k(0) = qp_eta;

  //
  // Modify the solution if we have to!
  // 
  switch(solution_type) {
    case QPSolverStats::OPTIMAL_SOLUTION:
      break;	// we are good!
    case QPSolverStats::PRIMAL_FEASIBLE_POINT:
    {
      omsg
        << "\n*** Alert! the returned QP solution is PRIMAL_FEASIBLE_POINT but not optimal!\n";
      if( primal_feasible_point_error() )
        omsg
          << "\n*** primal_feasible_point_error == true, this is an error!\n";
      if(  static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
        out << omsg.str();
      }
      throw_qp_failure = primal_feasible_point_error();
      break;
    }	
    case QPSolverStats::DUAL_FEASIBLE_POINT:
    {
      omsg
        << "\n*** Alert! the returned QP solution is DUAL_FEASIBLE_POINT"
        << "\n*** but not optimal so we cut back the step ...\n";
      if( dual_feasible_point_error() )
        omsg
          << "\n*** dual_feasible_point_error == true, this is an error!\n";
      if(  static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
        out << omsg.str();
      }
      // Cut back the step to fit in the bounds
      // 
      // dl <= u*(Ypy_k+Zpz_k) <= du
      //
      vec_mut_ptr_t
        zero  = s.space_x().create_member(0.0),
        d_tmp = s.space_x().create_member();
      V_VpV( d_tmp.get(), *Ypy_k, Zpz_k );
      const std::pair<value_type,value_type>
        u_steps = max_near_feas_step( *zero, *d_tmp, dl_iq.get_k(0), du_iq.get_k(0), 0.0 );
      const value_type
        u = my_min( u_steps.first, 1.0 ); // largest positive step size
      alpha_iq.set_k(0) = u;
      if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
        out	<< "\nFinding u s.t. dl <= u*(Ypy_k+Zpz_k) <= du\n"
          << "max step length u = " << u << std::endl
          << "alpha_k = u = " << alpha_iq.get_k(0) << std::endl;
      }
      throw_qp_failure = dual_feasible_point_error();
      break;
    }	
    case QPSolverStats::SUBOPTIMAL_POINT:
    {
      omsg
        << "\n*** Alert!, the returned QP solution is SUBOPTIMAL_POINT!\n";
      if(  static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
        out << omsg.str();
      }
      throw_qp_failure = true;
      break;
    }
    default:
      TEST_FOR_EXCEPT(true);	// should not happen!
  }

  //
  // Output the final solution!
  //
  if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
    out	<< "\n||pz_k||inf    = " << s.pz().get_k(0).norm_inf()
      << "\nnu_k.nz()      = " << s.nu().get_k(0).nz()
      << "\nmax(|nu_k(i)|) = " << s.nu().get_k(0).norm_inf()
//			<< "\nmin(|nu_k(i)|) = " << min_abs( s.nu().get_k(0)() )
      ;
    if( m > r ) out << "\n||lambda_k(undecomp)||inf = " << s.lambda().get_k(0).norm_inf();
    out	<< "\n||Zpz_k||2     = " << s.Zpz().get_k(0).norm_2()
      ;
    if(qp_eta > 0.0) out << "\n||Ypy||2 = " << s.Ypy().get_k(0).norm_2();
    out << std::endl;
  }

  if( static_cast<int>(ns_olevel) >= static_cast<int>(PRINT_VECTORS) ) {
    out << "\npz_k = \n" << s.pz().get_k(0);
    if(var_indep.size())
      out << "\nnu_k(var_indep) = \n" << *s.nu().get_k(0).sub_view(var_indep);
  }

  if( static_cast<int>(ns_olevel) >= static_cast<int>(PRINT_VECTORS) ) {
    if(var_indep.size())
      out	<< "\nZpz(var_indep)_k = \n" << *s.Zpz().get_k(0).sub_view(var_indep);
    out << std::endl;
  }

  if( static_cast<int>(olevel) >= static_cast<int>(PRINT_VECTORS) ) {
    if(var_dep.size())
      out	<< "\nZpz(var_dep)_k = \n" << *s.Zpz().get_k(0).sub_view(var_dep);
    out	<< "\nZpz_k = \n" << s.Zpz().get_k(0);
    out << std::endl;
  }

  if( static_cast<int>(olevel) >= static_cast<int>(PRINT_VECTORS) ) {
    out << "\nnu_k = \n" << s.nu().get_k(0);
    if(var_dep.size())
      out << "\nnu_k(var_dep) = \n" << *s.nu().get_k(0).sub_view(var_dep);
    if( m > r )
      out << "\nlambda_k(equ_undecomp) = \n" << *s.lambda().get_k(0).sub_view(equ_undecomp);
    if(qp_eta > 0.0) out << "\nYpy = \n" << s.Ypy().get_k(0);
  }

  if( qp_eta == 1.0 ) {
    omsg
      << "TangentialStepWithInequStd_Step::do_step(...) : Error, a QP relaxation parameter\n"
      << "of eta = " << qp_eta << " was calculated and therefore it must be assumed\n"
      << "that the NLP's constraints are infeasible\n"
      << "Throwing an InfeasibleConstraints exception!\n";
    if(  static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
      out << omsg.str();
    }
    throw InfeasibleConstraints(omsg.str());
  }

  if( throw_qp_failure )
    throw QPFailure( omsg.str(), qp_solver().get_qp_stats() );

  return true;
}
void MatrixSymPosDefLBFGS::V_InvMtV(
  VectorMutable* y, BLAS_Cpp::Transp trans_rhs1
  , const Vector& x
  ) const
{
  using AbstractLinAlgPack::Vp_StMtV;
  using DenseLinAlgPack::V_InvMtV;
  using LinAlgOpPack::V_mV;
  using LinAlgOpPack::V_StV;
  using LinAlgOpPack::Vp_V;
  using LinAlgOpPack::V_MtV;
  using LinAlgOpPack::V_StMtV;
  using LinAlgOpPack::Vp_MtV;
  using DenseLinAlgPack::Vp_StMtV;
  typedef VectorDenseEncap         vde;
  typedef VectorDenseMutableEncap  vdme;
  using Teuchos::Workspace;
  Teuchos::WorkspaceStore* wss = Teuchos::get_default_workspace_store().get();

  assert_initialized();

  TEUCHOS_TEST_FOR_EXCEPT( !(  inverse_is_updated_  ) ); // For now just always update

  // y = inv(Bk) * x = Hk * x
  //
  // = gk*x + [S gk*Y] * [ inv(R')*(D+gk*Y'Y)*inv(R)     -inv(R') ] * [   S'  ] * x
  //                     [            -inv(R)                0    ]   [ gk*Y' ]
  //
  // Perform in the following (in order):
  //
  // y = gk*x
  //
  // t1 = [   S'*x  ]					<: R^(2*m)
  //      [ gk*Y'*x ]
  //
  // t2 = inv(R) * t1(1:m)			<: R^(m)
  //
  // t3 = - inv(R') * t1(m+1,2*m)		<: R^(m)
  //
  // t4 = gk * Y'Y * t2				<: R^(m)
  //
  // t4 += D*t2
  //
  // t5 = inv(R') * t4				<: R^(m)
  //
  // t5 += t3
  //
  // y += S*t5
  //
  // y += -gk*Y*t2

  // y = gk*x
  V_StV( y, gamma_k_, x );

  const size_type
    mb = m_bar_;	
  
  if( !mb )
    return;	// No updates have been performed.

  const multi_vec_ptr_t
    S = this->S(),
    Y = this->Y();

  // Get workspace

  Workspace<value_type>    t1_ws(wss,2*mb);
  DVectorSlice                   t1(&t1_ws[0],t1_ws.size());
  Workspace<value_type>    t2_ws(wss,mb);
  DVectorSlice                   t2(&t2_ws[0],t2_ws.size());
  Workspace<value_type>    t3_ws(wss,mb);
  DVectorSlice                   t3(&t3_ws[0],t3_ws.size());
  Workspace<value_type>    t4_ws(wss,mb);
  DVectorSlice                   t4(&t4_ws[0],t4_ws.size());
  Workspace<value_type>    t5_ws(wss,mb);
  DVectorSlice                   t5(&t5_ws[0],t5_ws.size());

  VectorSpace::vec_mut_ptr_t
    t = S->space_rows().create_member();

  const DMatrixSliceTri
    &R = this->R();

  const DMatrixSliceSym
    &YTY = this->YTY();

  // t1 = [   S'*x  ]
  //      [ gk*Y'*x ]
  V_MtV( t.get(), *S, BLAS_Cpp::trans, x );
  t1(1,mb) = vde(*t)();
  V_StMtV( t.get(), gamma_k_, *Y, BLAS_Cpp::trans, x );
  t1(mb+1,2*mb) = vde(*t)();

  // t2 = inv(R) * t1(1:m)
  V_InvMtV( &t2, R, BLAS_Cpp::no_trans, t1(1,mb) );

  // t3 = - inv(R') * t1(m+1,2*m)
  V_mV( &t3, t1(mb+1,2*mb) );
  V_InvMtV( &t3, R, BLAS_Cpp::trans, t3 );

  // t4 = gk * Y'Y * t2
  V_StMtV( &t4, gamma_k_, YTY, BLAS_Cpp::no_trans, t2 );

  // t4 += D*t2
  Vp_DtV( &t4, t2 );

  // t5 = inv(R') * t4
  V_InvMtV( &t5, R, BLAS_Cpp::trans, t4 );

  // t5 += t3
  Vp_V( &t5, t3 );

  // y += S*t5
  (vdme(*t)() = t5);
  Vp_MtV( y, *S, BLAS_Cpp::no_trans, *t );

  // y += -gk*Y*t2
  (vdme(*t)() = t2);
  Vp_StMtV( y, -gamma_k_, *Y, BLAS_Cpp::no_trans, *t );

}