value_type MatrixHessianRelaxed::transVtMtV(
  const SpVectorSlice& x1, BLAS_Cpp::Transp M_trans
  , const SpVectorSlice& x2 ) const
{
  using BLAS_Cpp::no_trans;
  //
  // a = x1' * M * x2
  // 
  //   = [ x11'  x12' ] * [ H   0   ] * [ x21 ]
  //                      [ 0  bigM ]   [ x22 ]
  //               
  // =>              
  //               
  // a = x11'*H*x21 + x12'*bigM*x22
  //
  DenseLinAlgPack::Vp_MtV_assert_sizes(x1.size(),rows(),cols(),M_trans,x2.size());

  if( &x1 == &x2 ) {
    // x1 and x2 are the same sparse vector
    const SpVectorSlice
      x11 = x1(1,n_);
    const SpVectorSlice::element_type
      *x12_ele = x1.lookup_element(n_+1);
    const value_type
      x12 = x12_ele ? x12_ele->value() : 0.0;
    return AbstractLinAlgPack::transVtMtV( x11, *H_, no_trans, x11) 
      + x12 * bigM_ * x12;
  }
  else {
    // x1 and x2 could be different sparse vectors
    TEUCHOS_TEST_FOR_EXCEPT(true); // ToDo: Implement this!
  }
  return 0.0; // Will never be executed!
}
void MatrixSymDiagStd::V_InvMtV(
  DVectorSlice* vs_lhs, BLAS_Cpp::Transp trans_rhs1
  , const SpVectorSlice& sv_rhs2) const
{
  const DVectorSlice diag = this->diag();
  size_type n = diag.size();

  // y = inv(op(A)) * x
  //
  // A is symmetric and diagonal A = diag(diag) so:
  //
  // y(j) = x(j) / diag(j), for j = 1...n
  //
  // x is sparse so take account of this.
  
  DenseLinAlgPack::Vp_MtV_assert_sizes( vs_lhs->size()
    , n, n, trans_rhs1, sv_rhs2.size() );

  for(   SpVectorSlice::const_iterator x_itr = sv_rhs2.begin()
     ; x_itr != sv_rhs2.end()
     ; ++x_itr )
  {
    (*vs_lhs)(x_itr->indice() + sv_rhs2.offset())
      = x_itr->value() / diag(x_itr->indice() + sv_rhs2.offset());
      // Note: The indice x(i) invocations are ranged check
      // if this is compiled into the code.
  }
}
inline
DenseLinAlgPack::DVectorSlice
AbstractLinAlgPack::dense_view( SpVectorSlice& sv_rhs )
{
  return sv_rhs.nz()
    ? DVectorSlice( &sv_rhs.begin()->value(), sv_rhs.nz(), 2 )
    : DVectorSlice( NULL, 0, 0 );
}
inline
const DenseLinAlgPack::DVectorSlice
AbstractLinAlgPack::dense_view( const SpVectorSlice& sv_rhs )
{
  return sv_rhs.nz()
    ? DVectorSlice( &const_cast<SpVectorSlice&>(sv_rhs).begin()->value(), sv_rhs.nz(), 2 )
    : DVectorSlice( NULL, 0, 0 );
}
void MatrixHessianRelaxed::Vp_StMtV(
    DVectorSlice* y, value_type a, BLAS_Cpp::Transp M_trans
  , const SpVectorSlice& x, value_type b ) const
{
  using BLAS_Cpp::no_trans;
  using BLAS_Cpp::trans;
  using AbstractLinAlgPack::Vp_StMtV;
  //
  // y = b*y + a * M * x
  // 
  //   = b*y + a * [ H  0    ] * [ x1 ]
  //               [ 0  bigM ]   [ x2 ]
  //               
  // =>              
  //               
  // y1 = b*y1 + a*H*x1
  // 
  // y2 = b*y2 + bigM * x2
  //
  LinAlgOpPack::Vp_MtV_assert_sizes(y->size(),rows(),cols(),M_trans,x.size());

  DVectorSlice
    y1 = (*y)(1,n_);
  value_type
    &y2 = (*y)(n_+1);
  const SpVectorSlice
    x1 = x(1,n_);
  const SpVectorSlice::element_type
    *x2_ele = x.lookup_element(n_+1);
  const value_type
    x2 = x2_ele ? x2_ele->value() : 0.0;

  // y1 = b*y1 + a*H*x1
  Vp_StMtV( &y1, a, *H_, no_trans, x1, b );

  // y2 = b*y2 + bigM * x2
  if( b == 0.0 )
    y2 = bigM_ * x2;
  else
    y2 = b*y2 + bigM_ * x2;
  
}
void AbstractLinAlgPack::Vp_StV(
  VectorMutable* y, const value_type& a, const SpVectorSlice& sx )
{
  Vp_V_assert_compatibility(y,sx);
  if( sx.nz() ) {
    VectorSpace::vec_mut_ptr_t
        x = y->space().create_member();
    x->set_sub_vector(sub_vec_view(sx));
    Vp_StV( y, a, *x );
  }
}
void AbstractLinAlgPack::add_elements( SpVector* sv_lhs, value_type alpha, const SpVectorSlice& sv_rhs
                   , size_type offset, bool add_zeros )
{
  typedef SpVector::element_type ele_t;
  const bool assume_sorted = ( !sv_lhs->nz() || ( sv_lhs->nz() && sv_lhs->is_sorted() ) )
    && ( !sv_rhs.nz() || ( sv_rhs.nz() || sv_rhs.is_sorted() ) );
  if(add_zeros) {
    for( SpVectorSlice::const_iterator itr = sv_rhs.begin(); itr != sv_rhs.end(); ++itr )
      sv_lhs->add_element( ele_t( itr->index() + sv_rhs.offset() + offset, alpha * (itr->value()) ) );
  }
  else {
    for( SpVectorSlice::const_iterator itr = sv_rhs.begin(); itr != sv_rhs.end(); ++itr )
      if(itr->value() != 0.0 )
        sv_lhs->add_element( ele_t( itr->index() + sv_rhs.offset() + offset, alpha * (itr->value()) ) );
  }
  sv_lhs->assume_sorted(assume_sorted);
}
void MultiVectorMutableCols::Vp_StMtV(
  VectorMutable* y, value_type a, BLAS_Cpp::Transp M_trans
  ,const SpVectorSlice& x, value_type b
  ) const
{
  using AbstractLinAlgPack::dot;

  // y = b*y
  LinAlgOpPack::Vt_S(y,b);

  if( M_trans == BLAS_Cpp::no_trans ) {
    //
    // y += a*M*x
    //
    // =>
    //
    // y += a * x(1) * M(:,1) + a * x(2) * M(:,2) + ...
    //
    SpVectorSlice::difference_type o = x.offset();
    for( SpVectorSlice::const_iterator itr = x.begin(); itr != x.end(); ++itr ) {
      const size_type j = itr->index() + o;
      LinAlgOpPack::Vp_StV( y, a * itr->value(), *col_vecs_[j-1] );
    }
  }
  else {
    //
    // y += a*M'*x
    //
    // =>
    //
    // y(1) += a M(:,1)*x
    // y(2) += a M(:,2)*x
    // ...
    //
    for( size_type j = 1; j <= col_vecs_.size(); ++j )
      y->set_ele(
        j
        ,y->get_ele(j) + a * dot(*col_vecs_[j-1],x)
        );
  }
}
AbstractLinAlgPack::value_type
AbstractLinAlgPack::dot( const Vector& v_rhs1, const SpVectorSlice& sv_rhs2 )
{
  VopV_assert_compatibility(v_rhs1,sv_rhs2 );
  if( sv_rhs2.nz() ) {
    VectorSpace::vec_mut_ptr_t
      v_rhs2 = v_rhs1.space().create_member();
    v_rhs2->set_sub_vector(sub_vec_view(sv_rhs2));
    return dot(v_rhs1,*v_rhs2);
  }
  return 0.0;
}
/** \brief Count the number of sparse bounds where at least one bound is
  * finite.
  */
AbstractLinAlgPack::size_type
AbstractLinAlgPack::num_bounds( const SpVectorSlice& bl, const SpVectorSlice& bu )
{
  SpVectorSlice::const_iterator
    bl_itr			= bl.begin(),
    bl_itr_end		= bl.end(),
    bu_itr			= bu.begin(),
    bu_itr_end		= bu.end();
  size_type num_bounds = 0;
  while( bl_itr != bl_itr_end || bu_itr != bu_itr_end ) {
    if( ( bl_itr != bl_itr_end )
      && ( bu_itr == bu_itr_end || bl_itr->indice() + bl.offset() < bu_itr->indice() + bu.offset() ) )
    {
      // Only the lower bound is finite
      ++bl_itr;
    }
    else if( ( bu_itr != bu_itr_end )
      && ( bl_itr == bl_itr_end || bu_itr->indice() + bu.offset() < bl_itr->indice() + bl.offset()) )
    {
      // Only the upper bound is finite
      ++bu_itr;
    }
    else if(bl_itr->indice() == bu_itr->indice()) {
      // Both bounds exist.
      ++bl_itr;
      ++bu_itr; 
    }
    ++num_bounds;
  }
  return num_bounds;
}
QPSolverStats::ESolutionType
QPSolverRelaxedLOQO::imp_solve_qp(
      std::ostream* out, EOutputLevel olevel, ERunTests test_what
    , const DVectorSlice& g, const MatrixOp& G
    , value_type etaL
    , const SpVectorSlice& dL, const SpVectorSlice& dU
    , const MatrixOp* E, BLAS_Cpp::Transp trans_E, const DVectorSlice* b
      , const SpVectorSlice* eL, const SpVectorSlice* eU
    , const MatrixOp* F, BLAS_Cpp::Transp trans_F, const DVectorSlice* f
    , value_type* obj_d
    , value_type* eta, DVectorSlice* d
    , SpVector* nu
    , SpVector* mu, DVectorSlice* Ed
    , DVectorSlice* lambda, DVectorSlice* Fd
  )
{
  using Teuchos::Workspace;
  Teuchos::WorkspaceStore* wss = wsp::default_workspace_store.get();

  const value_type inf_bnd  = std::numeric_limits<value_type>::max();
//	const value_type real_big = 1e+20;
  const value_type real_big = HUGE_VAL;

  const size_type
    nd   = g.size(),
    m_in = E ? b->size() : 0,
    m_eq = F ? f->size() : 0;

  //
  // Create a LOQO QP definition struct
  //

  LOQO *loqo_lp = openlp();
  TEUCHOS_TEST_FOR_EXCEPT( !(  loqo_lp  ) );

  //
  // Setup loqo_r and loqo_b and count the number of actual
  // constraints.
  //

  // LOQO's b vector storage
  MALLOC( loqo_lp->b, m_in+m_eq, double ); // May not use all of this storage
  DVectorSlice loqo_b( loqo_lp->b, m_in+m_eq );
  // LOQO's r vector storage
  MALLOC( loqo_lp->r, m_in+m_eq, double ); // May not use all of this storage
  DVectorSlice loqo_r( loqo_lp->r, m_in+m_eq );
  // Gives status of b.
  //                  /  j : if eL(j) > -inf_bnd
  // loqo_b_stat(k) = |
  //                  \ -j : if eL(j) <= -inf_bnd && eU(j) < +inf_bnd
  //
  // , for k = 1...num_inequal
  //
  Workspace<int>               loqo_b_stat_ws(wss,m_in); // May not use all of this
  DenseLinAlgPack::VectorSliceTmpl<int>  loqo_b_stat(&loqo_b_stat_ws[0],loqo_b_stat_ws.size());
  std::fill( loqo_b_stat.begin(), loqo_b_stat.end(), 0 ); // Initialize to zero

  // Fill up loqo_b, loqo_r and loqo_b_stat
  size_type num_inequal = 0; // The actual number of bouned general inequalities
  if(E) {
    // Read iterators
    AbstractLinAlgPack::sparse_bounds_itr
      eLU_itr( eL->begin(), eL->end(), eL->offset()
           , eU->begin(), eU->end(), eU->offset(), inf_bnd );
    // written iterators
    DVectorSlice::iterator
      b_itr		= loqo_b.begin(),
      r_itr		= loqo_r.begin();
    DenseLinAlgPack::VectorSliceTmpl<int>::iterator
      b_stat_itr  = loqo_b_stat.begin();
    // loop
    for( int k = 1; !eLU_itr.at_end(); ++k, ++eLU_itr, ++b_itr, ++r_itr, ++b_stat_itr, ++num_inequal )
    {
      const size_type j = eLU_itr.indice();
      if(eLU_itr.lbound() > -inf_bnd) {
        *b_itr = eLU_itr.lbound();
        *r_itr = eLU_itr.ubound() >= inf_bnd ? real_big : eLU_itr.ubound() - eLU_itr.lbound();
        *b_stat_itr = j; // We need to make A(k,:) = [ +op(E)(j,:), -b(j) ]
      }
      else {
        TEUCHOS_TEST_FOR_EXCEPT( !( eLU_itr.ubound() < +inf_bnd ) );
        *b_itr = -eLU_itr.ubound();
        *r_itr = eLU_itr.lbound() <= -inf_bnd ? real_big : - eLU_itr.lbound() + eLU_itr.ubound();
        *b_stat_itr = -j; // We need to make A(k,:) = [ -op(E)(j,:), +b(j) ]
      }
    }
  }
  if(F) {
    LinAlgOpPack::V_StV( &loqo_b(num_inequal+1,num_inequal+m_eq), -1.0, *f );
    loqo_r(num_inequal+1,num_inequal+m_eq) = 0.0;
  }

  //
  // Setup the QP dimensions
  //

  loqo_lp->n = nd+1;
  loqo_lp->m = num_inequal + m_eq;

  //
  // Setup loqo_c, loqo_l and loqo_u
  //

  // LOQO's c vector storage
  MALLOC( loqo_lp->c, nd+1, double );
  DVectorSlice loqo_c( loqo_lp->c, nd+1 );
  loqo_c(1,nd) = g;
  loqo_c(nd+1) = bigM();

  // LOQO's l vector storage
  MALLOC( loqo_lp->l, nd+1, double );
  DVectorSlice loqo_l( loqo_lp->l, nd+1 );
  std::fill( loqo_l.begin(), loqo_l.end(), -real_big );
  {
    SpVectorSlice::const_iterator
      dL_itr = dL.begin(),
      dL_end = dL.end();
    for( ; dL_itr != dL_end; ++dL_itr )
      loqo_l( dL_itr->indice() + dL.offset() ) = dL_itr->value();
  }
  loqo_l(nd+1) = etaL;

  // LOQO's u vector storage
  MALLOC( loqo_lp->u, nd+1, double );
  DVectorSlice loqo_u( loqo_lp->u, nd+1 );
  std::fill( loqo_u.begin(), loqo_u.end(), +real_big );
  {
    SpVectorSlice::const_iterator
      dU_itr = dU.begin(),
      dU_end = dU.end();
    for( ; dU_itr != dU_end; ++dU_itr )
      loqo_u( dU_itr->indice() + dU.offset() ) = dU_itr->value();
  }
  loqo_u(nd+1) = +real_big;
  
  //
  // Setup the objective and constraint matrices (using strategy interface).
  //

  init_hess_jacob().init_hess_jacob(
    G,bigM(),E,trans_E,b,&loqo_b_stat[0],num_inequal,F,trans_F,f
    ,loqo_lp);

  //
  // Setup the starting point
  //

  MALLOC( loqo_lp->x, nd+1, double );
  DVectorSlice loqo_x( loqo_lp->x, nd+1 );
  loqo_x(1,nd) = *d;
  loqo_x(nd+1) = *eta;

  //
  // Set some control parameters
  //
  
//	strcpy( loqo_lp->name, "loqo_qp" );
  loqo_lp->quadratic = 1;
  loqo_lp->convex    = 1;
  switch( olevel ) {
    case PRINT_NONE:
      loqo_lp->verbose = 0;
      break;
    case PRINT_BASIC_INFO:
      loqo_lp->verbose = 1;
      break;
    case PRINT_ITER_SUMMARY:
      loqo_lp->verbose = 2;
      break;
    case PRINT_ITER_STEPS:
      loqo_lp->verbose = 3;
      break;
    case PRINT_ITER_ACT_SET:
      loqo_lp->verbose = 4;
      break;
    case PRINT_ITER_VECTORS:
      loqo_lp->verbose = 5;
      break;
    case PRINT_EVERY_THING:
      loqo_lp->verbose = 6;
      break;
    default:
      TEUCHOS_TEST_FOR_EXCEPT(true);
  }

  //
  // Solve the QP
  //

  if( out && olevel >= PRINT_BASIC_INFO ) {
    *out << "\nSolving QP using LOQO ...\n";
    out->flush();
  }
  
  const int loqo_status = solvelp(loqo_lp);

  if( out && olevel >= PRINT_BASIC_INFO ) {
    *out << "\nLOQO returned status = " << loqo_status << "\n";
  }

  //
  // Map the solution to the output arguments
  //

  TEUCHOS_TEST_FOR_EXCEPT( !(  loqo_lp->x  ) );
  DVectorSlice loqo_x_sol( loqo_lp->x, nd+1 );

  // d
  *d    = loqo_x_sol(1,nd);

  // eta
  *eta  = loqo_x_sol(nd+1);

  // obj_d
  if(obj_d)
    *obj_d = loqo_lp->primal_obj - (*eta + 0.5 * (*eta)*(*eta)) * bigM();

  // nu
  if(nu) {
    nu->resize(nd,nd);
    TEUCHOS_TEST_FOR_EXCEPT( !(  loqo_lp->z  ) );
    TEUCHOS_TEST_FOR_EXCEPT( !(  loqo_lp->s  ) );
    const DVectorSlice
      loqo_z(loqo_lp->z,loqo_lp->n),   // Multipliers for l - x <= 0
      loqo_s(loqo_lp->s,loqo_lp->n);   // Multipliers for x - u <= 0
    DVectorSlice::const_iterator
      z_itr = loqo_z.begin(),
      s_itr = loqo_s.begin();
    typedef SpVector::element_type ele_t;
    for( size_type i = 1; i <= nd; ++i, ++z_itr, ++s_itr ) {
      if( *z_itr > *s_itr && *z_itr >= nonbinding_lag_mult() ) {
        // Lower bound is active
        nu->add_element(ele_t(i,-(*z_itr)));
      }
      else if( *s_itr > *z_itr && *s_itr >= nonbinding_lag_mult() ) {
        // Upper bound is active
        nu->add_element(ele_t(i,+(*s_itr)));
      }
    }
    // We could look at z(nd+1) and s(nd+1) for the value of kappa?
    nu->assume_sorted(true);
  }

  // mu
  if(mu) {
    mu->resize(m_in,num_inequal);
    DenseLinAlgPack::VectorSliceTmpl<int>::iterator
      b_stat_itr  = loqo_b_stat.begin();
    TEUCHOS_TEST_FOR_EXCEPT( !(  loqo_lp->v  ) );
    TEUCHOS_TEST_FOR_EXCEPT( !(  loqo_lp->q  ) );
    const DVectorSlice
      loqo_v(loqo_lp->v,loqo_lp->m),   // Multipliers for b <= A*x
      loqo_q(loqo_lp->q,loqo_lp->m);   // Multipliers for A*x <= b + r
    DVectorSlice::const_iterator
      v_itr = loqo_v.begin(),
      q_itr = loqo_q.begin();
    // loop
    typedef SpVector::element_type ele_t;
    for( size_type k = 1; k <= num_inequal; ++k, ++b_stat_itr, ++v_itr, ++q_itr ) {
      const int j = *b_stat_itr;
      if( *v_itr > *q_itr && *v_itr >= nonbinding_lag_mult() ) {
        // Lower bound is active
        if( j < 0 ) // We had to flip this since it was really and upper bound
          mu->add_element(ele_t(-j,+(*v_itr)));
        else // This really was a lower bound
          mu->add_element(ele_t(+j,-(*v_itr)));
      }
      else if( *q_itr > *v_itr && *q_itr >= nonbinding_lag_mult() ) {
        // Upper bound is active
        mu->add_element(ele_t(+j,+(*q_itr)));
      }
    }
  }

  // Ed
  if(Ed) {
    LinAlgOpPack::V_MtV( Ed, *E, trans_E, *d );
  }

  // lambda
  if(lambda) {
    TEUCHOS_TEST_FOR_EXCEPT( !(  loqo_lp->y  ) );
    const DVectorSlice
      loqo_y(loqo_lp->y,loqo_lp->m);         // Multipliers for equalities
    DVectorSlice::const_iterator
      y_itr = loqo_y.begin() + num_inequal;  // Get iterators to equalities
    DVectorSlice::iterator
      lambda_itr = lambda->begin();
    // loop
    for( size_type k = 1; k <= m_eq; ++k, ++y_itr, ++lambda_itr ) {
      *lambda_itr = -(*y_itr);
    }
  }

  // Fd
  if(Fd) {
    LinAlgOpPack::V_MtV( Fd, *F, trans_F, *d );
  }

  //
  // Setup the QP statistics
  //

  QPSolverStats::ESolutionType solution_type = QPSolverStats::OPTIMAL_SOLUTION; // Assume this?
  switch( loqo_status ) { // I had to find this out by trial and error!
      case 0:
      solution_type = QPSolverStats::OPTIMAL_SOLUTION;
      break;
    case 2:
      solution_type = QPSolverStats::DUAL_FEASIBLE_POINT;
      break;
    default:
      TEUCHOS_TEST_FOR_EXCEPT(true);
  }

  qp_stats_.set_stats(
    solution_type, QPSolverStats::CONVEX
    ,loqo_lp->iter, QPSolverStats::NOT_KNOWN, QPSolverStats::NOT_KNOWN
    ,false, *eta > 0.0 );

  //
  // Clean up dynamically allocated memory for LOQO
  //

  inv_clo();          // frees memory associated with matrix factorization
  closelp(loqo_lp);   // frees all allocated arrays with free(...).

  return qp_stats_.solution_type();

}
RTOpPack::SparseSubVector
AbstractLinAlgPack::sub_vec_view(
  const SpVectorSlice&   sv_in
  ,const Range1D&        rng_in
  )
{
  using Teuchos::null;
  const Range1D        rng = RangePack::full_range(rng_in,1,sv_in.dim());
  const SpVectorSlice  sv = sv_in(rng);

  RTOpPack::SparseSubVector  sub_vec;

  if(!sv.nz()) {
    sub_vec.initialize(
      rng.lbound()-1  // global_offset
      ,rng.size()     // sub_dim
      ,0              // nz
      ,null           // vlaues
      ,1              // values_stride
      ,null           // indices
      ,1              // indices_stride
      ,0              // local_offset
      ,1              // is_sorted
      );
  }
  else {
    SpVectorSlice::const_iterator itr = sv.begin();
    TEUCHOS_TEST_FOR_EXCEPT( !( itr != sv.end() ) );
    if( sv.dim() && sv.nz() == sv.dim() && sv.is_sorted() ) {
      const Teuchos::ArrayRCP<const value_type>  values =
        Teuchos::arcp(&itr->value(), 0, values_stride*rng.size(), false) ;
      sub_vec.initialize(
        rng.lbound()-1    // global_offset
        ,rng.size()       // sub_dim
        ,values           // values
        ,values_stride    // values_stride
        );
    }
    else {
      const Teuchos::ArrayRCP<const value_type>  values =
        Teuchos::arcp(&itr->value(), 0, values_stride*sv.nz(), false) ;
      const Teuchos::ArrayRCP<const index_type> indexes =
        Teuchos::arcp(&itr->index(), 0, indices_stride*sv.nz(), false);
      sub_vec.initialize(
        rng.lbound()-1    // global_offset
        ,sv.dim()         // sub_dim
        ,sv.nz()          // sub_nz
        ,values           // values
        ,values_stride    // values_stride
        ,indexes          // indices
        ,indices_stride   // indices_stride
        ,sv.offset()      // local_offset
        ,sv.is_sorted()   // is_sorted
        );
    }
  }
  
  return sub_vec;
}
void MatrixHessianRelaxed::Vp_StPtMtV(
  DVectorSlice* y, value_type a
  , const GenPermMatrixSlice& P, BLAS_Cpp::Transp P_trans
  , BLAS_Cpp::Transp M_trans
  , const SpVectorSlice& x, value_type b ) const
{
  using BLAS_Cpp::no_trans;
  using BLAS_Cpp::trans;
  namespace GPMSIP = AbstractLinAlgPack::GenPermMatrixSliceIteratorPack;
  //
  // y = b*y + a * op(P) * M * x
  // 
  //   = b*y + a * [ op(P1)  op(P2) ] *  [ H   0   ] * [ x1 ]
  //                                     [ 0  bigM ]   [ x2 ]
  //               
  // =>              
  //               
  // y = b*y + a*op(P1)*H*x1 + a*op(P2)*bigM*x2
  //
  LinAlgOpPack::Vp_MtV_assert_sizes(y->size(),P.rows(),P.cols(),P_trans
    , BLAS_Cpp::rows( rows(), cols(), M_trans) );
  LinAlgOpPack::Vp_MtV_assert_sizes( BLAS_Cpp::cols( P.rows(), P.cols(), P_trans)
    ,rows(),cols(),M_trans,x.size());

  // For this to work (as shown below) we need to have P sorted by
  // row if op(P) = P' or sorted by column if op(P) = P.  If
  // P is not sorted properly, we will just use the default
  // implementation of this operation.
  if( 	( P.ordered_by() == GPMSIP::BY_ROW && P_trans == no_trans )
      || 	( P.ordered_by() == GPMSIP::BY_COL && P_trans == trans ) )
  {
    // Call the default implementation
    MatrixOp::Vp_StPtMtV(y,a,P,P_trans,M_trans,x,b);
    return;
  }

  if( P.is_identity() )
    TEUCHOS_TEST_FOR_EXCEPT( !(  BLAS_Cpp::rows( P.rows(), P.cols(), P_trans ) == n_  ) );

  const GenPermMatrixSlice
    P1 = ( P.is_identity() 
         ? GenPermMatrixSlice( n_, n_, GenPermMatrixSlice::IDENTITY_MATRIX )
         : P.create_submatrix(Range1D(1,n_),P_trans==trans?GPMSIP::BY_ROW:GPMSIP::BY_COL)
      ),
    P2 = ( P.is_identity()
         ? GenPermMatrixSlice(
           P_trans == no_trans ? n_ : 1
           , P_trans == no_trans ? 1 : n_
           , GenPermMatrixSlice::ZERO_MATRIX )
         : P.create_submatrix(Range1D(n_+1,n_+1),P_trans==trans?GPMSIP::BY_ROW:GPMSIP::BY_COL)
      );

  const SpVectorSlice
    x1 = x(1,n_);
  const SpVectorSlice::element_type
    *x2_ele = x.lookup_element(n_+1);
  const value_type
    x2 = x2_ele ? x2_ele->value() : 0.0;
  // y = b*y + a*op(P1)*H*x1
  AbstractLinAlgPack::Vp_StPtMtV( y, a, P1, P_trans, *H_, no_trans, x1, b );
  // y += a*op(P2)*bigM*x2
  if( P2.nz() ){
    TEUCHOS_TEST_FOR_EXCEPT( !( P2.nz() == 1 ) );
    const size_type
      i = P_trans == no_trans ? P2.begin()->row_i() : P2.begin()->col_j();
    (*y)(i) += a * bigM_ * x2;
  }
}
bool ReducedHessianSecantUpdateLPBFGS_Strategy::perform_update(
  DVectorSlice* s_bfgs, DVectorSlice* y_bfgs, bool first_update
  ,std::ostream& out, EJournalOutputLevel olevel, NLPAlgo *algo, NLPAlgoState *s
  ,MatrixOp *rHL_k
  )
{
  using std::setw;
  using std::endl;
  using std::right;
  using Teuchos::dyn_cast;
  namespace rcp = MemMngPack;
  using Teuchos::RCP;
  using LinAlgOpPack::V_MtV;
  using DenseLinAlgPack::dot;
  using AbstractLinAlgPack::norm_inf;
  using AbstractLinAlgPack::transVtMtV;
  typedef ConstrainedOptPack::MatrixHessianSuperBasic MHSB_t;
  using Teuchos::Workspace;
  Teuchos::WorkspaceStore* wss = Teuchos::get_default_workspace_store().get();

  if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
    out << "\n*** (LPBFGS) Full limited memory BFGS to projected BFGS ...\n";
  }

#ifdef _WINDOWS
  MHSB_t &rHL_super = dynamic_cast<MHSB_t&>(*rHL_k);
#else
  MHSB_t &rHL_super = dyn_cast<MHSB_t>(*rHL_k);
#endif

  const size_type
    n    = algo->nlp().n(),
    r    = algo->nlp().r(),
    n_pz = n-r;

  bool do_projected_rHL_RR = false;

  // See if we still have a limited memory BFGS update matrix
  RCP<MatrixSymPosDefLBFGS> // We don't want this to be deleted until we are done with it
    lbfgs_rHL_RR = Teuchos::rcp_const_cast<MatrixSymPosDefLBFGS>(
      Teuchos::rcp_dynamic_cast<const MatrixSymPosDefLBFGS>(rHL_super.B_RR_ptr()) );

  if( lbfgs_rHL_RR.get() && rHL_super.Q_R().is_identity()  ) {
    //
    // We have a limited memory BFGS matrix and have not started projected BFGS updating
    // yet so lets determine if it is time to consider switching
    //
    // Check that the current update is sufficiently p.d. before we do anything
    const value_type
      sTy = dot(*s_bfgs,*y_bfgs),
      yTy = dot(*y_bfgs,*y_bfgs);
    if( !ConstrainedOptPack::BFGS_sTy_suff_p_d(
      *s_bfgs,*y_bfgs,&sTy
      ,  int(olevel) >= int(PRINT_ALGORITHM_STEPS) ? &out : NULL )
      && !proj_bfgs_updater().bfgs_update().use_dampening()
      )
    {
      if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
        out	<< "\nWarning!  use_damening == false so there is no way we can perform any"
            " kind of BFGS update (projected or not) so we will skip it!\n";
      }
      quasi_newton_stats_(*s).set_k(0).set_updated_stats(
        QuasiNewtonStats::INDEF_SKIPED );
      return true;
    }
    // Consider if we can even look at the active set yet.
    const bool consider_switch  = lbfgs_rHL_RR->num_secant_updates() >= min_num_updates_proj_start();
    if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
      out << "\nnum_previous_updates = " << lbfgs_rHL_RR->num_secant_updates()
        << ( consider_switch ? " >= " : " < " )
        << "min_num_updates_proj_start = " << min_num_updates_proj_start()
        << ( consider_switch
           ? "\nConsidering if we should switch to projected BFGS updating of superbasics ...\n"
           : "\nNot time to consider switching to projected BFGS updating of superbasics yet!" );
    }
    if( consider_switch ) {
      // 
      // Our job here is to determine if it is time to switch to projected projected
      // BFGS updating.
      //
      if( act_set_stats_(*s).updated_k(-1) ) {
        if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
          out	<< "\nDetermining if projected BFGS updating of superbasics should begin ...\n";
        }
        // Determine if the active set has calmed down enough
        const SpVector
          &nu_km1 = s->nu().get_k(-1);
        const SpVectorSlice
          nu_indep = nu_km1(s->var_indep());
        const bool 
          act_set_calmed_down
          = PBFGSPack::act_set_calmed_down(
            act_set_stats_(*s).get_k(-1)
            ,proj_bfgs_updater().act_set_frac_proj_start()
            ,olevel,out
            ),
          max_num_updates_exceeded
          = lbfgs_rHL_RR->m_bar() >= max_num_updates_proj_start();
        do_projected_rHL_RR = act_set_calmed_down || max_num_updates_exceeded;
        if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
          if( act_set_calmed_down ) {
            out << "\nThe active set has calmed down enough so lets further consider switching to\n"
              << "projected BFGS updating of superbasic variables ...\n";
          }
          else if( max_num_updates_exceeded ) {
            out << "\nThe active set has not calmed down enough but num_previous_updates = "
              << lbfgs_rHL_RR->m_bar() << " >= max_num_updates_proj_start = "	<< max_num_updates_proj_start()
              << "\nso we will further consider switching to projected BFGS updating of superbasic variables ...\n";
          }
          else {
            out << "\nIt is not time to switch to projected BFGS so just keep performing full limited memory BFGS for now ...\n";
          }
        }
        if( do_projected_rHL_RR ) {
          //
          // Determine the set of initially fixed and free independent variables.
          //
          typedef Workspace<size_type>                              i_x_t;
          typedef Workspace<ConstrainedOptPack::EBounds>   bnd_t;
          i_x_t   i_x_free(wss,n_pz);
          i_x_t   i_x_fixed(wss,n_pz);
          bnd_t   bnd_fixed(wss,n_pz);
          i_x_t   l_x_fixed_sorted(wss,n_pz);
          size_type n_pz_X = 0, n_pz_R = 0;
          // rHL = rHL_scale * I
          value_type
            rHL_scale = sTy > 0.0 ? yTy/sTy : 1.0;
          if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
            out	<< "\nScaling for diagonal intitial rHL = rHL_scale*I, rHL_scale = " << rHL_scale << std::endl;
          }
          value_type sRTBRRsR = 0.0, sRTyR = 0.0, sXTBXXsX = 0.0, sXTyX = 0.0;
          // Get the elements in i_x_free[] for variables that are definitely free
          // and initialize s_R'*B_RR*s_R and s_R'*y_R
          PBFGSPack::init_i_x_free_sRTsR_sRTyR(
            nu_indep, *s_bfgs, *y_bfgs
            , &n_pz_R, &i_x_free[0], &sRTBRRsR, &sRTyR );
          sRTBRRsR *= rHL_scale;
          Workspace<value_type> rHL_XX_diag_ws(wss,nu_indep.nz());
          DVectorSlice rHL_XX_diag(&rHL_XX_diag_ws[0],rHL_XX_diag_ws.size());
          rHL_XX_diag = rHL_scale;
          // Sort fixed variables according to |s_X(i)^2*B_XX(i,i)|/|sRTBRRsR| + |s_X(i)*y_X(i)|/|sRTyR|
          // and initialize s_X'*B_XX*s_X and s_X*y_X
          PBFGSPack::sort_fixed_max_cond_viol(
            nu_indep,*s_bfgs,*y_bfgs,rHL_XX_diag,sRTBRRsR,sRTyR
            ,&sXTBXXsX,&sXTyX,&l_x_fixed_sorted[0]);
          // Pick initial set of i_x_free[] and i_x_fixed[] (sorted!)
          PBFGSPack::choose_fixed_free(
            proj_bfgs_updater().project_error_tol()
            ,proj_bfgs_updater().super_basic_mult_drop_tol(),nu_indep
            ,*s_bfgs,*y_bfgs,rHL_XX_diag,&l_x_fixed_sorted[0]
            ,olevel,out,&sRTBRRsR,&sRTyR,&sXTBXXsX,&sXTyX
            ,&n_pz_X,&n_pz_R,&i_x_free[0],&i_x_fixed[0],&bnd_fixed[0] );
          if( n_pz_R < n_pz ) {
            //
            // We are ready to transition to projected BFGS updating!
            //
            // Determine if we are be using dense or limited memory BFGS?
            const bool
              low_num_super_basics = n_pz_R <= num_superbasics_switch_dense();
            if( static_cast<int>(olevel) >= static_cast<int>(PRINT_BASIC_ALGORITHM_INFO) ) {
              out	<< "\nSwitching to projected BFGS updating ..."
                << "\nn_pz_R = " << n_pz_R << ( low_num_super_basics ? " <= " : " > " )
                << " num_superbasics_switch_dense = " << num_superbasics_switch_dense()
                << ( low_num_super_basics
                   ? "\nThere are not too many superbasic variables so use dense projected BFGS ...\n"
                   : "\nThere are too many superbasic variables so use limited memory projected BFGS ...\n"
                  );
            }
            // Create new matrix to use for rHL_RR initialized to rHL_RR = rHL_scale*I
            RCP<MatrixSymSecant>
              rHL_RR = NULL;
            if( low_num_super_basics ) {
              rHL_RR = new MatrixSymPosDefCholFactor(
                NULL    // Let it allocate its own memory
                ,NULL   // ...
                ,n_pz   // maximum size
                ,lbfgs_rHL_RR->maintain_original()
                ,lbfgs_rHL_RR->maintain_inverse()
                );
            }
            else {
              rHL_RR = new MatrixSymPosDefLBFGS(
                n_pz, lbfgs_rHL_RR->m()
                ,lbfgs_rHL_RR->maintain_original()
                ,lbfgs_rHL_RR->maintain_inverse()
                ,lbfgs_rHL_RR->auto_rescaling()
                );
            }
            rHL_RR->init_identity( n_pz_R, rHL_scale );
            if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ITERATION_QUANTITIES) ) {
              out << "\nrHL_RR after intialized to rHL_RR = rHL_scale*I but before performing current BFGS update:\nrHL_RR =\n"
                << dynamic_cast<MatrixOp&>(*rHL_RR); // I know this is okay!
            }
            // Initialize rHL_XX = rHL_scale*I
#ifdef _WINDOWS
            MatrixSymInitDiag
              &rHL_XX = dynamic_cast<MatrixSymInitDiag&>(
                const_cast<MatrixSymOp&>(*rHL_super.B_XX_ptr()));
#else
            MatrixSymInitDiag
              &rHL_XX = dyn_cast<MatrixSymInitDiag>(
                const_cast<MatrixSymOp&>(*rHL_super.B_XX_ptr()));
#endif
            rHL_XX.init_identity( n_pz_X, rHL_scale );
            // Reinitialize rHL
            rHL_super.initialize(
              n_pz, n_pz_R, &i_x_free[0], &i_x_fixed[0], &bnd_fixed[0]
              ,Teuchos::rcp_const_cast<const MatrixSymWithOpFactorized>(
                Teuchos::rcp_dynamic_cast<MatrixSymWithOpFactorized>(rHL_RR))
              ,NULL,BLAS_Cpp::no_trans,rHL_super.B_XX_ptr()
              );
            //
            // Perform the current BFGS update first
            //
            MatrixSymOp
              &rHL_RR_op = dynamic_cast<MatrixSymOp&>(*rHL_RR);
            const GenPermMatrixSlice
              &Q_R = rHL_super.Q_R(),
              &Q_X = rHL_super.Q_X();
            // Get projected BFGS update vectors y_bfgs_R, s_bfgs_R
            Workspace<value_type>
              y_bfgs_R_ws(wss,Q_R.cols()),
              s_bfgs_R_ws(wss,Q_R.cols()),
              y_bfgs_X_ws(wss,Q_X.cols()),
              s_bfgs_X_ws(wss,Q_X.cols());
            DVectorSlice y_bfgs_R(&y_bfgs_R_ws[0],y_bfgs_R_ws.size());
            DVectorSlice s_bfgs_R(&s_bfgs_R_ws[0],s_bfgs_R_ws.size());
            DVectorSlice y_bfgs_X(&y_bfgs_X_ws[0],y_bfgs_X_ws.size());
            DVectorSlice s_bfgs_X(&s_bfgs_X_ws[0],s_bfgs_X_ws.size());
            V_MtV( &y_bfgs_R, Q_R, BLAS_Cpp::trans, *y_bfgs );  // y_bfgs_R = Q_R'*y_bfgs
            V_MtV( &s_bfgs_R, Q_R, BLAS_Cpp::trans, *s_bfgs );  // s_bfgs_R = Q_R'*s_bfgs
            V_MtV( &y_bfgs_X, Q_X, BLAS_Cpp::trans, *y_bfgs );  // y_bfgs_X = Q_X'*y_bfgs
            V_MtV( &s_bfgs_X, Q_X, BLAS_Cpp::trans, *s_bfgs );  // s_bfgs_X = Q_X'*s_bfgs
            // Update rHL_RR
            if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
              out << "\nPerform current BFGS update on " << n_pz_R << " x " << n_pz_R << " projected "
                << "reduced Hessian for the superbasic variables where B = rHL_RR...\n";
            }
            QuasiNewtonStats quasi_newton_stats;
            proj_bfgs_updater().bfgs_update().perform_update(
              &s_bfgs_R(),&y_bfgs_R(),false,out,olevel,algo->algo_cntr().check_results()
              ,&rHL_RR_op, &quasi_newton_stats );
            // Perform previous updates if possible
            if( lbfgs_rHL_RR->m_bar() ) {
              const size_type num_add_updates = std::_MIN(num_add_recent_updates(),lbfgs_rHL_RR->m_bar());
              if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
                out	<< "\nAdd the min(num_previous_updates,num_add_recent_updates) = min(" << lbfgs_rHL_RR->m_bar()
                  << "," << num_add_recent_updates() << ") = " << num_add_updates << " most recent BFGS updates if possible ...\n";
              }
              // Now add previous update vectors
              const value_type
                project_error_tol = proj_bfgs_updater().project_error_tol();
              const DMatrixSlice
                S = lbfgs_rHL_RR->S(),
                Y = lbfgs_rHL_RR->Y();
              size_type k = lbfgs_rHL_RR->k_bar();  // Location in S and Y of most recent update vectors
              for( size_type l = 1; l <= num_add_updates; ++l, --k ) {
                if(k == 0) k = lbfgs_rHL_RR->m_bar();  // see MatrixSymPosDefLBFGS
                // Check to see if this update satisfies the required conditions.
                // Start with the condition on s'*y since this are cheap to check.
                V_MtV( &s_bfgs_X(), Q_X, BLAS_Cpp::trans, S.col(k) ); // s_bfgs_X = Q_X'*s_bfgs
                V_MtV( &y_bfgs_X(), Q_X, BLAS_Cpp::trans, Y.col(k) ); // y_bfgs_X = Q_X'*y_bfgs
                sRTyR    = dot( s_bfgs_R, y_bfgs_R );
                sXTyX    = dot( s_bfgs_X, y_bfgs_X );
                bool
                  sXTyX_cond    = ::fabs(sXTyX/sRTyR) <= project_error_tol,
                  do_update     = sXTyX_cond,
                  sXTBXXsX_cond = false;
                if( sXTyX_cond ) {
                  // Check the second more expensive condition
                  V_MtV( &s_bfgs_R(), Q_R, BLAS_Cpp::trans, S.col(k) ); // s_bfgs_R = Q_R'*s_bfgs
                  V_MtV( &y_bfgs_R(), Q_R, BLAS_Cpp::trans, Y.col(k) ); // y_bfgs_R = Q_R'*y_bfgs
                  sRTBRRsR = transVtMtV( s_bfgs_R, rHL_RR_op, BLAS_Cpp::no_trans, s_bfgs_R );
                  sXTBXXsX = rHL_scale * dot( s_bfgs_X, s_bfgs_X );
                  sXTBXXsX_cond = sXTBXXsX/sRTBRRsR <= project_error_tol;
                  do_update     = sXTBXXsX_cond && sXTyX_cond;
                }
                if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
                  out << "\n---------------------------------------------------------------------"
                    << "\nprevious update " << l
                    << "\n\nChecking projection error:\n"
                    << "\n|s_X'*y_X|/|s_R'*y_R| = |" << sXTyX << "|/|" << sRTyR
                    << "| = " << ::fabs(sXTyX/sRTyR)
                    << ( sXTyX_cond ? " <= " : " > " ) << " project_error_tol = "
                    << project_error_tol;
                  if( sXTyX_cond ) {
                    out	<< "\n(s_X'*rHL_XX*s_X/s_R'*rHL_RR*s_R) = (" << sXTBXXsX
                        << ") = " << (sXTBXXsX/sRTBRRsR)
                        << ( sXTBXXsX_cond ? " <= " : " > " ) << " project_error_tol = "
                        << project_error_tol;
                  }
                  out << ( do_update
                    ? "\n\nAttemping to add this previous update where B = rHL_RR ...\n"
                    : "\n\nCan not add this previous update ...\n" );
                }
                if( do_update ) {
                  // ( rHL_RR, s_bfgs_R, y_bfgs_R ) -> rHL_RR (this should not throw an exception!)
                  try {
                    proj_bfgs_updater().bfgs_update().perform_update(
                      &s_bfgs_R(),&y_bfgs_R(),false,out,olevel,algo->algo_cntr().check_results()
                      ,&rHL_RR_op, &quasi_newton_stats );
                  }
                  catch( const MatrixSymSecant::UpdateSkippedException& excpt ) {
                    if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
                      out	<< "\nOops!  The " << l << "th most recent BFGS update was rejected?:\n"
                        << excpt.what() << std::endl;
                    }
                  }
                }
              }
              if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
                out << "\n---------------------------------------------------------------------\n";
                }
              if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ITERATION_QUANTITIES) ) {
                out << "\nrHL_RR after adding previous BFGS updates:\nrHL_BRR =\n"
                  << dynamic_cast<MatrixOp&>(*rHL_RR);
              }
            }
            else {
              if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
                out	<< "\nThere were no previous update vectors to add!\n";
              }
            }
            if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ITERATION_QUANTITIES) ) {
              out << "\nFull rHL after complete reinitialization:\nrHL =\n" << *rHL_k;
            }
            quasi_newton_stats_(*s).set_k(0).set_updated_stats(
              QuasiNewtonStats::REINITIALIZED );
            return true;
          }
          else {
            if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
              out << "\nn_pz_R == n_pz = " << n_pz_R << ", No variables would be removed from "
                << "the superbasis so just keep on performing limited memory BFGS for now ...\n";
            }
            do_projected_rHL_RR = false;
          }
        }
      }
    }
    // If we have not switched to PBFGS then just update the full limited memory BFGS matrix
    if(!do_projected_rHL_RR) {
      if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
        out << "\nPerform current BFGS update on " << n_pz << " x " << n_pz << " full "
          << "limited memory reduced Hessian B = rHL ...\n";
      }
      proj_bfgs_updater().bfgs_update().perform_update(
        s_bfgs,y_bfgs,first_update,out,olevel,algo->algo_cntr().check_results()
        ,lbfgs_rHL_RR.get()
        ,&quasi_newton_stats_(*s).set_k(0)
        );
      return true;
    }
  }
  else {
    if( static_cast<int>(olevel) >= static_cast<int>(PRINT_ALGORITHM_STEPS) ) {
      out	<< "\nWe have already switched to projected BFGS updating ...\n";
    }
  }
  //
  // If we get here then we must have switched to
  // projected updating so lets just pass it on!
  //
  return proj_bfgs_updater().perform_update(
    s_bfgs,y_bfgs,first_update,out,olevel,algo,s,rHL_k);
}