void MatrixSymDiagStd::Vp_StMtV(
  DVectorSlice* vs_lhs, value_type alpha, BLAS_Cpp::Transp trans_rhs1
  , const DVectorSlice& vs_rhs2, value_type beta) const
{
  const DVectorSlice diag = this->diag();
  size_type n = diag.size();

  //
  // y = b*y + a * op(A) * x
  //
  DenseLinAlgPack::Vp_MtV_assert_sizes(
    vs_lhs->size(), n, n, trans_rhs1, vs_rhs2.size() );
  //
  // A is symmetric and diagonal A = diag(diag) so:
  //
  // y(j) += a * diag(j) * x(j), for j = 1...n
  //
  if( vs_rhs2.stride() == 1 && vs_lhs->stride() == 1 ) {
    // Optimized implementation
    const value_type
      *d_itr      = diag.raw_ptr(),
      *x_itr      = vs_rhs2.raw_ptr();
    value_type
      *y_itr      = vs_lhs->raw_ptr(),
      *y_end      = y_itr + vs_lhs->size();

    if( beta == 0.0 ) {
      while( y_itr != y_end )
        *y_itr++ = alpha * (*d_itr++) * (*x_itr++);
    }
    else if( beta == 1.0 ) {
      while( y_itr != y_end )
        *y_itr++ += alpha * (*d_itr++) * (*x_itr++);
    }
    else {
      for( ; y_itr != y_end; ++y_itr )
        *y_itr = beta * (*y_itr) + alpha * (*d_itr++) * (*x_itr++);
    }
  }
  else {
    // Generic implementation
    DVectorSlice::const_iterator
      d_itr = diag.begin(),
      x_itr = vs_rhs2.begin();
    DVectorSlice::iterator
      y_itr = vs_lhs->begin(),
      y_end = vs_lhs->end();
    for( ; y_itr != y_end; ++y_itr, ++d_itr, ++x_itr ) {
#ifdef LINALGPACK_CHECK_RANGE
      TEST_FOR_EXCEPT( !(  d_itr < diag.end()  ) );
      TEST_FOR_EXCEPT( !(  x_itr < vs_rhs2.end()  ) );
      TEST_FOR_EXCEPT( !(  y_itr < vs_lhs->end()  ) );
#endif
      *y_itr = beta * (*y_itr) + alpha * (*d_itr) * (*x_itr);
    }
  }
}
void MatrixSymDiagStd::V_InvMtV(
  DVectorSlice* vs_lhs, BLAS_Cpp::Transp trans_rhs1
  , const SpVectorSlice& sv_rhs2) const
{
  const DVectorSlice diag = this->diag();
  size_type n = diag.size();

  // y = inv(op(A)) * x
  //
  // A is symmetric and diagonal A = diag(diag) so:
  //
  // y(j) = x(j) / diag(j), for j = 1...n
  //
  // x is sparse so take account of this.
  
  DenseLinAlgPack::Vp_MtV_assert_sizes( vs_lhs->size()
    , n, n, trans_rhs1, sv_rhs2.size() );

  for(   SpVectorSlice::const_iterator x_itr = sv_rhs2.begin()
     ; x_itr != sv_rhs2.end()
     ; ++x_itr )
  {
    (*vs_lhs)(x_itr->indice() + sv_rhs2.offset())
      = x_itr->value() / diag(x_itr->indice() + sv_rhs2.offset());
      // Note: The indice x(i) invocations are ranged check
      // if this is compiled into the code.
  }
}
void MatrixSymDiagStd::V_InvMtV(
  DVectorSlice* vs_lhs, BLAS_Cpp::Transp trans_rhs1
  , const DVectorSlice& vs_rhs2) const
{
  const DVectorSlice diag = this->diag();
  size_type n = diag.size();

  // y = inv(op(A)) * x
  //
  // A is symmetric and diagonal (A = diag(diag)) so:
  //
  // y(j) = x(j) / diag(j), for j = 1...n

  DenseLinAlgPack::Vp_MtV_assert_sizes( vs_lhs->size()
    , n, n, trans_rhs1, vs_rhs2.size() );
  
  if( vs_rhs2.stride() == 1 && vs_lhs->stride() == 1 ) {
    // Optimized implementation
    const value_type
      *d_itr      = diag.raw_ptr(),
      *x_itr      = vs_rhs2.raw_ptr();
    value_type
      *y_itr      = vs_lhs->raw_ptr(),
      *y_end      = y_itr + vs_lhs->size();
    while( y_itr != y_end )
      *y_itr++ = (*x_itr++) / (*d_itr++);
  }
  else {
    // Generic implementation
    DVectorSlice::const_iterator
      d_itr = diag.begin(),
      x_itr = vs_rhs2.begin();
    DVectorSlice::iterator
      y_itr = vs_lhs->begin(),
      y_end = vs_lhs->end();
    for( ; y_itr != y_end; ++y_itr, ++d_itr, ++x_itr ) {
      TEST_FOR_EXCEPT( !(  d_itr < diag.end()  ) );
      TEST_FOR_EXCEPT( !(  x_itr < vs_rhs2.end()  ) );
      TEST_FOR_EXCEPT( !(  y_itr < vs_lhs->end()  ) );
      *y_itr = (*x_itr)/(*d_itr);
    }
  }
}
void MatrixHessianRelaxed::Vp_StMtV(
    DVectorSlice* y, value_type a, BLAS_Cpp::Transp M_trans
  , const DVectorSlice& x, value_type b ) const
{
  using BLAS_Cpp::no_trans;
  using BLAS_Cpp::trans;
  using AbstractLinAlgPack::Vp_StMtV;
  //
  // y = b*y + a * M * x
  // 
  //   = b*y + a * [ H  0    ] * [ x1 ]
  //               [ 0  bigM ]   [ x2 ]
  //               
  // =>              
  //               
  // y1 = b*y1 + a*H*x1
  // 
  // y2 = b*y2 + bigM * x2
  //
  LinAlgOpPack::Vp_MtV_assert_sizes(y->size(),rows(),cols(),M_trans,x.size());

  DVectorSlice
    y1 = (*y)(1,n_);
  value_type
    &y2 = (*y)(n_+1);
  const DVectorSlice
    x1 = x(1,n_);
  const value_type
    x2 = x(n_+1);

  // y1 = b*y1 + a*H*x1
  Vp_StMtV( &y1, a, *H_, no_trans, x1, b );

  // y2 = b*y2 + bigM * x2
  if( b == 0.0 )
    y2 = bigM_ * x2;
  else
    y2 = b*y2 + bigM_ * x2;
  
}
QPSolverStats::ESolutionType
QPSolverRelaxedLOQO::imp_solve_qp(
      std::ostream* out, EOutputLevel olevel, ERunTests test_what
    , const DVectorSlice& g, const MatrixOp& G
    , value_type etaL
    , const SpVectorSlice& dL, const SpVectorSlice& dU
    , const MatrixOp* E, BLAS_Cpp::Transp trans_E, const DVectorSlice* b
      , const SpVectorSlice* eL, const SpVectorSlice* eU
    , const MatrixOp* F, BLAS_Cpp::Transp trans_F, const DVectorSlice* f
    , value_type* obj_d
    , value_type* eta, DVectorSlice* d
    , SpVector* nu
    , SpVector* mu, DVectorSlice* Ed
    , DVectorSlice* lambda, DVectorSlice* Fd
  )
{
  using Teuchos::Workspace;
  Teuchos::WorkspaceStore* wss = wsp::default_workspace_store.get();

  const value_type inf_bnd  = std::numeric_limits<value_type>::max();
//	const value_type real_big = 1e+20;
  const value_type real_big = HUGE_VAL;

  const size_type
    nd   = g.size(),
    m_in = E ? b->size() : 0,
    m_eq = F ? f->size() : 0;

  //
  // Create a LOQO QP definition struct
  //

  LOQO *loqo_lp = openlp();
  TEUCHOS_TEST_FOR_EXCEPT( !(  loqo_lp  ) );

  //
  // Setup loqo_r and loqo_b and count the number of actual
  // constraints.
  //

  // LOQO's b vector storage
  MALLOC( loqo_lp->b, m_in+m_eq, double ); // May not use all of this storage
  DVectorSlice loqo_b( loqo_lp->b, m_in+m_eq );
  // LOQO's r vector storage
  MALLOC( loqo_lp->r, m_in+m_eq, double ); // May not use all of this storage
  DVectorSlice loqo_r( loqo_lp->r, m_in+m_eq );
  // Gives status of b.
  //                  /  j : if eL(j) > -inf_bnd
  // loqo_b_stat(k) = |
  //                  \ -j : if eL(j) <= -inf_bnd && eU(j) < +inf_bnd
  //
  // , for k = 1...num_inequal
  //
  Workspace<int>               loqo_b_stat_ws(wss,m_in); // May not use all of this
  DenseLinAlgPack::VectorSliceTmpl<int>  loqo_b_stat(&loqo_b_stat_ws[0],loqo_b_stat_ws.size());
  std::fill( loqo_b_stat.begin(), loqo_b_stat.end(), 0 ); // Initialize to zero

  // Fill up loqo_b, loqo_r and loqo_b_stat
  size_type num_inequal = 0; // The actual number of bouned general inequalities
  if(E) {
    // Read iterators
    AbstractLinAlgPack::sparse_bounds_itr
      eLU_itr( eL->begin(), eL->end(), eL->offset()
           , eU->begin(), eU->end(), eU->offset(), inf_bnd );
    // written iterators
    DVectorSlice::iterator
      b_itr		= loqo_b.begin(),
      r_itr		= loqo_r.begin();
    DenseLinAlgPack::VectorSliceTmpl<int>::iterator
      b_stat_itr  = loqo_b_stat.begin();
    // loop
    for( int k = 1; !eLU_itr.at_end(); ++k, ++eLU_itr, ++b_itr, ++r_itr, ++b_stat_itr, ++num_inequal )
    {
      const size_type j = eLU_itr.indice();
      if(eLU_itr.lbound() > -inf_bnd) {
        *b_itr = eLU_itr.lbound();
        *r_itr = eLU_itr.ubound() >= inf_bnd ? real_big : eLU_itr.ubound() - eLU_itr.lbound();
        *b_stat_itr = j; // We need to make A(k,:) = [ +op(E)(j,:), -b(j) ]
      }
      else {
        TEUCHOS_TEST_FOR_EXCEPT( !( eLU_itr.ubound() < +inf_bnd ) );
        *b_itr = -eLU_itr.ubound();
        *r_itr = eLU_itr.lbound() <= -inf_bnd ? real_big : - eLU_itr.lbound() + eLU_itr.ubound();
        *b_stat_itr = -j; // We need to make A(k,:) = [ -op(E)(j,:), +b(j) ]
      }
    }
  }
  if(F) {
    LinAlgOpPack::V_StV( &loqo_b(num_inequal+1,num_inequal+m_eq), -1.0, *f );
    loqo_r(num_inequal+1,num_inequal+m_eq) = 0.0;
  }

  //
  // Setup the QP dimensions
  //

  loqo_lp->n = nd+1;
  loqo_lp->m = num_inequal + m_eq;

  //
  // Setup loqo_c, loqo_l and loqo_u
  //

  // LOQO's c vector storage
  MALLOC( loqo_lp->c, nd+1, double );
  DVectorSlice loqo_c( loqo_lp->c, nd+1 );
  loqo_c(1,nd) = g;
  loqo_c(nd+1) = bigM();

  // LOQO's l vector storage
  MALLOC( loqo_lp->l, nd+1, double );
  DVectorSlice loqo_l( loqo_lp->l, nd+1 );
  std::fill( loqo_l.begin(), loqo_l.end(), -real_big );
  {
    SpVectorSlice::const_iterator
      dL_itr = dL.begin(),
      dL_end = dL.end();
    for( ; dL_itr != dL_end; ++dL_itr )
      loqo_l( dL_itr->indice() + dL.offset() ) = dL_itr->value();
  }
  loqo_l(nd+1) = etaL;

  // LOQO's u vector storage
  MALLOC( loqo_lp->u, nd+1, double );
  DVectorSlice loqo_u( loqo_lp->u, nd+1 );
  std::fill( loqo_u.begin(), loqo_u.end(), +real_big );
  {
    SpVectorSlice::const_iterator
      dU_itr = dU.begin(),
      dU_end = dU.end();
    for( ; dU_itr != dU_end; ++dU_itr )
      loqo_u( dU_itr->indice() + dU.offset() ) = dU_itr->value();
  }
  loqo_u(nd+1) = +real_big;
  
  //
  // Setup the objective and constraint matrices (using strategy interface).
  //

  init_hess_jacob().init_hess_jacob(
    G,bigM(),E,trans_E,b,&loqo_b_stat[0],num_inequal,F,trans_F,f
    ,loqo_lp);

  //
  // Setup the starting point
  //

  MALLOC( loqo_lp->x, nd+1, double );
  DVectorSlice loqo_x( loqo_lp->x, nd+1 );
  loqo_x(1,nd) = *d;
  loqo_x(nd+1) = *eta;

  //
  // Set some control parameters
  //
  
//	strcpy( loqo_lp->name, "loqo_qp" );
  loqo_lp->quadratic = 1;
  loqo_lp->convex    = 1;
  switch( olevel ) {
    case PRINT_NONE:
      loqo_lp->verbose = 0;
      break;
    case PRINT_BASIC_INFO:
      loqo_lp->verbose = 1;
      break;
    case PRINT_ITER_SUMMARY:
      loqo_lp->verbose = 2;
      break;
    case PRINT_ITER_STEPS:
      loqo_lp->verbose = 3;
      break;
    case PRINT_ITER_ACT_SET:
      loqo_lp->verbose = 4;
      break;
    case PRINT_ITER_VECTORS:
      loqo_lp->verbose = 5;
      break;
    case PRINT_EVERY_THING:
      loqo_lp->verbose = 6;
      break;
    default:
      TEUCHOS_TEST_FOR_EXCEPT(true);
  }

  //
  // Solve the QP
  //

  if( out && olevel >= PRINT_BASIC_INFO ) {
    *out << "\nSolving QP using LOQO ...\n";
    out->flush();
  }
  
  const int loqo_status = solvelp(loqo_lp);

  if( out && olevel >= PRINT_BASIC_INFO ) {
    *out << "\nLOQO returned status = " << loqo_status << "\n";
  }

  //
  // Map the solution to the output arguments
  //

  TEUCHOS_TEST_FOR_EXCEPT( !(  loqo_lp->x  ) );
  DVectorSlice loqo_x_sol( loqo_lp->x, nd+1 );

  // d
  *d    = loqo_x_sol(1,nd);

  // eta
  *eta  = loqo_x_sol(nd+1);

  // obj_d
  if(obj_d)
    *obj_d = loqo_lp->primal_obj - (*eta + 0.5 * (*eta)*(*eta)) * bigM();

  // nu
  if(nu) {
    nu->resize(nd,nd);
    TEUCHOS_TEST_FOR_EXCEPT( !(  loqo_lp->z  ) );
    TEUCHOS_TEST_FOR_EXCEPT( !(  loqo_lp->s  ) );
    const DVectorSlice
      loqo_z(loqo_lp->z,loqo_lp->n),   // Multipliers for l - x <= 0
      loqo_s(loqo_lp->s,loqo_lp->n);   // Multipliers for x - u <= 0
    DVectorSlice::const_iterator
      z_itr = loqo_z.begin(),
      s_itr = loqo_s.begin();
    typedef SpVector::element_type ele_t;
    for( size_type i = 1; i <= nd; ++i, ++z_itr, ++s_itr ) {
      if( *z_itr > *s_itr && *z_itr >= nonbinding_lag_mult() ) {
        // Lower bound is active
        nu->add_element(ele_t(i,-(*z_itr)));
      }
      else if( *s_itr > *z_itr && *s_itr >= nonbinding_lag_mult() ) {
        // Upper bound is active
        nu->add_element(ele_t(i,+(*s_itr)));
      }
    }
    // We could look at z(nd+1) and s(nd+1) for the value of kappa?
    nu->assume_sorted(true);
  }

  // mu
  if(mu) {
    mu->resize(m_in,num_inequal);
    DenseLinAlgPack::VectorSliceTmpl<int>::iterator
      b_stat_itr  = loqo_b_stat.begin();
    TEUCHOS_TEST_FOR_EXCEPT( !(  loqo_lp->v  ) );
    TEUCHOS_TEST_FOR_EXCEPT( !(  loqo_lp->q  ) );
    const DVectorSlice
      loqo_v(loqo_lp->v,loqo_lp->m),   // Multipliers for b <= A*x
      loqo_q(loqo_lp->q,loqo_lp->m);   // Multipliers for A*x <= b + r
    DVectorSlice::const_iterator
      v_itr = loqo_v.begin(),
      q_itr = loqo_q.begin();
    // loop
    typedef SpVector::element_type ele_t;
    for( size_type k = 1; k <= num_inequal; ++k, ++b_stat_itr, ++v_itr, ++q_itr ) {
      const int j = *b_stat_itr;
      if( *v_itr > *q_itr && *v_itr >= nonbinding_lag_mult() ) {
        // Lower bound is active
        if( j < 0 ) // We had to flip this since it was really and upper bound
          mu->add_element(ele_t(-j,+(*v_itr)));
        else // This really was a lower bound
          mu->add_element(ele_t(+j,-(*v_itr)));
      }
      else if( *q_itr > *v_itr && *q_itr >= nonbinding_lag_mult() ) {
        // Upper bound is active
        mu->add_element(ele_t(+j,+(*q_itr)));
      }
    }
  }

  // Ed
  if(Ed) {
    LinAlgOpPack::V_MtV( Ed, *E, trans_E, *d );
  }

  // lambda
  if(lambda) {
    TEUCHOS_TEST_FOR_EXCEPT( !(  loqo_lp->y  ) );
    const DVectorSlice
      loqo_y(loqo_lp->y,loqo_lp->m);         // Multipliers for equalities
    DVectorSlice::const_iterator
      y_itr = loqo_y.begin() + num_inequal;  // Get iterators to equalities
    DVectorSlice::iterator
      lambda_itr = lambda->begin();
    // loop
    for( size_type k = 1; k <= m_eq; ++k, ++y_itr, ++lambda_itr ) {
      *lambda_itr = -(*y_itr);
    }
  }

  // Fd
  if(Fd) {
    LinAlgOpPack::V_MtV( Fd, *F, trans_F, *d );
  }

  //
  // Setup the QP statistics
  //

  QPSolverStats::ESolutionType solution_type = QPSolverStats::OPTIMAL_SOLUTION; // Assume this?
  switch( loqo_status ) { // I had to find this out by trial and error!
      case 0:
      solution_type = QPSolverStats::OPTIMAL_SOLUTION;
      break;
    case 2:
      solution_type = QPSolverStats::DUAL_FEASIBLE_POINT;
      break;
    default:
      TEUCHOS_TEST_FOR_EXCEPT(true);
  }

  qp_stats_.set_stats(
    solution_type, QPSolverStats::CONVEX
    ,loqo_lp->iter, QPSolverStats::NOT_KNOWN, QPSolverStats::NOT_KNOWN
    ,false, *eta > 0.0 );

  //
  // Clean up dynamically allocated memory for LOQO
  //

  inv_clo();          // frees memory associated with matrix factorization
  closelp(loqo_lp);   // frees all allocated arrays with free(...).

  return qp_stats_.solution_type();

}
void QPSchurInitKKTSystemHessianRelaxed::initialize_kkt_system(
    const DVectorSlice&    g
    ,const MatrixOp&  G
    ,value_type           etaL
    ,const SpVectorSlice& dL
    ,const SpVectorSlice& dU
    ,const MatrixOp*  F
    ,BLAS_Cpp::Transp     trans_F
    ,const DVectorSlice*   f
    ,const DVectorSlice&   d
    ,const SpVectorSlice& nu
    ,size_type*           n_R
    ,i_x_free_t*          i_x_free
    ,i_x_fixed_t*         i_x_fixed
    ,bnd_fixed_t*         bnd_fixed
    ,j_f_decomp_t*        j_f_decomp
    ,DVector*              b_X
    ,Ko_ptr_t*            Ko
    ,DVector*              fo
) const
{
    using BLAS_Cpp::trans;

    // Validate type of and convert G
    const MatrixSymHessianRelaxNonSing
    *G_relax_ptr = dynamic_cast<const MatrixSymHessianRelaxNonSing*>(&G);

    if( G_relax_ptr == NULL ) {
        init_kkt_full_.initialize_kkt_system(
            g,G,etaL,dL,dU,F,trans_F,f,d,nu,n_R,i_x_free,i_x_fixed,bnd_fixed
            ,j_f_decomp,b_X,Ko,fo);
        return;
    }

    const MatrixSymHessianRelaxNonSing
    &G_relax = *G_relax_ptr;

    // get some stuff
    const MatrixSymWithOpFactorized
    &G_orig = G_relax.G(),
     &M      = G_relax.M();
    const size_type
    nd = g.size(),
    no = G_orig.rows(),
    nr = M.rows();
    TEST_FOR_EXCEPT( !(  no + nr == nd  ) );

    // Setup output arguments

    // n_R = nd_R
    *n_R = no;
    // i_x_free.size() == 0 and i_x_free is implicitly identity
    i_x_free->resize(no);
{   for(size_type l = 1; l <= no; ++l ) {
            (*i_x_free)[l-1] = l;
        }
    }
    // i_x_fixed[]
    i_x_fixed->resize(nr+1);
    if(nr) {
        // i_x_fixed[l-1] = no + l, l = 1...nr
        for( size_type l = 1; l <= nr; ++l )
            (*i_x_fixed)[l-1] = no+l;
    }
    (*i_x_fixed)[nr] = nd+1; // extra relaxation is always initially active
    // bnd_fixed[]
    bnd_fixed->resize(nr+1);
    if(nr) {
        // bnd_fixed[l-1] = LOWER, l = 1...nr
        std::fill_n( bnd_fixed->begin(), nr, LOWER );
    }
    (*bnd_fixed)[nr] = LOWER; // relaxation is always initially active
    // j_f_decomp[]
    j_f_decomp->resize(0);
    // b_X
    b_X->resize(nr+1);
    if(nr) {
        // b_X[l-1] = dL(no+l), l = 1...nr
        LinAlgOpPack::assign( &(*b_X)(1,nr), dL(no+1,no+nr) );
    }
    (*b_X)[nr] = etaL; // relaxation is always initially active
    // Ko = G.G
    *Ko = G_relax.G_ptr(); // now B_RR is a shared object
    // fo = - *g(1:no)
    LinAlgOpPack::V_StV( fo, -1.0, g(1,no) );

}
void MatrixHessianRelaxed::Vp_StPtMtV(
  DVectorSlice* y, value_type a
  , const GenPermMatrixSlice& P, BLAS_Cpp::Transp P_trans
  , BLAS_Cpp::Transp M_trans
  , const DVectorSlice& x, value_type b ) const
{
  using BLAS_Cpp::no_trans;
  using BLAS_Cpp::trans;
  namespace GPMSIP = AbstractLinAlgPack::GenPermMatrixSliceIteratorPack;
  //
  // y = b*y + a * op(P) * M * x
  // 
  //   = b*y + a * [ op(P1)  op(P2) ] *  [ H   0   ] * [ x1 ]
  //                                     [ 0  bigM ]   [ x2 ]
  //               
  // =>              
  //               
  // y = b*y + a*op(P1)*H*x1 + a*op(P2)*bigM*x2
  //
  LinAlgOpPack::Vp_MtV_assert_sizes(y->size(),P.rows(),P.cols(),P_trans
    , BLAS_Cpp::rows( rows(), cols(), M_trans) );
  LinAlgOpPack::Vp_MtV_assert_sizes( BLAS_Cpp::cols( P.rows(), P.cols(), P_trans)
    ,rows(),cols(),M_trans,x.size());

  // For this to work (as shown below) we need to have P sorted by
  // row if op(P) = P' or sorted by column if op(P) = P.  If
  // P is not sorted properly, we will just use the default
  // implementation of this operation.
  if( 	( P.ordered_by() == GPMSIP::BY_ROW && P_trans == no_trans )
      || 	( P.ordered_by() == GPMSIP::BY_COL && P_trans == trans ) )
  {
    // Call the default implementation
    MatrixOp::Vp_StPtMtV(y,a,P,P_trans,M_trans,x,b);
    return;
  }

  if( P.is_identity() )
    TEUCHOS_TEST_FOR_EXCEPT( !(  BLAS_Cpp::rows( P.rows(), P.cols(), P_trans ) == n_  ) );

  const GenPermMatrixSlice
    P1 = ( P.is_identity() 
         ? GenPermMatrixSlice( n_, n_, GenPermMatrixSlice::IDENTITY_MATRIX )
         : P.create_submatrix(Range1D(1,n_),P_trans==trans?GPMSIP::BY_ROW:GPMSIP::BY_COL)
      ),
    P2 = ( P.is_identity()
         ? GenPermMatrixSlice(
           P_trans == no_trans ? n_ : 1
           , P_trans == no_trans ? 1 : n_
           , GenPermMatrixSlice::ZERO_MATRIX )
         : P.create_submatrix(Range1D(n_+1,n_+1),P_trans==trans?GPMSIP::BY_ROW:GPMSIP::BY_COL)
      );
  
  const DVectorSlice
    x1 = x(1,n_);
  const value_type
    x2 = x(n_+1);
  // y = b*y + a*op(P1)*H*x1
  AbstractLinAlgPack::Vp_StPtMtV( y, a, P1, P_trans, *H_, no_trans, x1, b );
  // y += a*op(P2)*bigM*x2
  if( P2.nz() ){
    TEUCHOS_TEST_FOR_EXCEPT( !( P2.nz() == 1 ) );
    const size_type
      i = P_trans == no_trans ? P2.begin()->row_i() : P2.begin()->col_j();
    (*y)(i) += a * bigM_ * x2;
  }
}