void MatrixHessianRelaxed::Vp_StMtV(
    DVectorSlice* y, value_type a, BLAS_Cpp::Transp M_trans
  , const SpVectorSlice& x, value_type b ) const
{
  using BLAS_Cpp::no_trans;
  using BLAS_Cpp::trans;
  using AbstractLinAlgPack::Vp_StMtV;
  //
  // y = b*y + a * M * x
  // 
  //   = b*y + a * [ H  0    ] * [ x1 ]
  //               [ 0  bigM ]   [ x2 ]
  //               
  // =>              
  //               
  // y1 = b*y1 + a*H*x1
  // 
  // y2 = b*y2 + bigM * x2
  //
  LinAlgOpPack::Vp_MtV_assert_sizes(y->size(),rows(),cols(),M_trans,x.size());

  DVectorSlice
    y1 = (*y)(1,n_);
  value_type
    &y2 = (*y)(n_+1);
  const SpVectorSlice
    x1 = x(1,n_);
  const SpVectorSlice::element_type
    *x2_ele = x.lookup_element(n_+1);
  const value_type
    x2 = x2_ele ? x2_ele->value() : 0.0;

  // y1 = b*y1 + a*H*x1
  Vp_StMtV( &y1, a, *H_, no_trans, x1, b );

  // y2 = b*y2 + bigM * x2
  if( b == 0.0 )
    y2 = bigM_ * x2;
  else
    y2 = b*y2 + bigM_ * x2;
  
}
void dense_Vp_StPtMtV(
  DVectorSlice                 *y
  ,value_type                 a
  ,const GenPermMatrixSlice   &P
  ,BLAS_Cpp::Transp           P_trans
  ,const M_t                  &M
  ,BLAS_Cpp::Transp           M_trans
  ,const V_t                  &x
  ,value_type                 b
  )
{
  using BLAS_Cpp::no_trans;
  using BLAS_Cpp::trans;
  using BLAS_Cpp::trans_not;
  using BLAS_Cpp::rows;
  using BLAS_Cpp::cols;
  using DenseLinAlgPack::dot;
  using DenseLinAlgPack::DVector;
  using DenseLinAlgPack::Vt_S;
  using AbstractLinAlgPack::dot;
  using AbstractLinAlgPack::Vp_StMtV;
  using AbstractLinAlgPack::GenPermMatrixSlice;
  typedef AbstractLinAlgPack::EtaVector eta;
  using Teuchos::Workspace;
  Teuchos::WorkspaceStore* wss = Teuchos::get_default_workspace_store().get();
  
  // Validate the sizes
  // 
  // y += op(P)*op(M)*x
  // 
  const DenseLinAlgPack::size_type
    ny = y->size(),
    nx = x.size(),
    opM_rows = rows( M.rows(), M.cols(), M_trans ),
    opM_cols = cols( M.rows(), M.cols(), M_trans );
  if(    ny != rows( P.rows(), P.cols(), P_trans )
       || nx != opM_cols
       || cols( P.rows(), P.cols(), P_trans ) != opM_rows )
    throw std::length_error( "MatrixOp::Vp_StPtMtV(...) : Error, "
      "sizes of arguments does not match up" );
  //
  // Compute y = b*y + a*op(P)*op(M)*x in a resonably efficient manner.  This
  // implementation will assume that M is stored as a dense matrix.  Either
  // t = op(M)*x is computed first (O(opM_rows*nx) flops) then y = b*y + a*op(P)*t
  // (O(ny) + O(P_nz) flops) or each row of t' = e(j)' * op(M) (O(nx) flops)
  // is computed one at a time and then y(i) = b*y(i) + a*t'*x (O(nx)  flops)
  // where op(P)(i,j) = 1.0.  In the latter case, there are P_nz rows
  // of op(M) that have to be generated so the total cost is O(P_nz*nx).
  // Therefore, we will do the former if opM_rows < P_nz and the latter otherwise.
  //
  if( !P.nz() ) {
    // y = b*y
    if(b==0.0)       *y = 0.0;
    else if(b!=1.0)  Vt_S(y,b);
  }
  else if( opM_rows > P.nz() || P.is_identity() ) {
    // t = op(M)*x
    Workspace<value_type> t_ws(wss,opM_rows);
    DVectorSlice t(&t_ws[0],t_ws.size());
    LinAlgOpPack::V_MtV( &t, M, M_trans, x );
    // y = b*y + a*op(P)*t
    Vp_StMtV( y, a, P, P_trans, t(), b );
  }
  else {
    // y = b*y
    if(b==0.0)       *y = 0.0;
    else if(b!=1.0)  Vt_S(y,b);
    // Compute t' = e(j)' * op(M) then y(i) += a*t'*x where op(P)(i,j) = 1.0
    Workspace<value_type> t_ws(wss,opM_cols);
    DVectorSlice t(&t_ws[0],t_ws.size());
    if( P.is_identity() ) {
      for( size_type k = 1; k <= P.nz(); ++k ) {
        const size_type
          i = k,
          j = k;
        // t = op(M') * e(j)			
        LinAlgOpPack::V_MtV( &t, M, trans_not(M_trans), eta(j,opM_rows)() );
        // y(i) += a*t'*x
        (*y)(i) += a * dot( t(), x );
      }
    }
    else {
      for( GenPermMatrixSlice::const_iterator itr = P.begin(); itr != P.end(); ++itr ) {
        const DenseLinAlgPack::size_type
          i = P_trans == no_trans ? itr->row_i() : itr->col_j(),
          j = P_trans == no_trans ? itr->col_j() : itr->row_i();
        // t = op(M') * e(j)			
        LinAlgOpPack::V_MtV( &t, M, trans_not(M_trans), eta(j,opM_rows)() );
        // y(i) += a*t'*x
        (*y)(i) += a * dot( t(), x );
      }
    }
  }
}
void MatrixSymPosDefLBFGS::V_InvMtV(
  VectorMutable* y, BLAS_Cpp::Transp trans_rhs1
  , const Vector& x
  ) const
{
  using AbstractLinAlgPack::Vp_StMtV;
  using DenseLinAlgPack::V_InvMtV;
  using LinAlgOpPack::V_mV;
  using LinAlgOpPack::V_StV;
  using LinAlgOpPack::Vp_V;
  using LinAlgOpPack::V_MtV;
  using LinAlgOpPack::V_StMtV;
  using LinAlgOpPack::Vp_MtV;
  using DenseLinAlgPack::Vp_StMtV;
  typedef VectorDenseEncap         vde;
  typedef VectorDenseMutableEncap  vdme;
  using Teuchos::Workspace;
  Teuchos::WorkspaceStore* wss = Teuchos::get_default_workspace_store().get();

  assert_initialized();

  TEUCHOS_TEST_FOR_EXCEPT( !(  inverse_is_updated_  ) ); // For now just always update

  // y = inv(Bk) * x = Hk * x
  //
  // = gk*x + [S gk*Y] * [ inv(R')*(D+gk*Y'Y)*inv(R)     -inv(R') ] * [   S'  ] * x
  //                     [            -inv(R)                0    ]   [ gk*Y' ]
  //
  // Perform in the following (in order):
  //
  // y = gk*x
  //
  // t1 = [   S'*x  ]					<: R^(2*m)
  //      [ gk*Y'*x ]
  //
  // t2 = inv(R) * t1(1:m)			<: R^(m)
  //
  // t3 = - inv(R') * t1(m+1,2*m)		<: R^(m)
  //
  // t4 = gk * Y'Y * t2				<: R^(m)
  //
  // t4 += D*t2
  //
  // t5 = inv(R') * t4				<: R^(m)
  //
  // t5 += t3
  //
  // y += S*t5
  //
  // y += -gk*Y*t2

  // y = gk*x
  V_StV( y, gamma_k_, x );

  const size_type
    mb = m_bar_;	
  
  if( !mb )
    return;	// No updates have been performed.

  const multi_vec_ptr_t
    S = this->S(),
    Y = this->Y();

  // Get workspace

  Workspace<value_type>    t1_ws(wss,2*mb);
  DVectorSlice                   t1(&t1_ws[0],t1_ws.size());
  Workspace<value_type>    t2_ws(wss,mb);
  DVectorSlice                   t2(&t2_ws[0],t2_ws.size());
  Workspace<value_type>    t3_ws(wss,mb);
  DVectorSlice                   t3(&t3_ws[0],t3_ws.size());
  Workspace<value_type>    t4_ws(wss,mb);
  DVectorSlice                   t4(&t4_ws[0],t4_ws.size());
  Workspace<value_type>    t5_ws(wss,mb);
  DVectorSlice                   t5(&t5_ws[0],t5_ws.size());

  VectorSpace::vec_mut_ptr_t
    t = S->space_rows().create_member();

  const DMatrixSliceTri
    &R = this->R();

  const DMatrixSliceSym
    &YTY = this->YTY();

  // t1 = [   S'*x  ]
  //      [ gk*Y'*x ]
  V_MtV( t.get(), *S, BLAS_Cpp::trans, x );
  t1(1,mb) = vde(*t)();
  V_StMtV( t.get(), gamma_k_, *Y, BLAS_Cpp::trans, x );
  t1(mb+1,2*mb) = vde(*t)();

  // t2 = inv(R) * t1(1:m)
  V_InvMtV( &t2, R, BLAS_Cpp::no_trans, t1(1,mb) );

  // t3 = - inv(R') * t1(m+1,2*m)
  V_mV( &t3, t1(mb+1,2*mb) );
  V_InvMtV( &t3, R, BLAS_Cpp::trans, t3 );

  // t4 = gk * Y'Y * t2
  V_StMtV( &t4, gamma_k_, YTY, BLAS_Cpp::no_trans, t2 );

  // t4 += D*t2
  Vp_DtV( &t4, t2 );

  // t5 = inv(R') * t4
  V_InvMtV( &t5, R, BLAS_Cpp::trans, t4 );

  // t5 += t3
  Vp_V( &t5, t3 );

  // y += S*t5
  (vdme(*t)() = t5);
  Vp_MtV( y, *S, BLAS_Cpp::no_trans, *t );

  // y += -gk*Y*t2
  (vdme(*t)() = t2);
  Vp_StMtV( y, -gamma_k_, *Y, BLAS_Cpp::no_trans, *t );

}
void MatrixSymPosDefLBFGS::Vp_StMtV(
    VectorMutable* y, value_type alpha, BLAS_Cpp::Transp trans_rhs1
  , const Vector& x, value_type beta
  ) const
{
  using AbstractLinAlgPack::Vt_S;
  using AbstractLinAlgPack::Vp_StV;
  using AbstractLinAlgPack::Vp_StMtV;
  using LinAlgOpPack::V_StMtV;
  using LinAlgOpPack::V_MtV;
  typedef VectorDenseEncap         vde;
  typedef VectorDenseMutableEncap  vdme;
  using Teuchos::Workspace;
  Teuchos::WorkspaceStore* wss = Teuchos::get_default_workspace_store().get();

  assert_initialized();

  TEUCHOS_TEST_FOR_EXCEPT( !(  original_is_updated_  ) ); // For now just always update

  // y = b*y + Bk * x
  //
  // y = b*y + (1/gk)*x - [ (1/gk)*S  Y ] * inv(Q) * [ (1/gk)*S' ] * x
  //                                                 [     Y'    ]
  // Perform the following operations (in order):
  //
  // y = b*y
  //
  // y += (1/gk)*x
  //
  // t1 = [ (1/gk)*S'*x ]		<: R^(2*m)
  //		[      Y'*x   ]
  //
  // t2 =	inv(Q) * t1			<: R^(2*m)
  //
  // y += -(1/gk) * S * t2(1:m)
  //
  // y += -1.0 * Y * t2(m+1,2m)

  const value_type
    invgk = 1.0 / gamma_k_;

  // y = b*y
  Vt_S( y, beta );

  // y += (1/gk)*x
  Vp_StV( y, invgk, x );

  if( !m_bar_ )
    return;	// No updates have been added yet.

  const multi_vec_ptr_t
    S = this->S(),
    Y = this->Y();

  // Get workspace

  const size_type
    mb = m_bar_;

  Workspace<value_type>  t1_ws(wss,2*mb);
  DVectorSlice                 t1(&t1_ws[0],t1_ws.size());
  Workspace<value_type>  t2_ws(wss,2*mb);
  DVectorSlice                 t2(&t2_ws[0],t2_ws.size());

  VectorSpace::vec_mut_ptr_t
    t = S->space_rows().create_member();

  // t1 = [ (1/gk)*S'*x ]
  //		[      Y'*x   ]

  V_StMtV( t.get(), invgk, *S, BLAS_Cpp::trans, x );
  t1(1,mb) = vde(*t)();
  V_MtV( t.get(), *Y, BLAS_Cpp::trans, x );
  t1(mb+1,2*mb) = vde(*t)();

  // t2 =	inv(Q) * t1
  V_invQtV( &t2, t1 );

  // y += -(1/gk) * S * t2(1:m)
  (vdme(*t)() = t2(1,mb));
  Vp_StMtV( y, -invgk, *S, BLAS_Cpp::no_trans, *t );

  // y += -1.0 * Y * t2(m+1,2m
  (vdme(*t)() = t2(mb+1,2*mb));
  Vp_StMtV( y, -1.0, *Y, BLAS_Cpp::no_trans, *t );

}