QPSolverStats::ESolutionType
QPSolverRelaxedQPKWIK::imp_solve_qp(
  std::ostream* out, EOutputLevel olevel, ERunTests test_what
  ,const Vector& g, const MatrixSymOp& G
  ,value_type etaL
  ,const Vector* dL, const Vector* dU
  ,const MatrixOp* E, BLAS_Cpp::Transp trans_E, const Vector* b
  ,const Vector* eL, const Vector* eU
  ,const MatrixOp* F, BLAS_Cpp::Transp trans_F, const Vector* f
  ,value_type* obj_d
  ,value_type* eta, VectorMutable* d
  ,VectorMutable* nu
  ,VectorMutable* mu, VectorMutable* Ed
  ,VectorMutable* lambda, VectorMutable* Fd
  )
{
  using Teuchos::dyn_cast;
  using DenseLinAlgPack::nonconst_tri_ele;
  using LinAlgOpPack::dot;
  using LinAlgOpPack::V_StV;
  using LinAlgOpPack::assign;
  using LinAlgOpPack::V_StV;
  using LinAlgOpPack::V_MtV;
  using AbstractLinAlgPack::EtaVector;
  using AbstractLinAlgPack::transVtMtV;
  using AbstractLinAlgPack::num_bounded;
  using ConstrainedOptPack::MatrixExtractInvCholFactor;

  // /////////////////////////
  // Map to QPKWIK input

  // Validate that rHL is of the proper type.
  const MatrixExtractInvCholFactor &cG
    = dyn_cast<const MatrixExtractInvCholFactor>(G);

  // Determine the number of sparse bounds on variables and inequalities.
  // By default set for the dense case
  const value_type inf = this->infinite_bound();
  const size_type
    nd              = d->dim(),
    m_in            = E  ? b->dim()                  : 0,
    m_eq            = F  ? f->dim()                  : 0,
    nvarbounds      = dL ? num_bounded(*dL,*dU,inf)  : 0,
    ninequbounds    = E  ? num_bounded(*eL,*eU,inf)  : 0,
    nequalities     = F  ? f->dim()                  : 0;

  // Determine if this is a QP with a structure different from the
  // one just solved.
  
  const bool same_qp_struct = (  N_ == nd && M1_ == nvarbounds && M2_ == ninequbounds && M3_ == nequalities );

  /////////////////////////////////////////////////////////////////
  // Set the input parameters to be sent to QPKWIKNEW

  // N
  N_ = nd;

  // M1
  M1_ = nvarbounds;

  // M2
  M2_ = ninequbounds;

  // M3
  M3_ = nequalities;

  // GRAD
  GRAD_ = VectorDenseEncap(g)();

  // UINV_AUG
  //
  // UINV_AUG = [ sqrt(bigM)  0  ]
  //            [ 0           L' ]
  //
  UINV_AUG_.resize(N_+1,N_+1);
  cG.extract_inv_chol( &nonconst_tri_ele( UINV_AUG_(2,N_+1,2,N_+1), BLAS_Cpp::upper ) );
  UINV_AUG_(1,1) = 1.0 / ::sqrt( NUMPARAM_[2] );
  UINV_AUG_.col(1)(2,N_+1) = 0.0;
  UINV_AUG_.row(1)(2,N_+1) = 0.0;

  // LDUINV_AUG
  LDUINV_AUG_ = UINV_AUG_.rows();

  // IBND, BL , BU, A, LDA, YPY

  IBND_INV_.resize( nd + m_in);
  std::fill( IBND_INV_.begin(), IBND_INV_.end(), 0 ); // Initialize the zero
  IBND_.resize( my_max( 1, M1_ + M2_ ) );
  BL_.resize( my_max( 1, M1_ + M2_ ) );
  BU_.resize( my_max( 1, M1_ + M2_ + M3_ ) );
  LDA_ = my_max( 1, M2_ + M3_ );
  A_.resize( LDA_, (  M2_ + M3_ > 0 ? N_ : 1 ) );
  YPY_.resize( my_max( 1, M1_ + M2_ ) );
  if(M1_)
    YPY_(1,M1_) = 0.0; // Must be for this QP interface

  // Initialize variable bound constraints
  if( dL ) {
    VectorDenseEncap dL_de(*dL);
    VectorDenseEncap dU_de(*dU);
    // read iterators
    AbstractLinAlgPack::sparse_bounds_itr
      dLU_itr( dL_de().begin(), dL_de().end()
          ,dU_de().begin(), dU_de().end()
          ,inf );
    // written iterators
    IBND_t::iterator
      IBND_itr = IBND_.begin(),
      IBND_end = IBND_.begin() + M1_;
    DVector::iterator
      BL_itr = BL_.begin(),
      BU_itr = BU_.begin(),
      YPY_itr = YPY_.begin();
    // Loop
    for( size_type ibnd_i = 1; IBND_itr != IBND_end; ++ibnd_i, ++dLU_itr ) {
      IBND_INV_[dLU_itr.index()-1] = ibnd_i;
      *IBND_itr++ = dLU_itr.index();
      *BL_itr++	= dLU_itr.lbound();
      *BU_itr++	= dLU_itr.ubound();
      *YPY_itr++	= 0.0; // Must be zero with this QP interface
    }
  }

  // Initialize inequality constraints
  
  if(M2_) {
    VectorDenseEncap eL_de(*eL);
    VectorDenseEncap eU_de(*eU);
    VectorDenseEncap b_de(*b);
    AbstractLinAlgPack::sparse_bounds_itr
      eLU_itr( eL_de().begin(), eL_de().end()
          ,eU_de().begin(), eU_de().end()
          ,inf );
    if( M2_ < m_in ) {
      // Initialize BL, BU, YPY and A for sparse bounds on general inequalities
      // written iterators
      DVector::iterator
        BL_itr		= BL_.begin() + M1_,
        BU_itr		= BU_.begin() + M1_,
        YPY_itr		= YPY_.begin() + M1_;
      IBND_t::iterator
        ibnds_itr	= IBND_.begin() + M1_;
      // loop
      for(size_type i = 1; i <= M2_; ++i, ++eLU_itr, ++ibnds_itr ) {
        TEST_FOR_EXCEPT( !( !eLU_itr.at_end() ) );
        const size_type k      = eLU_itr.index();
        *BL_itr++              = eLU_itr.lbound();
        *BU_itr++              = eLU_itr.ubound();
        *YPY_itr++             = b_de()(k);
        *ibnds_itr             = k;  // Only for my record, not used by QPKWIK
        IBND_INV_[nd+k-1]      = M1_ + i;
        // Add the corresponding row of op(E) to A
        // y == A.row(i)'
        // y' = e_k' * op(E) => y = op(E')*e_k
        DVectorSlice y = A_.row(i);
        EtaVector e_k(k,eL_de().dim());
        V_MtV( &y( 1, N_ ), *E, BLAS_Cpp::trans_not(trans_E), e_k() ); // op(E')*e_k
      }
    }
    else {
      //
      // Initialize BL, BU, YPY and A for dense bounds on general inequalities
      //
      // Initialize BL(M1+1:M1+M2), BU(M1+1:M1+M2)
      // and IBND(M1+1:M1+M2) = identity (only for my record, not used by QPKWIK)
      DVector::iterator
        BL_itr		= BL_.begin() + M1_,
        BU_itr		= BU_.begin() + M1_;
      IBND_t::iterator
        ibnds_itr	= IBND_.begin() + M1_;
      for(size_type i = 1; i <= m_in; ++i ) {
        if( !eLU_itr.at_end() && eLU_itr.index() == i ) {
          *BL_itr++ = eLU_itr.lbound();
          *BU_itr++ = eLU_itr.ubound();
          ++eLU_itr;
        }
        else {
          *BL_itr++ = -inf;
          *BU_itr++ = +inf;
        }
        *ibnds_itr++     = i;
        IBND_INV_[nd+i-1]= M1_ + i;
      }
      // A(1:M2,1:N) = op(E)
      assign( &A_(1,M2_,1,N_), *E, trans_E );
      // YPY
      YPY_(M1_+1,M1_+M2_) = b_de();
    }
  }

  // Initialize equalities

  if(M3_) {
    V_StV( &BU_( M1_ + M2_ + 1, M1_ + M2_ + M3_ ), -1.0, VectorDenseEncap(*f)() );
    assign( &A_( M2_ + 1, M2_ + M3_, 1, N_ ), *F, trans_F );
  }

  // IYPY
  IYPY_ = 1; // ???

  // WARM
  WARM_ = 0; // Cold start by default

  // MAX_ITER
  MAX_ITER_ = static_cast<f_int>(max_qp_iter_frac() * N_);

  // INF
  INF_ = ( same_qp_struct ? 1 : 0 );
  
  // Initilize output, internal state and workspace quantities.
  if(!same_qp_struct) {
    X_.resize(N_);
    NACTSTORE_ = 0;
    IACTSTORE_.resize(N_+1);
    IACT_.resize(N_+1);
    UR_.resize(N_+1);
    ISTATE_.resize( QPKWIKNEW_CppDecl::qpkwiknew_listate(N_,M1_,M2_,M3_) );
    LRW_ = QPKWIKNEW_CppDecl::qpkwiknew_lrw(N_,M1_,M2_,M3_);
    RW_.resize(LRW_);
  }

  // /////////////////////////////////////////////
  // Setup a warm start form the input arguments
  //
  // Interestingly enough, QPKWIK sorts all of the
  // constraints according to scaled multiplier values
  // and mixes equality with inequality constriants.
  // It seems to me that you should start with equality
  // constraints first.

  WARM_      = 0;
  NACTSTORE_ = 0;

  if( m_eq ) {
    // Add equality constraints first since we know these will
    // be part of the active set.
    for( size_type j = 1; j <= m_eq; ++j ) {
      IACTSTORE_[NACTSTORE_] = 2*M1_ + 2*M2_ + j;
      ++NACTSTORE_;
    }
  }
  if( ( nu && nu->nz() ) || ( mu && mu->nz() ) ) {
    // Add inequality constraints
    const size_type
      nu_nz = nu ? nu->nz() : 0,
      mu_nz = mu ? mu->nz() : 0;
    // Combine all the multipliers for the bound and general inequality
    // constraints and sort them from the largest to the smallest.  Hopefully
    // the constraints with the larger multiplier values will not be dropped
    // from the active set.
    SpVector gamma( nd + 1 + m_in , nu_nz + mu_nz );
    typedef SpVector::element_type ele_t;
    if(nu && nu_nz) {
      VectorDenseEncap nu_de(*nu);
      DVectorSlice::const_iterator
        nu_itr = nu_de().begin(),
        nu_end = nu_de().end();
      index_type i = 1;
      while( nu_itr != nu_end ) {
        for( ; *nu_itr == 0.0; ++nu_itr, ++i );
        gamma.add_element(ele_t(i,*nu_itr));
        ++nu_itr; ++i;
      }
    }
    if(mu && mu_nz) {
      VectorDenseEncap mu_de(*mu);
      DVectorSlice::const_iterator
        mu_itr = mu_de().begin(),
        mu_end = mu_de().end();
      index_type i = 1;
      while( mu_itr != mu_end ) {
        for( ; *mu_itr == 0.0; ++mu_itr, ++i );
        gamma.add_element(ele_t(i+nd,*mu_itr));
        ++mu_itr; ++i;
      }
    }
    std::sort( gamma.begin(), gamma.end()
      , AbstractLinAlgPack::SortByDescendingAbsValue() );
    // Now add the inequality constraints in decreasing order
    const SpVector::difference_type o = gamma.offset();
    for( SpVector::const_iterator itr = gamma.begin(); itr != gamma.end(); ++itr ) {
      const size_type  j   = itr->index() + o;
      const value_type val = itr->value();
      if( j <= nd ) { // Variable bound
        const size_type ibnd_i = IBND_INV_[j-1];
        TEST_FOR_EXCEPT( !( ibnd_i ) );
        IACTSTORE_[NACTSTORE_]
          = (val < 0.0
             ? ibnd_i               // lower bound (see IACT(*))
             : M1_ + M2_ + ibnd_i   // upper bound (see IACT(*))
            );
        ++NACTSTORE_;
      }
      else if( j <= nd + m_in ) { // General inequality constraint
        const size_type ibnd_i = IBND_INV_[j-1]; // offset into M1_ + ibnd_j
        TEST_FOR_EXCEPT( !( ibnd_i ) );
        IACTSTORE_[NACTSTORE_]
          = (val < 0.0
             ? ibnd_i               // lower bound (see IACT(*))
             : M1_ + M2_ + ibnd_i   // upper bound (see IACT(*))
            );
        ++NACTSTORE_;
      }
    }
  }
  if( NACTSTORE_ > 0 )
    WARM_ = 1;

  // /////////////////////////
  // Call QPKWIK

  if( out && olevel > PRINT_NONE ) {
    *out
      << "\nCalling QPKWIK to solve QP problem ...\n";
  }

  QPKWIKNEW_CppDecl::qpkwiknew(
    N_, M1_, M2_, M3_, &GRAD_(1), &UINV_AUG_(1,1), LDUINV_AUG_, &IBND_[0]
    ,&BL_(1), &BU_(1), &A_(1,1), LDA_, &YPY_(1), IYPY_, WARM_, NUMPARAM_, MAX_ITER_, &X_(1)
    ,&NACTSTORE_, &IACTSTORE_[0], &INF_, &NACT_, &IACT_[0], &UR_[0], &EXTRA_
    ,&ITER_, &NUM_ADDS_, &NUM_DROPS_, &ISTATE_[0], LRW_, &RW_[0]
    );

  // ////////////////////////
  // Map from QPKWIK output

  // eta
  *eta = EXTRA_;
  // d
  (VectorDenseMutableEncap(*d))() = X_();
  // nu (simple variable bounds) and mu (general inequalities)
  if(nu) *nu = 0.0;
  if(mu) *mu = 0.0;
  // ToDo: Create VectorDenseMutableEncap views for faster access!
  {for(size_type i = 1; i <= NACT_; ++i) {
    size_type j = IACT_[i-1];
    EConstraintType type = constraint_type(M1_,M2_,M3_,j);
    FortranTypes::f_int idc = constraint_index(M1_,M2_,M3_,&IBND_[0],type,j);
    switch(type) {
      case NU_L:
        nu->set_ele( idc , -UR_(i) );
        break;
      case GAMA_L:
        mu->set_ele( IBND_[ M1_ + idc - 1 ], -UR_(i) );
        break;
      case NU_U:
        nu->set_ele( idc, UR_(i)) ;
        break;
      case GAMA_U:
        mu->set_ele( IBND_[ M1_ + idc - 1 ], UR_(i) );
        break;
      case LAMBDA:
        lambda->set_ele( idc, UR_(i) );
        break;
    }
  }}
  // obj_d (This could be updated within QPKWIK in the future)
  if(obj_d) {
    // obj_d = g'*d + 1/2 * d' * G * g
    *obj_d = dot(g,*d) + 0.5 * transVtMtV(*d,G,BLAS_Cpp::no_trans,*d);
  }
  // Ed (This could be updated within QPKWIK in the future)
  if(Ed) {
    V_MtV( Ed, *E, trans_E, *d );
  }
  // Fd (This could be updated within QPKWIK in the future)
  if(Fd) {
    V_MtV( Fd, *F, trans_F, *d );
  }
  // Set the QP statistics
  QPSolverStats::ESolutionType solution_type;
  if( INF_ >= 0 ) {
    solution_type = QPSolverStats::OPTIMAL_SOLUTION;
  }
  else if( INF_ == -1 ) { // Infeasible constraints
    TEST_FOR_EXCEPTION(
      true, QPSolverRelaxed::Infeasible
      ,"QPSolverRelaxedQPKWIK::solve_qp(...) : Error, QP is infeasible" );
  }
  else if( INF_ == -2 ) { // LRW too small
    TEST_FOR_EXCEPT( !( INF_ != -2 ) );  // Local programming error?
  }
  else if( INF_ == -3 ) { // Max iterations exceeded
    solution_type = QPSolverStats::DUAL_FEASIBLE_POINT;
  }
  else {
    TEST_FOR_EXCEPT(true); // Unknown return value!
  }
  qp_stats_.set_stats(
    solution_type, QPSolverStats::CONVEX
    ,ITER_, NUM_ADDS_, NUM_DROPS_
    ,WARM_==1, *eta > 0.0 );

  return qp_stats_.solution_type();
}
bool NLPDirectTester::finite_diff_check(
  NLPDirect         *nlp
  ,const Vector     &xo
  ,const Vector     *xl
  ,const Vector     *xu
  ,const Vector     *c
  ,const Vector     *Gf
  ,const Vector     *py
  ,const Vector     *rGf
  ,const MatrixOp   *GcU
  ,const MatrixOp   *D
  ,const MatrixOp   *Uz
  ,bool             print_all_warnings
  ,std::ostream     *out
  ) const
{

  using std::setw;
  using std::endl;
  using std::right;

  using AbstractLinAlgPack::sum;
  using AbstractLinAlgPack::dot;
  using AbstractLinAlgPack::Vp_StV;
  using AbstractLinAlgPack::random_vector;
  using AbstractLinAlgPack::assert_print_nan_inf;
  using LinAlgOpPack::V_StV;
  using LinAlgOpPack::V_StMtV;
  using LinAlgOpPack::Vp_MtV;
  using LinAlgOpPack::M_StM;
  using LinAlgOpPack::M_StMtM;

  typedef VectorSpace::vec_mut_ptr_t  vec_mut_ptr_t;

//  using AbstractLinAlgPack::TestingPack::CompareDenseVectors;
//  using AbstractLinAlgPack::TestingPack::CompareDenseSparseMatrices;

  using TestingHelperPack::update_success;

  bool success = true, preformed_fd;
  if(out) {
    *out << std::boolalpha
       << std::endl
       << "*********************************************************\n"
       << "*** NLPDirectTester::finite_diff_check(...) ***\n"
       << "*********************************************************\n";
  }

  const Range1D
    var_dep      = nlp->var_dep(),
    var_indep    = nlp->var_indep(),
    con_decomp   = nlp->con_decomp(),
    con_undecomp = nlp->con_undecomp();
  NLP::vec_space_ptr_t
    space_x = nlp->space_x(),
    space_c = nlp->space_c();

  // //////////////////////////////////////////////
  // Validate the input

  TEST_FOR_EXCEPTION(
    py && !c, std::invalid_argument
    ,"NLPDirectTester::finite_diff_check(...) : "
    "Error, if py!=NULL then c!=NULL must also be true!" );

  const CalcFiniteDiffProd
    &fd_deriv_prod = this->calc_fd_prod();

  const value_type
    rand_y_l = -1.0, rand_y_u = 1.0,
    small_num = ::sqrt(std::numeric_limits<value_type>::epsilon());

  try {

  // ///////////////////////////////////////////////
  // (1) Check Gf

  if(Gf) {
    switch( Gf_testing_method() ) {
      case FD_COMPUTE_ALL: {
        // Compute FDGf outright
        TEST_FOR_EXCEPT(true); // ToDo: update above!
        break;
      }
      case FD_DIRECTIONAL: {
        // Compute FDGF'*y using random y's
        if(out)
          *out
            << "\nComparing products Gf'*y with finite difference values FDGf'*y for "
            << "random y's ...\n";
        vec_mut_ptr_t y = space_x->create_member();
        value_type max_warning_viol = 0.0;
        int num_warning_viol = 0;
        const int num_fd_directions_used = ( num_fd_directions() > 0 ? num_fd_directions() : 1 );
        for( int direc_i = 1; direc_i <= num_fd_directions_used; ++direc_i ) {
          if( num_fd_directions() > 0 ) {
            random_vector( rand_y_l, rand_y_u, y.get() );
            if(out)
              *out
                << "\n****"
                << "\n**** Random directional vector " << direc_i << " ( ||y||_1 / n = "
                << (y->norm_1() / y->dim()) << " )"
                << "\n***\n";
          }
          else {
            *y = 1.0;
            if(out)
              *out
                << "\n****"
                << "\n**** Ones vector y ( ||y||_1 / n = "<<(y->norm_1()/y->dim())<<" )"
                << "\n***\n";
          }
          value_type
            Gf_y = dot( *Gf, *y ),
            FDGf_y;
          preformed_fd = fd_deriv_prod.calc_deriv_product(
            xo,xl,xu
            ,*y,NULL,NULL,true,nlp,&FDGf_y,NULL,out,dump_all(),dump_all()
            );
          if( !preformed_fd )
            goto FD_NOT_PREFORMED;
          assert_print_nan_inf(FDGf_y, "FDGf'*y",true,out);
          const value_type
            calc_err = ::fabs( ( Gf_y - FDGf_y )/( ::fabs(Gf_y) + ::fabs(FDGf_y) + small_num ) );
          if( calc_err >= Gf_warning_tol() ) {
            max_warning_viol = my_max( max_warning_viol, calc_err );
            ++num_warning_viol;
          }
          if(out)
            *out
              << "\nrel_err(Gf'*y,FDGf'*y) = "
              << "rel_err(" << Gf_y << "," << FDGf_y << ") = "
              << calc_err << endl;
          if( calc_err >= Gf_error_tol() ) {
            if(out) {
              *out
                << "Error, above relative error exceeded Gf_error_tol = " << Gf_error_tol() << endl;
              if(dump_all()) {
                *out << "\ny =\n" << *y;
              }
            }
          }
        }
        if(out && num_warning_viol)
          *out
            << "\nThere were " << num_warning_viol << " warning tolerance "
            << "violations out of num_fd_directions = " << num_fd_directions()
            << " computations of FDGf'*y\n"
            << "and the maximum violation was " << max_warning_viol
            << " > Gf_waring_tol = " << Gf_warning_tol() << endl;
        break;
      }
      default:
        TEST_FOR_EXCEPT(true); // Invalid value
    }
  }

  // /////////////////////////////////////////////
  // (2) Check py = -inv(C)*c
  //
  // We want to check; 
  // 
  //  FDC * (inv(C)*c) \approx c
  //       \_________/
  //         -py
  //
  // We can compute this as:
  //           
  // FDC * py = [ FDC, FDN ] * [ -py ; 0 ]
  //            \__________/
  //                FDA'
  // 
  // t1 =  [ -py ; 0 ]
  // 
  // t2 = FDA'*t1
  // 
  // Compare t2 \approx c
  // 
  if(py) {
    if(out)
      *out
        << "\nComparing c with finite difference product FDA'*[ -py; 0 ] = -FDC*py ...\n";
    // t1 =  [ -py ; 0 ]
    VectorSpace::vec_mut_ptr_t
      t1 = space_x->create_member();
    V_StV( t1->sub_view(var_dep).get(), -1.0, *py );
    *t1->sub_view(var_indep) = 0.0;
    // t2 = FDA'*t1
    VectorSpace::vec_mut_ptr_t
      t2 = nlp->space_c()->create_member();
    preformed_fd = fd_deriv_prod.calc_deriv_product(
      xo,xl,xu
      ,*t1,NULL,NULL,true,nlp,NULL,t2.get(),out,dump_all(),dump_all()
      );
    if( !preformed_fd )
      goto FD_NOT_PREFORMED;
    const value_type
      sum_c  = sum(*c),
      sum_t2 = sum(*t2);
    assert_print_nan_inf(sum_t2, "sum(-FDC*py)",true,out);
    const value_type
      calc_err = ::fabs( ( sum_c - sum_t2 )/( ::fabs(sum_c) + ::fabs(sum_t2) + small_num ) );
    if(out)
      *out
        << "\nrel_err(sum(c),sum(-FDC*py)) = "
        << "rel_err(" << sum_c << "," << sum_t2 << ") = "
        << calc_err << endl;
    if( calc_err >= Gc_error_tol() ) {
      if(out)
        *out
          << "Error, above relative error exceeded Gc_error_tol = " << Gc_error_tol() << endl;
      if(print_all_warnings)
        *out << "\nt1 = [ -py; 0 ] =\n" << *t1
           << "\nt2 = FDA'*t1 = -FDC*py =\n"   << *t2;
      update_success( false, &success );
    }
    if( calc_err >= Gc_warning_tol() ) {
      if(out)
        *out
          << "\nWarning, above relative error exceeded Gc_warning_tol = " << Gc_warning_tol() << endl;
    }
  }

  // /////////////////////////////////////////////
  // (3) Check D = -inv(C)*N

  if(D) {
    switch( Gc_testing_method() ) {
      case FD_COMPUTE_ALL: {
        //
        // Compute FDN outright and check
        // -FDC * D \aprox FDN
        // 
        // We want to compute:
        // 
        // FDC * -D = [ FDC, FDN ] * [ -D; 0 ]
        //            \__________/
        //                FDA'
        // 
        // To compute the above we perform:
        // 
        // T = FDA' * [ -D; 0 ] (one column at a time)
        // 
        // Compare T \approx FDN
        //
/*
        // FDN
        DMatrix FDN(m,n-m);
        fd_deriv_computer.calc_deriv( xo, xl, xu
          , Range1D(m+1,n), nlp, NULL
          , &FDN() ,BLAS_Cpp::trans, out );

        // T = FDA' * [ -D; 0 ] (one column at a time)
        DMatrix T(m,n-m);
        DVector t(n);
        t(m+1,n) = 0.0;
        for( int s = 1; s <= n-m; ++s ) {
          // t = [ -D(:,s); 0 ]
          V_StV( &t(1,m), -1.0, D->col(s) );
          // T(:,s) =  FDA' * t
          fd_deriv_prod.calc_deriv_product(
            xo,xl,xu,t(),NULL,NULL,nlp,NULL,&T.col(s),out);
        }        

        // Compare T \approx FDN
        if(out)
          *out
            << "\nChecking the computed D = -inv(C)*N\n"
            << "where D(i,j) = (-FDC*D)(i,j), dM(i,j) = FDN(i,j) ...\n";
        result = comp_M.comp(
          T(), FDN, BLAS_Cpp::no_trans
          , CompareDenseSparseMatrices::FULL_MATRIX
          , CompareDenseSparseMatrices::REL_ERR_BY_COL
          , Gc_warning_tol(), Gc_error_tol()
          , print_all_warnings, out );
        update_success( result, &success );
        if(!result) return false;
*/
        TEST_FOR_EXCEPT(true); // Todo: Implement above!
        break;
      }
      case FD_DIRECTIONAL: {
        //
        // Compute -FDC * D * v \aprox FDN * v
        // for random v's
        //
        // We will compute this as:
        // 
        // t1 = [ 0; y ] <: R^(n)
        // 
        // t2 = FDA' * t1  (  FDN * y ) <: R^(m)
        //
        // t1 = [ -D * y ; 0 ]  <: R^(n)
        // 
        // t3 = FDA' * t1  ( -FDC * D * y ) <: R^(m)
        // 
        // Compare t2 \approx t3
        //
        if(out)
          *out
            << "\nComparing finite difference products -FDC*D*y with FDN*y for "
              "random vectors y ...\n";
        VectorSpace::vec_mut_ptr_t
          y  = space_x->sub_space(var_indep)->create_member(),
          t1 = space_x->create_member(),
          t2 = space_c->create_member(),
          t3 = space_c->create_member();
        value_type max_warning_viol = 0.0;
        int num_warning_viol = 0;
        const int num_fd_directions_used = ( num_fd_directions() > 0 ? num_fd_directions() : 1 );
        for( int direc_i = 1; direc_i <= num_fd_directions_used; ++direc_i ) {
          if( num_fd_directions() > 0 ) {
            random_vector( rand_y_l, rand_y_u, y.get() );
            if(out)
              *out
                << "\n****"
                << "\n**** Random directional vector " << direc_i << " ( ||y||_1 / n = "
                << (y->norm_1() / y->dim()) << " )"
                << "\n***\n";
          }
          else {
            *y = 1.0;
            if(out)
              *out
                << "\n****"
                << "\n**** Ones vector y ( ||y||_1 / n = "<<(y->norm_1()/y->dim())<<" )"
                << "\n***\n";
          }
          // t1 = [ 0; y ] <: R^(n)
          *t1->sub_view(var_dep)   = 0.0;
          *t1->sub_view(var_indep) = *y;
          // t2 = FDA' * t1  (  FDN * y ) <: R^(m)
          preformed_fd = fd_deriv_prod.calc_deriv_product(
            xo,xl,xu
            ,*t1,NULL,NULL,true,nlp,NULL,t2.get(),out,dump_all(),dump_all()
            );
          if( !preformed_fd )
            goto FD_NOT_PREFORMED;
          // t1 = [ -D * y ; 0 ]  <: R^(n)
          V_StMtV( t1->sub_view(var_dep).get(), -1.0, *D, BLAS_Cpp::no_trans, *y );
          *t1->sub_view(var_indep) = 0.0;
          // t3 = FDA' * t1  ( -FDC * D * y ) <: R^(m)
          preformed_fd = fd_deriv_prod.calc_deriv_product(
            xo,xl,xu
            ,*t1,NULL,NULL,true,nlp,NULL,t3.get(),out,dump_all(),dump_all()
            );
          // Compare t2 \approx t3
          const value_type
            sum_t2 = sum(*t2),
            sum_t3 = sum(*t3);
          const value_type
            calc_err = ::fabs( ( sum_t2 - sum_t3 )/( ::fabs(sum_t2) + ::fabs(sum_t3) + small_num ) );
          if(out)
            *out
              << "\nrel_err(sum(-FDC*D*y),sum(FDN*y)) = "
              << "rel_err(" << sum_t3 << "," << sum_t2 << ") = "
              << calc_err << endl;
          if( calc_err >= Gc_warning_tol() ) {
            max_warning_viol = my_max( max_warning_viol, calc_err );
            ++num_warning_viol;
          }
          if( calc_err >= Gc_error_tol() ) {
            if(out)
              *out
                << "Error, above relative error exceeded Gc_error_tol = " << Gc_error_tol() << endl
                << "Stoping the tests!\n";
            if(print_all_warnings)
              *out << "\ny =\n" << *y
                   << "\nt1 = [ -D*y; 0 ] =\n" << *t1
                   << "\nt2 =  FDA' * [ 0; y ] = FDN * y =\n" << *t2
                   << "\nt3 =  FDA' * t1 = -FDC * D * y =\n" << *t3;
            update_success( false, &success );
          }
        }
        if(out && num_warning_viol)
          *out
            << "\nThere were " << num_warning_viol << " warning tolerance "
            << "violations out of num_fd_directions = " << num_fd_directions()
            << " computations of sum(FDC*D*y) and sum(FDN*y)\n"
            << "and the maximum relative iolation was " << max_warning_viol
            << " > Gc_waring_tol = " << Gc_warning_tol() << endl;
        break;
      }
      default:
        TEST_FOR_EXCEPT(true);
    }
  }

  // ///////////////////////////////////////////////
  // (4) Check rGf = Gf(var_indep) + D'*Gf(var_dep)

  if(rGf) {
    if( Gf && D ) {
      if(out)
        *out
          << "\nComparing rGf_tmp = Gf(var_indep) - D'*Gf(var_dep) with rGf ...\n";
      VectorSpace::vec_mut_ptr_t
        rGf_tmp = space_x->sub_space(var_indep)->create_member();
      *rGf_tmp = *Gf->sub_view(var_indep);
      Vp_MtV( rGf_tmp.get(), *D, BLAS_Cpp::trans, *Gf->sub_view(var_dep) );
      const value_type
        sum_rGf_tmp  = sum(*rGf_tmp),
        sum_rGf      = sum(*rGf);
      const value_type
        calc_err = ::fabs( ( sum_rGf_tmp - sum_rGf )/( ::fabs(sum_rGf_tmp) + ::fabs(sum_rGf) + small_num ) );
      if(out)
        *out
          << "\nrel_err(sum(rGf_tmp),sum(rGf)) = "
          << "rel_err(" << sum_rGf_tmp << "," << sum_rGf << ") = "
          << calc_err << endl;
      if( calc_err >= Gc_error_tol() ) {
        if(out)
          *out
            << "Error, above relative error exceeded Gc_error_tol = " << Gc_error_tol() << endl;
        if(print_all_warnings)
          *out << "\nrGf_tmp =\n" << *rGf_tmp
             << "\nrGf =\n"   << *rGf;
        update_success( false, &success );
      }
      if( calc_err >= Gc_warning_tol() ) {
        if(out)
          *out
            << "\nWarning, above relative error exceeded Gc_warning_tol = "
            << Gc_warning_tol() << endl;
      }
    }
    else if( D ) {
      if(out)
        *out
          << "\nComparing rGf'*y with the finite difference product"
          << " fd_prod(f,[D*y;y]) for random vectors y ...\n";
      VectorSpace::vec_mut_ptr_t
        y  = space_x->sub_space(var_indep)->create_member(),
        t  = space_x->create_member();
      value_type max_warning_viol = 0.0;
      int num_warning_viol = 0;
      const int num_fd_directions_used = ( num_fd_directions() > 0 ? num_fd_directions() : 1 );
      for( int direc_i = 1; direc_i <= num_fd_directions_used; ++direc_i ) {
        if( num_fd_directions() > 0 ) {
          random_vector( rand_y_l, rand_y_u, y.get() );
          if(out)
            *out
              << "\n****"
              << "\n**** Random directional vector " << direc_i << " ( ||y||_1 / n = "
              << (y->norm_1() / y->dim()) << " )"
              << "\n***\n";
        }
        else {
          *y = 1.0;
          if(out)
            *out
              << "\n****"
              << "\n**** Ones vector y ( ||y||_1 / n = "<<(y->norm_1()/y->dim())<<" )"
              << "\n***\n";
        }
        // t = [ D*y; y ]
        LinAlgOpPack::V_MtV(&*t->sub_view(var_dep),*D,BLAS_Cpp::no_trans,*y);
        *t->sub_view(var_indep) = *y;
        value_type fd_rGf_y = 0.0;
        // fd_Gf_y
        preformed_fd = fd_deriv_prod.calc_deriv_product(
          xo,xl,xu
          ,*t,NULL,NULL,true,nlp,&fd_rGf_y,NULL,out,dump_all(),dump_all()
          );
        if( !preformed_fd )
          goto FD_NOT_PREFORMED;
        if(out) *out << "fd_prod(f,[D*y;y]) = " << fd_rGf_y << "\n";
        // rGf_y = rGf'*y
        const value_type rGf_y = dot(*rGf,*y);
        if(out) *out << "rGf'*y = " << rGf_y << "\n";
        // Compare fd_rGf_y and rGf*y
        const value_type
          calc_err = ::fabs( ( rGf_y - fd_rGf_y )/( ::fabs(rGf_y) + ::fabs(fd_rGf_y) + small_num ) );
        if( calc_err >= Gc_warning_tol() ) {
          max_warning_viol = my_max( max_warning_viol, calc_err );
          ++num_warning_viol;
        }
        if(out)
          *out
            << "\nrel_err(rGf'*y,fd_prod(f,[D*y;y])) = "
            << "rel_err(" << fd_rGf_y << "," << rGf_y << ") = "
            << calc_err << endl;
        if( calc_err >= Gf_error_tol() ) {
          if(out)
            *out << "Error, above relative error exceeded Gc_error_tol = " << Gc_error_tol() << endl;
          if(print_all_warnings)
            *out << "\ny =\n" << *y
                 << "\nt = [ D*y; y ] =\n" << *t;
          update_success( false, &success );
        }
      }
    }
    else {
      TEST_FOR_EXCEPT(true); // ToDo: Test rGf without D? (This is not going to be easy!)
    }
  }
  
  // ///////////////////////////////////////////////////
  // (5) Check GcU, and/or Uz (for undecomposed equalities)

  if(GcU || Uz) {
    TEST_FOR_EXCEPT(true); // ToDo: Implement!
  }
  
FD_NOT_PREFORMED:

  if(!preformed_fd) {
    if(out)
      *out
        << "\nError, the finite difference computation was not preformed due to cramped bounds\n"
        << "Finite difference test failed!\n" << endl;
    return false;
  }

  } // end try
  catch( const AbstractLinAlgPack::NaNInfException& except ) {
    if(out)
      *out
        << "Error, found a NaN or Inf.  Stoping tests\n";
    success = false;
  }
  
  if( out ) {
    if( success )
      *out
        << "\nCongradulations, all the finite difference errors where within the\n"
        "specified error tolerances!\n";
    else
      *out
        << "\nOh no, at least one of the above finite difference tests failed!\n";
  }

  return success;

}
void MatrixSymPosDefLBFGS::V_InvMtV(
  VectorMutable* y, BLAS_Cpp::Transp trans_rhs1
  , const Vector& x
  ) const
{
  using AbstractLinAlgPack::Vp_StMtV;
  using DenseLinAlgPack::V_InvMtV;
  using LinAlgOpPack::V_mV;
  using LinAlgOpPack::V_StV;
  using LinAlgOpPack::Vp_V;
  using LinAlgOpPack::V_MtV;
  using LinAlgOpPack::V_StMtV;
  using LinAlgOpPack::Vp_MtV;
  using DenseLinAlgPack::Vp_StMtV;
  typedef VectorDenseEncap         vde;
  typedef VectorDenseMutableEncap  vdme;
  using Teuchos::Workspace;
  Teuchos::WorkspaceStore* wss = Teuchos::get_default_workspace_store().get();

  assert_initialized();

  TEUCHOS_TEST_FOR_EXCEPT( !(  inverse_is_updated_  ) ); // For now just always update

  // y = inv(Bk) * x = Hk * x
  //
  // = gk*x + [S gk*Y] * [ inv(R')*(D+gk*Y'Y)*inv(R)     -inv(R') ] * [   S'  ] * x
  //                     [            -inv(R)                0    ]   [ gk*Y' ]
  //
  // Perform in the following (in order):
  //
  // y = gk*x
  //
  // t1 = [   S'*x  ]					<: R^(2*m)
  //      [ gk*Y'*x ]
  //
  // t2 = inv(R) * t1(1:m)			<: R^(m)
  //
  // t3 = - inv(R') * t1(m+1,2*m)		<: R^(m)
  //
  // t4 = gk * Y'Y * t2				<: R^(m)
  //
  // t4 += D*t2
  //
  // t5 = inv(R') * t4				<: R^(m)
  //
  // t5 += t3
  //
  // y += S*t5
  //
  // y += -gk*Y*t2

  // y = gk*x
  V_StV( y, gamma_k_, x );

  const size_type
    mb = m_bar_;	
  
  if( !mb )
    return;	// No updates have been performed.

  const multi_vec_ptr_t
    S = this->S(),
    Y = this->Y();

  // Get workspace

  Workspace<value_type>    t1_ws(wss,2*mb);
  DVectorSlice                   t1(&t1_ws[0],t1_ws.size());
  Workspace<value_type>    t2_ws(wss,mb);
  DVectorSlice                   t2(&t2_ws[0],t2_ws.size());
  Workspace<value_type>    t3_ws(wss,mb);
  DVectorSlice                   t3(&t3_ws[0],t3_ws.size());
  Workspace<value_type>    t4_ws(wss,mb);
  DVectorSlice                   t4(&t4_ws[0],t4_ws.size());
  Workspace<value_type>    t5_ws(wss,mb);
  DVectorSlice                   t5(&t5_ws[0],t5_ws.size());

  VectorSpace::vec_mut_ptr_t
    t = S->space_rows().create_member();

  const DMatrixSliceTri
    &R = this->R();

  const DMatrixSliceSym
    &YTY = this->YTY();

  // t1 = [   S'*x  ]
  //      [ gk*Y'*x ]
  V_MtV( t.get(), *S, BLAS_Cpp::trans, x );
  t1(1,mb) = vde(*t)();
  V_StMtV( t.get(), gamma_k_, *Y, BLAS_Cpp::trans, x );
  t1(mb+1,2*mb) = vde(*t)();

  // t2 = inv(R) * t1(1:m)
  V_InvMtV( &t2, R, BLAS_Cpp::no_trans, t1(1,mb) );

  // t3 = - inv(R') * t1(m+1,2*m)
  V_mV( &t3, t1(mb+1,2*mb) );
  V_InvMtV( &t3, R, BLAS_Cpp::trans, t3 );

  // t4 = gk * Y'Y * t2
  V_StMtV( &t4, gamma_k_, YTY, BLAS_Cpp::no_trans, t2 );

  // t4 += D*t2
  Vp_DtV( &t4, t2 );

  // t5 = inv(R') * t4
  V_InvMtV( &t5, R, BLAS_Cpp::trans, t4 );

  // t5 += t3
  Vp_V( &t5, t3 );

  // y += S*t5
  (vdme(*t)() = t5);
  Vp_MtV( y, *S, BLAS_Cpp::no_trans, *t );

  // y += -gk*Y*t2
  (vdme(*t)() = t2);
  Vp_StMtV( y, -gamma_k_, *Y, BLAS_Cpp::no_trans, *t );

}