/// Solves the Anitescu-Potra model LCP
void ImpactConstraintHandler::apply_ap_model(UnilateralConstraintProblemData& q)
{
  /// Matrices and vectors for solving LCP
  Ravelin::MatrixNd _UL, _LL, _MM, _UR, _workM;
  Ravelin::VectorNd _qq, _workv;

  FILE_LOG(LOG_CONSTRAINT) << "ImpactConstraintHandler::apply_ap_model() entered" << std::endl;

  unsigned NC = q.N_CONTACTS;

  // Num joint limit constraints
  unsigned N_LIMIT = q.N_LIMITS;

  // Num friction directions + num normal directions
  unsigned N_FRICT = NC*4 + NC;

  // Total constraints
  unsigned N_CONST = N_FRICT + N_LIMIT;

  // Num friction constraints
  unsigned NK_DIRS = 0;
  for(unsigned i=0,j=0,r=0;i<NC;i++){
    if (q.contact_constraints[i]->contact_NK > 4)
      NK_DIRS+=(q.contact_constraints[i]->contact_NK+4)/4;
    else
      NK_DIRS+=1;
  }

  // setup sizes
  _UL.set_zero(N_CONST, N_CONST);
  _UR.set_zero(N_CONST, NK_DIRS);
  _LL.set_zero(NK_DIRS, N_CONST);
  _MM.set_zero(_UL.rows() + _LL.rows(), _UL.columns() + _UR.columns());

  MatrixNd Cs_iM_CnT,Ct_iM_CnT,Ct_iM_CsT,L_iM_CnT,L_iM_CsT,L_iM_CtT;
  Ravelin::MatrixNd::transpose(q.Cn_iM_LT,L_iM_CnT);
  Ravelin::MatrixNd::transpose(q.Cs_iM_LT,L_iM_CsT);
  Ravelin::MatrixNd::transpose(q.Ct_iM_LT,L_iM_CtT);
  Ravelin::MatrixNd::transpose(q.Cn_iM_CsT,Cs_iM_CnT);
  Ravelin::MatrixNd::transpose(q.Cn_iM_CtT,Ct_iM_CnT);
  Ravelin::MatrixNd::transpose(q.Cs_iM_CtT,Ct_iM_CsT);
  /*     n          r          r           r           r
  n  Cn_iM_CnT  Cn_iM_CsT  -Cn_iM_CsT   Cn_iM_CtT  -Cn_iM_CtT
  r  Cs_iM_CnT  Cs_iM_CsT  -Cs_iM_CsT   Cs_iM_CtT  -Cs_iM_CtT
  r -Cs_iM_CnT -Cs_iM_CsT   Cs_iM_CsT  -Cs_iM_CtT   Cs_iM_CtT
  r  Ct_iM_CnT  Ct_iM_CsT  -Ct_iM_CsT   Ct_iM_CtT  -Ct_iM_CtT
  r -Ct_iM_CnT -Ct_iM_CsT   Ct_iM_CsT  -Ct_iM_CtT   Ct_iM_CtT
  */
  FILE_LOG(LOG_CONSTRAINT) << "Cn*inv(M)*Cn': " << std::endl << q.Cn_iM_CnT;
  FILE_LOG(LOG_CONSTRAINT) << "Cn*inv(M)*Cs': " << std::endl << q.Cn_iM_CsT;
  FILE_LOG(LOG_CONSTRAINT) << "Cn*inv(M)*Ct': " << std::endl << q.Cn_iM_CtT;
  
  FILE_LOG(LOG_CONSTRAINT) << "Cs*inv(M)*Cn': " << std::endl << Cs_iM_CnT;
  FILE_LOG(LOG_CONSTRAINT) << "Cs*inv(M)*Cs': " << std::endl << q.Cs_iM_CsT;
  FILE_LOG(LOG_CONSTRAINT) << "Cs*inv(M)*Ct': " << std::endl << q.Cs_iM_CsT;
  
  FILE_LOG(LOG_CONSTRAINT) << "Ct*inv(M)*Cn': " << std::endl << Ct_iM_CnT;
  FILE_LOG(LOG_CONSTRAINT) << "Ct*inv(M)*Cs': " << std::endl << Ct_iM_CsT;

  FILE_LOG(LOG_CONSTRAINT) << "L*inv(M)*L': " << std::endl << q.L_iM_LT;
  FILE_LOG(LOG_CONSTRAINT) << "Cn*inv(M)*L': " << std::endl << q.Cn_iM_LT;
  FILE_LOG(LOG_CONSTRAINT) << "L*inv(M)*Cn': " << std::endl << L_iM_CnT;

  FILE_LOG(LOG_CONSTRAINT) << "Cs*inv(M)*L': " << std::endl << q.Cs_iM_LT;
  FILE_LOG(LOG_CONSTRAINT) << "Ct*inv(M)*L': " << std::endl << q.Ct_iM_LT;
  FILE_LOG(LOG_CONSTRAINT) << "L*inv(M)*Cs': " << std::endl << L_iM_CsT;
  FILE_LOG(LOG_CONSTRAINT) << "L*inv(M)*Ct': " << std::endl << L_iM_CtT;
  // Set positive submatrices
  /*
          n          r          r           r           r
  n  Cn_iM_CnT  Cn_iM_CsT               Cn_iM_CtT
  r  Cs_iM_CnT  Cs_iM_CsT               Cs_iM_CtT
  r                         Cs_iM_CsT               Cs_iM_CtT
  r  Ct_iM_CnT  Ct_iM_CsT               Ct_iM_CtT
  r                         Ct_iM_CsT               Ct_iM_CtT
  */
  _UL.set_sub_mat(0,0,q.Cn_iM_CnT);
  // setup the LCP matrix

  // setup the LCP vector
  _qq.set_zero(_MM.rows());
  _qq.set_sub_vec(0,q.Cn_v);

  _UL.set_sub_mat(NC,NC,q.Cs_iM_CsT);
  _UL.set_sub_mat(NC,0,Cs_iM_CnT);
  _UL.set_sub_mat(0,NC,q.Cn_iM_CsT);
  _UL.set_sub_mat(NC+NC,NC+NC,q.Cs_iM_CsT);
  _UL.set_sub_mat(NC+NC*2,0,Ct_iM_CnT);
  _UL.set_sub_mat(0,NC+NC*2,q.Cn_iM_CtT);
  _UL.set_sub_mat(NC+NC*2,NC,Ct_iM_CsT);
  _UL.set_sub_mat(NC+NC*3,NC+NC,Ct_iM_CsT);
  _UL.set_sub_mat(NC,NC+NC*2,q.Cs_iM_CtT);
  _UL.set_sub_mat(NC+NC,NC+NC*3,q.Cs_iM_CtT);
  _UL.set_sub_mat(NC+NC*2,NC+NC*2,q.Ct_iM_CtT);
  _UL.set_sub_mat(NC+NC*3,NC+NC*3,q.Ct_iM_CtT);

  // Joint Limits
  _UL.set_sub_mat(N_FRICT,N_FRICT,q.L_iM_LT);
  _UL.set_sub_mat(N_FRICT,0,L_iM_CnT);
  _UL.set_sub_mat(0,N_FRICT,q.Cn_iM_LT);
  _UL.set_sub_mat(NC,N_FRICT,q.Cs_iM_LT);
  _UL.set_sub_mat(NC+NC*2,N_FRICT,q.Ct_iM_LT);
  _UL.set_sub_mat(N_FRICT,NC,L_iM_CsT);
  _UL.set_sub_mat(N_FRICT,NC+NC*2,L_iM_CtT);


  // Set negative submatrices
  /*     n          r          r           r           r
  n                        -Cn_iM_CsT              -Cn_iM_CtT
  r                        -Cs_iM_CsT              -Cs_iM_CtT
  r -Cs_iM_CnT -Cs_iM_CsT              -Cs_iM_CtT
  r                        -Ct_iM_CsT              -Ct_iM_CtT
  r -Ct_iM_CnT -Ct_iM_CsT              -Ct_iM_CtT
    */

  q.Cn_iM_CsT.negate();
  q.Cn_iM_CtT.negate();
  Cs_iM_CnT.negate();
  q.Cs_iM_CsT.negate();
  q.Cs_iM_CtT.negate();
  Ct_iM_CnT.negate();
  Ct_iM_CsT.negate();
  q.Ct_iM_CtT.negate();

  q.Cs_iM_LT.negate();
  q.Ct_iM_LT.negate();
  L_iM_CsT.negate();
  L_iM_CtT.negate();

  _UL.set_sub_mat(NC+NC,0,Cs_iM_CnT);
  _UL.set_sub_mat(0,NC+NC,q.Cn_iM_CsT);

  _UL.set_sub_mat(NC,NC+NC,q.Cs_iM_CsT);
  _UL.set_sub_mat(NC+NC,NC,q.Cs_iM_CsT);

  _UL.set_sub_mat(NC+NC*3,0,Ct_iM_CnT);
  _UL.set_sub_mat(0,NC+NC*3,q.Cn_iM_CtT);

  _UL.set_sub_mat(NC+NC*3,NC,Ct_iM_CsT);
  _UL.set_sub_mat(NC+NC*2,NC+NC,Ct_iM_CsT);
  _UL.set_sub_mat(NC+NC,NC+NC*2,q.Cs_iM_CtT);
  _UL.set_sub_mat(NC,NC+NC*3,q.Cs_iM_CtT);

  _UL.set_sub_mat(NC+NC*2,NC+NC*3,q.Ct_iM_CtT);
  _UL.set_sub_mat(NC+NC*3,NC+NC*2,q.Ct_iM_CtT);

  // Joint limits
  _UL.set_sub_mat(NC+NC,N_FRICT,q.Cs_iM_LT);
  _UL.set_sub_mat(NC+NC*3,N_FRICT,q.Ct_iM_LT);
  _UL.set_sub_mat(N_FRICT,NC+NC,L_iM_CsT);
  _UL.set_sub_mat(N_FRICT,NC+NC*3,L_iM_CtT);

  // lower left & upper right block of matrix
  for(unsigned i=0,j=0,r=0;i<NC;i++)
  {
    const UnilateralConstraint* ci =  q.contact_constraints[i];
    if (ci->contact_NK > 4)
    {
      int nk4 = ( ci->contact_NK+4)/4;
      for(unsigned k=0;k<nk4;k++)
      {
        FILE_LOG(LOG_CONSTRAINT) << "mu_{"<< k<< ","<< i <<"}: " << ci->contact_mu_coulomb << std::endl;

        // muK
        _LL(r+k,i)         = ci->contact_mu_coulomb;
        // Xe
        _LL(r+k,NC+j)      = -cos((M_PI*k)/(2.0*nk4));
        _LL(r+k,NC+NC+j)   = -cos((M_PI*k)/(2.0*nk4));
        // Xf
        _LL(r+k,NC+NC*2+j) = -sin((M_PI*k)/(2.0*nk4));
        _LL(r+k,NC+NC*3+j) = -sin((M_PI*k)/(2.0*nk4));
        // XeT
        _UR(NC+j,r+k)      =  cos((M_PI*k)/(2.0*nk4));
        _UR(NC+NC+j,r+k)   =  cos((M_PI*k)/(2.0*nk4));
        // XfT
        _UR(NC+NC*2+j,r+k) =  sin((M_PI*k)/(2.0*nk4));
        _UR(NC+NC*3+j,r+k) =  sin((M_PI*k)/(2.0*nk4));
      }
      r+=nk4;
      j++;
    }
    else
    {
      FILE_LOG(LOG_CONSTRAINT) << "mu_{"<< i <<"}: " << ci->contact_mu_coulomb << std::endl;
      // muK
      _LL(r,i) = ci->contact_mu_coulomb;
      // Xe
      _LL(r,NC+j)      = -1.0;
      _LL(r,NC+NC+j)   = -1.0;
      // Xf
      _LL(r,NC+NC*2+j) = -1.0;
      _LL(r,NC+NC*3+j) = -1.0;
      // XeT
      _UR(NC+j,r)      =  1.0;
      _UR(NC+NC+j,r)   =  1.0;
      // XfT
      _UR(NC+NC*2+j,r) =  1.0;
      _UR(NC+NC*3+j,r) =  1.0;
      r += 1;
      j++;
    }
  }

  // setup the LCP matrix
  _MM.set_sub_mat(0, _UL.columns(), _UR);
  _MM.set_sub_mat(_UL.rows(), 0, _LL);

  // setup the LCP vector
  _qq.set_sub_vec(NC,q.Cs_v);
  _qq.set_sub_vec(NC+NC*2,q.Ct_v);
  q.Cs_v.negate();
  q.Ct_v.negate();
  _qq.set_sub_vec(NC+NC,q.Cs_v);
  _qq.set_sub_vec(NC+NC*3,q.Ct_v);
  _qq.set_sub_vec(N_FRICT,q.L_v);

  _MM.set_sub_mat(0, 0, _UL);

  FILE_LOG(LOG_CONSTRAINT) << " LCP matrix: " << std::endl << _MM;
  FILE_LOG(LOG_CONSTRAINT) << " LCP vector: " << _qq << std::endl;

  // Fix Negations
  q.Cn_iM_CsT.negate();
  q.Cn_iM_CtT.negate();
//  Cs_iM_CnT.negate();
  q.Cs_iM_CsT.negate();
  q.Cs_iM_CtT.negate();
//  Ct_iM_CnT.negate();
//  Ct_iM_CsT.negate();
  q.Ct_iM_CtT.negate();
  q.Cs_iM_LT.negate();
  q.Ct_iM_LT.negate();
  //L_iM_CsT.negate();
  //L_iM_CtT.negate();
  q.Cs_v.negate();
  q.Ct_v.negate();

  // solve the LCP
  VectorNd z;
  if (!_lcp.lcp_lemke_regularized(_MM, _qq, z, -20, 1, -2))
    throw std::exception();

  for(unsigned i=0,j=0;i<NC;i++)
  {
    q.cn[i] = z[i];
    q.cs[i] = z[NC+j] - z[NC+NC+j];
    q.ct[i] = z[NC+NC*2+j] - z[NC+NC*3+j];
    j++;
  }

  q.l = z.segment(N_FRICT,N_FRICT+N_LIMIT);

  // setup a temporary frame
  shared_ptr<Pose3d> P(new Pose3d);

  // propagate the impulse data
  propagate_impulse_data(q); 

  if (LOGGING(LOG_CONSTRAINT))
  {
    // compute LCP 'w' vector
    VectorNd w;
    _MM.mult(z, w) += _qq;

    // output new acceleration
    FILE_LOG(LOG_CONSTRAINT) << "new normal v: " << w.segment(0, q.contact_constraints.size()) << std::endl;
  }

  FILE_LOG(LOG_CONSTRAINT) << "cn " << q.cn << std::endl;
  FILE_LOG(LOG_CONSTRAINT) << "cs " << q.cs << std::endl;
  FILE_LOG(LOG_CONSTRAINT) << "ct " << q.ct << std::endl;

  FILE_LOG(LOG_CONSTRAINT) << "l " << q.l << std::endl;

  FILE_LOG(LOG_CONSTRAINT) << " LCP result : " << z << std::endl;
  FILE_LOG(LOG_CONSTRAINT) << "ImpactConstraintHandler::apply_ap_model() exited" << std::endl;
}
/**
 * \param M the generalized inertia matrix of the humanoid
 * \param vminus the generalized velocity of the humanoid at the current time
 * \param fext the generalized external force vector of the humanoid at the
 *             current time
 * \param N the generalized normal contact wrench for the humanoid 
 * \param D the generalized tangent contact wrench for the humanoid 
 * \param R the generalized constraint wrench for the humanoid
 * \param J the generalized constraint wrench for the robot
 * \param lolimit the lower (negative) actuator limits imposed on the robot
 * \param hilimit the upper (positive) actuator limits imposed on the robot
 * \param fr contains, on return, the force that the robot should apply to the human at the point of contact
 */
void controller(const MatrixNd& M, const VectorNd& v, const VectorNd& fext, const MatrixNd& N, const MatrixNd& D, const MatrixNd& R, const MatrixNd& J, const VectorNd& lolimit, const VectorNd& hilimit, VectorNd& fr)
{
  const double INF = std::numeric_limits<double>::max();
  const double DT = 1e-3;  // default value for delta t

  // setup variable sizes
  const unsigned N_ACT = J.columns();    // number of actuators robot commands
  const unsigned N_CONTACTS = N.rows();
  const unsigned N_SIZE = N_CONTACTS;
  const unsigned D_SIZE = N_CONTACTS*2;  // two tangent directions 
  const unsigned R_SIZE = R.rows();      // 6 x X robot/humanoid constraints 

  // setup indices for variables
  const unsigned R_INDEX = 0;
  const unsigned D_INDEX = R_INDEX + R_SIZE;
  const unsigned N_INDEX = D_INDEX + D_SIZE;

  // setup the number of QP variables
  const unsigned NVARS = N_SIZE + R_SIZE + D_SIZE;

  // do a Cholesky factorization on M so that we can solve with it quickly
  MatrixNd cholM = M;
  if (!LinAlgd::factor_chol(cholM))
    throw std::runtime_error("Unexpectedly unable to factorize generalized inertia matrix!");
 
  // compute k = v^- + inv(M)*fext*DT
  VectorNd k = fext;
  k *= DT;
  LinAlgd::solve_chol_fast(cholM, k);
  k += v; 

  // compute matrices
  MatrixNd workM, RiMRT, RiMDT, RiMNT, DiMRT, DiMDT, DiMNT, NiMRT, NiMDT, NiMNT;

  // first compute inv(M)*R'
  MatrixNd::transpose(R, workM);
  LinAlgd::solve_chol_fast(cholM, workM);

  // now compute R*inv(M)*R', D*inv(M)*R' (this is identical to R*inv(M)*D'),
  // and N*inv(M)*R' (this is identical to R*inv(M)*N')
  R.mult(workM, RiMRT);
  D.mult(workM, DiMRT);
  N.mult(workM, NiMRT);
  MatrixNd::transpose(DiMRT, RiMDT);
  MatrixNd::transpose(NiMRT, RiMNT);

  // now compute inv(M)*D'
  MatrixNd::transpose(D, workM);
  LinAlgd::solve_chol_fast(cholM, workM);

  // now compute D*inv(M)*D' and N*inv(M)*D' (this is identical to D*inv(M)*N')
  D.mult(workM, DiMDT);
  N.mult(workM, NiMDT);
  MatrixNd::transpose(NiMDT, DiMNT);

  // finally, compute N*inv(M)*N'
  MatrixNd::transpose(N, workM);
  LinAlgd::solve_chol_fast(cholM, workM);
  N.mult(workM, NiMNT);

  // setup the G matrix
  MatrixNd G(NVARS, NVARS);
  G.block(R_INDEX, R_INDEX+R_SIZE, R_INDEX, R_INDEX+R_SIZE) = RiMRT;
  G.block(R_INDEX, R_INDEX+R_SIZE, D_INDEX, D_INDEX+D_SIZE) = RiMDT;
  G.block(R_INDEX, R_INDEX+R_SIZE, N_INDEX, N_INDEX+N_SIZE) = RiMNT;
  G.block(D_INDEX, D_INDEX+D_SIZE, R_INDEX, R_INDEX+R_SIZE) = DiMRT; 
  G.block(D_INDEX, D_INDEX+D_SIZE, D_INDEX, D_INDEX+D_SIZE) = DiMDT;
  G.block(D_INDEX, D_INDEX+D_SIZE, N_INDEX, N_INDEX+N_SIZE) = DiMNT;
  G.block(N_INDEX, N_INDEX+N_SIZE, R_INDEX, R_INDEX+R_SIZE) = NiMRT; 
  G.block(N_INDEX, N_INDEX+N_SIZE, D_INDEX, D_INDEX+D_SIZE) = NiMDT; 
  G.block(N_INDEX, N_INDEX+N_SIZE, N_INDEX, N_INDEX+N_SIZE) = NiMNT;

  // setup the c vector
  VectorNd workv;
  VectorNd c(NVARS);
  c.segment(R_INDEX, R_INDEX+R_SIZE) = R.mult(k, workv);
  c.segment(D_INDEX, D_INDEX+D_SIZE) = R.mult(k, workv);
  c.segment(N_INDEX, N_INDEX+N_SIZE) = R.mult(k, workv);

  // setup the inequality constraint matrix (P)
  MatrixNd JT;
  MatrixNd::transpose(J, JT);
  MatrixNd P(N_CONTACTS+N_ACT*2,NVARS);
  P.block(0, N_CONTACTS, R_INDEX, R_INDEX+R_SIZE) = RiMRT;
  P.block(0, N_CONTACTS, R_INDEX, D_INDEX+D_SIZE) = RiMDT;
  P.block(0, N_CONTACTS, R_INDEX, N_INDEX+N_SIZE) = RiMNT;
  P.block(N_CONTACTS, P.rows(), 0, NVARS).set_zero();
  P.block(N_CONTACTS, N_CONTACTS+J.columns(), R_INDEX, R_INDEX+R_SIZE) = JT;
  P.block(N_CONTACTS+J.columns(), P.rows(), R_INDEX, R_INDEX+R_SIZE) = JT;
  P.block(N_CONTACTS+J.columns(), P.rows(), R_INDEX, R_INDEX+R_SIZE).negate();

  // setup the inequality constraint vector (p)
  VectorNd p(N_CONTACTS+N_ACT*2);
  p.segment(0, N_CONTACTS) = c.segment(N_INDEX, N_INDEX+N_SIZE);
  p.segment(N_CONTACTS, J.columns()) = lolimit;
  p.segment(N_CONTACTS+J.columns(), N_CONTACTS+J.columns()*2) = hilimit;
  p.segment(N_CONTACTS+J.columns(), N_CONTACTS+J.columns()*2).negate();

  // setup the equality constraint matrix (Q)
  const MatrixNd& Q = D;

  // setup the equality constraint vector (q)
  VectorNd q(D.rows());
  q.set_zero();

  // setup bounds on variables -- only N variables have lower bounds
  VectorNd lb(NVARS), ub(NVARS);
  lb.set_one() *= -INF;
  ub.set_one() *= INF;
  lb.segment(N_INDEX, N_INDEX+N_SIZE).set_zero();

  // solve the QP using QLCPD
  Opt::QPActiveSet qp;
  VectorNd z;
  bool success = qp.qp_activeset(G, c, lb, ub, P, p, Q, q, z);

  // verify that we have success
  if (!success)
    throw std::runtime_error("Unexpected QP solver failure!");
  else
    fr = z.segment(R_INDEX, R_INDEX+R_SIZE); 
}