Example #1
0
  // add a node at a pose
  // <pos> is x,y,th, with th in radians
  // returns node position 
  int SysSPA2d::addNode(const Vector3d &pos, int id)
  {
    Node2d nd;
    nd.nodeId = id;

    nd.arot = pos(2);
    nd.trans.head(2) = pos.head(2);
    nd.trans(2) = 1.0;

    // add in to system
    nd.setTransform();          // set up world2node transform
    nd.setDr();
    int ndi = nodes.size();
    nodes.push_back(nd);
    return ndi;
  }
Example #2
0
  // add a constraint
  // <nd0>, <nd1> are node id's
  // <mean> is x,y,th, with th in radians
  // <prec> is a 3x3 precision matrix (inverse covariance
  // returns true if nodes are found
  // TODO: make node lookup more efficient
  bool SysSPA2d::addConstraint(int ndi0, int ndi1, const Vector3d &mean, 
                                 const Matrix3d &prec)
  {
    int ni0 = -1, ni1 = -1;
    for (int i=0; i<(int)nodes.size(); i++)
      {
        if (nodes[i].nodeId == ndi0)
          ni0 = i;
        if (nodes[i].nodeId == ndi1)
          ni1 = i;
      }
    if (ni0 < 0 || ni1 < 0) return false;
    
    Con2dP2 con;
    con.ndr = ni0;
    con.nd1 = ni1;

    con.tmean = mean.head(2);
    con.amean = mean(2);
    con.prec = prec;
    p2cons.push_back(con);
    return true;
  }
Example #3
0
  // Set up sparse linear system for Delayed Sparse Information Filter
  void SysSPA2d::setupSparseDSIF(int newnode)
  {
    cout << "[SetupDSIF] at " << newnode << endl;

    // set matrix sizes and clear
    // assumes scales vars are all free
    int nFree = nodes.size() - nFixed;

    //    long long t0, t1, t2, t3;
    //    t0 = utime();

    // don't erase old stuff here, the delayed filter just grows in size
    csp.setupBlockStructure(nFree,false); // initialize CSparse structures

    //    t1 = utime();

    // loop over P2 constraints
    for(size_t pi=0; pi<p2cons.size(); pi++)
      {
        Con2dP2 &con = p2cons[pi];

        // don't consider old constraints
        if (con.ndr < newnode && con.nd1 < newnode)
          continue;

        con.setJacobians(nodes);

        // add in 4 blocks of A; actually just need upper triangular
        int i0 = con.ndr-nFixed; // will be negative if fixed
        int i1 = con.nd1-nFixed; // will be negative if fixed
        
        if (i0 != i1-1) continue; // just sequential nodes for now

        if (i0>=0)
          {
            Matrix<double,3,3> m = con.J0t*con.prec*con.J0;
            csp.addDiagBlock(m,i0);
          }
        if (i1>=0)
          {
            Matrix<double,3,3> m = con.J1t*con.prec*con.J1;
            csp.addDiagBlock(m,i1);

            if (i0>=0)
              {
                Matrix<double,3,3> m2 = con.J0t*con.prec*con.J1;
                if (i1 < i0)
                  {
                    m = m2.transpose();
                    csp.addOffdiagBlock(m,i1,i0);
                  }
                else
                  {
                    csp.addOffdiagBlock(m2,i0,i1);
                  }
              }
          }

        // add in 2 blocks of B
        // (J u + e)^T G J

        if (i0>=0)
          {
            Vector3d u;
            u.head(2) = nodes[con.ndr].trans.head(2);
            u(2) = nodes[con.ndr].arot;
            Vector3d bm = con.err + con.J0 * u;
            csp.B.block<3,1>(i0*3,0) += (bm.transpose() * con.prec * con.J0).transpose();
          }
        if (i1>=0)
          {
            Vector3d u;
            u.head(2) = nodes[con.nd1].trans.head(2);
            u(2) = nodes[con.nd1].arot;
            Vector3d bm = con.err + con.J1 * u;
            csp.B.block<3,1>(i1*3,0) += (bm.transpose() * con.prec * con.J1).transpose();
          }

      } // finish P2 constraints

    //    t2 = utime();

    csp.Bprev = csp.B;          // save for next iteration

    // set up sparse matrix structure from blocks
    csp.setupCSstructure(1.0,true); 

    //    t3 = utime();

    //    printf("\n[SetupSparseSys] Block: %0.1f   Cons: %0.1f  CS: %0.1f\n",
    //           (t1-t0)*.001, (t2-t1)*.001, (t3-t2)*.001);

  }
Example #4
0
  /// Run the Delayed Sparse Information Filter (Eustice et al.)
  /// <newnode> is the index of the first new node added since the last iteration
  /// <useCSparse> is true for sparse Cholesky.
  void SysSPA2d::doDSIF(int newnode, bool useCSparse)
  {
    // number of nodes
    int nnodes = nodes.size();

    // set number of constraints
    int ncons = p2cons.size();

    // check for fixed frames
    if (nFixed <= 0)
      {
        cout << "[doDSIF] No fixed frames" << endl;
        return;
      }

    // check for newnode being ok
    if (newnode >= nnodes)
      {
        cout << "[doDSIF] no new nodes to add" << endl;
        return;
      }

    nFixed = 0;

    for (int i=0; i<nnodes; i++)
      {
        Node2d &nd = nodes[i];
        if (i >= nFixed)
          nd.isFixed = false;
        else 
          nd.isFixed = true;
        nd.setTransform();      // set up world-to-node transform for cost calculation
        nd.setDr();             // always use local angles
      }

    // initialize vars
    double cost = calcCost();
    if (verbose)
      cout << " Initial squared cost: " << cost << " which is " 
           << sqrt(cost/ncons) << " rms error" << endl;

    if (newnode == 1)
      {
        // set up first system with node 0
        csp.setupBlockStructure(1,false); // initialize CSparse structures
        Matrix3d m;
        m.setIdentity();
        m = m*1000000;
        csp.addDiagBlock(m,0);
        Vector3d u;
        u.head(2) = nodes[0].trans.head(2);
        u(2) = nodes[0].arot;
        csp.B.head(3) = u*1000000;
        csp.Bprev = csp.B;              // save for next iteration
        cout << "[doDSIF] B = " << csp.B.transpose() << endl;    
      }

    // set up and solve linear system
    // NOTE: shouldn't need to redo all calcs in setupSys if we 
    //   got here from a bad update

    long long t0, t1, t2, t3;
    t0 = utime();
    if (useCSparse)
      setupSparseDSIF(newnode); // set up sparse linear system
    else
      {}

#if 1
    cout << "[doDSIF] B = " << csp.B.transpose() << endl;
    csp.uncompress(A);
    cout << "[doDSIF] A = " << endl << A << endl;
#endif

    //        cout << "[SPA] Solving...";
    t1 = utime();
    if (useCSparse)
      {
        bool ok = csp.doChol();
        if (!ok)
          cout << "[doDSIF] Sparse Cholesky failed!" << endl;
      }
    else
      A.ldlt().solveInPlace(B); // Cholesky decomposition and solution
    t2 = utime();
    //        cout << "solved" << endl;

    // get correct result vector
    VectorXd &BB = useCSparse ? csp.B : B;

    cout << "[doDSIF] RES  = " << BB.transpose() << endl;

    // update the frames
    int ci = 0;
    for(int i=0; i < nnodes; i++)
      {
        Node2d &nd = nodes[i];
        if (nd.isFixed) continue; // not to be updated
        nd.trans.head<2>() = BB.segment<2>(ci);
        nd.arot = BB(ci+2); 
        nd.normArot();
        nd.setTransform();  // set up projection matrix for cost calculation
        nd.setDr();         // set rotational derivatives
        ci += 3;            // advance B index
      }

    // new cost
    double newcost = calcCost();
    if (verbose)
      cout << " Updated squared cost: " << newcost << " which is " 
           << sqrt(newcost/ncons) << " rms error" << endl;
        
    t3 = utime();
  }
Example #5
0
  // Set up sparse linear system for Delayed Sparse Information Filter
  void SysSPA2d::setupSparseDSIF(int newnode)
  {
    cout << "[SetupDSIF] at " << newnode << endl;

    // set matrix sizes and clear
    // assumes scales vars are all free
    int nFree = nodes.size() - nFixed;

    //    long long t0, t1, t2, t3;
    //    t0 = utime();

    // don't erase old stuff here, the delayed filter just grows in size
    csp.setupBlockStructure(nFree,false); // initialize CSparse structures

    //    t1 = utime();

    // loop over P2 constraints
    for(size_t pi=0; pi<p2cons.size(); pi++)
      {
        Con2dP2 &con = p2cons[pi];

        // don't consider old constraints
        if (con.ndr < newnode && con.nd1 < newnode)
          continue;

        con.setJacobians(nodes);
        Matrix3d J;
        J.setIdentity();
        Vector2d dRdth = nodes[con.ndr].dRdx.transpose() * con.tmean;
        J(0,2) = dRdth(0);
        J(1,2) = dRdth(1);
        Matrix3d Jt = J.transpose();
        Vector3d u;
        u.head(2) = nodes[con.ndr].trans.head(2);
        u(2) = nodes[con.ndr].arot;
        Vector3d f;
        f.head(2) = u.head(2) + nodes[con.ndr].w2n.transpose().block<2,2>(0,0) * con.tmean;
        f(2) = u(2) + con.amean;
        if (f(2) > M_PI) f(2) -= 2.0*M_PI;
        if (f(2) < M_PI) f(2) += 2.0*M_PI;

        
        cout << "[SetupDSIF] u  = " << u.transpose() << endl;
        cout << "[SetupDSIF] f  = " << f.transpose() << endl;
        cout << "[SetupDSIF] fo = " << nodes[con.nd1].trans.transpose() << endl;


        // add in 4 blocks of A; actually just need upper triangular
        int i0 = con.ndr-nFixed; // will be negative if fixed
        int i1 = con.nd1-nFixed; // will be negative if fixed
        
        if (i0 != i1-1) continue; // just sequential nodes for now

        if (i0>=0)
          {
            Matrix<double,3,3> m = Jt*con.prec*J;
            csp.addDiagBlock(m,i0);
          }
        if (i1>=0)
          {
            Matrix<double,3,3> m = con.prec;
            csp.addDiagBlock(m,i1);

            if (i0>=0)
              {
                Matrix<double,3,3> m2 = -con.prec * J;


                if (i1 < i0)
                  {
                    m = m2.transpose();
                    csp.addOffdiagBlock(m,i1,i0);
                  }
                else
                  {
                    csp.addOffdiagBlock(m2,i0,i1);
                  }
              }
          }

        // add in 2 blocks of B
        // Jt Gamma (J u - e)
        Vector3d Juf = J*u - f;
        if (Juf(2) > M_PI) Juf(2) -= 2.0*M_PI;
        if (Juf(2) < M_PI) Juf(2) += 2.0*M_PI;
        if (i0>=0)
          {
            csp.B.block<3,1>(i0*3,0) += Jt * con.prec * Juf;
          }
        if (i1>=0)
          {
            csp.B.block<3,1>(i1*3,0) -= con.prec * Juf;
          }

      } // finish P2 constraints

    //    t2 = utime();

    csp.Bprev = csp.B;          // save for next iteration

    // set up sparse matrix structure from blocks
    csp.setupCSstructure(1.0,true); 

    //    t3 = utime();

    //    printf("\n[SetupSparseSys] Block: %0.1f   Cons: %0.1f  CS: %0.1f\n",
    //           (t1-t0)*.001, (t2-t1)*.001, (t3-t2)*.001);

  }
Example #6
0
int main(int argc, char **argv)
{
  char *fin;

  if (argc < 2)
    {
      cout << "Arguments are:  <input filename> [<number of nodes to use>]" << endl;
      return -1;
    }

  // number of nodes to use
  int nnodes = 0;

  if (argc > 2)
    nnodes = atoi(argv[2]);

  int doiters = 10;
  if (argc > 3)
    doiters = atoi(argv[3]);

  fin = argv[1];

  // node translation
  std::vector< Eigen::Vector2d, Eigen::aligned_allocator<Eigen::Vector2d> > ntrans;
  // node rotation
  std::vector< double > arots;
  // constraint indices
  std::vector< Eigen::Vector2i, Eigen::aligned_allocator<Eigen::Vector2i> > cind;
  // constraint local translation 
  std::vector< Eigen::Vector2d, Eigen::aligned_allocator<Eigen::Vector2d> > ctrans;
  // constraint local rotation as quaternion
  std::vector< double > carot;
  // constraint precision
  std::vector< Eigen::Matrix<double,3,3>, Eigen::aligned_allocator<Eigen::Matrix<double,3,3> > > cvar;
  // scans
  std::vector< std::vector< Eigen::Vector2d, Eigen::aligned_allocator<Eigen::Vector2d> > > scans;

  ReadSPA2dFile(fin,ntrans,arots,cind,ctrans,carot,cvar,scans);

  cout << "# [ReadSPA2dFile] Found " << (int)ntrans.size() << " nodes and " 
       << (int)cind.size() << " constraints" << endl;


  // system
  SysSPA2d spa;
  spa.verbose=false;
  spa.print_iros_stats=false;
  //  spa.useCholmod(false);
  spa.useCholmod(true);


  // use max nodes if we haven't specified it
  if (nnodes == 0) nnodes = ntrans.size();
  if (nnodes > (int)ntrans.size()) nnodes = ntrans.size();

  // add first node
  Node2d nd;

  // rotation
  nd.arot = arots[0];
  // translation
  Vector3d v;
  v.head(2) = ntrans[0];
  v(2) = 1.0;
  nd.trans = v;

  double cumtime = 0.0;
  //cout << nd.trans.transpose() << endl << endl;

  // add to system
  nd.setTransform();            // set up world2node transform
  nd.setDr();
  spa.nodes.push_back(nd);
  // add in nodes
  for (int i=0; i<nnodes-1; i+=doiters)
    {
      for (int j=0; j<doiters; j++)
        addnode(spa, i+j+1, ntrans, arots, cind, ctrans, carot, cvar);

      // cout << "[SysSPA2d] Using " << (int)spa.nodes.size() << " nodes and " 
      //      << (int)spa.p2cons.size() << " constraints" << endl;

      long long t0, t1;

      spa.nFixed = 1;           // one fixed frame

      t0 = utime();
      //      spa.doSPA(1,1.0e-4,SBA_SPARSE_CHOLESKY);
      spa.doSPA(1,1.0e-4,SBA_BLOCK_JACOBIAN_PCG,1.0e-8,15);
      t1 = utime();
      cumtime += t1 - t0;
      if (i%100 == 0) 
        {
          cout << "[SPA2D] iteration: " << i << " squared cost " << spa.errcost << endl;
          cerr << i << " " << cumtime*.001 << " " << spa.errcost << endl;
        }
    }



  printf("[TestSPA2D] Compute took %0.2f ms/node, total time %0.2f ms; error %0.2f\n", 0.001*(double)cumtime/(double)nnodes, cumtime*0.001, spa.errcost);
  // printf("[TestSPA] Accepted iterations: %d\n", niters);
  // printf("[TestSPA] Distance cost: %0.3f m rms\n", sqrt(spa.calcCost(true)/(double)spa.p2cons.size()));

    // if (verbose()){
    //   cerr << "iteration= " << niters 
    // 	   << "\t chi2= " << spa.calcCost();
    //     << "\t time= " << 0.0
    //     << "\t cumTime= " << 0.0
    //     << "\t kurtChi2= " << this->kurtChi2()
    //     << endl;
    // }



#if 0
  ofstream ofs("opt2d-ground.txt");
  for (int i=0; i<(int)ntrans.size(); i++)
    ofs << ntrans[i].transpose() << endl;
  ofs.close();

  ofstream ofs2("opt2d-opt.txt");
  for (int i=0; i<(int)spa.nodes.size(); i++)
    ofs2 << spa.nodes[i].trans.transpose().head(2) << endl;
  ofs2.close();
#endif

  //  spa.writeSparseA("sphere-sparse.txt",true);

  return 0;
}
Example #7
0
RigidBodySystem::StateVector<double> RigidBodySystem::dynamics(const double& t, const RigidBodySystem::StateVector<double>& x, const RigidBodySystem::InputVector<double>& u) const {
  using namespace std;
  using namespace Eigen;
  eigen_aligned_unordered_map<const RigidBody *, Matrix<double, 6, 1> > f_ext;

  // todo: make kinematics cache once and re-use it (but have to make one per type)
  auto nq = tree->num_positions;
  auto nv = tree->num_velocities;
  auto num_actuators = tree->actuators.size();
  auto q = x.topRows(nq);
  auto v = x.bottomRows(nv);
  auto kinsol = tree->doKinematics(q,v);

  // todo: preallocate the optimization problem and constraints, and simply update them then solve on each function eval.
  // happily, this clunkier version seems fast enough for now
  // the optimization framework should support this (though it has not been tested thoroughly yet)
  OptimizationProblem prog;
  auto const & vdot = prog.addContinuousVariables(nv,"vdot");

  auto H = tree->massMatrix(kinsol);
  Eigen::MatrixXd H_and_neg_JT = H;

  { // loop through rigid body force elements and accumulate f_ext

    // todo: distinguish between AppliedForce and ConstraintForce
    // todo: have AppliedForce output tau (instead of f_ext).  it's more direct, and just as easy to compute.

    int u_index = 0;
    const NullVector<double> force_state;  // todo:  will have to handle this case
    for (auto const & prop : props) {
      RigidBodyFrame* frame = prop->getFrame();
      RigidBody* body = frame->body.get();
      int num_inputs = 1;  // todo: generalize this
      RigidBodyPropellor::InputVector<double> u_i(u.middleRows(u_index,num_inputs));
      // todo: push the frame to body transform into the dynamicsBias method?
      Matrix<double,6,1> f_ext_i = transformSpatialForce(frame->transform_to_body,prop->output(t,force_state,u_i,kinsol));
      if (f_ext.find(body)==f_ext.end()) f_ext[body] = f_ext_i;
      else f_ext[body] = f_ext[body]+f_ext_i;
      u_index += num_inputs;
    }
  }

  VectorXd C = tree->dynamicsBiasTerm(kinsol,f_ext);
  if (num_actuators > 0) C -= tree->B*u.topRows(num_actuators);

  { // apply contact forces
    const bool use_multi_contact = false;
    VectorXd phi;
    Matrix3Xd normal, xA, xB;
    vector<int> bodyA_idx, bodyB_idx;
    if (use_multi_contact)
      tree->potentialCollisions(kinsol,phi,normal,xA,xB,bodyA_idx,bodyB_idx);
    else
      tree->collisionDetect(kinsol,phi,normal,xA,xB,bodyA_idx,bodyB_idx);

    for (int i=0; i<phi.rows(); i++) {
      if (phi(i)<0.0) { // then i have contact
        double mu = 1.0; // todo: make this a parameter

        // todo: move this entire block to a shared an updated "contactJacobian" method in RigidBodyTree
        auto JA = tree->transformPointsJacobian(kinsol, xA.col(i), bodyA_idx[i], 0, false);
        auto JB = tree->transformPointsJacobian(kinsol, xB.col(i), bodyB_idx[i], 0, false);
        Vector3d this_normal = normal.col(i);

        // compute the surface tangent basis
        Vector3d tangent1;
        if (1.0 - this_normal(2) < EPSILON) { // handle the unit-normal case (since it's unit length, just check z)
          tangent1 << 1.0, 0.0, 0.0;
        } else if (1 + this_normal(2) < EPSILON) {
          tangent1 << -1.0, 0.0, 0.0;  //same for the reflected case
        } else {// now the general case
          tangent1 << this_normal(1), -this_normal(0), 0.0;
          tangent1 /= sqrt(this_normal(1)*this_normal(1) + this_normal(0)*this_normal(0));
        }
        Vector3d tangent2 = tangent1.cross(this_normal);
        Matrix3d R;  // rotation into normal coordinates
        R << tangent1, tangent2, this_normal;
        auto J = R*(JA-JB);  // J = [ D1; D2; n ]
        auto relative_velocity = J*v;  // [ tangent1dot; tangent2dot; phidot ]

        if (false) {
          // spring law for normal force:  fA_normal = -k*phi - b*phidot
          // and damping for tangential force:  fA_tangent = -b*tangentdot (bounded by the friction cone)
          double k = 150, b = k / 10;  // todo: put these somewhere better... or make them parameters?
          Vector3d fA;
          fA(2) = -k * phi(i) - b * relative_velocity(2);
          fA.head(2) = -std::min(b, mu*fA(2)/(relative_velocity.head(2).norm()+EPSILON)) * relative_velocity.head(2);  // epsilon to avoid divide by zero

          // equal and opposite: fB = -fA.
          // tau = (R*JA)^T fA + (R*JB)^T fB = J^T fA
          C -= J.transpose()*fA;
        } else { // linear acceleration constraints (more expensive, but less tuning required for robot mass, etc)
          // note: this does not work for the multi-contact case (it overly constrains the motion of the link).  Perhaps if I made them inequality constraints...
          static_assert(!use_multi_contact, "The acceleration contact constraints do not play well with multi-contact");

          // phiddot = -2*alpha*phidot - alpha^2*phi   // critically damped response
          // tangential_velocity_dot = -2*alpha*tangential_velocity
          double alpha = 20;  // todo: put this somewhere better... or make them parameters?
          Vector3d desired_relative_acceleration = -2*alpha*relative_velocity;
          desired_relative_acceleration(2) += -alpha*alpha*phi(i);
          // relative_acceleration = J*vdot + R*(JAdotv - JBdotv) // uses the standard dnormal/dq = 0 assumption

          cout << "phi = " << phi << endl;
          cout << "desired acceleration = " << desired_relative_acceleration.transpose() << endl;
//          cout << "acceleration = " << (J*vdot + R*(JAdotv - JBdotv)).transpose() << endl;

          prog.addContinuousVariables(3,"contact normal force");
          auto JAdotv = tree->transformPointsJacobianDotTimesV(kinsol, xA.col(i).eval(), bodyA_idx[i], 0);
          auto JBdotv = tree->transformPointsJacobianDotTimesV(kinsol, xB.col(i).eval(), bodyB_idx[i], 0);

          prog.addLinearEqualityConstraint(J,desired_relative_acceleration - R*(JAdotv - JBdotv),{vdot});
          H_and_neg_JT.conservativeResize(NoChange,H_and_neg_JT.cols()+3);
          H_and_neg_JT.rightCols(3) = -J.transpose();
        }
      }
    }
  }

  if (tree->getNumPositionConstraints()) {
    int nc = tree->getNumPositionConstraints();
    const double alpha = 5.0;  // 1/time constant of position constraint satisfaction (see my latex rigid body notes)

    prog.addContinuousVariables(nc,"position constraint force");  // don't actually need to use the decision variable reference that would be returned...

    // then compute the constraint force
    auto phi = tree->positionConstraints(kinsol);
    auto J = tree->positionConstraintsJacobian(kinsol,false);
    auto Jdotv = tree->positionConstraintsJacDotTimesV(kinsol);

    // phiddot = -2 alpha phidot - alpha^2 phi  (0 + critically damped stabilization term)
    prog.addLinearEqualityConstraint(J,-(Jdotv + 2*alpha*J*v + alpha*alpha*phi),{vdot});
    H_and_neg_JT.conservativeResize(NoChange,H_and_neg_JT.cols()+J.rows());
    H_and_neg_JT.rightCols(J.rows()) = -J.transpose();
  }

  // add [H,-J^T]*[vdot;f] = -C
  prog.addLinearEqualityConstraint(H_and_neg_JT,-C);

  prog.solve();
  //      prog.printSolution();

  StateVector<double> dot(nq+nv);
  dot << kinsol.transformPositionDotMappingToVelocityMapping(Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic>::Identity(nq, nq))*v, vdot.value();
  return dot;
}
RigidBodySystem::StateVector<double> RigidBodySystem::dynamics(
    const double& t, const RigidBodySystem::StateVector<double>& x,
    const RigidBodySystem::InputVector<double>& u) const {
  using namespace std;
  using namespace Eigen;
  eigen_aligned_unordered_map<const RigidBody*, Matrix<double, 6, 1> > f_ext;

  // todo: make kinematics cache once and re-use it (but have to make one per
  // type)
  auto nq = tree->num_positions;
  auto nv = tree->num_velocities;
  auto num_actuators = tree->actuators.size();
  auto q = x.topRows(nq);
  auto v = x.bottomRows(nv);
  auto kinsol = tree->doKinematics(q, v);

  // todo: preallocate the optimization problem and constraints, and simply
  // update them then solve on each function eval.
  // happily, this clunkier version seems fast enough for now
  // the optimization framework should support this (though it has not been
  // tested thoroughly yet)
  OptimizationProblem prog;
  auto const& vdot = prog.AddContinuousVariables(nv, "vdot");

  auto H = tree->massMatrix(kinsol);
  Eigen::MatrixXd H_and_neg_JT = H;

  VectorXd C = tree->dynamicsBiasTerm(kinsol, f_ext);
  if (num_actuators > 0) C -= tree->B * u.topRows(num_actuators);

  // loop through rigid body force elements
  {
    // todo: distinguish between AppliedForce and ConstraintForce

    size_t u_index = 0;
    for (auto const& f : force_elements) {
      size_t num_inputs = f->getNumInputs();
      VectorXd force_input(u.middleRows(u_index, num_inputs));
      C -= f->output(t, force_input, kinsol);
      u_index += num_inputs;
    }
  }

  // apply joint limit forces
  {
    for (auto const& b : tree->bodies) {
      if (!b->hasParent()) continue;
      auto const& joint = b->getJoint();
      if (joint.getNumPositions() == 1 &&
          joint.getNumVelocities() ==
              1) {  // taking advantage of only single-axis joints having joint
                    // limits makes things easier/faster here
        double qmin = joint.getJointLimitMin()(0),
               qmax = joint.getJointLimitMax()(0);
        // tau = k*(qlimit-q) - b(qdot)
        if (q(b->position_num_start) < qmin)
          C(b->velocity_num_start) -=
              penetration_stiffness * (qmin - q(b->position_num_start)) -
              penetration_damping * v(b->velocity_num_start);
        else if (q(b->position_num_start) > qmax)
          C(b->velocity_num_start) -=
              penetration_stiffness * (qmax - q(b->position_num_start)) -
              penetration_damping * v(b->velocity_num_start);
      }
    }
  }

  // apply contact forces
  {
    VectorXd phi;
    Matrix3Xd normal, xA, xB;
    vector<int> bodyA_idx, bodyB_idx;
    if (use_multi_contact)
      tree->potentialCollisions(kinsol, phi, normal, xA, xB, bodyA_idx,
                                bodyB_idx);
    else
      tree->collisionDetect(kinsol, phi, normal, xA, xB, bodyA_idx, bodyB_idx);

    for (int i = 0; i < phi.rows(); i++) {
      if (phi(i) < 0.0) {  // then i have contact
        // todo: move this entire block to a shared an updated "contactJacobian"
        // method in RigidBodyTree
        auto JA = tree->transformPointsJacobian(kinsol, xA.col(i), bodyA_idx[i],
                                                0, false);
        auto JB = tree->transformPointsJacobian(kinsol, xB.col(i), bodyB_idx[i],
                                                0, false);
        Vector3d this_normal = normal.col(i);

        // compute the surface tangent basis
        Vector3d tangent1;
        if (1.0 - this_normal(2) < EPSILON) {  // handle the unit-normal case
                                               // (since it's unit length, just
                                               // check z)
          tangent1 << 1.0, 0.0, 0.0;
        } else if (1 + this_normal(2) < EPSILON) {
          tangent1 << -1.0, 0.0, 0.0;  // same for the reflected case
        } else {                       // now the general case
          tangent1 << this_normal(1), -this_normal(0), 0.0;
          tangent1 /= sqrt(this_normal(1) * this_normal(1) +
                           this_normal(0) * this_normal(0));
        }
        Vector3d tangent2 = this_normal.cross(tangent1);
        Matrix3d R;  // rotation into normal coordinates
        R.row(0) = tangent1;
        R.row(1) = tangent2;
        R.row(2) = this_normal;
        auto J = R * (JA - JB);          // J = [ D1; D2; n ]
        auto relative_velocity = J * v;  // [ tangent1dot; tangent2dot; phidot ]

        {
          // spring law for normal force:  fA_normal = -k*phi - b*phidot
          // and damping for tangential force:  fA_tangent = -b*tangentdot
          // (bounded by the friction cone)
          Vector3d fA;
          fA(2) =
              std::max<double>(-penetration_stiffness * phi(i) -
                                   penetration_damping * relative_velocity(2),
                               0.0);
          fA.head(2) =
              -std::min<double>(
                  penetration_damping,
                  friction_coefficient * fA(2) /
                      (relative_velocity.head(2).norm() + EPSILON)) *
              relative_velocity.head(2);  // epsilon to avoid divide by zero

          // equal and opposite: fB = -fA.
          // tau = (R*JA)^T fA + (R*JB)^T fB = J^T fA
          C -= J.transpose() * fA;
        }
      }
    }
  }

  if (tree->getNumPositionConstraints()) {
    int nc = tree->getNumPositionConstraints();
    const double alpha = 5.0;  // 1/time constant of position constraint
                               // satisfaction (see my latex rigid body notes)

    prog.AddContinuousVariables(
        nc, "position constraint force");  // don't actually need to use the
                                           // decision variable reference that
                                           // would be returned...

    // then compute the constraint force
    auto phi = tree->positionConstraints(kinsol);
    auto J = tree->positionConstraintsJacobian(kinsol, false);
    auto Jdotv = tree->positionConstraintsJacDotTimesV(kinsol);

    // phiddot = -2 alpha phidot - alpha^2 phi  (0 + critically damped
    // stabilization term)
    prog.AddLinearEqualityConstraint(
        J, -(Jdotv + 2 * alpha * J * v + alpha * alpha * phi), {vdot});
    H_and_neg_JT.conservativeResize(NoChange, H_and_neg_JT.cols() + J.rows());
    H_and_neg_JT.rightCols(J.rows()) = -J.transpose();
  }

  // add [H,-J^T]*[vdot;f] = -C
  prog.AddLinearEqualityConstraint(H_and_neg_JT, -C);

  prog.Solve();
  //      prog.PrintSolution();

  StateVector<double> dot(nq + nv);
  dot << kinsol.transformPositionDotMappingToVelocityMapping(
             Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic>::Identity(
                 nq, nq)) *
             v,
      vdot.value();
  return dot;
}