void btMultiBodyConstraintSolver::resolveSingleConstraintRowGenericMultiBody(const btMultiBodySolverConstraint& c)
{

	btScalar deltaImpulse = c.m_rhs-btScalar(c.m_appliedImpulse)*c.m_cfm;
	btScalar deltaVelADotn=0;
	btScalar deltaVelBDotn=0;
	int ndofA=0;
	int ndofB=0;

	if (c.m_multiBodyA)
	{
		ndofA  = (c.m_multiBodyA->isMultiDof() ? c.m_multiBodyA->getNumDofs() : c.m_multiBodyA->getNumLinks()) + 6;
		for (int i = 0; i < ndofA; ++i) 
			deltaVelADotn += m_data.m_jacobians[c.m_jacAindex+i] * m_data.m_deltaVelocities[c.m_deltaVelAindex+i];
	}

	if (c.m_multiBodyB)
	{
		ndofB  = (c.m_multiBodyB->isMultiDof() ? c.m_multiBodyB->getNumDofs() : c.m_multiBodyB->getNumLinks()) + 6;
		for (int i = 0; i < ndofB; ++i) 
			deltaVelBDotn += m_data.m_jacobians[c.m_jacBindex+i] * m_data.m_deltaVelocities[c.m_deltaVelBindex+i];
	}

	
	deltaImpulse	-=	deltaVelADotn*c.m_jacDiagABInv;//m_jacDiagABInv = 1./denom
	deltaImpulse	-=	deltaVelBDotn*c.m_jacDiagABInv;
	const btScalar sum = btScalar(c.m_appliedImpulse) + deltaImpulse;
	
	if (sum < c.m_lowerLimit)
	{
		deltaImpulse = c.m_lowerLimit-c.m_appliedImpulse;
		c.m_appliedImpulse = c.m_lowerLimit;
	}
	else if (sum > c.m_upperLimit) 
	{
		deltaImpulse = c.m_upperLimit-c.m_appliedImpulse;
		c.m_appliedImpulse = c.m_upperLimit;
	}
	else
	{
		c.m_appliedImpulse = sum;
	}

	if (c.m_multiBodyA)
	{
		applyDeltaVee(&m_data.m_deltaVelocitiesUnitImpulse[c.m_jacAindex],deltaImpulse,c.m_deltaVelAindex,ndofA);
		c.m_multiBodyA->applyDeltaVee(&m_data.m_deltaVelocitiesUnitImpulse[c.m_jacAindex],deltaImpulse);
	}
	if (c.m_multiBodyB)
	{
		applyDeltaVee(&m_data.m_deltaVelocitiesUnitImpulse[c.m_jacBindex],deltaImpulse,c.m_deltaVelBindex,ndofB);
		c.m_multiBodyB->applyDeltaVee(&m_data.m_deltaVelocitiesUnitImpulse[c.m_jacBindex],deltaImpulse);
	}
}
btScalar btMultiBodyMLCPConstraintSolver::solveGroupCacheFriendlyIterations(btCollisionObject** bodies, int numBodies, btPersistentManifold** manifoldPtr, int numManifolds, btTypedConstraint** constraints, int numConstraints, const btContactSolverInfo& infoGlobal, btIDebugDraw* debugDrawer)
{
	bool result = true;
	{
		BT_PROFILE("solveMLCP");
		result = solveMLCP(infoGlobal);
	}

	// Fallback to btSequentialImpulseConstraintSolver::solveGroupCacheFriendlyIterations if the solution isn't valid.
	if (!result)
	{
		m_fallback++;
		return btMultiBodyConstraintSolver::solveGroupCacheFriendlyIterations(bodies, numBodies, manifoldPtr, numManifolds, constraints, numConstraints, infoGlobal, debugDrawer);
	}

	{
		BT_PROFILE("process MLCP results");

		for (int i = 0; i < m_allConstraintPtrArray.size(); ++i)
		{
			const btSolverConstraint& c = *m_allConstraintPtrArray[i];

			const btScalar deltaImpulse = m_x[i] - c.m_appliedImpulse;
			c.m_appliedImpulse = m_x[i];

			int sbA = c.m_solverBodyIdA;
			int sbB = c.m_solverBodyIdB;

			btSolverBody& solverBodyA = m_tmpSolverBodyPool[sbA];
			btSolverBody& solverBodyB = m_tmpSolverBodyPool[sbB];

			solverBodyA.internalApplyImpulse(c.m_contactNormal1 * solverBodyA.internalGetInvMass(), c.m_angularComponentA, deltaImpulse);
			solverBodyB.internalApplyImpulse(c.m_contactNormal2 * solverBodyB.internalGetInvMass(), c.m_angularComponentB, deltaImpulse);

			if (infoGlobal.m_splitImpulse)
			{
				const btScalar deltaPushImpulse = m_xSplit[i] - c.m_appliedPushImpulse;
				solverBodyA.internalApplyPushImpulse(c.m_contactNormal1 * solverBodyA.internalGetInvMass(), c.m_angularComponentA, deltaPushImpulse);
				solverBodyB.internalApplyPushImpulse(c.m_contactNormal2 * solverBodyB.internalGetInvMass(), c.m_angularComponentB, deltaPushImpulse);
				c.m_appliedPushImpulse = m_xSplit[i];
			}
		}

		for (int i = 0; i < m_multiBodyAllConstraintPtrArray.size(); ++i)
		{
			btMultiBodySolverConstraint& c = *m_multiBodyAllConstraintPtrArray[i];

			const btScalar deltaImpulse = m_multiBodyX[i] - c.m_appliedImpulse;
			c.m_appliedImpulse = m_multiBodyX[i];

			btMultiBody* multiBodyA = c.m_multiBodyA;
			if (multiBodyA)
			{
				const int ndofA = multiBodyA->getNumDofs() + 6;
				applyDeltaVee(&m_data.m_deltaVelocitiesUnitImpulse[c.m_jacAindex], deltaImpulse, c.m_deltaVelAindex, ndofA);
#ifdef DIRECTLY_UPDATE_VELOCITY_DURING_SOLVER_ITERATIONS
				//note: update of the actual velocities (below) in the multibody does not have to happen now since m_deltaVelocities can be applied after all iterations
				//it would make the multibody solver more like the regular one with m_deltaVelocities being equivalent to btSolverBody::m_deltaLinearVelocity/m_deltaAngularVelocity
				multiBodyA->applyDeltaVeeMultiDof2(&m_data.m_deltaVelocitiesUnitImpulse[c.m_jacAindex], deltaImpulse);
#endif  // DIRECTLY_UPDATE_VELOCITY_DURING_SOLVER_ITERATIONS
			}
			else
			{
				const int sbA = c.m_solverBodyIdA;
				btSolverBody& solverBodyA = m_tmpSolverBodyPool[sbA];
				solverBodyA.internalApplyImpulse(c.m_contactNormal1 * solverBodyA.internalGetInvMass(), c.m_angularComponentA, deltaImpulse);
			}

			btMultiBody* multiBodyB = c.m_multiBodyB;
			if (multiBodyB)
			{
				const int ndofB = multiBodyB->getNumDofs() + 6;
				applyDeltaVee(&m_data.m_deltaVelocitiesUnitImpulse[c.m_jacBindex], deltaImpulse, c.m_deltaVelBindex, ndofB);
#ifdef DIRECTLY_UPDATE_VELOCITY_DURING_SOLVER_ITERATIONS
				//note: update of the actual velocities (below) in the multibody does not have to happen now since m_deltaVelocities can be applied after all iterations
				//it would make the multibody solver more like the regular one with m_deltaVelocities being equivalent to btSolverBody::m_deltaLinearVelocity/m_deltaAngularVelocity
				multiBodyB->applyDeltaVeeMultiDof2(&m_data.m_deltaVelocitiesUnitImpulse[c.m_jacBindex], deltaImpulse);
#endif  // DIRECTLY_UPDATE_VELOCITY_DURING_SOLVER_ITERATIONS
			}
			else
			{
				const int sbB = c.m_solverBodyIdB;
				btSolverBody& solverBodyB = m_tmpSolverBodyPool[sbB];
				solverBodyB.internalApplyImpulse(c.m_contactNormal2 * solverBodyB.internalGetInvMass(), c.m_angularComponentB, deltaImpulse);
			}
		}
	}

	return btScalar(0);
}
void btMultiBody::stepVelocities(btScalar dt,
                               btAlignedObjectArray<btScalar> &scratch_r,
                               btAlignedObjectArray<btVector3> &scratch_v,
                               btAlignedObjectArray<btMatrix3x3> &scratch_m)
{
    // Implement Featherstone's algorithm to calculate joint accelerations (q_double_dot)
    // and the base linear & angular accelerations.

    // We apply damping forces in this routine as well as any external forces specified by the 
    // caller (via addBaseForce etc).

    // output should point to an array of 6 + num_links reals.
    // Format is: 3 angular accelerations (in world frame), 3 linear accelerations (in world frame),
    // num_links joint acceleration values.
    
	int num_links = getNumLinks();

    const btScalar DAMPING_K1_LINEAR = m_linearDamping;
	const btScalar DAMPING_K2_LINEAR = m_linearDamping;

	const btScalar DAMPING_K1_ANGULAR = m_angularDamping;
	const btScalar DAMPING_K2_ANGULAR= m_angularDamping;

    btVector3 base_vel = getBaseVel();
    btVector3 base_omega = getBaseOmega();

    // Temporary matrices/vectors -- use scratch space from caller
    // so that we don't have to keep reallocating every frame

    scratch_r.resize(2*num_links + 6);
    scratch_v.resize(8*num_links + 6);
    scratch_m.resize(4*num_links + 4);

    btScalar * r_ptr = &scratch_r[0];
    btScalar * output = &scratch_r[num_links];  // "output" holds the q_double_dot results
    btVector3 * v_ptr = &scratch_v[0];
    
    // vhat_i  (top = angular, bottom = linear part)
    btVector3 * vel_top_angular = v_ptr; v_ptr += num_links + 1;
    btVector3 * vel_bottom_linear = v_ptr; v_ptr += num_links + 1;

    // zhat_i^A
    btVector3 * zero_acc_top_angular = v_ptr; v_ptr += num_links + 1;
    btVector3 * zero_acc_bottom_linear = v_ptr; v_ptr += num_links + 1;

    // chat_i  (note NOT defined for the base)
    btVector3 * coriolis_top_angular = v_ptr; v_ptr += num_links;
    btVector3 * coriolis_bottom_linear = v_ptr; v_ptr += num_links;

    // top left, top right and bottom left blocks of Ihat_i^A.
    // bottom right block = transpose of top left block and is not stored.
    // Note: the top right and bottom left blocks are always symmetric matrices, but we don't make use of this fact currently.
    btMatrix3x3 * inertia_top_left = &scratch_m[num_links + 1];
    btMatrix3x3 * inertia_top_right = &scratch_m[2*num_links + 2];
    btMatrix3x3 * inertia_bottom_left = &scratch_m[3*num_links + 3];

    // Cached 3x3 rotation matrices from parent frame to this frame.
    btMatrix3x3 * rot_from_parent = &matrix_buf[0];
    btMatrix3x3 * rot_from_world = &scratch_m[0];

    // hhat_i, ahat_i
    // hhat is NOT stored for the base (but ahat is)
    btVector3 * h_top = num_links > 0 ? &vector_buf[0] : 0;
    btVector3 * h_bottom = num_links > 0 ? &vector_buf[num_links] : 0;
    btVector3 * accel_top = v_ptr; v_ptr += num_links + 1;
    btVector3 * accel_bottom = v_ptr; v_ptr += num_links + 1;

    // Y_i, D_i
    btScalar * Y = r_ptr; r_ptr += num_links;
    btScalar * D = num_links > 0 ? &m_real_buf[6 + num_links] : 0;

    // ptr to the joint accel part of the output
    btScalar * joint_accel = output + 6;


    // Start of the algorithm proper.
    
    // First 'upward' loop.
    // Combines CompTreeLinkVelocities and InitTreeLinks from Mirtich.

    rot_from_parent[0] = btMatrix3x3(base_quat);

    vel_top_angular[0] = rot_from_parent[0] * base_omega;
    vel_bottom_linear[0] = rot_from_parent[0] * base_vel;

    if (fixed_base) {
        zero_acc_top_angular[0] = zero_acc_bottom_linear[0] = btVector3(0,0,0);
    } else {
        zero_acc_top_angular[0] = - (rot_from_parent[0] * (base_force 
                                                   - base_mass*(DAMPING_K1_LINEAR+DAMPING_K2_LINEAR*base_vel.norm())*base_vel));
		
        zero_acc_bottom_linear[0] =
            - (rot_from_parent[0] * base_torque);

		if (m_useGyroTerm)
			zero_acc_bottom_linear[0]+=vel_top_angular[0].cross( base_inertia * vel_top_angular[0] );

        zero_acc_bottom_linear[0] += base_inertia * vel_top_angular[0] * (DAMPING_K1_ANGULAR + DAMPING_K2_ANGULAR*vel_top_angular[0].norm());

    }



    inertia_top_left[0] = btMatrix3x3(0,0,0,0,0,0,0,0,0);//::Zero();
	
	
    inertia_top_right[0].setValue(base_mass, 0, 0,
                            0, base_mass, 0,
                            0, 0, base_mass);
    inertia_bottom_left[0].setValue(base_inertia[0], 0, 0,
                              0, base_inertia[1], 0,
                              0, 0, base_inertia[2]);

    rot_from_world[0] = rot_from_parent[0];

    for (int i = 0; i < num_links; ++i) {
        const int parent = links[i].parent;
        rot_from_parent[i+1] = btMatrix3x3(links[i].cached_rot_parent_to_this);
		

        rot_from_world[i+1] = rot_from_parent[i+1] * rot_from_world[parent+1];
        
        // vhat_i = i_xhat_p(i) * vhat_p(i)
        SpatialTransform(rot_from_parent[i+1], links[i].cached_r_vector,
                         vel_top_angular[parent+1], vel_bottom_linear[parent+1],
                         vel_top_angular[i+1], vel_bottom_linear[i+1]);

        // we can now calculate chat_i
        // remember vhat_i is really vhat_p(i) (but in current frame) at this point
        coriolis_bottom_linear[i] = vel_top_angular[i+1].cross(vel_top_angular[i+1].cross(links[i].cached_r_vector))
            + 2 * vel_top_angular[i+1].cross(links[i].axis_bottom) * getJointVel(i);
        if (links[i].is_revolute) {
            coriolis_top_angular[i] = vel_top_angular[i+1].cross(links[i].axis_top) * getJointVel(i);
            coriolis_bottom_linear[i] += (getJointVel(i) * getJointVel(i)) * links[i].axis_top.cross(links[i].axis_bottom);
        } else {
            coriolis_top_angular[i] = btVector3(0,0,0);
        }
        
        // now set vhat_i to its true value by doing
        // vhat_i += qidot * shat_i
        vel_top_angular[i+1] += getJointVel(i) * links[i].axis_top;
        vel_bottom_linear[i+1] += getJointVel(i) * links[i].axis_bottom;

        // calculate zhat_i^A
        zero_acc_top_angular[i+1] = - (rot_from_world[i+1] * (links[i].applied_force));
        zero_acc_top_angular[i+1] += links[i].mass * (DAMPING_K1_LINEAR + DAMPING_K2_LINEAR*vel_bottom_linear[i+1].norm()) * vel_bottom_linear[i+1];

        zero_acc_bottom_linear[i+1] =
            - (rot_from_world[i+1] * links[i].applied_torque);
		if (m_useGyroTerm)
		{
			zero_acc_bottom_linear[i+1] += vel_top_angular[i+1].cross( links[i].inertia * vel_top_angular[i+1] );
		}

        zero_acc_bottom_linear[i+1] += links[i].inertia * vel_top_angular[i+1] * (DAMPING_K1_ANGULAR + DAMPING_K2_ANGULAR*vel_top_angular[i+1].norm());

        // calculate Ihat_i^A
        inertia_top_left[i+1] = btMatrix3x3(0,0,0,0,0,0,0,0,0);//::Zero();
        inertia_top_right[i+1].setValue(links[i].mass, 0, 0,
                                  0, links[i].mass, 0,
                                  0, 0, links[i].mass);
        inertia_bottom_left[i+1].setValue(links[i].inertia[0], 0, 0,
                                    0, links[i].inertia[1], 0,
                                    0, 0, links[i].inertia[2]);
    }


    // 'Downward' loop.
    // (part of TreeForwardDynamics in Mirtich.)
    for (int i = num_links - 1; i >= 0; --i) {

        h_top[i] = inertia_top_left[i+1] * links[i].axis_top + inertia_top_right[i+1] * links[i].axis_bottom;
        h_bottom[i] = inertia_bottom_left[i+1] * links[i].axis_top + inertia_top_left[i+1].transpose() * links[i].axis_bottom;
		btScalar val = SpatialDotProduct(links[i].axis_top, links[i].axis_bottom, h_top[i], h_bottom[i]);
        D[i] = val;
		Y[i] = links[i].joint_torque
            - SpatialDotProduct(links[i].axis_top, links[i].axis_bottom, zero_acc_top_angular[i+1], zero_acc_bottom_linear[i+1])
            - SpatialDotProduct(h_top[i], h_bottom[i], coriolis_top_angular[i], coriolis_bottom_linear[i]);

        const int parent = links[i].parent;

        
        // Ip += pXi * (Ii - hi hi' / Di) * iXp
        const btScalar one_over_di = 1.0f / D[i];

		


		const btMatrix3x3 TL = inertia_top_left[i+1]   - vecMulVecTranspose(one_over_di * h_top[i] , h_bottom[i]);
        const btMatrix3x3 TR = inertia_top_right[i+1]  - vecMulVecTranspose(one_over_di * h_top[i] , h_top[i]);
        const btMatrix3x3 BL = inertia_bottom_left[i+1]- vecMulVecTranspose(one_over_di * h_bottom[i] , h_bottom[i]);


        btMatrix3x3 r_cross;
        r_cross.setValue(
            0, -links[i].cached_r_vector[2], links[i].cached_r_vector[1],
            links[i].cached_r_vector[2], 0, -links[i].cached_r_vector[0],
            -links[i].cached_r_vector[1], links[i].cached_r_vector[0], 0);
        
        inertia_top_left[parent+1] += rot_from_parent[i+1].transpose() * ( TL - TR * r_cross ) * rot_from_parent[i+1];
        inertia_top_right[parent+1] += rot_from_parent[i+1].transpose() * TR * rot_from_parent[i+1];
        inertia_bottom_left[parent+1] += rot_from_parent[i+1].transpose() *
            (r_cross * (TL - TR * r_cross) + BL - TL.transpose() * r_cross) * rot_from_parent[i+1];
        
        
        // Zp += pXi * (Zi + Ii*ci + hi*Yi/Di)
        btVector3 in_top, in_bottom, out_top, out_bottom;
        const btScalar Y_over_D = Y[i] * one_over_di;
        in_top = zero_acc_top_angular[i+1]
            + inertia_top_left[i+1] * coriolis_top_angular[i]
            + inertia_top_right[i+1] * coriolis_bottom_linear[i]
            + Y_over_D * h_top[i];
        in_bottom = zero_acc_bottom_linear[i+1]
            + inertia_bottom_left[i+1] * coriolis_top_angular[i]
            + inertia_top_left[i+1].transpose() * coriolis_bottom_linear[i]
            + Y_over_D * h_bottom[i];
        InverseSpatialTransform(rot_from_parent[i+1], links[i].cached_r_vector,
                                in_top, in_bottom, out_top, out_bottom);
        zero_acc_top_angular[parent+1] += out_top;
        zero_acc_bottom_linear[parent+1] += out_bottom;
    }


    // Second 'upward' loop
    // (part of TreeForwardDynamics in Mirtich)

    if (fixed_base) 
	{
        accel_top[0] = accel_bottom[0] = btVector3(0,0,0);
    } 
	else 
	{
        if (num_links > 0) 
		{
            //Matrix<btScalar, 6, 6> Imatrix;
            //Imatrix.block<3,3>(0,0) = inertia_top_left[0];
            //Imatrix.block<3,3>(3,0) = inertia_bottom_left[0];
            //Imatrix.block<3,3>(0,3) = inertia_top_right[0];
            //Imatrix.block<3,3>(3,3) = inertia_top_left[0].transpose();
            //cached_imatrix_lu.reset(new Eigen::LU<Matrix<btScalar, 6, 6> >(Imatrix));  // TODO: Avoid memory allocation here?

			cached_inertia_top_left = inertia_top_left[0];
			cached_inertia_top_right = inertia_top_right[0];
			cached_inertia_lower_left = inertia_bottom_left[0];
			cached_inertia_lower_right= inertia_top_left[0].transpose();

        }
		btVector3 rhs_top (zero_acc_top_angular[0][0], zero_acc_top_angular[0][1], zero_acc_top_angular[0][2]);
		btVector3 rhs_bot (zero_acc_bottom_linear[0][0], zero_acc_bottom_linear[0][1], zero_acc_bottom_linear[0][2]);
        float result[6];

		solveImatrix(rhs_top, rhs_bot, result);
//		printf("result=%f,%f,%f,%f,%f,%f\n",result[0],result[0],result[0],result[0],result[0],result[0]);
        for (int i = 0; i < 3; ++i) {
            accel_top[0][i] = -result[i];
            accel_bottom[0][i] = -result[i+3];
        }

    }

    // now do the loop over the links
    for (int i = 0; i < num_links; ++i) {
        const int parent = links[i].parent;
        SpatialTransform(rot_from_parent[i+1], links[i].cached_r_vector,
                         accel_top[parent+1], accel_bottom[parent+1],
                         accel_top[i+1], accel_bottom[i+1]);
        joint_accel[i] = (Y[i] - SpatialDotProduct(h_top[i], h_bottom[i], accel_top[i+1], accel_bottom[i+1])) / D[i];
        accel_top[i+1] += coriolis_top_angular[i] + joint_accel[i] * links[i].axis_top;
        accel_bottom[i+1] += coriolis_bottom_linear[i] + joint_accel[i] * links[i].axis_bottom;
    }

    // transform base accelerations back to the world frame.
    btVector3 omegadot_out = rot_from_parent[0].transpose() * accel_top[0];
	output[0] = omegadot_out[0];
	output[1] = omegadot_out[1];
	output[2] = omegadot_out[2];

    btVector3 vdot_out = rot_from_parent[0].transpose() * accel_bottom[0];
	output[3] = vdot_out[0];
	output[4] = vdot_out[1];
	output[5] = vdot_out[2];
    // Final step: add the accelerations (times dt) to the velocities.
    applyDeltaVee(output, dt);

	
}
void btMultiBodyConstraintSolver::setupMultiBodyContactConstraint(btMultiBodySolverConstraint& solverConstraint, 
																 const btVector3& contactNormal,
																 btManifoldPoint& cp, const btContactSolverInfo& infoGlobal,
																 btScalar& relaxation,
																 bool isFriction, btScalar desiredVelocity, btScalar cfmSlip)
{
			
	BT_PROFILE("setupMultiBodyContactConstraint");
	btVector3 rel_pos1;
	btVector3 rel_pos2;

	btMultiBody* multiBodyA = solverConstraint.m_multiBodyA;
	btMultiBody* multiBodyB = solverConstraint.m_multiBodyB;

	const btVector3& pos1 = cp.getPositionWorldOnA();
	const btVector3& pos2 = cp.getPositionWorldOnB();

	btSolverBody* bodyA = multiBodyA ? 0 : &m_tmpSolverBodyPool[solverConstraint.m_solverBodyIdA];
	btSolverBody* bodyB = multiBodyB ? 0 : &m_tmpSolverBodyPool[solverConstraint.m_solverBodyIdB];

	btRigidBody* rb0 = multiBodyA ? 0 : bodyA->m_originalBody;
	btRigidBody* rb1 = multiBodyB ? 0 : bodyB->m_originalBody;

	if (bodyA)
		rel_pos1 = pos1 - bodyA->getWorldTransform().getOrigin(); 
	if (bodyB)
		rel_pos2 = pos2 - bodyB->getWorldTransform().getOrigin();

	relaxation = 1.f;

	


	if (multiBodyA)
	{
		if (solverConstraint.m_linkA<0)
		{
			rel_pos1 = pos1 - multiBodyA->getBasePos();
		} else
		{
			rel_pos1 = pos1 - multiBodyA->getLink(solverConstraint.m_linkA).m_cachedWorldTransform.getOrigin();
		}
		const int ndofA  = (multiBodyA->isMultiDof() ? multiBodyA->getNumDofs() : multiBodyA->getNumLinks()) + 6;

		solverConstraint.m_deltaVelAindex = multiBodyA->getCompanionId();

		if (solverConstraint.m_deltaVelAindex <0)
		{
			solverConstraint.m_deltaVelAindex = m_data.m_deltaVelocities.size();
			multiBodyA->setCompanionId(solverConstraint.m_deltaVelAindex);
			m_data.m_deltaVelocities.resize(m_data.m_deltaVelocities.size()+ndofA);
		} else
		{
			btAssert(m_data.m_deltaVelocities.size() >= solverConstraint.m_deltaVelAindex+ndofA);
		}

		solverConstraint.m_jacAindex = m_data.m_jacobians.size();
		m_data.m_jacobians.resize(m_data.m_jacobians.size()+ndofA);
		m_data.m_deltaVelocitiesUnitImpulse.resize(m_data.m_deltaVelocitiesUnitImpulse.size()+ndofA);
		btAssert(m_data.m_jacobians.size() == m_data.m_deltaVelocitiesUnitImpulse.size());

		btScalar* jac1=&m_data.m_jacobians[solverConstraint.m_jacAindex];
		if(multiBodyA->isMultiDof())
			multiBodyA->fillContactJacobianMultiDof(solverConstraint.m_linkA, cp.getPositionWorldOnA(), contactNormal, jac1, m_data.scratch_r, m_data.scratch_v, m_data.scratch_m);
		else
			multiBodyA->fillContactJacobian(solverConstraint.m_linkA, cp.getPositionWorldOnA(), contactNormal, jac1, m_data.scratch_r, m_data.scratch_v, m_data.scratch_m);
		btScalar* delta = &m_data.m_deltaVelocitiesUnitImpulse[solverConstraint.m_jacAindex];
		if(multiBodyA->isMultiDof())
			multiBodyA->calcAccelerationDeltasMultiDof(&m_data.m_jacobians[solverConstraint.m_jacAindex],delta,m_data.scratch_r, m_data.scratch_v);
		else
			multiBodyA->calcAccelerationDeltas(&m_data.m_jacobians[solverConstraint.m_jacAindex],delta,m_data.scratch_r, m_data.scratch_v);

		btVector3 torqueAxis0 = rel_pos1.cross(contactNormal);
		solverConstraint.m_relpos1CrossNormal = torqueAxis0;
		solverConstraint.m_contactNormal1 = contactNormal;
	} else
	{
		btVector3 torqueAxis0 = rel_pos1.cross(contactNormal);
		solverConstraint.m_relpos1CrossNormal = torqueAxis0;
		solverConstraint.m_contactNormal1 = contactNormal;
		solverConstraint.m_angularComponentA = rb0 ? rb0->getInvInertiaTensorWorld()*torqueAxis0*rb0->getAngularFactor() : btVector3(0,0,0);
	}

	

	if (multiBodyB)
	{
		if (solverConstraint.m_linkB<0)
		{
			rel_pos2 = pos2 - multiBodyB->getBasePos();
		} else
		{
			rel_pos2 = pos2 - multiBodyB->getLink(solverConstraint.m_linkB).m_cachedWorldTransform.getOrigin();
		}

		const int ndofB  = (multiBodyB->isMultiDof() ? multiBodyB->getNumDofs() : multiBodyB->getNumLinks()) + 6;

		solverConstraint.m_deltaVelBindex = multiBodyB->getCompanionId();
		if (solverConstraint.m_deltaVelBindex <0)
		{
			solverConstraint.m_deltaVelBindex = m_data.m_deltaVelocities.size();
			multiBodyB->setCompanionId(solverConstraint.m_deltaVelBindex);
			m_data.m_deltaVelocities.resize(m_data.m_deltaVelocities.size()+ndofB);
		}

		solverConstraint.m_jacBindex = m_data.m_jacobians.size();

		m_data.m_jacobians.resize(m_data.m_jacobians.size()+ndofB);
		m_data.m_deltaVelocitiesUnitImpulse.resize(m_data.m_deltaVelocitiesUnitImpulse.size()+ndofB);
		btAssert(m_data.m_jacobians.size() == m_data.m_deltaVelocitiesUnitImpulse.size());

		if(multiBodyB->isMultiDof())
			multiBodyB->fillContactJacobianMultiDof(solverConstraint.m_linkB, cp.getPositionWorldOnB(), -contactNormal, &m_data.m_jacobians[solverConstraint.m_jacBindex], m_data.scratch_r, m_data.scratch_v, m_data.scratch_m);
		else
			multiBodyB->fillContactJacobian(solverConstraint.m_linkB, cp.getPositionWorldOnB(), -contactNormal, &m_data.m_jacobians[solverConstraint.m_jacBindex], m_data.scratch_r, m_data.scratch_v, m_data.scratch_m);
		if(multiBodyB->isMultiDof())
			multiBodyB->calcAccelerationDeltasMultiDof(&m_data.m_jacobians[solverConstraint.m_jacBindex],&m_data.m_deltaVelocitiesUnitImpulse[solverConstraint.m_jacBindex],m_data.scratch_r, m_data.scratch_v);
		else
			multiBodyB->calcAccelerationDeltas(&m_data.m_jacobians[solverConstraint.m_jacBindex],&m_data.m_deltaVelocitiesUnitImpulse[solverConstraint.m_jacBindex],m_data.scratch_r, m_data.scratch_v);
		
		btVector3 torqueAxis1 = rel_pos2.cross(contactNormal);		
		solverConstraint.m_relpos2CrossNormal = -torqueAxis1;
		solverConstraint.m_contactNormal2 = -contactNormal;
	
	} else
	{
		btVector3 torqueAxis1 = rel_pos2.cross(contactNormal);		
		solverConstraint.m_relpos2CrossNormal = -torqueAxis1;
		solverConstraint.m_contactNormal2 = -contactNormal;
	
		solverConstraint.m_angularComponentB = rb1 ? rb1->getInvInertiaTensorWorld()*-torqueAxis1*rb1->getAngularFactor() : btVector3(0,0,0);
	}

	{
						
		btVector3 vec;
		btScalar denom0 = 0.f;
		btScalar denom1 = 0.f;
		btScalar* jacB = 0;
		btScalar* jacA = 0;
		btScalar* lambdaA =0;
		btScalar* lambdaB =0;
		int ndofA  = 0;
		if (multiBodyA)
		{
			ndofA  = (multiBodyA->isMultiDof() ? multiBodyA->getNumDofs() : multiBodyA->getNumLinks()) + 6;
			jacA = &m_data.m_jacobians[solverConstraint.m_jacAindex];
			lambdaA = &m_data.m_deltaVelocitiesUnitImpulse[solverConstraint.m_jacAindex];
			for (int i = 0; i < ndofA; ++i)
			{
				btScalar j = jacA[i] ;
				btScalar l =lambdaA[i];
				denom0 += j*l;
			}
		} else
		{
			if (rb0)
			{
				vec = ( solverConstraint.m_angularComponentA).cross(rel_pos1);
				denom0 = rb0->getInvMass() + contactNormal.dot(vec);
			}
		}
		if (multiBodyB)
		{
			const int ndofB  = (multiBodyB->isMultiDof() ? multiBodyB->getNumDofs() : multiBodyB->getNumLinks()) + 6;
			jacB = &m_data.m_jacobians[solverConstraint.m_jacBindex];
			lambdaB = &m_data.m_deltaVelocitiesUnitImpulse[solverConstraint.m_jacBindex];
			for (int i = 0; i < ndofB; ++i)
			{
				btScalar j = jacB[i] ;
				btScalar l =lambdaB[i];
				denom1 += j*l;
			}

		} else
		{
			if (rb1)
			{
				vec = ( -solverConstraint.m_angularComponentB).cross(rel_pos2);
				denom1 = rb1->getInvMass() + contactNormal.dot(vec);
			}
		}

		 

		 btScalar d = denom0+denom1;
		 if (d>SIMD_EPSILON)
		 {
			solverConstraint.m_jacDiagABInv = relaxation/(d);
		 } else
		 {
			//disable the constraint row to handle singularity/redundant constraint
			solverConstraint.m_jacDiagABInv  = 0.f;
		 }
		
	}

	
	//compute rhs and remaining solverConstraint fields

	

	btScalar restitution = 0.f;
	btScalar penetration = isFriction? 0 : cp.getDistance()+infoGlobal.m_linearSlop;

	btScalar rel_vel = 0.f;
	int ndofA  = 0;
	int ndofB  = 0;
	{

		btVector3 vel1,vel2;
		if (multiBodyA)
		{
			ndofA  = (multiBodyA->isMultiDof() ? multiBodyA->getNumDofs() : multiBodyA->getNumLinks()) + 6;
			btScalar* jacA = &m_data.m_jacobians[solverConstraint.m_jacAindex];
			for (int i = 0; i < ndofA ; ++i) 
				rel_vel += multiBodyA->getVelocityVector()[i] * jacA[i];
		} else
		{
			if (rb0)
			{
				rel_vel += rb0->getVelocityInLocalPoint(rel_pos1).dot(solverConstraint.m_contactNormal1);
			}
		}
		if (multiBodyB)
		{
			ndofB  = (multiBodyB->isMultiDof() ? multiBodyB->getNumDofs() : multiBodyB->getNumLinks()) + 6;
			btScalar* jacB = &m_data.m_jacobians[solverConstraint.m_jacBindex];
			for (int i = 0; i < ndofB ; ++i) 
				rel_vel += multiBodyB->getVelocityVector()[i] * jacB[i];

		} else
		{
			if (rb1)
			{
				rel_vel += rb1->getVelocityInLocalPoint(rel_pos2).dot(solverConstraint.m_contactNormal2);
			}
		}

		solverConstraint.m_friction = cp.m_combinedFriction;

		if(!isFriction)
		{
			restitution =  restitutionCurve(rel_vel, cp.m_combinedRestitution);	
			if (restitution <= btScalar(0.))
			{
				restitution = 0.f;
			}
		}
	}


	///warm starting (or zero if disabled)
	//disable warmstarting for btMultiBody, it has issues gaining energy (==explosion)
	if (0)//infoGlobal.m_solverMode & SOLVER_USE_WARMSTARTING)
	{
		solverConstraint.m_appliedImpulse = isFriction ? 0 : cp.m_appliedImpulse * infoGlobal.m_warmstartingFactor;

		if (solverConstraint.m_appliedImpulse)
		{
			if (multiBodyA)
			{
				btScalar impulse = solverConstraint.m_appliedImpulse;
				btScalar* deltaV = &m_data.m_deltaVelocitiesUnitImpulse[solverConstraint.m_jacAindex];
				if(multiBodyA->isMultiDof())
					multiBodyA->applyDeltaVeeMultiDof(deltaV,impulse);
				else
					multiBodyA->applyDeltaVee(deltaV,impulse);
				applyDeltaVee(deltaV,impulse,solverConstraint.m_deltaVelAindex,ndofA);
			} else
			{
				if (rb0)
					bodyA->internalApplyImpulse(solverConstraint.m_contactNormal1*bodyA->internalGetInvMass()*rb0->getLinearFactor(),solverConstraint.m_angularComponentA,solverConstraint.m_appliedImpulse);
			}
			if (multiBodyB)
			{
				btScalar impulse = solverConstraint.m_appliedImpulse;
				btScalar* deltaV = &m_data.m_deltaVelocitiesUnitImpulse[solverConstraint.m_jacBindex];
				if(multiBodyB->isMultiDof())
					multiBodyB->applyDeltaVeeMultiDof(deltaV,impulse);
				else
					multiBodyB->applyDeltaVee(deltaV,impulse);
				applyDeltaVee(deltaV,impulse,solverConstraint.m_deltaVelBindex,ndofB);
			} else
			{
				if (rb1)
					bodyB->internalApplyImpulse(-solverConstraint.m_contactNormal2*bodyB->internalGetInvMass()*rb1->getLinearFactor(),-solverConstraint.m_angularComponentB,-(btScalar)solverConstraint.m_appliedImpulse);
			}
		}
	} else
	{
		solverConstraint.m_appliedImpulse = 0.f;
	}

	solverConstraint.m_appliedPushImpulse = 0.f;

	{

		btScalar positionalError = 0.f;
		btScalar velocityError = restitution - rel_vel;// * damping;	//note for friction restitution is always set to 0 (check above) so it is acutally velocityError = -rel_vel for friction

		btScalar erp = infoGlobal.m_erp2;
		if (!infoGlobal.m_splitImpulse || (penetration > infoGlobal.m_splitImpulsePenetrationThreshold))
		{
			erp = infoGlobal.m_erp;
		}

		if (penetration>0)
		{
			positionalError = 0;
			velocityError -= penetration / infoGlobal.m_timeStep;

		} else
		{
			positionalError = -penetration * erp/infoGlobal.m_timeStep;
		}

		btScalar  penetrationImpulse = positionalError*solverConstraint.m_jacDiagABInv;
		btScalar velocityImpulse = velocityError *solverConstraint.m_jacDiagABInv;

		if(!isFriction)
		{
			if (!infoGlobal.m_splitImpulse || (penetration > infoGlobal.m_splitImpulsePenetrationThreshold))
			{
				//combine position and velocity into rhs
				solverConstraint.m_rhs = penetrationImpulse+velocityImpulse;
				solverConstraint.m_rhsPenetration = 0.f;

			} else
			{
				//split position and velocity into rhs and m_rhsPenetration
				solverConstraint.m_rhs = velocityImpulse;
				solverConstraint.m_rhsPenetration = penetrationImpulse;
			}

			solverConstraint.m_lowerLimit = 0;
			solverConstraint.m_upperLimit = 1e10f;
		}
		else
		{
			solverConstraint.m_rhs = velocityImpulse;
			solverConstraint.m_rhsPenetration = 0.f;
			solverConstraint.m_lowerLimit = -solverConstraint.m_friction;
			solverConstraint.m_upperLimit = solverConstraint.m_friction;
		}

		solverConstraint.m_cfm = 0.f;			//why not use cfmSlip?
	}

}
void btMultiBodyConstraintSolver::resolveSingleConstraintRowGeneric(const btMultiBodySolverConstraint& c)
{

	btScalar deltaImpulse = c.m_rhs-btScalar(c.m_appliedImpulse)*c.m_cfm;
	btScalar deltaVelADotn=0;
	btScalar deltaVelBDotn=0;
	btSolverBody* bodyA = 0;
	btSolverBody* bodyB = 0;
	int ndofA=0;
	int ndofB=0;

	if (c.m_multiBodyA)
	{
		ndofA  = (c.m_multiBodyA->isMultiDof() ? c.m_multiBodyA->getNumDofs() : c.m_multiBodyA->getNumLinks()) + 6;
		for (int i = 0; i < ndofA; ++i) 
			deltaVelADotn += m_data.m_jacobians[c.m_jacAindex+i] * m_data.m_deltaVelocities[c.m_deltaVelAindex+i];
	} else if(c.m_solverBodyIdA >= 0)
	{
		bodyA = &m_tmpSolverBodyPool[c.m_solverBodyIdA];
		deltaVelADotn += c.m_contactNormal1.dot(bodyA->internalGetDeltaLinearVelocity()) 	+ c.m_relpos1CrossNormal.dot(bodyA->internalGetDeltaAngularVelocity());
	}

	if (c.m_multiBodyB)
	{
		ndofB  = (c.m_multiBodyB->isMultiDof() ? c.m_multiBodyB->getNumDofs() : c.m_multiBodyB->getNumLinks()) + 6;
		for (int i = 0; i < ndofB; ++i) 
			deltaVelBDotn += m_data.m_jacobians[c.m_jacBindex+i] * m_data.m_deltaVelocities[c.m_deltaVelBindex+i];
	} else if(c.m_solverBodyIdB >= 0)
	{
		bodyB = &m_tmpSolverBodyPool[c.m_solverBodyIdB];
		deltaVelBDotn += c.m_contactNormal2.dot(bodyB->internalGetDeltaLinearVelocity())  + c.m_relpos2CrossNormal.dot(bodyB->internalGetDeltaAngularVelocity());
	}

	
	deltaImpulse	-=	deltaVelADotn*c.m_jacDiagABInv;//m_jacDiagABInv = 1./denom
	deltaImpulse	-=	deltaVelBDotn*c.m_jacDiagABInv;
	const btScalar sum = btScalar(c.m_appliedImpulse) + deltaImpulse;
	
	if (sum < c.m_lowerLimit)
	{
		deltaImpulse = c.m_lowerLimit-c.m_appliedImpulse;
		c.m_appliedImpulse = c.m_lowerLimit;
	}
	else if (sum > c.m_upperLimit) 
	{
		deltaImpulse = c.m_upperLimit-c.m_appliedImpulse;
		c.m_appliedImpulse = c.m_upperLimit;
	}
	else
	{
		c.m_appliedImpulse = sum;
	}

	if (c.m_multiBodyA)
	{
		applyDeltaVee(&m_data.m_deltaVelocitiesUnitImpulse[c.m_jacAindex],deltaImpulse,c.m_deltaVelAindex,ndofA);
#ifdef DIRECTLY_UPDATE_VELOCITY_DURING_SOLVER_ITERATIONS
		//note: update of the actual velocities (below) in the multibody does not have to happen now since m_deltaVelocities can be applied after all iterations
		//it would make the multibody solver more like the regular one with m_deltaVelocities being equivalent to btSolverBody::m_deltaLinearVelocity/m_deltaAngularVelocity
		if(c.m_multiBodyA->isMultiDof())
			c.m_multiBodyA->applyDeltaVeeMultiDof2(&m_data.m_deltaVelocitiesUnitImpulse[c.m_jacAindex],deltaImpulse);
		else
			c.m_multiBodyA->applyDeltaVee(&m_data.m_deltaVelocitiesUnitImpulse[c.m_jacAindex],deltaImpulse);
#endif //DIRECTLY_UPDATE_VELOCITY_DURING_SOLVER_ITERATIONS
	} else if(c.m_solverBodyIdA >= 0)
	{
		bodyA->internalApplyImpulse(c.m_contactNormal1*bodyA->internalGetInvMass(),c.m_angularComponentA,deltaImpulse);

	}
	if (c.m_multiBodyB)
	{
		applyDeltaVee(&m_data.m_deltaVelocitiesUnitImpulse[c.m_jacBindex],deltaImpulse,c.m_deltaVelBindex,ndofB);
#ifdef DIRECTLY_UPDATE_VELOCITY_DURING_SOLVER_ITERATIONS
		//note: update of the actual velocities (below) in the multibody does not have to happen now since m_deltaVelocities can be applied after all iterations
		//it would make the multibody solver more like the regular one with m_deltaVelocities being equivalent to btSolverBody::m_deltaLinearVelocity/m_deltaAngularVelocity
		if(c.m_multiBodyB->isMultiDof())
			c.m_multiBodyB->applyDeltaVeeMultiDof2(&m_data.m_deltaVelocitiesUnitImpulse[c.m_jacBindex],deltaImpulse);
		else
			c.m_multiBodyB->applyDeltaVee(&m_data.m_deltaVelocitiesUnitImpulse[c.m_jacBindex],deltaImpulse);
#endif //DIRECTLY_UPDATE_VELOCITY_DURING_SOLVER_ITERATIONS
	} else if(c.m_solverBodyIdB >= 0)
	{
		bodyB->internalApplyImpulse(c.m_contactNormal2*bodyB->internalGetInvMass(),c.m_angularComponentB,deltaImpulse);
	}

}
void btMultiBodyConstraintSolver::resolveSingleConstraintRowGeneric(const btMultiBodySolverConstraint& c)
{

	btScalar deltaImpulse = c.m_rhs-btScalar(c.m_appliedImpulse)*c.m_cfm;
	btScalar deltaVelADotn=0;
	btScalar deltaVelBDotn=0;
	btSolverBody* bodyA = 0;
	btSolverBody* bodyB = 0;
	int ndofA=0;
	int ndofB=0;

	if (c.m_multiBodyA)
	{
		ndofA  = c.m_multiBodyA->getNumLinks() + 6;
		for (int i = 0; i < ndofA; ++i) 
			deltaVelADotn += m_data.m_jacobians[c.m_jacAindex+i] * m_data.m_deltaVelocities[c.m_deltaVelAindex+i];
	} else
	{
		bodyA = &m_tmpSolverBodyPool[c.m_solverBodyIdA];
		deltaVelADotn += c.m_contactNormal1.dot(bodyA->internalGetDeltaLinearVelocity()) 	+ c.m_relpos1CrossNormal.dot(bodyA->internalGetDeltaAngularVelocity());
	}

	if (c.m_multiBodyB)
	{
		ndofB = c.m_multiBodyB->getNumLinks() + 6;
		for (int i = 0; i < ndofB; ++i) 
			deltaVelBDotn += m_data.m_jacobians[c.m_jacBindex+i] * m_data.m_deltaVelocities[c.m_deltaVelBindex+i];
	} else
	{
		bodyB = &m_tmpSolverBodyPool[c.m_solverBodyIdB];
		deltaVelBDotn += c.m_contactNormal2.dot(bodyB->internalGetDeltaLinearVelocity())  + c.m_relpos2CrossNormal.dot(bodyB->internalGetDeltaAngularVelocity());
	}

	
	deltaImpulse	-=	deltaVelADotn*c.m_jacDiagABInv;//m_jacDiagABInv = 1./denom
	deltaImpulse	-=	deltaVelBDotn*c.m_jacDiagABInv;
	const btScalar sum = btScalar(c.m_appliedImpulse) + deltaImpulse;
	
	if (sum < c.m_lowerLimit)
	{
		deltaImpulse = c.m_lowerLimit-c.m_appliedImpulse;
		c.m_appliedImpulse = c.m_lowerLimit;
	}
	else if (sum > c.m_upperLimit) 
	{
		deltaImpulse = c.m_upperLimit-c.m_appliedImpulse;
		c.m_appliedImpulse = c.m_upperLimit;
	}
	else
	{
		c.m_appliedImpulse = sum;
	}

	if (c.m_multiBodyA)
	{
		applyDeltaVee(&m_data.m_deltaVelocitiesUnitImpulse[c.m_jacAindex],deltaImpulse,c.m_deltaVelAindex,ndofA);
		c.m_multiBodyA->applyDeltaVee(&m_data.m_deltaVelocitiesUnitImpulse[c.m_jacAindex],deltaImpulse);
	} else
	{
		bodyA->internalApplyImpulse(c.m_contactNormal1*bodyA->internalGetInvMass(),c.m_angularComponentA,deltaImpulse);

	}
	if (c.m_multiBodyB)
	{
		applyDeltaVee(&m_data.m_deltaVelocitiesUnitImpulse[c.m_jacBindex],deltaImpulse,c.m_deltaVelBindex,ndofB);
		c.m_multiBodyB->applyDeltaVee(&m_data.m_deltaVelocitiesUnitImpulse[c.m_jacBindex],deltaImpulse);
	} else
	{
		bodyB->internalApplyImpulse(c.m_contactNormal2*bodyB->internalGetInvMass(),c.m_angularComponentB,deltaImpulse);
	}

}