void PxcFsPropagateDrivenInertiaSimd(PxcFsData& matrix,
									 const PxcFsInertia* baseInertia,
									 const PxReal* isf,
									 const Mat33V* load,
									 PxcFsScratchAllocator allocator)
{
	typedef PxcArticulationFnsSimd<PxcArticulationFnsSimdBase> Fns;

	PxcSIMDSpatial IS[3];

	PxcFsRow* rows = getFsRows(matrix);
	const PxcFsRowAux* aux = getAux(matrix);
	const PxcFsJointVectors* jointVectors = getJointVectors(matrix);

	PxcFsInertia *inertia = allocator.alloc<PxcFsInertia>(matrix.linkCount);
	PxMemCopy(inertia, baseInertia, matrix.linkCount*sizeof(PxcFsInertia));

	for(PxU32 i=matrix.linkCount; --i>0;)
	{
		PxcFsRow& r = rows[i];
		const PxcFsRowAux& a = aux[i];
		const PxcFsJointVectors& jv = jointVectors[i];

		Mat33V m = Fns::computeSIS(inertia[i], a.S, IS);
		FloatV f = FLoad(isf[i]);

		Mat33V D = Fns::invertSym33(Mat33V(V3ScaleAdd(load[i].col0, f, m.col0),
										   V3ScaleAdd(load[i].col1, f, m.col1),
										   V3ScaleAdd(load[i].col2, f, m.col2)));
		r.D = D;

		inertia[matrix.parent[i]] = Fns::addInertia(inertia[matrix.parent[i]], 
													Fns::translateInertia(jv.parentOffset, Fns::multiplySubtract(inertia[i], D,  IS,  r.DSI)));
	}

	getRootInverseInertia(matrix) = Fns::invertInertia(inertia[0]);
}
Ejemplo n.º 2
0
void solveFriction_BStatic(const PxcSolverConstraintDesc& desc, PxcSolverContext& /*cache*/)
{
	PxcSolverBody& b0 = *desc.bodyA;

	Vec3V linVel0 = V3LoadA(b0.linearVelocity);
	Vec3V angVel0 = V3LoadA(b0.angularVelocity);

	const PxU8* PX_RESTRICT currPtr = desc.constraint;

	const PxU8* PX_RESTRICT last = currPtr + getConstraintLength(desc);

	//hopefully pointer aliasing doesn't bite.

	//PxVec3 l0, a0;
	//PxVec3_From_Vec3V(linVel0, l0);
	//PxVec3_From_Vec3V(angVel0, a0);

	//PX_ASSERT(l0.isFinite());
	//PX_ASSERT(a0.isFinite());
	

	while(currPtr < last)
	{

		const PxcSolverFrictionHeader* PX_RESTRICT frictionHeader = (PxcSolverFrictionHeader*)currPtr;
		const PxU32 numFrictionConstr = frictionHeader->numFrictionConstr;
		currPtr +=sizeof(PxcSolverFrictionHeader);
		PxF32* appliedImpulse = (PxF32*)currPtr;
		currPtr +=frictionHeader->getAppliedForcePaddingSize();

		PxcSolverFriction* PX_RESTRICT frictions = (PxcSolverFriction*)currPtr;
		currPtr += numFrictionConstr * sizeof(PxcSolverFriction);


		const FloatV staticFriction = frictionHeader->getStaticFriction();

		for(PxU32 i=0;i<numFrictionConstr;i++)   
		{
			PxcSolverFriction& f = frictions[i];
			Ps::prefetchLine(&frictions[i+1]);

			const Vec3V t0 = Vec3V_From_Vec4V(f.normalXYZ_appliedForceW);
			const Vec3V raXt0 = Vec3V_From_Vec4V(f.raXnXYZ_velMultiplierW);

			const FloatV appliedForce = V4GetW(f.normalXYZ_appliedForceW);
			const FloatV velMultiplier = V4GetW(f.raXnXYZ_velMultiplierW);

			const FloatV targetVel = V4GetW(f.rbXnXYZ_targetVelocityW);
			
			//const FloatV normalImpulse = contacts[f.contactIndex].getAppliedForce();
			const FloatV normalImpulse = FLoad(appliedImpulse[f.contactIndex]);
			const FloatV maxFriction = FMul(staticFriction, normalImpulse);
			const FloatV nMaxFriction = FNeg(maxFriction);

			//Compute the normal velocity of the constraint.

			const FloatV t0Vel1 = V3Dot(t0, linVel0);
			const FloatV t0Vel2 = V3Dot(raXt0, angVel0);

			//const FloatV unbiasedErr = FMul(targetVel, velMultiplier);
			//const FloatV biasedErr = FMulAdd(targetVel, velMultiplier, nScaledBias);

			const FloatV t0Vel = FAdd(t0Vel1, t0Vel2);

			const Vec3V delAngVel0 = Vec3V_From_Vec4V(f.delAngVel0_InvMassADom);
			const Vec3V delLinVel0 = V3Scale(t0, V4GetW(f.delAngVel0_InvMassADom));

			// still lots to do here: using loop pipelining we can interweave this code with the
			// above - the code here has a lot of stalls that we would thereby eliminate

				//FloatV deltaF = FSub(scaledBias, FMul(t0Vel, velMultiplier));//FNeg(FMul(t0Vel, velMultiplier));
			//FloatV deltaF = FMul(t0Vel, velMultiplier);
			//FloatV newForce = FMulAdd(t0Vel, velMultiplier, appliedForce);

			const FloatV tmp = FNegMulSub(targetVel,velMultiplier,appliedForce);
			FloatV newForce = FMulAdd(t0Vel, velMultiplier, tmp);
			newForce = FClamp(newForce, nMaxFriction, maxFriction);
			const FloatV deltaF = FSub(newForce, appliedForce);

			linVel0 = V3ScaleAdd(delLinVel0, deltaF, linVel0);
			angVel0 = V3ScaleAdd(delAngVel0, deltaF, angVel0);

			f.setAppliedForce(newForce);
		}
	}

	//PxVec3_From_Vec3V(linVel0, l0);
	//PxVec3_From_Vec3V(angVel0, a0);

	//PX_ASSERT(l0.isFinite());
	//PX_ASSERT(a0.isFinite());

	// Write back
	V3StoreU(linVel0, b0.linearVelocity);
	V3StoreU(angVel0, b0.angularVelocity);

	PX_ASSERT(currPtr == last);
}
Ejemplo n.º 3
0
void solveContactCoulomb_BStatic(const PxcSolverConstraintDesc& desc, PxcSolverContext& /*cache*/)
{
	PxcSolverBody& b0 = *desc.bodyA;


	Vec3V linVel0 = V3LoadA(b0.linearVelocity);
	Vec3V angVel0 = V3LoadA(b0.angularVelocity);

	PxcSolverContactCoulombHeader* firstHeader = (PxcSolverContactCoulombHeader*)desc.constraint;
	const PxU8* PX_RESTRICT last = desc.constraint + firstHeader->frictionOffset;//getConstraintLength(desc);

	//hopefully pointer aliasing doesn't bite.
	const PxU8* PX_RESTRICT currPtr = desc.constraint;

	const FloatV zero = FZero();

	while(currPtr < last)
	{
		PxcSolverContactCoulombHeader* PX_RESTRICT hdr = (PxcSolverContactCoulombHeader*)currPtr;
		currPtr += sizeof(PxcSolverContactCoulombHeader);

		const PxU32 numNormalConstr = hdr->numNormalConstr;

		PxcSolverContact* PX_RESTRICT contacts = (PxcSolverContact*)currPtr;
		Ps::prefetchLine(contacts);
		currPtr += numNormalConstr * sizeof(PxcSolverContact);

		PxF32* appliedImpulse = (PxF32*) (((PxU8*)hdr) + hdr->frictionOffset + sizeof(PxcSolverFrictionHeader));
		Ps::prefetchLine(appliedImpulse);

		const Vec3V normal = hdr->getNormal();

		const FloatV invMassDom0 = FLoad(hdr->dominance0);

		FloatV normalVel1 = V3Dot(normal, linVel0);

		const Vec3V delLinVel0 = V3Scale(normal, invMassDom0);
		FloatV accumDeltaF = zero;
		//FloatV accumImpulse = zero;

		for(PxU32 i=0;i<numNormalConstr;i++)
		{
			PxcSolverContact& c = contacts[i];
			Ps::prefetchLine(&contacts[i+1]);

			//const Vec4V normalXYZ_velMultiplierW = c.normalXYZ_velMultiplierW;
			const Vec4V raXnXYZ_appliedForceW = c.raXnXYZ_appliedForceW;
			const Vec4V rbXnXYZ_velMultiplierW = c.rbXnXYZ_velMultiplierW;

			//const Vec3V normal = c.normal;
			//const Vec3V normal = Vec3V_From_Vec4V(normalXYZ_velMultiplierW);
			const Vec3V raXn = Vec3V_From_Vec4V(raXnXYZ_appliedForceW);

			const FloatV appliedForce = V4GetW(raXnXYZ_appliedForceW);
			const FloatV velMultiplier = V4GetW(rbXnXYZ_velMultiplierW);
			//const FloatV velMultiplier = V4GetW(normalXYZ_velMultiplierW);

			const Vec3V delAngVel0 = Vec3V_From_Vec4V(c.delAngVel0_InvMassADom);

			const FloatV targetVel = c.getTargetVelocity();
			const FloatV nScaledBias = FNeg(c.getScaledBias());
			const FloatV maxImpulse = c.getMaxImpulse();

			//Compute the normal velocity of the constraint.

			//const FloatV normalVel1 = V3Dot(normal, linVel0);
			const FloatV normalVel2 = V3Dot(raXn, angVel0);
			const FloatV normalVel =  FAdd(normalVel1, normalVel2);

			//const FloatV unbiasedErr = FMul(targetVel, velMultiplier);
			const FloatV biasedErr = FMulAdd(targetVel, velMultiplier, nScaledBias);

			// still lots to do here: using loop pipelining we can interweave this code with the
			// above - the code here has a lot of stalls that we would thereby eliminate

			const FloatV _deltaF = FMax(FNegMulSub(normalVel, velMultiplier, biasedErr), FNeg(appliedForce));
			const FloatV _newForce = FAdd(appliedForce, _deltaF);
			const FloatV newForce = FMin(_newForce, maxImpulse);
			const FloatV deltaF = FSub(newForce, appliedForce);

			//linVel0 = V3MulAdd(delLinVel0, deltaF, linVel0);
			normalVel1 = FScaleAdd(invMassDom0, deltaF, normalVel1);
			angVel0 = V3ScaleAdd(delAngVel0, deltaF, angVel0);

			accumDeltaF = FAdd(accumDeltaF, deltaF);

			c.setAppliedForce(newForce);
			Ps::aos::FStore(newForce, &appliedImpulse[i]);
			Ps::prefetchLine(&appliedImpulse[i], 128);

			//accumImpulse = FAdd(accumImpulse, newAppliedForce);
		}
		linVel0 = V3ScaleAdd(delLinVel0, accumDeltaF, linVel0);
		//hdr->setAccumlatedForce(accumImpulse);
	}

	// Write back
	V3StoreU(linVel0, b0.linearVelocity);
	V3StoreU(angVel0, b0.angularVelocity);

	PX_ASSERT(currPtr == last);
}
void PxcArticulationHelper::getImpulseSelfResponse(const PxcFsData& matrix,
												   PxU32 linkID0,
												   const PxcSIMDSpatial& impulse0,
												   PxcSIMDSpatial& deltaV0,
												   PxU32 linkID1,
												   const PxcSIMDSpatial& impulse1,
												   PxcSIMDSpatial& deltaV1)
{
	PX_ASSERT(linkID0 != linkID1);

	const PxcFsRow* rows = getFsRows(matrix);
	const PxcFsRowAux* aux = getAux(matrix);
	const PxcFsJointVectors* jointVectors = getJointVectors(matrix);

	PX_UNUSED(aux);

	PxcSIMDSpatial& dV0 = deltaV0, 
				  & dV1 = deltaV1;

	// standard case: parent-child limit
	if(matrix.parent[linkID1] == linkID0)
	{
		const PxcFsRow& r = rows[linkID1];
		const PxcFsJointVectors& j = jointVectors[linkID1];

		Vec3V lZ = V3Neg(impulse1.linear),
			  aZ = V3Neg(impulse1.angular);

		Vec3V sz = V3Add(aZ, V3Cross(lZ, j.jointOffset));
		
		lZ = V3Sub(lZ, V3ScaleAdd(r.DSI[0].linear, V3GetX(sz), V3ScaleAdd(r.DSI[1].linear, V3GetY(sz), V3Scale(r.DSI[2].linear, V3GetZ(sz)))));
		aZ = V3Sub(aZ, V3ScaleAdd(r.DSI[0].angular, V3GetX(sz), V3ScaleAdd(r.DSI[1].angular, V3GetY(sz), V3Scale(r.DSI[2].angular, V3GetZ(sz)))));

		aZ = V3Add(aZ, V3Cross(j.parentOffset, lZ));

		lZ = V3Sub(impulse0.linear, lZ);
		aZ = V3Sub(impulse0.angular, aZ);

		dV0 = getImpulseResponseSimd(matrix, linkID0, lZ, aZ);

		Vec3V aV = dV0.angular;
		Vec3V lV = V3Sub(dV0.linear, V3Cross(j.parentOffset, aV));

		Vec3V n = V3Add(V3Merge(V3Dot(r.DSI[0].linear, lV),  V3Dot(r.DSI[1].linear, lV),  V3Dot(r.DSI[2].linear, lV)),
						V3Merge(V3Dot(r.DSI[0].angular, aV), V3Dot(r.DSI[1].angular, aV), V3Dot(r.DSI[2].angular, aV)));

		n = V3Add(n, M33MulV3(r.D, sz));
		lV = V3Sub(lV, V3Cross(j.jointOffset, n));
		aV = V3Sub(aV, n);

		dV1 = PxcSIMDSpatial(lV, aV);
	}
	else
		getImpulseResponseSlow(matrix, linkID0, impulse0, deltaV0, linkID1, impulse1, deltaV1);

#if PXC_ARTICULATION_DEBUG_VERIFY
	PxcSIMDSpatial dV0_, dV1_;
	PxcFsGetImpulseSelfResponse(matrix, linkID0, impulse0, dV0_, linkID1, impulse1, dV1_);

	PX_ASSERT(almostEqual(dV0_, dV0, 1e-3f));
	PX_ASSERT(almostEqual(dV1_, dV1, 1e-3f));
#endif
}
void solveFriction_BStatic(const PxSolverConstraintDesc& desc, SolverContext& /*cache*/)
{
	PxSolverBody& b0 = *desc.bodyA;

	Vec3V linVel0 = V3LoadA(b0.linearVelocity);
	Vec3V angState0 = V3LoadA(b0.angularState);

	PxU8* PX_RESTRICT currPtr = desc.constraint;

	const PxU8* PX_RESTRICT last = currPtr + getConstraintLength(desc);

	while(currPtr < last)
	{

		const SolverFrictionHeader* PX_RESTRICT frictionHeader = reinterpret_cast<SolverFrictionHeader*>(currPtr);
		const PxU32 numFrictionConstr = frictionHeader->numFrictionConstr;
		const PxU32 numNormalConstr = frictionHeader->numNormalConstr;
		const PxU32 numFrictionPerPoint = numFrictionConstr/numNormalConstr;
		currPtr +=sizeof(SolverFrictionHeader);
		PxF32* appliedImpulse = reinterpret_cast<PxF32*>(currPtr);
		currPtr +=frictionHeader->getAppliedForcePaddingSize();

		SolverContactFriction* PX_RESTRICT frictions = reinterpret_cast<SolverContactFriction*>(currPtr);
		currPtr += numFrictionConstr * sizeof(SolverContactFriction);

		const FloatV invMass0 = FLoad(frictionHeader->invMass0D0);
		const FloatV angD0 = FLoad(frictionHeader->angDom0);
		//const FloatV angD1 = FLoad(frictionHeader->angDom1);


		const FloatV staticFriction = frictionHeader->getStaticFriction();

		for(PxU32 i=0, j = 0;i<numFrictionConstr;j++)
		{
			for(PxU32 p = 0; p < numFrictionPerPoint; p++, i++)
			{
				SolverContactFriction& f = frictions[i];
				Ps::prefetchLine(&frictions[i+1]);

				const Vec3V t0 = Vec3V_From_Vec4V(f.normalXYZ_appliedForceW);
				const Vec3V raXt0 = Vec3V_From_Vec4V(f.raXnXYZ_velMultiplierW);

				const FloatV appliedForce = V4GetW(f.normalXYZ_appliedForceW);
				const FloatV velMultiplier = V4GetW(f.raXnXYZ_velMultiplierW);

				const FloatV targetVel = FLoad(f.targetVel);
				
				//const FloatV normalImpulse = contacts[f.contactIndex].getAppliedForce();
				const FloatV normalImpulse = FLoad(appliedImpulse[j]);
				const FloatV maxFriction = FMul(staticFriction, normalImpulse);
				const FloatV nMaxFriction = FNeg(maxFriction);

				//Compute the normal velocity of the constraint.

				const FloatV t0Vel1 = V3Dot(t0, linVel0);
				const FloatV t0Vel2 = V3Dot(raXt0, angState0);

				const FloatV t0Vel = FAdd(t0Vel1, t0Vel2);

				const Vec3V delangState0 = V3Scale(raXt0, angD0);
				const Vec3V delLinVel0 = V3Scale(t0, invMass0);

				// still lots to do here: using loop pipelining we can interweave this code with the
				// above - the code here has a lot of stalls that we would thereby eliminate

				const FloatV tmp = FNegScaleSub(targetVel,velMultiplier,appliedForce);
				FloatV newForce = FScaleAdd(t0Vel, velMultiplier, tmp);
				newForce = FClamp(newForce, nMaxFriction, maxFriction);
				const FloatV deltaF = FSub(newForce, appliedForce);

				linVel0 = V3ScaleAdd(delLinVel0, deltaF, linVel0);
				angState0 = V3ScaleAdd(delangState0, deltaF, angState0);

				f.setAppliedForce(newForce);
			}
		}
	}

	// Write back
	V3StoreA(linVel0, b0.linearVelocity);
	V3StoreA(angState0, b0.angularState);

	PX_ASSERT(currPtr == last);
}