void solveContactCoulomb_BStatic(const PxSolverConstraintDesc& desc, SolverContext& /*cache*/)
{
	PxSolverBody& b0 = *desc.bodyA;


	Vec3V linVel0 = V3LoadA(b0.linearVelocity);
	Vec3V angState0 = V3LoadA(b0.angularState);

	SolverContactCoulombHeader* firstHeader = reinterpret_cast<SolverContactCoulombHeader*>(desc.constraint);
	const PxU8* PX_RESTRICT last = desc.constraint + firstHeader->frictionOffset;//getConstraintLength(desc);

	//hopefully pointer aliasing doesn't bite.
	PxU8* PX_RESTRICT currPtr = desc.constraint;

	//const FloatV zero = FZero();

	while(currPtr < last)
	{
		SolverContactCoulombHeader* PX_RESTRICT hdr = reinterpret_cast<SolverContactCoulombHeader*>(currPtr);
		currPtr += sizeof(SolverContactCoulombHeader);

		const PxU32 numNormalConstr = hdr->numNormalConstr;

		SolverContactPoint* PX_RESTRICT contacts = reinterpret_cast<SolverContactPoint*>(currPtr);
		Ps::prefetchLine(contacts);
		currPtr += numNormalConstr * sizeof(SolverContactPoint);

		PxF32* appliedImpulse = reinterpret_cast<PxF32*> ((reinterpret_cast<PxU8*>(hdr)) + hdr->frictionOffset + sizeof(SolverFrictionHeader));
		Ps::prefetchLine(appliedImpulse);

		const Vec3V normal = hdr->getNormal();

		const FloatV invMassDom0 = FLoad(hdr->dominance0);

		const FloatV angD0 = FLoad(hdr->angDom0);
		
		solveStaticContacts(contacts, numNormalConstr, normal, invMassDom0, 
			angD0, linVel0, angState0, appliedImpulse); 
	}

	// Write back
	V3StoreA(linVel0, b0.linearVelocity);
	V3StoreA(angState0, b0.angularState);

	PX_ASSERT(currPtr == last);
}
bool setupFinalizeExtSolverContactsCoulomb(
						    const ContactBuffer& buffer,
							const CorrelationBuffer& c,
							const PxTransform& bodyFrame0,
							const PxTransform& bodyFrame1,
							PxU8* workspace,
							PxReal invDt,
							PxReal bounceThresholdF32,
							const SolverExtBody& b0,
							const SolverExtBody& b1,
							PxU32 frictionCountPerPoint,
							PxReal invMassScale0, PxReal invInertiaScale0, 
							PxReal invMassScale1, PxReal invInertiaScale1,
							PxReal restDist,
							PxReal ccdMaxDistance)	
{
	// NOTE II: the friction patches are sparse (some of them have no contact patches, and
	// therefore did not get written back to the cache) but the patch addresses are dense,
	// corresponding to valid patches

	const FloatV ccdMaxSeparation = FLoad(ccdMaxDistance);

	PxU8* PX_RESTRICT ptr = workspace;

	//KS - TODO - this should all be done in SIMD to avoid LHS
	const PxF32 maxPenBias0 = b0.mLinkIndex == PxSolverConstraintDesc::NO_LINK ? b0.mBodyData->penBiasClamp : getMaxPenBias(*b0.mFsData)[b0.mLinkIndex];
	const PxF32 maxPenBias1 = b1.mLinkIndex == PxSolverConstraintDesc::NO_LINK ? b1.mBodyData->penBiasClamp : getMaxPenBias(*b1.mFsData)[b1.mLinkIndex];

	const FloatV maxPenBias = FLoad(PxMax(maxPenBias0, maxPenBias1)/invDt);

	const FloatV restDistance = FLoad(restDist); 
	const FloatV bounceThreshold = FLoad(bounceThresholdF32);

	const FloatV invDtV = FLoad(invDt);
	const FloatV pt8 = FLoad(0.8f);

	const FloatV invDtp8 = FMul(invDtV, pt8);

	Ps::prefetchLine(c.contactID);
	Ps::prefetchLine(c.contactID, 128);

	const PxU32 frictionPatchCount = c.frictionPatchCount;

	const PxU32 pointStride = sizeof(SolverContactPointExt);
	const PxU32 frictionStride = sizeof(SolverContactFrictionExt);
	const PxU8 pointHeaderType = DY_SC_TYPE_EXT_CONTACT;
	const PxU8 frictionHeaderType = DY_SC_TYPE_EXT_FRICTION;

	PxReal d0 = invMassScale0;
	PxReal d1 = invMassScale1;
	PxReal angD0 = invInertiaScale0;
	PxReal angD1 = invInertiaScale1;

	PxU8 flags = 0;

	for(PxU32 i=0;i< frictionPatchCount;i++)
	{
		const PxU32 contactCount = c.frictionPatchContactCounts[i];
		if(contactCount == 0)
			continue;

		const Gu::ContactPoint* contactBase0 = buffer.contacts + c.contactPatches[c.correlationListHeads[i]].start;

		const Vec3V normalV = Ps::aos::V3LoadA(contactBase0->normal);
		const Vec3V normal = V3LoadA(contactBase0->normal);

		const PxReal combinedRestitution = contactBase0->restitution;
	
		
		SolverContactCoulombHeader* PX_RESTRICT header = reinterpret_cast<SolverContactCoulombHeader*>(ptr);
		ptr += sizeof(SolverContactCoulombHeader);

		Ps::prefetchLine(ptr, 128);
		Ps::prefetchLine(ptr, 256);
		Ps::prefetchLine(ptr, 384);

		const FloatV restitution = FLoad(combinedRestitution);


		header->numNormalConstr		= PxU8(contactCount);
		header->type				= pointHeaderType;
		//header->setRestitution(combinedRestitution);

		header->setDominance0(d0);
		header->setDominance1(d1);
		header->angDom0 = angD0;
		header->angDom1 = angD1;
		header->flags = flags;
		
		header->setNormal(normalV);
		
		for(PxU32 patch=c.correlationListHeads[i]; 
			patch!=CorrelationBuffer::LIST_END; 
			patch = c.contactPatches[patch].next)
		{
			const PxU32 count = c.contactPatches[patch].count;
			const Gu::ContactPoint* contactBase = buffer.contacts + c.contactPatches[patch].start;
				
			PxU8* p = ptr;
			for(PxU32 j=0;j<count;j++)
			{
				const Gu::ContactPoint& contact = contactBase[j];

				SolverContactPointExt* PX_RESTRICT solverContact = reinterpret_cast<SolverContactPointExt*>(p);
				p += pointStride;

				setupExtSolverContact(b0, b1, d0, d1, angD0, angD1, bodyFrame0, bodyFrame1, normal, invDtV, invDtp8, restDistance, maxPenBias, restitution,
					bounceThreshold, contact, *solverContact, ccdMaxSeparation);
			}			
			ptr = p;
		}
	}

	//construct all the frictions

	PxU8* PX_RESTRICT ptr2 = workspace;

	const PxF32 orthoThreshold = 0.70710678f;
	const PxF32 eps = 0.00001f;
	bool hasFriction = false;

	for(PxU32 i=0;i< frictionPatchCount;i++)
	{
		const PxU32 contactCount = c.frictionPatchContactCounts[i];
		if(contactCount == 0)
			continue;

		SolverContactCoulombHeader* header = reinterpret_cast<SolverContactCoulombHeader*>(ptr2); 
		header->frictionOffset = PxU16(ptr - ptr2);
		ptr2 += sizeof(SolverContactCoulombHeader) + header->numNormalConstr * pointStride;

		const Gu::ContactPoint* contactBase0 = buffer.contacts + c.contactPatches[c.correlationListHeads[i]].start;

		PxVec3 normal = contactBase0->normal;

		const PxReal staticFriction = contactBase0->staticFriction;
		const bool disableStrongFriction = !!(contactBase0->materialFlags & PxMaterialFlag::eDISABLE_FRICTION);
		const bool haveFriction = (disableStrongFriction == 0);
	
		SolverFrictionHeader* frictionHeader = reinterpret_cast<SolverFrictionHeader*>(ptr);
		frictionHeader->numNormalConstr = Ps::to8(c.frictionPatchContactCounts[i]);
		frictionHeader->numFrictionConstr = Ps::to8(haveFriction ? c.frictionPatchContactCounts[i] * frictionCountPerPoint : 0);
		frictionHeader->flags = flags;
		ptr += sizeof(SolverFrictionHeader);
		PxF32* forceBuffer = reinterpret_cast<PxF32*>(ptr);
		ptr += frictionHeader->getAppliedForcePaddingSize(c.frictionPatchContactCounts[i]);
		PxMemZero(forceBuffer, sizeof(PxF32) * c.frictionPatchContactCounts[i]);
		Ps::prefetchLine(ptr, 128);
		Ps::prefetchLine(ptr, 256);
		Ps::prefetchLine(ptr, 384);


		const PxVec3 t0Fallback1(0.f, -normal.z, normal.y);
		const PxVec3 t0Fallback2(-normal.y, normal.x, 0.f) ;
		const PxVec3 tFallback1 = orthoThreshold > PxAbs(normal.x) ? t0Fallback1 : t0Fallback2;
		const PxVec3 vrel = b0.getLinVel() - b1.getLinVel();
		const PxVec3 t0_ = vrel - normal * (normal.dot(vrel));
		const PxReal sqDist = t0_.dot(t0_);
		const PxVec3 tDir0 = (sqDist > eps ? t0_: tFallback1).getNormalized();
		const PxVec3 tDir1 = tDir0.cross(normal);
		PxVec3 tFallback[2] = {tDir0, tDir1};

		PxU32 ind = 0;

		if(haveFriction)
		{
			hasFriction = true;
			frictionHeader->setStaticFriction(staticFriction);
			frictionHeader->invMass0D0 = d0;
			frictionHeader->invMass1D1 = d1;
			frictionHeader->angDom0 = angD0;
			frictionHeader->angDom1 = angD1;
			frictionHeader->type			= frictionHeaderType;
			
			PxU32 totalPatchContactCount = 0;
		
			for(PxU32 patch=c.correlationListHeads[i]; 
				patch!=CorrelationBuffer::LIST_END; 
				patch = c.contactPatches[patch].next)
			{
				const PxU32 count = c.contactPatches[patch].count;
				const PxU32 start = c.contactPatches[patch].start;
				const Gu::ContactPoint* contactBase = buffer.contacts + start;
					
				PxU8* p = ptr;

				for(PxU32 j =0; j < count; j++)
				{
					const Gu::ContactPoint& contact = contactBase[j];
					const PxVec3 ra = contact.point - bodyFrame0.p;
					const PxVec3 rb = contact.point - bodyFrame1.p;
						
					const PxVec3 targetVel = contact.targetVel;
					const PxVec3 pVRa = b0.getLinVel() + b0.getAngVel().cross(ra);
					const PxVec3 pVRb = b1.getLinVel() + b1.getAngVel().cross(rb);
					//const PxVec3 vrel = pVRa - pVRb;

					for(PxU32 k = 0; k < frictionCountPerPoint; ++k)
					{
						SolverContactFrictionExt* PX_RESTRICT f0 = reinterpret_cast<SolverContactFrictionExt*>(p);
						p += frictionStride;

						PxVec3 t0 = tFallback[ind];
						ind = 1 - ind;
						PxVec3 raXn = ra.cross(t0); 
						PxVec3 rbXn = rb.cross(t0); 
						Cm::SpatialVector deltaV0, deltaV1;

						const Cm::SpatialVector resp0 = createImpulseResponseVector(t0, raXn, b0);
						const Cm::SpatialVector resp1 = createImpulseResponseVector(-t0, -rbXn, b1);

						PxReal unitResponse = getImpulseResponse(b0, resp0, deltaV0, d0, angD0,
																 b1, resp1, deltaV1, d1, angD1);

						PxReal tv = targetVel.dot(t0);
						if(b0.mLinkIndex == PxSolverConstraintDesc::NO_LINK)
							tv += pVRa.dot(t0);
						else if(b1.mLinkIndex == PxSolverConstraintDesc::NO_LINK)
							tv -= pVRb.dot(t0);


						f0->setVelMultiplier(FLoad(unitResponse>0.0f ? 1.f/unitResponse : 0.0f));
						f0->setRaXn(resp0.angular);
						f0->setRbXn(-resp1.angular);
						f0->targetVel = tv;
						f0->setNormal(t0);
						f0->setAppliedForce(0.0f);
						f0->linDeltaVA = V3LoadA(deltaV0.linear);
						f0->angDeltaVA = V3LoadA(deltaV0.angular);
						f0->linDeltaVB = V3LoadA(deltaV1.linear);
						f0->angDeltaVB = V3LoadA(deltaV1.angular);
					}					
				}

				totalPatchContactCount += c.contactPatches[patch].count;
				
				ptr = p;	
			}
		}
	}
	//PX_ASSERT(ptr - workspace == n.solverConstraintSize);
	return hasFriction;
}
Ejemplo n.º 3
0
void solveFriction_BStatic(const PxcSolverConstraintDesc& desc, PxcSolverContext& /*cache*/)
{
	PxcSolverBody& b0 = *desc.bodyA;

	Vec3V linVel0 = V3LoadA(b0.linearVelocity);
	Vec3V angVel0 = V3LoadA(b0.angularVelocity);

	const PxU8* PX_RESTRICT currPtr = desc.constraint;

	const PxU8* PX_RESTRICT last = currPtr + getConstraintLength(desc);

	//hopefully pointer aliasing doesn't bite.

	//PxVec3 l0, a0;
	//PxVec3_From_Vec3V(linVel0, l0);
	//PxVec3_From_Vec3V(angVel0, a0);

	//PX_ASSERT(l0.isFinite());
	//PX_ASSERT(a0.isFinite());
	

	while(currPtr < last)
	{

		const PxcSolverFrictionHeader* PX_RESTRICT frictionHeader = (PxcSolverFrictionHeader*)currPtr;
		const PxU32 numFrictionConstr = frictionHeader->numFrictionConstr;
		currPtr +=sizeof(PxcSolverFrictionHeader);
		PxF32* appliedImpulse = (PxF32*)currPtr;
		currPtr +=frictionHeader->getAppliedForcePaddingSize();

		PxcSolverFriction* PX_RESTRICT frictions = (PxcSolverFriction*)currPtr;
		currPtr += numFrictionConstr * sizeof(PxcSolverFriction);


		const FloatV staticFriction = frictionHeader->getStaticFriction();

		for(PxU32 i=0;i<numFrictionConstr;i++)   
		{
			PxcSolverFriction& f = frictions[i];
			Ps::prefetchLine(&frictions[i+1]);

			const Vec3V t0 = Vec3V_From_Vec4V(f.normalXYZ_appliedForceW);
			const Vec3V raXt0 = Vec3V_From_Vec4V(f.raXnXYZ_velMultiplierW);

			const FloatV appliedForce = V4GetW(f.normalXYZ_appliedForceW);
			const FloatV velMultiplier = V4GetW(f.raXnXYZ_velMultiplierW);

			const FloatV targetVel = V4GetW(f.rbXnXYZ_targetVelocityW);
			
			//const FloatV normalImpulse = contacts[f.contactIndex].getAppliedForce();
			const FloatV normalImpulse = FLoad(appliedImpulse[f.contactIndex]);
			const FloatV maxFriction = FMul(staticFriction, normalImpulse);
			const FloatV nMaxFriction = FNeg(maxFriction);

			//Compute the normal velocity of the constraint.

			const FloatV t0Vel1 = V3Dot(t0, linVel0);
			const FloatV t0Vel2 = V3Dot(raXt0, angVel0);

			//const FloatV unbiasedErr = FMul(targetVel, velMultiplier);
			//const FloatV biasedErr = FMulAdd(targetVel, velMultiplier, nScaledBias);

			const FloatV t0Vel = FAdd(t0Vel1, t0Vel2);

			const Vec3V delAngVel0 = Vec3V_From_Vec4V(f.delAngVel0_InvMassADom);
			const Vec3V delLinVel0 = V3Scale(t0, V4GetW(f.delAngVel0_InvMassADom));

			// still lots to do here: using loop pipelining we can interweave this code with the
			// above - the code here has a lot of stalls that we would thereby eliminate

				//FloatV deltaF = FSub(scaledBias, FMul(t0Vel, velMultiplier));//FNeg(FMul(t0Vel, velMultiplier));
			//FloatV deltaF = FMul(t0Vel, velMultiplier);
			//FloatV newForce = FMulAdd(t0Vel, velMultiplier, appliedForce);

			const FloatV tmp = FNegMulSub(targetVel,velMultiplier,appliedForce);
			FloatV newForce = FMulAdd(t0Vel, velMultiplier, tmp);
			newForce = FClamp(newForce, nMaxFriction, maxFriction);
			const FloatV deltaF = FSub(newForce, appliedForce);

			linVel0 = V3ScaleAdd(delLinVel0, deltaF, linVel0);
			angVel0 = V3ScaleAdd(delAngVel0, deltaF, angVel0);

			f.setAppliedForce(newForce);
		}
	}

	//PxVec3_From_Vec3V(linVel0, l0);
	//PxVec3_From_Vec3V(angVel0, a0);

	//PX_ASSERT(l0.isFinite());
	//PX_ASSERT(a0.isFinite());

	// Write back
	V3StoreU(linVel0, b0.linearVelocity);
	V3StoreU(angVel0, b0.angularVelocity);

	PX_ASSERT(currPtr == last);
}
Ejemplo n.º 4
0
void solveContactCoulomb_BStatic(const PxcSolverConstraintDesc& desc, PxcSolverContext& /*cache*/)
{
	PxcSolverBody& b0 = *desc.bodyA;


	Vec3V linVel0 = V3LoadA(b0.linearVelocity);
	Vec3V angVel0 = V3LoadA(b0.angularVelocity);

	PxcSolverContactCoulombHeader* firstHeader = (PxcSolverContactCoulombHeader*)desc.constraint;
	const PxU8* PX_RESTRICT last = desc.constraint + firstHeader->frictionOffset;//getConstraintLength(desc);

	//hopefully pointer aliasing doesn't bite.
	const PxU8* PX_RESTRICT currPtr = desc.constraint;

	const FloatV zero = FZero();

	while(currPtr < last)
	{
		PxcSolverContactCoulombHeader* PX_RESTRICT hdr = (PxcSolverContactCoulombHeader*)currPtr;
		currPtr += sizeof(PxcSolverContactCoulombHeader);

		const PxU32 numNormalConstr = hdr->numNormalConstr;

		PxcSolverContact* PX_RESTRICT contacts = (PxcSolverContact*)currPtr;
		Ps::prefetchLine(contacts);
		currPtr += numNormalConstr * sizeof(PxcSolverContact);

		PxF32* appliedImpulse = (PxF32*) (((PxU8*)hdr) + hdr->frictionOffset + sizeof(PxcSolverFrictionHeader));
		Ps::prefetchLine(appliedImpulse);

		const Vec3V normal = hdr->getNormal();

		const FloatV invMassDom0 = FLoad(hdr->dominance0);

		FloatV normalVel1 = V3Dot(normal, linVel0);

		const Vec3V delLinVel0 = V3Scale(normal, invMassDom0);
		FloatV accumDeltaF = zero;
		//FloatV accumImpulse = zero;

		for(PxU32 i=0;i<numNormalConstr;i++)
		{
			PxcSolverContact& c = contacts[i];
			Ps::prefetchLine(&contacts[i+1]);

			//const Vec4V normalXYZ_velMultiplierW = c.normalXYZ_velMultiplierW;
			const Vec4V raXnXYZ_appliedForceW = c.raXnXYZ_appliedForceW;
			const Vec4V rbXnXYZ_velMultiplierW = c.rbXnXYZ_velMultiplierW;

			//const Vec3V normal = c.normal;
			//const Vec3V normal = Vec3V_From_Vec4V(normalXYZ_velMultiplierW);
			const Vec3V raXn = Vec3V_From_Vec4V(raXnXYZ_appliedForceW);

			const FloatV appliedForce = V4GetW(raXnXYZ_appliedForceW);
			const FloatV velMultiplier = V4GetW(rbXnXYZ_velMultiplierW);
			//const FloatV velMultiplier = V4GetW(normalXYZ_velMultiplierW);

			const Vec3V delAngVel0 = Vec3V_From_Vec4V(c.delAngVel0_InvMassADom);

			const FloatV targetVel = c.getTargetVelocity();
			const FloatV nScaledBias = FNeg(c.getScaledBias());
			const FloatV maxImpulse = c.getMaxImpulse();

			//Compute the normal velocity of the constraint.

			//const FloatV normalVel1 = V3Dot(normal, linVel0);
			const FloatV normalVel2 = V3Dot(raXn, angVel0);
			const FloatV normalVel =  FAdd(normalVel1, normalVel2);

			//const FloatV unbiasedErr = FMul(targetVel, velMultiplier);
			const FloatV biasedErr = FMulAdd(targetVel, velMultiplier, nScaledBias);

			// still lots to do here: using loop pipelining we can interweave this code with the
			// above - the code here has a lot of stalls that we would thereby eliminate

			const FloatV _deltaF = FMax(FNegMulSub(normalVel, velMultiplier, biasedErr), FNeg(appliedForce));
			const FloatV _newForce = FAdd(appliedForce, _deltaF);
			const FloatV newForce = FMin(_newForce, maxImpulse);
			const FloatV deltaF = FSub(newForce, appliedForce);

			//linVel0 = V3MulAdd(delLinVel0, deltaF, linVel0);
			normalVel1 = FScaleAdd(invMassDom0, deltaF, normalVel1);
			angVel0 = V3ScaleAdd(delAngVel0, deltaF, angVel0);

			accumDeltaF = FAdd(accumDeltaF, deltaF);

			c.setAppliedForce(newForce);
			Ps::aos::FStore(newForce, &appliedImpulse[i]);
			Ps::prefetchLine(&appliedImpulse[i], 128);

			//accumImpulse = FAdd(accumImpulse, newAppliedForce);
		}
		linVel0 = V3ScaleAdd(delLinVel0, accumDeltaF, linVel0);
		//hdr->setAccumlatedForce(accumImpulse);
	}

	// Write back
	V3StoreU(linVel0, b0.linearVelocity);
	V3StoreU(angVel0, b0.angularVelocity);

	PX_ASSERT(currPtr == last);
}
	void setupFinalizeExtSolverContacts(
						    const ContactPoint* buffer,
							const CorrelationBuffer& c,
							const PxTransform& bodyFrame0,
							const PxTransform& bodyFrame1,
							PxU8* workspace,
							const SolverExtBody& b0,
							const SolverExtBody& b1,
							const PxReal invDtF32,
							PxReal bounceThresholdF32,
							PxReal invMassScale0, PxReal invInertiaScale0, 
							PxReal invMassScale1, PxReal invInertiaScale1,
							const PxReal restDist,
							PxU8* frictionDataPtr,
							PxReal ccdMaxContactDist)	
{
	// NOTE II: the friction patches are sparse (some of them have no contact patches, and
	// therefore did not get written back to the cache) but the patch addresses are dense,
	// corresponding to valid patches

	/*const bool haveFriction = PX_IR(n.staticFriction) > 0 || PX_IR(n.dynamicFriction) > 0;*/

	const FloatV ccdMaxSeparation = FLoad(ccdMaxContactDist);

	PxU8* PX_RESTRICT ptr = workspace;

	const FloatV zero=FZero();

	//KS - TODO - this should all be done in SIMD to avoid LHS
	const PxF32 maxPenBias0 = b0.mLinkIndex == PxSolverConstraintDesc::NO_LINK ? b0.mBodyData->penBiasClamp : getMaxPenBias(*b0.mFsData)[b0.mLinkIndex];
	const PxF32 maxPenBias1 = b1.mLinkIndex == PxSolverConstraintDesc::NO_LINK ? b1.mBodyData->penBiasClamp : getMaxPenBias(*b1.mFsData)[b1.mLinkIndex];

	const FloatV maxPenBias = FLoad(PxMax(maxPenBias0, maxPenBias1));


	const PxReal d0 = invMassScale0;
	const PxReal d1 = invMassScale1;

	const PxReal angD0 = invInertiaScale0;
	const PxReal angD1 = invInertiaScale1;

	Vec4V staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W = V4Zero();
	staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W=V4SetZ(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, FLoad(d0));
	staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W=V4SetW(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, FLoad(d1));

	const FloatV restDistance = FLoad(restDist); 

	PxU32 frictionPatchWritebackAddrIndex = 0;
	PxU32 contactWritebackCount = 0;

	Ps::prefetchLine(c.contactID);
	Ps::prefetchLine(c.contactID, 128);

	const FloatV invDt = FLoad(invDtF32);
	const FloatV p8 = FLoad(0.8f);
	const FloatV bounceThreshold = FLoad(bounceThresholdF32);

	const FloatV invDtp8 = FMul(invDt, p8);

	PxU8 flags = 0;

	for(PxU32 i=0;i<c.frictionPatchCount;i++)
	{
		PxU32 contactCount = c.frictionPatchContactCounts[i];
		if(contactCount == 0)
			continue;

		const FrictionPatch& frictionPatch = c.frictionPatches[i];
		PX_ASSERT(frictionPatch.anchorCount <= 2);  //0==anchorCount is allowed if all the contacts in the manifold have a large offset. 

		const Gu::ContactPoint* contactBase0 = buffer + c.contactPatches[c.correlationListHeads[i]].start;
		const PxReal combinedRestitution = contactBase0->restitution;

		const PxReal staticFriction = contactBase0->staticFriction;
		const PxReal dynamicFriction = contactBase0->dynamicFriction;
		const bool disableStrongFriction = !!(contactBase0->materialFlags & PxMaterialFlag::eDISABLE_FRICTION);
		staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W=V4SetX(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, FLoad(staticFriction));
		staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W=V4SetY(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, FLoad(dynamicFriction));
	
		SolverContactHeader* PX_RESTRICT header = reinterpret_cast<SolverContactHeader*>(ptr);
		ptr += sizeof(SolverContactHeader);		


		Ps::prefetchLine(ptr + 128);
		Ps::prefetchLine(ptr + 256);
		Ps::prefetchLine(ptr + 384);
		
		const bool haveFriction = (disableStrongFriction == 0) ;//PX_IR(n.staticFriction) > 0 || PX_IR(n.dynamicFriction) > 0;
		header->numNormalConstr		= Ps::to8(contactCount);
		header->numFrictionConstr	= Ps::to8(haveFriction ? frictionPatch.anchorCount*2 : 0);
	
		header->type				= Ps::to8(DY_SC_TYPE_EXT_CONTACT);

		header->flags = flags;

		const FloatV restitution = FLoad(combinedRestitution);
	
		header->staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W = staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W;

		header->angDom0 = angD0;
		header->angDom1 = angD1;
	
		const PxU32 pointStride = sizeof(SolverContactPointExt);
		const PxU32 frictionStride = sizeof(SolverContactFrictionExt);

		const Vec3V normal = V3LoadU(buffer[c.contactPatches[c.correlationListHeads[i]].start].normal);

		header->normal = normal;
		
		for(PxU32 patch=c.correlationListHeads[i]; 
			patch!=CorrelationBuffer::LIST_END; 
			patch = c.contactPatches[patch].next)
		{
			const PxU32 count = c.contactPatches[patch].count;
			const Gu::ContactPoint* contactBase = buffer + c.contactPatches[patch].start;
				
			PxU8* p = ptr;
			for(PxU32 j=0;j<count;j++)
			{
				const Gu::ContactPoint& contact = contactBase[j];

				SolverContactPointExt* PX_RESTRICT solverContact = reinterpret_cast<SolverContactPointExt*>(p);
				p += pointStride;

				setupExtSolverContact(b0, b1, d0, d1, angD0, angD1, bodyFrame0, bodyFrame1, normal, invDt, invDtp8, restDistance, maxPenBias, restitution,
					bounceThreshold, contact, *solverContact, ccdMaxSeparation);
			
			}

			ptr = p;
		}
		contactWritebackCount += contactCount;

		PxF32* forceBuffer = reinterpret_cast<PxF32*>(ptr);
		PxMemZero(forceBuffer, sizeof(PxF32) * contactCount);
		ptr += sizeof(PxF32) * ((contactCount + 3) & (~3));

		header->broken = 0;

		if(haveFriction)
		{
			//const Vec3V normal = Vec3V_From_PxVec3(buffer.contacts[c.contactPatches[c.correlationListHeads[i]].start].normal);
			PxVec3 normalS = buffer[c.contactPatches[c.correlationListHeads[i]].start].normal;

			PxVec3 t0, t1;
			computeFrictionTangents(b0.getLinVel() - b1.getLinVel(), normalS, t0, t1);

			Vec3V vT0 = V3LoadU(t0);
			Vec3V vT1 = V3LoadU(t1);
			
			//We want to set the writeBack ptr to point to the broken flag of the friction patch.
			//On spu we have a slight problem here because the friction patch array is 
			//in local store rather than in main memory. The good news is that the address of the friction 
			//patch array in main memory is stored in the work unit. These two addresses will be equal 
			//except on spu where one is local store memory and the other is the effective address in main memory.
			//Using the value stored in the work unit guarantees that the main memory address is used on all platforms.
			PxU8* PX_RESTRICT writeback = frictionDataPtr + frictionPatchWritebackAddrIndex*sizeof(FrictionPatch);

			header->frictionBrokenWritebackByte = writeback;			

			for(PxU32 j = 0; j < frictionPatch.anchorCount; j++)
			{
				SolverContactFrictionExt* PX_RESTRICT f0 = reinterpret_cast<SolverContactFrictionExt*>(ptr);
				ptr += frictionStride;
				SolverContactFrictionExt* PX_RESTRICT f1 = reinterpret_cast<SolverContactFrictionExt*>(ptr);
				ptr += frictionStride;

				PxVec3 ra = bodyFrame0.q.rotate(frictionPatch.body0Anchors[j]);
				PxVec3 rb = bodyFrame1.q.rotate(frictionPatch.body1Anchors[j]);
				PxVec3 error = (ra + bodyFrame0.p) - (rb + bodyFrame1.p);

				{
					const PxVec3 raXn = ra.cross(t0);
					const PxVec3 rbXn = rb.cross(t0);

					Cm::SpatialVector deltaV0, deltaV1;

					const Cm::SpatialVector resp0 = createImpulseResponseVector(t0, raXn, b0);
					const Cm::SpatialVector resp1 = createImpulseResponseVector(-t1, -rbXn, b1);
					FloatV resp = FLoad(getImpulseResponse(b0, resp0, deltaV0, d0, angD0,
															 b1, resp1, deltaV1, d1, angD1));

					const FloatV velMultiplier = FSel(FIsGrtr(resp, zero), FMul(p8, FRecip(resp)), zero);

					PxU32 index = c.contactPatches[c.correlationListHeads[i]].start;
					PxF32 targetVel = buffer[index].targetVel.dot(t0);

					if(b0.mLinkIndex == PxSolverConstraintDesc::NO_LINK)
						targetVel -= b0.projectVelocity(t0, raXn);
					else if(b1.mLinkIndex == PxSolverConstraintDesc::NO_LINK)
						targetVel += b1.projectVelocity(t0, rbXn);

					f0->normalXYZ_appliedForceW = V4SetW(vT0, zero);
					f0->raXnXYZ_velMultiplierW = V4SetW(V4LoadA(&resp0.angular.x), velMultiplier);
					f0->rbXnXYZ_biasW = V4SetW(V4Neg(V4LoadA(&resp1.angular.x)), FLoad(t0.dot(error) * invDtF32));
					f0->linDeltaVA = V3LoadA(deltaV0.linear);
					f0->angDeltaVA = V3LoadA(deltaV0.angular);
					f0->linDeltaVB = V3LoadA(deltaV1.linear);
					f0->angDeltaVB = V3LoadA(deltaV1.angular);
					f0->targetVel = targetVel;
				}

				{

					const PxVec3 raXn = ra.cross(t1);
					const PxVec3 rbXn = rb.cross(t1);

					Cm::SpatialVector deltaV0, deltaV1;


					const Cm::SpatialVector resp0 = createImpulseResponseVector(t1, raXn, b0);
					const Cm::SpatialVector resp1 = createImpulseResponseVector(-t1, -rbXn, b1);

					FloatV resp = FLoad(getImpulseResponse(b0, resp0, deltaV0, d0, angD0,
														   b1, resp1, deltaV1, d1, angD1));

					const FloatV velMultiplier = FSel(FIsGrtr(resp, zero), FMul(p8, FRecip(resp)), zero);

					PxU32 index = c.contactPatches[c.correlationListHeads[i]].start;
					PxF32 targetVel = buffer[index].targetVel.dot(t0);

					if(b0.mLinkIndex == PxSolverConstraintDesc::NO_LINK)
						targetVel -= b0.projectVelocity(t1, raXn);
					else if(b1.mLinkIndex == PxSolverConstraintDesc::NO_LINK)
						targetVel += b1.projectVelocity(t1, rbXn);

					f1->normalXYZ_appliedForceW = V4SetW(vT1, zero);
					f1->raXnXYZ_velMultiplierW = V4SetW(V4LoadA(&resp0.angular.x), velMultiplier);
					f1->rbXnXYZ_biasW = V4SetW(V4Neg(V4LoadA(&resp1.angular.x)), FLoad(t1.dot(error) * invDtF32));
					f1->linDeltaVA = V3LoadA(deltaV0.linear);
					f1->angDeltaVA = V3LoadA(deltaV0.angular);
					f1->linDeltaVB = V3LoadA(deltaV1.linear);
					f1->angDeltaVB = V3LoadA(deltaV1.angular);
					f1->targetVel = targetVel;
				}
			}
		}

		frictionPatchWritebackAddrIndex++;
	}
}
void solveFriction_BStatic(const PxSolverConstraintDesc& desc, SolverContext& /*cache*/)
{
	PxSolverBody& b0 = *desc.bodyA;

	Vec3V linVel0 = V3LoadA(b0.linearVelocity);
	Vec3V angState0 = V3LoadA(b0.angularState);

	PxU8* PX_RESTRICT currPtr = desc.constraint;

	const PxU8* PX_RESTRICT last = currPtr + getConstraintLength(desc);

	while(currPtr < last)
	{

		const SolverFrictionHeader* PX_RESTRICT frictionHeader = reinterpret_cast<SolverFrictionHeader*>(currPtr);
		const PxU32 numFrictionConstr = frictionHeader->numFrictionConstr;
		const PxU32 numNormalConstr = frictionHeader->numNormalConstr;
		const PxU32 numFrictionPerPoint = numFrictionConstr/numNormalConstr;
		currPtr +=sizeof(SolverFrictionHeader);
		PxF32* appliedImpulse = reinterpret_cast<PxF32*>(currPtr);
		currPtr +=frictionHeader->getAppliedForcePaddingSize();

		SolverContactFriction* PX_RESTRICT frictions = reinterpret_cast<SolverContactFriction*>(currPtr);
		currPtr += numFrictionConstr * sizeof(SolverContactFriction);

		const FloatV invMass0 = FLoad(frictionHeader->invMass0D0);
		const FloatV angD0 = FLoad(frictionHeader->angDom0);
		//const FloatV angD1 = FLoad(frictionHeader->angDom1);


		const FloatV staticFriction = frictionHeader->getStaticFriction();

		for(PxU32 i=0, j = 0;i<numFrictionConstr;j++)
		{
			for(PxU32 p = 0; p < numFrictionPerPoint; p++, i++)
			{
				SolverContactFriction& f = frictions[i];
				Ps::prefetchLine(&frictions[i+1]);

				const Vec3V t0 = Vec3V_From_Vec4V(f.normalXYZ_appliedForceW);
				const Vec3V raXt0 = Vec3V_From_Vec4V(f.raXnXYZ_velMultiplierW);

				const FloatV appliedForce = V4GetW(f.normalXYZ_appliedForceW);
				const FloatV velMultiplier = V4GetW(f.raXnXYZ_velMultiplierW);

				const FloatV targetVel = FLoad(f.targetVel);
				
				//const FloatV normalImpulse = contacts[f.contactIndex].getAppliedForce();
				const FloatV normalImpulse = FLoad(appliedImpulse[j]);
				const FloatV maxFriction = FMul(staticFriction, normalImpulse);
				const FloatV nMaxFriction = FNeg(maxFriction);

				//Compute the normal velocity of the constraint.

				const FloatV t0Vel1 = V3Dot(t0, linVel0);
				const FloatV t0Vel2 = V3Dot(raXt0, angState0);

				const FloatV t0Vel = FAdd(t0Vel1, t0Vel2);

				const Vec3V delangState0 = V3Scale(raXt0, angD0);
				const Vec3V delLinVel0 = V3Scale(t0, invMass0);

				// still lots to do here: using loop pipelining we can interweave this code with the
				// above - the code here has a lot of stalls that we would thereby eliminate

				const FloatV tmp = FNegScaleSub(targetVel,velMultiplier,appliedForce);
				FloatV newForce = FScaleAdd(t0Vel, velMultiplier, tmp);
				newForce = FClamp(newForce, nMaxFriction, maxFriction);
				const FloatV deltaF = FSub(newForce, appliedForce);

				linVel0 = V3ScaleAdd(delLinVel0, deltaF, linVel0);
				angState0 = V3ScaleAdd(delangState0, deltaF, angState0);

				f.setAppliedForce(newForce);
			}
		}
	}

	// Write back
	V3StoreA(linVel0, b0.linearVelocity);
	V3StoreA(angState0, b0.angularState);

	PX_ASSERT(currPtr == last);
}