void solveContactCoulomb_BStatic(const PxSolverConstraintDesc& desc, SolverContext& /*cache*/) { PxSolverBody& b0 = *desc.bodyA; Vec3V linVel0 = V3LoadA(b0.linearVelocity); Vec3V angState0 = V3LoadA(b0.angularState); SolverContactCoulombHeader* firstHeader = reinterpret_cast<SolverContactCoulombHeader*>(desc.constraint); const PxU8* PX_RESTRICT last = desc.constraint + firstHeader->frictionOffset;//getConstraintLength(desc); //hopefully pointer aliasing doesn't bite. PxU8* PX_RESTRICT currPtr = desc.constraint; //const FloatV zero = FZero(); while(currPtr < last) { SolverContactCoulombHeader* PX_RESTRICT hdr = reinterpret_cast<SolverContactCoulombHeader*>(currPtr); currPtr += sizeof(SolverContactCoulombHeader); const PxU32 numNormalConstr = hdr->numNormalConstr; SolverContactPoint* PX_RESTRICT contacts = reinterpret_cast<SolverContactPoint*>(currPtr); Ps::prefetchLine(contacts); currPtr += numNormalConstr * sizeof(SolverContactPoint); PxF32* appliedImpulse = reinterpret_cast<PxF32*> ((reinterpret_cast<PxU8*>(hdr)) + hdr->frictionOffset + sizeof(SolverFrictionHeader)); Ps::prefetchLine(appliedImpulse); const Vec3V normal = hdr->getNormal(); const FloatV invMassDom0 = FLoad(hdr->dominance0); const FloatV angD0 = FLoad(hdr->angDom0); solveStaticContacts(contacts, numNormalConstr, normal, invMassDom0, angD0, linVel0, angState0, appliedImpulse); } // Write back V3StoreA(linVel0, b0.linearVelocity); V3StoreA(angState0, b0.angularState); PX_ASSERT(currPtr == last); }
bool setupFinalizeExtSolverContactsCoulomb( const ContactBuffer& buffer, const CorrelationBuffer& c, const PxTransform& bodyFrame0, const PxTransform& bodyFrame1, PxU8* workspace, PxReal invDt, PxReal bounceThresholdF32, const SolverExtBody& b0, const SolverExtBody& b1, PxU32 frictionCountPerPoint, PxReal invMassScale0, PxReal invInertiaScale0, PxReal invMassScale1, PxReal invInertiaScale1, PxReal restDist, PxReal ccdMaxDistance) { // NOTE II: the friction patches are sparse (some of them have no contact patches, and // therefore did not get written back to the cache) but the patch addresses are dense, // corresponding to valid patches const FloatV ccdMaxSeparation = FLoad(ccdMaxDistance); PxU8* PX_RESTRICT ptr = workspace; //KS - TODO - this should all be done in SIMD to avoid LHS const PxF32 maxPenBias0 = b0.mLinkIndex == PxSolverConstraintDesc::NO_LINK ? b0.mBodyData->penBiasClamp : getMaxPenBias(*b0.mFsData)[b0.mLinkIndex]; const PxF32 maxPenBias1 = b1.mLinkIndex == PxSolverConstraintDesc::NO_LINK ? b1.mBodyData->penBiasClamp : getMaxPenBias(*b1.mFsData)[b1.mLinkIndex]; const FloatV maxPenBias = FLoad(PxMax(maxPenBias0, maxPenBias1)/invDt); const FloatV restDistance = FLoad(restDist); const FloatV bounceThreshold = FLoad(bounceThresholdF32); const FloatV invDtV = FLoad(invDt); const FloatV pt8 = FLoad(0.8f); const FloatV invDtp8 = FMul(invDtV, pt8); Ps::prefetchLine(c.contactID); Ps::prefetchLine(c.contactID, 128); const PxU32 frictionPatchCount = c.frictionPatchCount; const PxU32 pointStride = sizeof(SolverContactPointExt); const PxU32 frictionStride = sizeof(SolverContactFrictionExt); const PxU8 pointHeaderType = DY_SC_TYPE_EXT_CONTACT; const PxU8 frictionHeaderType = DY_SC_TYPE_EXT_FRICTION; PxReal d0 = invMassScale0; PxReal d1 = invMassScale1; PxReal angD0 = invInertiaScale0; PxReal angD1 = invInertiaScale1; PxU8 flags = 0; for(PxU32 i=0;i< frictionPatchCount;i++) { const PxU32 contactCount = c.frictionPatchContactCounts[i]; if(contactCount == 0) continue; const Gu::ContactPoint* contactBase0 = buffer.contacts + c.contactPatches[c.correlationListHeads[i]].start; const Vec3V normalV = Ps::aos::V3LoadA(contactBase0->normal); const Vec3V normal = V3LoadA(contactBase0->normal); const PxReal combinedRestitution = contactBase0->restitution; SolverContactCoulombHeader* PX_RESTRICT header = reinterpret_cast<SolverContactCoulombHeader*>(ptr); ptr += sizeof(SolverContactCoulombHeader); Ps::prefetchLine(ptr, 128); Ps::prefetchLine(ptr, 256); Ps::prefetchLine(ptr, 384); const FloatV restitution = FLoad(combinedRestitution); header->numNormalConstr = PxU8(contactCount); header->type = pointHeaderType; //header->setRestitution(combinedRestitution); header->setDominance0(d0); header->setDominance1(d1); header->angDom0 = angD0; header->angDom1 = angD1; header->flags = flags; header->setNormal(normalV); for(PxU32 patch=c.correlationListHeads[i]; patch!=CorrelationBuffer::LIST_END; patch = c.contactPatches[patch].next) { const PxU32 count = c.contactPatches[patch].count; const Gu::ContactPoint* contactBase = buffer.contacts + c.contactPatches[patch].start; PxU8* p = ptr; for(PxU32 j=0;j<count;j++) { const Gu::ContactPoint& contact = contactBase[j]; SolverContactPointExt* PX_RESTRICT solverContact = reinterpret_cast<SolverContactPointExt*>(p); p += pointStride; setupExtSolverContact(b0, b1, d0, d1, angD0, angD1, bodyFrame0, bodyFrame1, normal, invDtV, invDtp8, restDistance, maxPenBias, restitution, bounceThreshold, contact, *solverContact, ccdMaxSeparation); } ptr = p; } } //construct all the frictions PxU8* PX_RESTRICT ptr2 = workspace; const PxF32 orthoThreshold = 0.70710678f; const PxF32 eps = 0.00001f; bool hasFriction = false; for(PxU32 i=0;i< frictionPatchCount;i++) { const PxU32 contactCount = c.frictionPatchContactCounts[i]; if(contactCount == 0) continue; SolverContactCoulombHeader* header = reinterpret_cast<SolverContactCoulombHeader*>(ptr2); header->frictionOffset = PxU16(ptr - ptr2); ptr2 += sizeof(SolverContactCoulombHeader) + header->numNormalConstr * pointStride; const Gu::ContactPoint* contactBase0 = buffer.contacts + c.contactPatches[c.correlationListHeads[i]].start; PxVec3 normal = contactBase0->normal; const PxReal staticFriction = contactBase0->staticFriction; const bool disableStrongFriction = !!(contactBase0->materialFlags & PxMaterialFlag::eDISABLE_FRICTION); const bool haveFriction = (disableStrongFriction == 0); SolverFrictionHeader* frictionHeader = reinterpret_cast<SolverFrictionHeader*>(ptr); frictionHeader->numNormalConstr = Ps::to8(c.frictionPatchContactCounts[i]); frictionHeader->numFrictionConstr = Ps::to8(haveFriction ? c.frictionPatchContactCounts[i] * frictionCountPerPoint : 0); frictionHeader->flags = flags; ptr += sizeof(SolverFrictionHeader); PxF32* forceBuffer = reinterpret_cast<PxF32*>(ptr); ptr += frictionHeader->getAppliedForcePaddingSize(c.frictionPatchContactCounts[i]); PxMemZero(forceBuffer, sizeof(PxF32) * c.frictionPatchContactCounts[i]); Ps::prefetchLine(ptr, 128); Ps::prefetchLine(ptr, 256); Ps::prefetchLine(ptr, 384); const PxVec3 t0Fallback1(0.f, -normal.z, normal.y); const PxVec3 t0Fallback2(-normal.y, normal.x, 0.f) ; const PxVec3 tFallback1 = orthoThreshold > PxAbs(normal.x) ? t0Fallback1 : t0Fallback2; const PxVec3 vrel = b0.getLinVel() - b1.getLinVel(); const PxVec3 t0_ = vrel - normal * (normal.dot(vrel)); const PxReal sqDist = t0_.dot(t0_); const PxVec3 tDir0 = (sqDist > eps ? t0_: tFallback1).getNormalized(); const PxVec3 tDir1 = tDir0.cross(normal); PxVec3 tFallback[2] = {tDir0, tDir1}; PxU32 ind = 0; if(haveFriction) { hasFriction = true; frictionHeader->setStaticFriction(staticFriction); frictionHeader->invMass0D0 = d0; frictionHeader->invMass1D1 = d1; frictionHeader->angDom0 = angD0; frictionHeader->angDom1 = angD1; frictionHeader->type = frictionHeaderType; PxU32 totalPatchContactCount = 0; for(PxU32 patch=c.correlationListHeads[i]; patch!=CorrelationBuffer::LIST_END; patch = c.contactPatches[patch].next) { const PxU32 count = c.contactPatches[patch].count; const PxU32 start = c.contactPatches[patch].start; const Gu::ContactPoint* contactBase = buffer.contacts + start; PxU8* p = ptr; for(PxU32 j =0; j < count; j++) { const Gu::ContactPoint& contact = contactBase[j]; const PxVec3 ra = contact.point - bodyFrame0.p; const PxVec3 rb = contact.point - bodyFrame1.p; const PxVec3 targetVel = contact.targetVel; const PxVec3 pVRa = b0.getLinVel() + b0.getAngVel().cross(ra); const PxVec3 pVRb = b1.getLinVel() + b1.getAngVel().cross(rb); //const PxVec3 vrel = pVRa - pVRb; for(PxU32 k = 0; k < frictionCountPerPoint; ++k) { SolverContactFrictionExt* PX_RESTRICT f0 = reinterpret_cast<SolverContactFrictionExt*>(p); p += frictionStride; PxVec3 t0 = tFallback[ind]; ind = 1 - ind; PxVec3 raXn = ra.cross(t0); PxVec3 rbXn = rb.cross(t0); Cm::SpatialVector deltaV0, deltaV1; const Cm::SpatialVector resp0 = createImpulseResponseVector(t0, raXn, b0); const Cm::SpatialVector resp1 = createImpulseResponseVector(-t0, -rbXn, b1); PxReal unitResponse = getImpulseResponse(b0, resp0, deltaV0, d0, angD0, b1, resp1, deltaV1, d1, angD1); PxReal tv = targetVel.dot(t0); if(b0.mLinkIndex == PxSolverConstraintDesc::NO_LINK) tv += pVRa.dot(t0); else if(b1.mLinkIndex == PxSolverConstraintDesc::NO_LINK) tv -= pVRb.dot(t0); f0->setVelMultiplier(FLoad(unitResponse>0.0f ? 1.f/unitResponse : 0.0f)); f0->setRaXn(resp0.angular); f0->setRbXn(-resp1.angular); f0->targetVel = tv; f0->setNormal(t0); f0->setAppliedForce(0.0f); f0->linDeltaVA = V3LoadA(deltaV0.linear); f0->angDeltaVA = V3LoadA(deltaV0.angular); f0->linDeltaVB = V3LoadA(deltaV1.linear); f0->angDeltaVB = V3LoadA(deltaV1.angular); } } totalPatchContactCount += c.contactPatches[patch].count; ptr = p; } } } //PX_ASSERT(ptr - workspace == n.solverConstraintSize); return hasFriction; }
void solveFriction_BStatic(const PxcSolverConstraintDesc& desc, PxcSolverContext& /*cache*/) { PxcSolverBody& b0 = *desc.bodyA; Vec3V linVel0 = V3LoadA(b0.linearVelocity); Vec3V angVel0 = V3LoadA(b0.angularVelocity); const PxU8* PX_RESTRICT currPtr = desc.constraint; const PxU8* PX_RESTRICT last = currPtr + getConstraintLength(desc); //hopefully pointer aliasing doesn't bite. //PxVec3 l0, a0; //PxVec3_From_Vec3V(linVel0, l0); //PxVec3_From_Vec3V(angVel0, a0); //PX_ASSERT(l0.isFinite()); //PX_ASSERT(a0.isFinite()); while(currPtr < last) { const PxcSolverFrictionHeader* PX_RESTRICT frictionHeader = (PxcSolverFrictionHeader*)currPtr; const PxU32 numFrictionConstr = frictionHeader->numFrictionConstr; currPtr +=sizeof(PxcSolverFrictionHeader); PxF32* appliedImpulse = (PxF32*)currPtr; currPtr +=frictionHeader->getAppliedForcePaddingSize(); PxcSolverFriction* PX_RESTRICT frictions = (PxcSolverFriction*)currPtr; currPtr += numFrictionConstr * sizeof(PxcSolverFriction); const FloatV staticFriction = frictionHeader->getStaticFriction(); for(PxU32 i=0;i<numFrictionConstr;i++) { PxcSolverFriction& f = frictions[i]; Ps::prefetchLine(&frictions[i+1]); const Vec3V t0 = Vec3V_From_Vec4V(f.normalXYZ_appliedForceW); const Vec3V raXt0 = Vec3V_From_Vec4V(f.raXnXYZ_velMultiplierW); const FloatV appliedForce = V4GetW(f.normalXYZ_appliedForceW); const FloatV velMultiplier = V4GetW(f.raXnXYZ_velMultiplierW); const FloatV targetVel = V4GetW(f.rbXnXYZ_targetVelocityW); //const FloatV normalImpulse = contacts[f.contactIndex].getAppliedForce(); const FloatV normalImpulse = FLoad(appliedImpulse[f.contactIndex]); const FloatV maxFriction = FMul(staticFriction, normalImpulse); const FloatV nMaxFriction = FNeg(maxFriction); //Compute the normal velocity of the constraint. const FloatV t0Vel1 = V3Dot(t0, linVel0); const FloatV t0Vel2 = V3Dot(raXt0, angVel0); //const FloatV unbiasedErr = FMul(targetVel, velMultiplier); //const FloatV biasedErr = FMulAdd(targetVel, velMultiplier, nScaledBias); const FloatV t0Vel = FAdd(t0Vel1, t0Vel2); const Vec3V delAngVel0 = Vec3V_From_Vec4V(f.delAngVel0_InvMassADom); const Vec3V delLinVel0 = V3Scale(t0, V4GetW(f.delAngVel0_InvMassADom)); // still lots to do here: using loop pipelining we can interweave this code with the // above - the code here has a lot of stalls that we would thereby eliminate //FloatV deltaF = FSub(scaledBias, FMul(t0Vel, velMultiplier));//FNeg(FMul(t0Vel, velMultiplier)); //FloatV deltaF = FMul(t0Vel, velMultiplier); //FloatV newForce = FMulAdd(t0Vel, velMultiplier, appliedForce); const FloatV tmp = FNegMulSub(targetVel,velMultiplier,appliedForce); FloatV newForce = FMulAdd(t0Vel, velMultiplier, tmp); newForce = FClamp(newForce, nMaxFriction, maxFriction); const FloatV deltaF = FSub(newForce, appliedForce); linVel0 = V3ScaleAdd(delLinVel0, deltaF, linVel0); angVel0 = V3ScaleAdd(delAngVel0, deltaF, angVel0); f.setAppliedForce(newForce); } } //PxVec3_From_Vec3V(linVel0, l0); //PxVec3_From_Vec3V(angVel0, a0); //PX_ASSERT(l0.isFinite()); //PX_ASSERT(a0.isFinite()); // Write back V3StoreU(linVel0, b0.linearVelocity); V3StoreU(angVel0, b0.angularVelocity); PX_ASSERT(currPtr == last); }
void solveContactCoulomb_BStatic(const PxcSolverConstraintDesc& desc, PxcSolverContext& /*cache*/) { PxcSolverBody& b0 = *desc.bodyA; Vec3V linVel0 = V3LoadA(b0.linearVelocity); Vec3V angVel0 = V3LoadA(b0.angularVelocity); PxcSolverContactCoulombHeader* firstHeader = (PxcSolverContactCoulombHeader*)desc.constraint; const PxU8* PX_RESTRICT last = desc.constraint + firstHeader->frictionOffset;//getConstraintLength(desc); //hopefully pointer aliasing doesn't bite. const PxU8* PX_RESTRICT currPtr = desc.constraint; const FloatV zero = FZero(); while(currPtr < last) { PxcSolverContactCoulombHeader* PX_RESTRICT hdr = (PxcSolverContactCoulombHeader*)currPtr; currPtr += sizeof(PxcSolverContactCoulombHeader); const PxU32 numNormalConstr = hdr->numNormalConstr; PxcSolverContact* PX_RESTRICT contacts = (PxcSolverContact*)currPtr; Ps::prefetchLine(contacts); currPtr += numNormalConstr * sizeof(PxcSolverContact); PxF32* appliedImpulse = (PxF32*) (((PxU8*)hdr) + hdr->frictionOffset + sizeof(PxcSolverFrictionHeader)); Ps::prefetchLine(appliedImpulse); const Vec3V normal = hdr->getNormal(); const FloatV invMassDom0 = FLoad(hdr->dominance0); FloatV normalVel1 = V3Dot(normal, linVel0); const Vec3V delLinVel0 = V3Scale(normal, invMassDom0); FloatV accumDeltaF = zero; //FloatV accumImpulse = zero; for(PxU32 i=0;i<numNormalConstr;i++) { PxcSolverContact& c = contacts[i]; Ps::prefetchLine(&contacts[i+1]); //const Vec4V normalXYZ_velMultiplierW = c.normalXYZ_velMultiplierW; const Vec4V raXnXYZ_appliedForceW = c.raXnXYZ_appliedForceW; const Vec4V rbXnXYZ_velMultiplierW = c.rbXnXYZ_velMultiplierW; //const Vec3V normal = c.normal; //const Vec3V normal = Vec3V_From_Vec4V(normalXYZ_velMultiplierW); const Vec3V raXn = Vec3V_From_Vec4V(raXnXYZ_appliedForceW); const FloatV appliedForce = V4GetW(raXnXYZ_appliedForceW); const FloatV velMultiplier = V4GetW(rbXnXYZ_velMultiplierW); //const FloatV velMultiplier = V4GetW(normalXYZ_velMultiplierW); const Vec3V delAngVel0 = Vec3V_From_Vec4V(c.delAngVel0_InvMassADom); const FloatV targetVel = c.getTargetVelocity(); const FloatV nScaledBias = FNeg(c.getScaledBias()); const FloatV maxImpulse = c.getMaxImpulse(); //Compute the normal velocity of the constraint. //const FloatV normalVel1 = V3Dot(normal, linVel0); const FloatV normalVel2 = V3Dot(raXn, angVel0); const FloatV normalVel = FAdd(normalVel1, normalVel2); //const FloatV unbiasedErr = FMul(targetVel, velMultiplier); const FloatV biasedErr = FMulAdd(targetVel, velMultiplier, nScaledBias); // still lots to do here: using loop pipelining we can interweave this code with the // above - the code here has a lot of stalls that we would thereby eliminate const FloatV _deltaF = FMax(FNegMulSub(normalVel, velMultiplier, biasedErr), FNeg(appliedForce)); const FloatV _newForce = FAdd(appliedForce, _deltaF); const FloatV newForce = FMin(_newForce, maxImpulse); const FloatV deltaF = FSub(newForce, appliedForce); //linVel0 = V3MulAdd(delLinVel0, deltaF, linVel0); normalVel1 = FScaleAdd(invMassDom0, deltaF, normalVel1); angVel0 = V3ScaleAdd(delAngVel0, deltaF, angVel0); accumDeltaF = FAdd(accumDeltaF, deltaF); c.setAppliedForce(newForce); Ps::aos::FStore(newForce, &appliedImpulse[i]); Ps::prefetchLine(&appliedImpulse[i], 128); //accumImpulse = FAdd(accumImpulse, newAppliedForce); } linVel0 = V3ScaleAdd(delLinVel0, accumDeltaF, linVel0); //hdr->setAccumlatedForce(accumImpulse); } // Write back V3StoreU(linVel0, b0.linearVelocity); V3StoreU(angVel0, b0.angularVelocity); PX_ASSERT(currPtr == last); }
void setupFinalizeExtSolverContacts( const ContactPoint* buffer, const CorrelationBuffer& c, const PxTransform& bodyFrame0, const PxTransform& bodyFrame1, PxU8* workspace, const SolverExtBody& b0, const SolverExtBody& b1, const PxReal invDtF32, PxReal bounceThresholdF32, PxReal invMassScale0, PxReal invInertiaScale0, PxReal invMassScale1, PxReal invInertiaScale1, const PxReal restDist, PxU8* frictionDataPtr, PxReal ccdMaxContactDist) { // NOTE II: the friction patches are sparse (some of them have no contact patches, and // therefore did not get written back to the cache) but the patch addresses are dense, // corresponding to valid patches /*const bool haveFriction = PX_IR(n.staticFriction) > 0 || PX_IR(n.dynamicFriction) > 0;*/ const FloatV ccdMaxSeparation = FLoad(ccdMaxContactDist); PxU8* PX_RESTRICT ptr = workspace; const FloatV zero=FZero(); //KS - TODO - this should all be done in SIMD to avoid LHS const PxF32 maxPenBias0 = b0.mLinkIndex == PxSolverConstraintDesc::NO_LINK ? b0.mBodyData->penBiasClamp : getMaxPenBias(*b0.mFsData)[b0.mLinkIndex]; const PxF32 maxPenBias1 = b1.mLinkIndex == PxSolverConstraintDesc::NO_LINK ? b1.mBodyData->penBiasClamp : getMaxPenBias(*b1.mFsData)[b1.mLinkIndex]; const FloatV maxPenBias = FLoad(PxMax(maxPenBias0, maxPenBias1)); const PxReal d0 = invMassScale0; const PxReal d1 = invMassScale1; const PxReal angD0 = invInertiaScale0; const PxReal angD1 = invInertiaScale1; Vec4V staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W = V4Zero(); staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W=V4SetZ(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, FLoad(d0)); staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W=V4SetW(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, FLoad(d1)); const FloatV restDistance = FLoad(restDist); PxU32 frictionPatchWritebackAddrIndex = 0; PxU32 contactWritebackCount = 0; Ps::prefetchLine(c.contactID); Ps::prefetchLine(c.contactID, 128); const FloatV invDt = FLoad(invDtF32); const FloatV p8 = FLoad(0.8f); const FloatV bounceThreshold = FLoad(bounceThresholdF32); const FloatV invDtp8 = FMul(invDt, p8); PxU8 flags = 0; for(PxU32 i=0;i<c.frictionPatchCount;i++) { PxU32 contactCount = c.frictionPatchContactCounts[i]; if(contactCount == 0) continue; const FrictionPatch& frictionPatch = c.frictionPatches[i]; PX_ASSERT(frictionPatch.anchorCount <= 2); //0==anchorCount is allowed if all the contacts in the manifold have a large offset. const Gu::ContactPoint* contactBase0 = buffer + c.contactPatches[c.correlationListHeads[i]].start; const PxReal combinedRestitution = contactBase0->restitution; const PxReal staticFriction = contactBase0->staticFriction; const PxReal dynamicFriction = contactBase0->dynamicFriction; const bool disableStrongFriction = !!(contactBase0->materialFlags & PxMaterialFlag::eDISABLE_FRICTION); staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W=V4SetX(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, FLoad(staticFriction)); staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W=V4SetY(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, FLoad(dynamicFriction)); SolverContactHeader* PX_RESTRICT header = reinterpret_cast<SolverContactHeader*>(ptr); ptr += sizeof(SolverContactHeader); Ps::prefetchLine(ptr + 128); Ps::prefetchLine(ptr + 256); Ps::prefetchLine(ptr + 384); const bool haveFriction = (disableStrongFriction == 0) ;//PX_IR(n.staticFriction) > 0 || PX_IR(n.dynamicFriction) > 0; header->numNormalConstr = Ps::to8(contactCount); header->numFrictionConstr = Ps::to8(haveFriction ? frictionPatch.anchorCount*2 : 0); header->type = Ps::to8(DY_SC_TYPE_EXT_CONTACT); header->flags = flags; const FloatV restitution = FLoad(combinedRestitution); header->staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W = staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W; header->angDom0 = angD0; header->angDom1 = angD1; const PxU32 pointStride = sizeof(SolverContactPointExt); const PxU32 frictionStride = sizeof(SolverContactFrictionExt); const Vec3V normal = V3LoadU(buffer[c.contactPatches[c.correlationListHeads[i]].start].normal); header->normal = normal; for(PxU32 patch=c.correlationListHeads[i]; patch!=CorrelationBuffer::LIST_END; patch = c.contactPatches[patch].next) { const PxU32 count = c.contactPatches[patch].count; const Gu::ContactPoint* contactBase = buffer + c.contactPatches[patch].start; PxU8* p = ptr; for(PxU32 j=0;j<count;j++) { const Gu::ContactPoint& contact = contactBase[j]; SolverContactPointExt* PX_RESTRICT solverContact = reinterpret_cast<SolverContactPointExt*>(p); p += pointStride; setupExtSolverContact(b0, b1, d0, d1, angD0, angD1, bodyFrame0, bodyFrame1, normal, invDt, invDtp8, restDistance, maxPenBias, restitution, bounceThreshold, contact, *solverContact, ccdMaxSeparation); } ptr = p; } contactWritebackCount += contactCount; PxF32* forceBuffer = reinterpret_cast<PxF32*>(ptr); PxMemZero(forceBuffer, sizeof(PxF32) * contactCount); ptr += sizeof(PxF32) * ((contactCount + 3) & (~3)); header->broken = 0; if(haveFriction) { //const Vec3V normal = Vec3V_From_PxVec3(buffer.contacts[c.contactPatches[c.correlationListHeads[i]].start].normal); PxVec3 normalS = buffer[c.contactPatches[c.correlationListHeads[i]].start].normal; PxVec3 t0, t1; computeFrictionTangents(b0.getLinVel() - b1.getLinVel(), normalS, t0, t1); Vec3V vT0 = V3LoadU(t0); Vec3V vT1 = V3LoadU(t1); //We want to set the writeBack ptr to point to the broken flag of the friction patch. //On spu we have a slight problem here because the friction patch array is //in local store rather than in main memory. The good news is that the address of the friction //patch array in main memory is stored in the work unit. These two addresses will be equal //except on spu where one is local store memory and the other is the effective address in main memory. //Using the value stored in the work unit guarantees that the main memory address is used on all platforms. PxU8* PX_RESTRICT writeback = frictionDataPtr + frictionPatchWritebackAddrIndex*sizeof(FrictionPatch); header->frictionBrokenWritebackByte = writeback; for(PxU32 j = 0; j < frictionPatch.anchorCount; j++) { SolverContactFrictionExt* PX_RESTRICT f0 = reinterpret_cast<SolverContactFrictionExt*>(ptr); ptr += frictionStride; SolverContactFrictionExt* PX_RESTRICT f1 = reinterpret_cast<SolverContactFrictionExt*>(ptr); ptr += frictionStride; PxVec3 ra = bodyFrame0.q.rotate(frictionPatch.body0Anchors[j]); PxVec3 rb = bodyFrame1.q.rotate(frictionPatch.body1Anchors[j]); PxVec3 error = (ra + bodyFrame0.p) - (rb + bodyFrame1.p); { const PxVec3 raXn = ra.cross(t0); const PxVec3 rbXn = rb.cross(t0); Cm::SpatialVector deltaV0, deltaV1; const Cm::SpatialVector resp0 = createImpulseResponseVector(t0, raXn, b0); const Cm::SpatialVector resp1 = createImpulseResponseVector(-t1, -rbXn, b1); FloatV resp = FLoad(getImpulseResponse(b0, resp0, deltaV0, d0, angD0, b1, resp1, deltaV1, d1, angD1)); const FloatV velMultiplier = FSel(FIsGrtr(resp, zero), FMul(p8, FRecip(resp)), zero); PxU32 index = c.contactPatches[c.correlationListHeads[i]].start; PxF32 targetVel = buffer[index].targetVel.dot(t0); if(b0.mLinkIndex == PxSolverConstraintDesc::NO_LINK) targetVel -= b0.projectVelocity(t0, raXn); else if(b1.mLinkIndex == PxSolverConstraintDesc::NO_LINK) targetVel += b1.projectVelocity(t0, rbXn); f0->normalXYZ_appliedForceW = V4SetW(vT0, zero); f0->raXnXYZ_velMultiplierW = V4SetW(V4LoadA(&resp0.angular.x), velMultiplier); f0->rbXnXYZ_biasW = V4SetW(V4Neg(V4LoadA(&resp1.angular.x)), FLoad(t0.dot(error) * invDtF32)); f0->linDeltaVA = V3LoadA(deltaV0.linear); f0->angDeltaVA = V3LoadA(deltaV0.angular); f0->linDeltaVB = V3LoadA(deltaV1.linear); f0->angDeltaVB = V3LoadA(deltaV1.angular); f0->targetVel = targetVel; } { const PxVec3 raXn = ra.cross(t1); const PxVec3 rbXn = rb.cross(t1); Cm::SpatialVector deltaV0, deltaV1; const Cm::SpatialVector resp0 = createImpulseResponseVector(t1, raXn, b0); const Cm::SpatialVector resp1 = createImpulseResponseVector(-t1, -rbXn, b1); FloatV resp = FLoad(getImpulseResponse(b0, resp0, deltaV0, d0, angD0, b1, resp1, deltaV1, d1, angD1)); const FloatV velMultiplier = FSel(FIsGrtr(resp, zero), FMul(p8, FRecip(resp)), zero); PxU32 index = c.contactPatches[c.correlationListHeads[i]].start; PxF32 targetVel = buffer[index].targetVel.dot(t0); if(b0.mLinkIndex == PxSolverConstraintDesc::NO_LINK) targetVel -= b0.projectVelocity(t1, raXn); else if(b1.mLinkIndex == PxSolverConstraintDesc::NO_LINK) targetVel += b1.projectVelocity(t1, rbXn); f1->normalXYZ_appliedForceW = V4SetW(vT1, zero); f1->raXnXYZ_velMultiplierW = V4SetW(V4LoadA(&resp0.angular.x), velMultiplier); f1->rbXnXYZ_biasW = V4SetW(V4Neg(V4LoadA(&resp1.angular.x)), FLoad(t1.dot(error) * invDtF32)); f1->linDeltaVA = V3LoadA(deltaV0.linear); f1->angDeltaVA = V3LoadA(deltaV0.angular); f1->linDeltaVB = V3LoadA(deltaV1.linear); f1->angDeltaVB = V3LoadA(deltaV1.angular); f1->targetVel = targetVel; } } } frictionPatchWritebackAddrIndex++; } }
void solveFriction_BStatic(const PxSolverConstraintDesc& desc, SolverContext& /*cache*/) { PxSolverBody& b0 = *desc.bodyA; Vec3V linVel0 = V3LoadA(b0.linearVelocity); Vec3V angState0 = V3LoadA(b0.angularState); PxU8* PX_RESTRICT currPtr = desc.constraint; const PxU8* PX_RESTRICT last = currPtr + getConstraintLength(desc); while(currPtr < last) { const SolverFrictionHeader* PX_RESTRICT frictionHeader = reinterpret_cast<SolverFrictionHeader*>(currPtr); const PxU32 numFrictionConstr = frictionHeader->numFrictionConstr; const PxU32 numNormalConstr = frictionHeader->numNormalConstr; const PxU32 numFrictionPerPoint = numFrictionConstr/numNormalConstr; currPtr +=sizeof(SolverFrictionHeader); PxF32* appliedImpulse = reinterpret_cast<PxF32*>(currPtr); currPtr +=frictionHeader->getAppliedForcePaddingSize(); SolverContactFriction* PX_RESTRICT frictions = reinterpret_cast<SolverContactFriction*>(currPtr); currPtr += numFrictionConstr * sizeof(SolverContactFriction); const FloatV invMass0 = FLoad(frictionHeader->invMass0D0); const FloatV angD0 = FLoad(frictionHeader->angDom0); //const FloatV angD1 = FLoad(frictionHeader->angDom1); const FloatV staticFriction = frictionHeader->getStaticFriction(); for(PxU32 i=0, j = 0;i<numFrictionConstr;j++) { for(PxU32 p = 0; p < numFrictionPerPoint; p++, i++) { SolverContactFriction& f = frictions[i]; Ps::prefetchLine(&frictions[i+1]); const Vec3V t0 = Vec3V_From_Vec4V(f.normalXYZ_appliedForceW); const Vec3V raXt0 = Vec3V_From_Vec4V(f.raXnXYZ_velMultiplierW); const FloatV appliedForce = V4GetW(f.normalXYZ_appliedForceW); const FloatV velMultiplier = V4GetW(f.raXnXYZ_velMultiplierW); const FloatV targetVel = FLoad(f.targetVel); //const FloatV normalImpulse = contacts[f.contactIndex].getAppliedForce(); const FloatV normalImpulse = FLoad(appliedImpulse[j]); const FloatV maxFriction = FMul(staticFriction, normalImpulse); const FloatV nMaxFriction = FNeg(maxFriction); //Compute the normal velocity of the constraint. const FloatV t0Vel1 = V3Dot(t0, linVel0); const FloatV t0Vel2 = V3Dot(raXt0, angState0); const FloatV t0Vel = FAdd(t0Vel1, t0Vel2); const Vec3V delangState0 = V3Scale(raXt0, angD0); const Vec3V delLinVel0 = V3Scale(t0, invMass0); // still lots to do here: using loop pipelining we can interweave this code with the // above - the code here has a lot of stalls that we would thereby eliminate const FloatV tmp = FNegScaleSub(targetVel,velMultiplier,appliedForce); FloatV newForce = FScaleAdd(t0Vel, velMultiplier, tmp); newForce = FClamp(newForce, nMaxFriction, maxFriction); const FloatV deltaF = FSub(newForce, appliedForce); linVel0 = V3ScaleAdd(delLinVel0, deltaF, linVel0); angState0 = V3ScaleAdd(delangState0, deltaF, angState0); f.setAppliedForce(newForce); } } } // Write back V3StoreA(linVel0, b0.linearVelocity); V3StoreA(angState0, b0.angularState); PX_ASSERT(currPtr == last); }