void PxcFsPropagateDrivenInertiaSimd(PxcFsData& matrix, const PxcFsInertia* baseInertia, const PxReal* isf, const Mat33V* load, PxcFsScratchAllocator allocator) { typedef PxcArticulationFnsSimd<PxcArticulationFnsSimdBase> Fns; PxcSIMDSpatial IS[3]; PxcFsRow* rows = getFsRows(matrix); const PxcFsRowAux* aux = getAux(matrix); const PxcFsJointVectors* jointVectors = getJointVectors(matrix); PxcFsInertia *inertia = allocator.alloc<PxcFsInertia>(matrix.linkCount); PxMemCopy(inertia, baseInertia, matrix.linkCount*sizeof(PxcFsInertia)); for(PxU32 i=matrix.linkCount; --i>0;) { PxcFsRow& r = rows[i]; const PxcFsRowAux& a = aux[i]; const PxcFsJointVectors& jv = jointVectors[i]; Mat33V m = Fns::computeSIS(inertia[i], a.S, IS); FloatV f = FLoad(isf[i]); Mat33V D = Fns::invertSym33(Mat33V(V3ScaleAdd(load[i].col0, f, m.col0), V3ScaleAdd(load[i].col1, f, m.col1), V3ScaleAdd(load[i].col2, f, m.col2))); r.D = D; inertia[matrix.parent[i]] = Fns::addInertia(inertia[matrix.parent[i]], Fns::translateInertia(jv.parentOffset, Fns::multiplySubtract(inertia[i], D, IS, r.DSI))); } getRootInverseInertia(matrix) = Fns::invertInertia(inertia[0]); }
void solveFriction_BStatic(const PxcSolverConstraintDesc& desc, PxcSolverContext& /*cache*/) { PxcSolverBody& b0 = *desc.bodyA; Vec3V linVel0 = V3LoadA(b0.linearVelocity); Vec3V angVel0 = V3LoadA(b0.angularVelocity); const PxU8* PX_RESTRICT currPtr = desc.constraint; const PxU8* PX_RESTRICT last = currPtr + getConstraintLength(desc); //hopefully pointer aliasing doesn't bite. //PxVec3 l0, a0; //PxVec3_From_Vec3V(linVel0, l0); //PxVec3_From_Vec3V(angVel0, a0); //PX_ASSERT(l0.isFinite()); //PX_ASSERT(a0.isFinite()); while(currPtr < last) { const PxcSolverFrictionHeader* PX_RESTRICT frictionHeader = (PxcSolverFrictionHeader*)currPtr; const PxU32 numFrictionConstr = frictionHeader->numFrictionConstr; currPtr +=sizeof(PxcSolverFrictionHeader); PxF32* appliedImpulse = (PxF32*)currPtr; currPtr +=frictionHeader->getAppliedForcePaddingSize(); PxcSolverFriction* PX_RESTRICT frictions = (PxcSolverFriction*)currPtr; currPtr += numFrictionConstr * sizeof(PxcSolverFriction); const FloatV staticFriction = frictionHeader->getStaticFriction(); for(PxU32 i=0;i<numFrictionConstr;i++) { PxcSolverFriction& f = frictions[i]; Ps::prefetchLine(&frictions[i+1]); const Vec3V t0 = Vec3V_From_Vec4V(f.normalXYZ_appliedForceW); const Vec3V raXt0 = Vec3V_From_Vec4V(f.raXnXYZ_velMultiplierW); const FloatV appliedForce = V4GetW(f.normalXYZ_appliedForceW); const FloatV velMultiplier = V4GetW(f.raXnXYZ_velMultiplierW); const FloatV targetVel = V4GetW(f.rbXnXYZ_targetVelocityW); //const FloatV normalImpulse = contacts[f.contactIndex].getAppliedForce(); const FloatV normalImpulse = FLoad(appliedImpulse[f.contactIndex]); const FloatV maxFriction = FMul(staticFriction, normalImpulse); const FloatV nMaxFriction = FNeg(maxFriction); //Compute the normal velocity of the constraint. const FloatV t0Vel1 = V3Dot(t0, linVel0); const FloatV t0Vel2 = V3Dot(raXt0, angVel0); //const FloatV unbiasedErr = FMul(targetVel, velMultiplier); //const FloatV biasedErr = FMulAdd(targetVel, velMultiplier, nScaledBias); const FloatV t0Vel = FAdd(t0Vel1, t0Vel2); const Vec3V delAngVel0 = Vec3V_From_Vec4V(f.delAngVel0_InvMassADom); const Vec3V delLinVel0 = V3Scale(t0, V4GetW(f.delAngVel0_InvMassADom)); // still lots to do here: using loop pipelining we can interweave this code with the // above - the code here has a lot of stalls that we would thereby eliminate //FloatV deltaF = FSub(scaledBias, FMul(t0Vel, velMultiplier));//FNeg(FMul(t0Vel, velMultiplier)); //FloatV deltaF = FMul(t0Vel, velMultiplier); //FloatV newForce = FMulAdd(t0Vel, velMultiplier, appliedForce); const FloatV tmp = FNegMulSub(targetVel,velMultiplier,appliedForce); FloatV newForce = FMulAdd(t0Vel, velMultiplier, tmp); newForce = FClamp(newForce, nMaxFriction, maxFriction); const FloatV deltaF = FSub(newForce, appliedForce); linVel0 = V3ScaleAdd(delLinVel0, deltaF, linVel0); angVel0 = V3ScaleAdd(delAngVel0, deltaF, angVel0); f.setAppliedForce(newForce); } } //PxVec3_From_Vec3V(linVel0, l0); //PxVec3_From_Vec3V(angVel0, a0); //PX_ASSERT(l0.isFinite()); //PX_ASSERT(a0.isFinite()); // Write back V3StoreU(linVel0, b0.linearVelocity); V3StoreU(angVel0, b0.angularVelocity); PX_ASSERT(currPtr == last); }
void solveContactCoulomb_BStatic(const PxcSolverConstraintDesc& desc, PxcSolverContext& /*cache*/) { PxcSolverBody& b0 = *desc.bodyA; Vec3V linVel0 = V3LoadA(b0.linearVelocity); Vec3V angVel0 = V3LoadA(b0.angularVelocity); PxcSolverContactCoulombHeader* firstHeader = (PxcSolverContactCoulombHeader*)desc.constraint; const PxU8* PX_RESTRICT last = desc.constraint + firstHeader->frictionOffset;//getConstraintLength(desc); //hopefully pointer aliasing doesn't bite. const PxU8* PX_RESTRICT currPtr = desc.constraint; const FloatV zero = FZero(); while(currPtr < last) { PxcSolverContactCoulombHeader* PX_RESTRICT hdr = (PxcSolverContactCoulombHeader*)currPtr; currPtr += sizeof(PxcSolverContactCoulombHeader); const PxU32 numNormalConstr = hdr->numNormalConstr; PxcSolverContact* PX_RESTRICT contacts = (PxcSolverContact*)currPtr; Ps::prefetchLine(contacts); currPtr += numNormalConstr * sizeof(PxcSolverContact); PxF32* appliedImpulse = (PxF32*) (((PxU8*)hdr) + hdr->frictionOffset + sizeof(PxcSolverFrictionHeader)); Ps::prefetchLine(appliedImpulse); const Vec3V normal = hdr->getNormal(); const FloatV invMassDom0 = FLoad(hdr->dominance0); FloatV normalVel1 = V3Dot(normal, linVel0); const Vec3V delLinVel0 = V3Scale(normal, invMassDom0); FloatV accumDeltaF = zero; //FloatV accumImpulse = zero; for(PxU32 i=0;i<numNormalConstr;i++) { PxcSolverContact& c = contacts[i]; Ps::prefetchLine(&contacts[i+1]); //const Vec4V normalXYZ_velMultiplierW = c.normalXYZ_velMultiplierW; const Vec4V raXnXYZ_appliedForceW = c.raXnXYZ_appliedForceW; const Vec4V rbXnXYZ_velMultiplierW = c.rbXnXYZ_velMultiplierW; //const Vec3V normal = c.normal; //const Vec3V normal = Vec3V_From_Vec4V(normalXYZ_velMultiplierW); const Vec3V raXn = Vec3V_From_Vec4V(raXnXYZ_appliedForceW); const FloatV appliedForce = V4GetW(raXnXYZ_appliedForceW); const FloatV velMultiplier = V4GetW(rbXnXYZ_velMultiplierW); //const FloatV velMultiplier = V4GetW(normalXYZ_velMultiplierW); const Vec3V delAngVel0 = Vec3V_From_Vec4V(c.delAngVel0_InvMassADom); const FloatV targetVel = c.getTargetVelocity(); const FloatV nScaledBias = FNeg(c.getScaledBias()); const FloatV maxImpulse = c.getMaxImpulse(); //Compute the normal velocity of the constraint. //const FloatV normalVel1 = V3Dot(normal, linVel0); const FloatV normalVel2 = V3Dot(raXn, angVel0); const FloatV normalVel = FAdd(normalVel1, normalVel2); //const FloatV unbiasedErr = FMul(targetVel, velMultiplier); const FloatV biasedErr = FMulAdd(targetVel, velMultiplier, nScaledBias); // still lots to do here: using loop pipelining we can interweave this code with the // above - the code here has a lot of stalls that we would thereby eliminate const FloatV _deltaF = FMax(FNegMulSub(normalVel, velMultiplier, biasedErr), FNeg(appliedForce)); const FloatV _newForce = FAdd(appliedForce, _deltaF); const FloatV newForce = FMin(_newForce, maxImpulse); const FloatV deltaF = FSub(newForce, appliedForce); //linVel0 = V3MulAdd(delLinVel0, deltaF, linVel0); normalVel1 = FScaleAdd(invMassDom0, deltaF, normalVel1); angVel0 = V3ScaleAdd(delAngVel0, deltaF, angVel0); accumDeltaF = FAdd(accumDeltaF, deltaF); c.setAppliedForce(newForce); Ps::aos::FStore(newForce, &appliedImpulse[i]); Ps::prefetchLine(&appliedImpulse[i], 128); //accumImpulse = FAdd(accumImpulse, newAppliedForce); } linVel0 = V3ScaleAdd(delLinVel0, accumDeltaF, linVel0); //hdr->setAccumlatedForce(accumImpulse); } // Write back V3StoreU(linVel0, b0.linearVelocity); V3StoreU(angVel0, b0.angularVelocity); PX_ASSERT(currPtr == last); }
void PxcArticulationHelper::getImpulseSelfResponse(const PxcFsData& matrix, PxU32 linkID0, const PxcSIMDSpatial& impulse0, PxcSIMDSpatial& deltaV0, PxU32 linkID1, const PxcSIMDSpatial& impulse1, PxcSIMDSpatial& deltaV1) { PX_ASSERT(linkID0 != linkID1); const PxcFsRow* rows = getFsRows(matrix); const PxcFsRowAux* aux = getAux(matrix); const PxcFsJointVectors* jointVectors = getJointVectors(matrix); PX_UNUSED(aux); PxcSIMDSpatial& dV0 = deltaV0, & dV1 = deltaV1; // standard case: parent-child limit if(matrix.parent[linkID1] == linkID0) { const PxcFsRow& r = rows[linkID1]; const PxcFsJointVectors& j = jointVectors[linkID1]; Vec3V lZ = V3Neg(impulse1.linear), aZ = V3Neg(impulse1.angular); Vec3V sz = V3Add(aZ, V3Cross(lZ, j.jointOffset)); lZ = V3Sub(lZ, V3ScaleAdd(r.DSI[0].linear, V3GetX(sz), V3ScaleAdd(r.DSI[1].linear, V3GetY(sz), V3Scale(r.DSI[2].linear, V3GetZ(sz))))); aZ = V3Sub(aZ, V3ScaleAdd(r.DSI[0].angular, V3GetX(sz), V3ScaleAdd(r.DSI[1].angular, V3GetY(sz), V3Scale(r.DSI[2].angular, V3GetZ(sz))))); aZ = V3Add(aZ, V3Cross(j.parentOffset, lZ)); lZ = V3Sub(impulse0.linear, lZ); aZ = V3Sub(impulse0.angular, aZ); dV0 = getImpulseResponseSimd(matrix, linkID0, lZ, aZ); Vec3V aV = dV0.angular; Vec3V lV = V3Sub(dV0.linear, V3Cross(j.parentOffset, aV)); Vec3V n = V3Add(V3Merge(V3Dot(r.DSI[0].linear, lV), V3Dot(r.DSI[1].linear, lV), V3Dot(r.DSI[2].linear, lV)), V3Merge(V3Dot(r.DSI[0].angular, aV), V3Dot(r.DSI[1].angular, aV), V3Dot(r.DSI[2].angular, aV))); n = V3Add(n, M33MulV3(r.D, sz)); lV = V3Sub(lV, V3Cross(j.jointOffset, n)); aV = V3Sub(aV, n); dV1 = PxcSIMDSpatial(lV, aV); } else getImpulseResponseSlow(matrix, linkID0, impulse0, deltaV0, linkID1, impulse1, deltaV1); #if PXC_ARTICULATION_DEBUG_VERIFY PxcSIMDSpatial dV0_, dV1_; PxcFsGetImpulseSelfResponse(matrix, linkID0, impulse0, dV0_, linkID1, impulse1, dV1_); PX_ASSERT(almostEqual(dV0_, dV0, 1e-3f)); PX_ASSERT(almostEqual(dV1_, dV1, 1e-3f)); #endif }
void solveFriction_BStatic(const PxSolverConstraintDesc& desc, SolverContext& /*cache*/) { PxSolverBody& b0 = *desc.bodyA; Vec3V linVel0 = V3LoadA(b0.linearVelocity); Vec3V angState0 = V3LoadA(b0.angularState); PxU8* PX_RESTRICT currPtr = desc.constraint; const PxU8* PX_RESTRICT last = currPtr + getConstraintLength(desc); while(currPtr < last) { const SolverFrictionHeader* PX_RESTRICT frictionHeader = reinterpret_cast<SolverFrictionHeader*>(currPtr); const PxU32 numFrictionConstr = frictionHeader->numFrictionConstr; const PxU32 numNormalConstr = frictionHeader->numNormalConstr; const PxU32 numFrictionPerPoint = numFrictionConstr/numNormalConstr; currPtr +=sizeof(SolverFrictionHeader); PxF32* appliedImpulse = reinterpret_cast<PxF32*>(currPtr); currPtr +=frictionHeader->getAppliedForcePaddingSize(); SolverContactFriction* PX_RESTRICT frictions = reinterpret_cast<SolverContactFriction*>(currPtr); currPtr += numFrictionConstr * sizeof(SolverContactFriction); const FloatV invMass0 = FLoad(frictionHeader->invMass0D0); const FloatV angD0 = FLoad(frictionHeader->angDom0); //const FloatV angD1 = FLoad(frictionHeader->angDom1); const FloatV staticFriction = frictionHeader->getStaticFriction(); for(PxU32 i=0, j = 0;i<numFrictionConstr;j++) { for(PxU32 p = 0; p < numFrictionPerPoint; p++, i++) { SolverContactFriction& f = frictions[i]; Ps::prefetchLine(&frictions[i+1]); const Vec3V t0 = Vec3V_From_Vec4V(f.normalXYZ_appliedForceW); const Vec3V raXt0 = Vec3V_From_Vec4V(f.raXnXYZ_velMultiplierW); const FloatV appliedForce = V4GetW(f.normalXYZ_appliedForceW); const FloatV velMultiplier = V4GetW(f.raXnXYZ_velMultiplierW); const FloatV targetVel = FLoad(f.targetVel); //const FloatV normalImpulse = contacts[f.contactIndex].getAppliedForce(); const FloatV normalImpulse = FLoad(appliedImpulse[j]); const FloatV maxFriction = FMul(staticFriction, normalImpulse); const FloatV nMaxFriction = FNeg(maxFriction); //Compute the normal velocity of the constraint. const FloatV t0Vel1 = V3Dot(t0, linVel0); const FloatV t0Vel2 = V3Dot(raXt0, angState0); const FloatV t0Vel = FAdd(t0Vel1, t0Vel2); const Vec3V delangState0 = V3Scale(raXt0, angD0); const Vec3V delLinVel0 = V3Scale(t0, invMass0); // still lots to do here: using loop pipelining we can interweave this code with the // above - the code here has a lot of stalls that we would thereby eliminate const FloatV tmp = FNegScaleSub(targetVel,velMultiplier,appliedForce); FloatV newForce = FScaleAdd(t0Vel, velMultiplier, tmp); newForce = FClamp(newForce, nMaxFriction, maxFriction); const FloatV deltaF = FSub(newForce, appliedForce); linVel0 = V3ScaleAdd(delLinVel0, deltaF, linVel0); angState0 = V3ScaleAdd(delangState0, deltaF, angState0); f.setAppliedForce(newForce); } } } // Write back V3StoreA(linVel0, b0.linearVelocity); V3StoreA(angState0, b0.angularState); PX_ASSERT(currPtr == last); }