void b3GpuPgsContactSolver::solveContactConstraintBatchSizes( const b3OpenCLArray<b3RigidBodyData>* bodyBuf, const b3OpenCLArray<b3InertiaData>* shapeBuf, b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches,int numIterations, const b3AlignedObjectArray<int>* batchSizes)//const b3OpenCLArray<int>* gpuBatchSizes) { B3_PROFILE("solveContactConstraintBatchSizes"); int numBatches = batchSizes->size()/B3_MAX_NUM_BATCHES; for(int iter=0; iter<numIterations; iter++) { for (int cellId=0;cellId<numBatches;cellId++) { int offset = 0; for (int ii=0;ii<B3_MAX_NUM_BATCHES;ii++) { int numInBatch = batchSizes->at(cellId*B3_MAX_NUM_BATCHES+ii); if (!numInBatch) break; { b3LauncherCL launcher( m_data->m_queue, m_data->m_solveSingleContactKernel,"m_solveSingleContactKernel" ); launcher.setBuffer(bodyBuf->getBufferCL() ); launcher.setBuffer(shapeBuf->getBufferCL() ); launcher.setBuffer( constraint->getBufferCL() ); launcher.setConst(cellId); launcher.setConst(offset); launcher.setConst(numInBatch); launcher.launch1D(numInBatch); offset+=numInBatch; } } } } for(int iter=0; iter<numIterations; iter++) { for (int cellId=0;cellId<numBatches;cellId++) { int offset = 0; for (int ii=0;ii<B3_MAX_NUM_BATCHES;ii++) { int numInBatch = batchSizes->at(cellId*B3_MAX_NUM_BATCHES+ii); if (!numInBatch) break; { b3LauncherCL launcher( m_data->m_queue, m_data->m_solveSingleFrictionKernel,"m_solveSingleFrictionKernel" ); launcher.setBuffer(bodyBuf->getBufferCL() ); launcher.setBuffer(shapeBuf->getBufferCL() ); launcher.setBuffer( constraint->getBufferCL() ); launcher.setConst(cellId); launcher.setConst(offset); launcher.setConst(numInBatch); launcher.launch1D(numInBatch); offset+=numInBatch; } } } } }
void TinyRenderObjectData::registerMeshShape(const float* vertices, int numVertices, const int* indices, int numIndices, const float rgbaColor[4], unsigned char* textureImage, int textureWidth, int textureHeight) { if (0 == m_model) { { B3_PROFILE("setColorRGBA"); m_model = new Model(); m_model->setColorRGBA(rgbaColor); } if (textureImage) { { B3_PROFILE("setDiffuseTextureFromData"); m_model->setDiffuseTextureFromData(textureImage, textureWidth, textureHeight); } } else { /*char relativeFileName[1024]; if (b3ResourcePath::findResourcePath("floor_diffuse.tga", relativeFileName, 1024)) { m_model->loadDiffuseTexture(relativeFileName); } */ } { B3_PROFILE("reserveMemory"); m_model->reserveMemory(numVertices, numIndices); } { B3_PROFILE("addVertex"); for (int i = 0; i < numVertices; i++) { m_model->addVertex(vertices[i * 9], vertices[i * 9 + 1], vertices[i * 9 + 2], vertices[i * 9 + 4], vertices[i * 9 + 5], vertices[i * 9 + 6], vertices[i * 9 + 7], vertices[i * 9 + 8]); } } { B3_PROFILE("addTriangle"); for (int i = 0; i < numIndices; i += 3) { m_model->addTriangle(indices[i], indices[i], indices[i], indices[i + 1], indices[i + 1], indices[i + 1], indices[i + 2], indices[i + 2], indices[i + 2]); } } } }
///todo: add some acceleration structure (AABBs, tree etc) void b3GpuRaycast::castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults, int numBodies,const struct b3RigidBodyCL* bodies, int numCollidables, const struct b3Collidable* collidables, const struct b3GpuNarrowPhaseInternalData* narrowphaseData) { //castRaysHost(rays,hitResults,numBodies,bodies,numCollidables,collidables,narrowphaseData); B3_PROFILE("castRaysGPU"); b3OpenCLArray<b3RayInfo> gpuRays(m_data->m_context,m_data->m_q); b3OpenCLArray<b3RayHit> gpuHitResults(m_data->m_context,m_data->m_q); { B3_PROFILE("raycast copyFromHost"); gpuRays.copyFromHost(rays); gpuHitResults.resize(hitResults.size()); gpuHitResults.copyFromHost(hitResults); } //run kernel { B3_PROFILE("raycast launch1D"); b3LauncherCL launcher(m_data->m_q,m_data->m_raytraceKernel,"m_raytraceKernel"); int numRays = rays.size(); launcher.setConst(numRays); launcher.setBuffer(gpuRays.getBufferCL()); launcher.setBuffer(gpuHitResults.getBufferCL()); launcher.setConst(numBodies); launcher.setBuffer(narrowphaseData->m_bodyBufferGPU->getBufferCL()); launcher.setBuffer(narrowphaseData->m_collidablesGPU->getBufferCL()); launcher.setBuffer(narrowphaseData->m_convexFacesGPU->getBufferCL()); launcher.setBuffer(narrowphaseData->m_convexPolyhedraGPU->getBufferCL()); launcher.launch1D(numRays); clFinish(m_data->m_q); } //copy results { B3_PROFILE("raycast copyToHost"); gpuHitResults.copyToHost(hitResults); } }
void b3GpuRaycast::castRaysHost(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults, int numBodies,const struct b3RigidBodyCL* bodies, int numCollidables,const struct b3Collidable* collidables) { // return castRays(rays,hitResults,numBodies,bodies,numCollidables,collidables); B3_PROFILE("castRaysHost"); for (int r=0;r<rays.size();r++) { b3Vector3 rayFrom = rays[r].m_from; b3Vector3 rayTo = rays[r].m_to; //if there is a hit, color the pixels bool hits = false; for (int b=0;b<numBodies && !hits;b++) { const b3Vector3& pos = bodies[b].m_pos; const b3Quaternion& orn = bodies[b].m_quat; b3Scalar radius = 1; if (sphere_intersect(pos, radius, rayFrom, rayTo)) hits = true; } if (hits) hitResults[r].m_hitFraction = 0.f; } }
// return non-null if there is a status, nullptr otherwise virtual const struct SharedMemoryStatus* processServerStatus() { { if (btIsExampleBrowserMainThreadTerminated(m_data)) { PhysicsClientSharedMemory::disconnectSharedMemory(); } } { unsigned long int ms = m_clock.getTimeMilliseconds(); if (ms>2) { B3_PROFILE("m_clock.reset()"); btUpdateInProcessExampleBrowserMainThread(m_data); m_clock.reset(); } } { b3Clock::usleep(0); } const SharedMemoryStatus* stat = 0; { stat = PhysicsClientSharedMemory::processServerStatus(); } return stat; }
void ExplicitEuler::integrateExplicitEuler(struct CpuSoftClothDemoInternalData* clothData, char* vertexPositions, int vertexStride,float deltaTime) { B3_PROFILE("integrateEuler"); b3Vector3 deltaTimeVec = b3MakeVector3(deltaTime,deltaTime,deltaTime,0); int numPoints = clothData->m_particleMasses.size(); for (int i=0;i<numPoints;i++) { float mass = clothData->m_particleMasses[i]; if (mass) { b3Vector3 dv = (clothData->m_forces[i]/mass)*deltaTimeVec; clothData->m_velocities[i]+= dv; clothData->m_velocities[i]*=0.999; b3Vector3& pos = (b3Vector3&)vertexPositions[i*vertexStride]; pos += clothData->m_velocities[i]*deltaTimeVec; } } }
void ConvertURDF2Bullet( const URDFImporterInterface& u2b, MultiBodyCreationInterface& creation, const btTransform& rootTransformInWorldSpace, btMultiBodyDynamicsWorld* world1, bool createMultiBody, const char* pathPrefix, int flags, UrdfVisualShapeCache* cachedLinkGraphicsShapes) { URDF2BulletCachedData cache; InitURDF2BulletCache(u2b,cache); int urdfLinkIndex = u2b.getRootLinkIndex(); B3_PROFILE("ConvertURDF2Bullet"); UrdfVisualShapeCache cachedLinkGraphicsShapesOut; ConvertURDF2BulletInternal(u2b, creation, cache, urdfLinkIndex,rootTransformInWorldSpace,world1,createMultiBody,pathPrefix,flags, cachedLinkGraphicsShapes, &cachedLinkGraphicsShapesOut); if (cachedLinkGraphicsShapes && cachedLinkGraphicsShapesOut.m_cachedUrdfLinkVisualShapeIndices.size() > cachedLinkGraphicsShapes->m_cachedUrdfLinkVisualShapeIndices.size()) { *cachedLinkGraphicsShapes = cachedLinkGraphicsShapesOut; } if (world1 && cache.m_bulletMultiBody) { B3_PROFILE("Post process"); btMultiBody* mb = cache.m_bulletMultiBody; mb->setHasSelfCollision((flags&CUF_USE_SELF_COLLISION)!=0); mb->finalizeMultiDof(); btTransform localInertialFrameRoot = cache.m_urdfLinkLocalInertialFrames[urdfLinkIndex]; if (flags & CUF_USE_MJCF) { } else { mb->setBaseWorldTransform(rootTransformInWorldSpace*localInertialFrameRoot); } btAlignedObjectArray<btQuaternion> scratch_q; btAlignedObjectArray<btVector3> scratch_m; mb->forwardKinematics(scratch_q,scratch_m); mb->updateCollisionObjectWorldTransforms(scratch_q,scratch_m); world1->addMultiBody(mb); } }
void b3GpuRaycast::castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults, int numBodies,const struct b3RigidBodyCL* bodies, int numCollidables, const struct b3Collidable* collidables) { B3_PROFILE("castRaysGPU"); b3OpenCLArray<b3RayInfo> gpuRays(m_data->m_context,m_data->m_q); gpuRays.copyFromHost(rays); b3OpenCLArray<b3RayHit> gpuHitResults(m_data->m_context,m_data->m_q); gpuHitResults.resize(hitResults.size()); b3OpenCLArray<b3RigidBodyCL> gpuBodies(m_data->m_context,m_data->m_q); gpuBodies.resize(numBodies); gpuBodies.copyFromHostPointer(bodies,numBodies); b3OpenCLArray<b3Collidable> gpuCollidables(m_data->m_context,m_data->m_q); gpuCollidables.resize(numCollidables); gpuCollidables.copyFromHostPointer(collidables,numCollidables); //run kernel { B3_PROFILE("raycast launch1D"); b3LauncherCL launcher(m_data->m_q,m_data->m_raytraceKernel); int numRays = rays.size(); launcher.setConst(numRays); launcher.setBuffer(gpuRays.getBufferCL()); launcher.setBuffer(gpuHitResults.getBufferCL()); launcher.setConst(numBodies); launcher.setBuffer(gpuBodies.getBufferCL()); launcher.setBuffer(gpuCollidables.getBufferCL()); launcher.launch1D(numRays); clFinish(m_data->m_q); } //copy results gpuHitResults.copyToHost(hitResults); }
void OpenGLExampleBrowser::updateGraphics() { if (sCurrentDemo) { if (!pauseSimulation || singleStepSimulation) { B3_PROFILE("sCurrentDemo->updateGraphics"); sCurrentDemo->updateGraphics(); } } }
void RigidBodyDemo::renderScene() { { B3_PROFILE("writeSingleInstanceTransformToCPU"); const b3RigidBodyData* bodies = m_rb->getBodyBuffer(); //sync transforms int numBodies = m_rb->getNumBodies(); for (int i=0;i<numBodies;i++) { m_instancingRenderer->writeSingleInstanceTransformToCPU(&bodies[i].m_pos.x,bodies[i].m_quat,i); } } { B3_PROFILE("writeTransforms"); m_instancingRenderer->writeTransforms(); } { B3_PROFILE("renderScene"); m_instancingRenderer->renderScene(); } }
/// b3PgsJacobiSolver Sequentially applies impulses b3Scalar b3GpuPgsConstraintSolver::solveGroup(b3OpenCLArray<b3RigidBodyData>* gpuBodies, b3OpenCLArray<b3InertiaData>* gpuInertias, int numBodies, b3OpenCLArray<b3GpuGenericConstraint>* gpuConstraints, int numConstraints, const b3ContactSolverInfo& infoGlobal) { B3_PROFILE("solveJoints"); //you need to provide at least some bodies solveGroupCacheFriendlySetup(gpuBodies, gpuInertias, numBodies, gpuConstraints, numConstraints, infoGlobal); solveGroupCacheFriendlyIterations(gpuConstraints, numConstraints, infoGlobal); solveGroupCacheFriendlyFinish(gpuBodies, gpuInertias, numBodies, gpuConstraints, numConstraints, infoGlobal); return 0.f; }
void ExplicitEuler::computeGravityForces(struct CpuSoftClothDemoInternalData* clothData, char* vertexPositions, int vertexStride, float dt) { B3_PROFILE("computeForces"); int numPoints = clothData->m_particleMasses.size(); b3Vector3 gravityAcceleration = b3MakeVector3(0,-9.8,0); //f=m*a for (int i=0;i<numPoints;i++) { { float particleMass = clothData->m_particleMasses[i]; b3Vector3 particleMassVec = b3MakeVector3(particleMass,particleMass,particleMass,0); clothData->m_forces[i] = gravityAcceleration*particleMass; } } }
void b3Solver::convertToConstraints( const b3OpenCLArray<b3RigidBodyCL>* bodyBuf, const b3OpenCLArray<b3InertiaCL>* shapeBuf, b3OpenCLArray<b3Contact4>* contactsIn, b3OpenCLArray<b3GpuConstraint4>* contactCOut, void* additionalData, int nContacts, const ConstraintCfg& cfg ) { b3OpenCLArray<b3GpuConstraint4>* constraintNative =0; struct CB { int m_nContacts; float m_dt; float m_positionDrift; float m_positionConstraintCoeff; }; { B3_PROFILE("m_contactToConstraintKernel"); CB cdata; cdata.m_nContacts = nContacts; cdata.m_dt = cfg.m_dt; cdata.m_positionDrift = cfg.m_positionDrift; cdata.m_positionConstraintCoeff = cfg.m_positionConstraintCoeff; b3BufferInfoCL bInfo[] = { b3BufferInfoCL( contactsIn->getBufferCL() ), b3BufferInfoCL( bodyBuf->getBufferCL() ), b3BufferInfoCL( shapeBuf->getBufferCL()), b3BufferInfoCL( contactCOut->getBufferCL() ) }; b3LauncherCL launcher( m_queue, m_contactToConstraintKernel ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); //launcher.setConst( cdata ); launcher.setConst(cdata.m_nContacts); launcher.setConst(cdata.m_dt); launcher.setConst(cdata.m_positionDrift); launcher.setConst(cdata.m_positionConstraintCoeff); launcher.launch1D( nContacts, 64 ); clFinish(m_queue); } contactCOut->resize(nContacts); }
inline int b3GpuPgsConstraintSolver::sortConstraintByBatch3(b3BatchConstraint* cs, int numConstraints, int simdWidth, int staticIdx, int numBodies) { //int sz = sizeof(b3BatchConstraint); B3_PROFILE("sortConstraintByBatch3"); static int maxSwaps = 0; int numSwaps = 0; curUsed.resize(2 * simdWidth); static int maxNumConstraints = 0; if (maxNumConstraints < numConstraints) { maxNumConstraints = numConstraints; //printf("maxNumConstraints = %d\n",maxNumConstraints ); } int numUsedArray = numBodies / 32 + 1; bodyUsed.resize(numUsedArray); for (int q = 0; q < numUsedArray; q++) bodyUsed[q] = 0; int curBodyUsed = 0; int numIter = 0; #if defined(_DEBUG) for (int i = 0; i < numConstraints; i++) cs[i].m_batchId = -1; #endif int numValidConstraints = 0; // int unprocessedConstraintIndex = 0; int batchIdx = 0; { B3_PROFILE("cpu batch innerloop"); while (numValidConstraints < numConstraints) { numIter++; int nCurrentBatch = 0; // clear flag for (int i = 0; i < curBodyUsed; i++) bodyUsed[curUsed[i] / 32] = 0; curBodyUsed = 0; for (int i = numValidConstraints; i < numConstraints; i++) { int idx = i; b3Assert(idx < numConstraints); // check if it can go int bodyAS = cs[idx].m_bodyAPtrAndSignBit; int bodyBS = cs[idx].m_bodyBPtrAndSignBit; int bodyA = abs(bodyAS); int bodyB = abs(bodyBS); bool aIsStatic = (bodyAS < 0) || bodyAS == staticIdx; bool bIsStatic = (bodyBS < 0) || bodyBS == staticIdx; int aUnavailable = 0; int bUnavailable = 0; if (!aIsStatic) { aUnavailable = bodyUsed[bodyA / 32] & (1 << (bodyA & 31)); } if (!aUnavailable) if (!bIsStatic) { bUnavailable = bodyUsed[bodyB / 32] & (1 << (bodyB & 31)); } if (aUnavailable == 0 && bUnavailable == 0) // ok { if (!aIsStatic) { bodyUsed[bodyA / 32] |= (1 << (bodyA & 31)); curUsed[curBodyUsed++] = bodyA; } if (!bIsStatic) { bodyUsed[bodyB / 32] |= (1 << (bodyB & 31)); curUsed[curBodyUsed++] = bodyB; } cs[idx].m_batchId = batchIdx; if (i != numValidConstraints) { b3Swap(cs[i], cs[numValidConstraints]); numSwaps++; } numValidConstraints++; { nCurrentBatch++; if (nCurrentBatch == simdWidth) { nCurrentBatch = 0; for (int i = 0; i < curBodyUsed; i++) bodyUsed[curUsed[i] / 32] = 0; curBodyUsed = 0; } } } } m_gpuData->m_batchSizes.push_back(nCurrentBatch); batchIdx++; } } #if defined(_DEBUG) // debugPrintf( "nBatches: %d\n", batchIdx ); for (int i = 0; i < numConstraints; i++) { b3Assert(cs[i].m_batchId != -1); } #endif if (maxSwaps < numSwaps) { maxSwaps = numSwaps; //printf("maxSwaps = %d\n", maxSwaps); } return batchIdx; }
b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlyIterations(b3OpenCLArray<b3GpuGenericConstraint>* gpuConstraints1, int numConstraints, const b3ContactSolverInfo& infoGlobal) { //only create the batches once. //@todo: incrementally update batches when constraints are added/activated and/or removed/deactivated B3_PROFILE("GpuSolveGroupCacheFriendlyIterations"); bool createBatches = m_gpuData->m_batchSizes.size() == 0; { if (createBatches) { m_gpuData->m_batchSizes.resize(0); { m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints); B3_PROFILE("batch joints"); b3Assert(batchConstraints.size() == numConstraints); int simdWidth = numConstraints + 1; int numBodies = m_tmpSolverBodyPool.size(); sortConstraintByBatch3(&batchConstraints[0], numConstraints, simdWidth, m_staticIdx, numBodies); m_gpuData->m_gpuBatchConstraints->copyFromHost(batchConstraints); } } else { /*b3AlignedObjectArray<b3BatchConstraint> cpuCheckBatches; m_gpuData->m_gpuBatchConstraints->copyToHost(cpuCheckBatches); b3Assert(cpuCheckBatches.size()==batchConstraints.size()); printf(".\n"); */ //>copyFromHost(batchConstraints); } int maxIterations = infoGlobal.m_numIterations; bool useBatching = true; if (useBatching) { if (!useGpuSolveJointConstraintRows) { B3_PROFILE("copy to host"); m_gpuData->m_gpuSolverBodies->copyToHost(m_tmpSolverBodyPool); m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints); m_gpuData->m_gpuConstraintRows->copyToHost(m_tmpSolverNonContactConstraintPool); m_gpuData->m_gpuConstraintInfo1->copyToHost(m_gpuData->m_cpuConstraintInfo1); m_gpuData->m_gpuConstraintRowOffsets->copyToHost(m_gpuData->m_cpuConstraintRowOffsets); gpuConstraints1->copyToHost(m_gpuData->m_cpuConstraints); } for (int iteration = 0; iteration < maxIterations; iteration++) { int batchOffset = 0; int constraintOffset = 0; int numBatches = m_gpuData->m_batchSizes.size(); for (int bb = 0; bb < numBatches; bb++) { int numConstraintsInBatch = m_gpuData->m_batchSizes[bb]; if (useGpuSolveJointConstraintRows) { B3_PROFILE("solveJointConstraintRowsKernels"); /* __kernel void solveJointConstraintRows(__global b3GpuSolverBody* solverBodies, __global b3BatchConstraint* batchConstraints, __global b3SolverConstraint* rows, __global unsigned int* numConstraintRowsInfo1, __global unsigned int* rowOffsets, __global b3GpuGenericConstraint* constraints, int batchOffset, int numConstraintsInBatch*/ b3LauncherCL launcher(m_gpuData->m_queue, m_gpuData->m_solveJointConstraintRowsKernels, "m_solveJointConstraintRowsKernels"); launcher.setBuffer(m_gpuData->m_gpuSolverBodies->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuBatchConstraints->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuConstraintRows->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuConstraintRowOffsets->getBufferCL()); launcher.setBuffer(gpuConstraints1->getBufferCL()); //to detect disabled constraints launcher.setConst(batchOffset); launcher.setConst(numConstraintsInBatch); launcher.launch1D(numConstraintsInBatch); } else //useGpu { for (int b = 0; b < numConstraintsInBatch; b++) { const b3BatchConstraint& c = batchConstraints[batchOffset + b]; /*printf("-----------\n"); printf("bb=%d\n",bb); printf("c.batchId = %d\n", c.m_batchId); */ b3Assert(c.m_batchId == bb); b3GpuGenericConstraint* constraint = &m_gpuData->m_cpuConstraints[c.m_originalConstraintIndex]; if (constraint->m_flags & B3_CONSTRAINT_FLAG_ENABLED) { int numConstraintRows = m_gpuData->m_cpuConstraintInfo1[c.m_originalConstraintIndex]; int constraintOffset = m_gpuData->m_cpuConstraintRowOffsets[c.m_originalConstraintIndex]; for (int jj = 0; jj < numConstraintRows; jj++) { // b3GpuSolverConstraint& constraint = m_tmpSolverNonContactConstraintPool[constraintOffset + jj]; //resolveSingleConstraintRowGenericSIMD(m_tmpSolverBodyPool[constraint.m_solverBodyIdA],m_tmpSolverBodyPool[constraint.m_solverBodyIdB],constraint); resolveSingleConstraintRowGeneric2(&m_tmpSolverBodyPool[constraint.m_solverBodyIdA], &m_tmpSolverBodyPool[constraint.m_solverBodyIdB], &constraint); } } } } //useGpu batchOffset += numConstraintsInBatch; constraintOffset += numConstraintsInBatch; } } //for (int iteration... if (!useGpuSolveJointConstraintRows) { { B3_PROFILE("copy from host"); m_gpuData->m_gpuSolverBodies->copyFromHost(m_tmpSolverBodyPool); m_gpuData->m_gpuBatchConstraints->copyFromHost(batchConstraints); m_gpuData->m_gpuConstraintRows->copyFromHost(m_tmpSolverNonContactConstraintPool); } //B3_PROFILE("copy to host"); //m_gpuData->m_gpuSolverBodies->copyToHost(m_tmpSolverBodyPool); } //int sz = sizeof(b3GpuSolverBody); //printf("cpu sizeof(b3GpuSolverBody)=%d\n",sz); } else { for (int iteration = 0; iteration < maxIterations; iteration++) { int numJoints = m_tmpSolverNonContactConstraintPool.size(); for (int j = 0; j < numJoints; j++) { b3GpuSolverConstraint& constraint = m_tmpSolverNonContactConstraintPool[j]; resolveSingleConstraintRowGeneric2(&m_tmpSolverBodyPool[constraint.m_solverBodyIdA], &m_tmpSolverBodyPool[constraint.m_solverBodyIdB], &constraint); } if (!m_usePgs) { averageVelocities(); } } } } clFinish(m_gpuData->m_queue); return 0.f; }
b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlySetup(b3OpenCLArray<b3RigidBodyData>* gpuBodies, b3OpenCLArray<b3InertiaData>* gpuInertias, int numBodies, b3OpenCLArray<b3GpuGenericConstraint>* gpuConstraints, int numConstraints, const b3ContactSolverInfo& infoGlobal) { B3_PROFILE("GPU solveGroupCacheFriendlySetup"); batchConstraints.resize(numConstraints); m_gpuData->m_gpuBatchConstraints->resize(numConstraints); m_staticIdx = -1; m_maxOverrideNumSolverIterations = 0; /* m_gpuData->m_gpuBodies->resize(numBodies); m_gpuData->m_gpuBodies->copyFromHostPointer(bodies,numBodies); b3OpenCLArray<b3InertiaData> gpuInertias(m_gpuData->m_context,m_gpuData->m_queue); gpuInertias.resize(numBodies); gpuInertias.copyFromHostPointer(inertias,numBodies); */ m_gpuData->m_gpuSolverBodies->resize(numBodies); m_tmpSolverBodyPool.resize(numBodies); { if (useGpuInitSolverBodies) { B3_PROFILE("m_initSolverBodiesKernel"); b3LauncherCL launcher(m_gpuData->m_queue, m_gpuData->m_initSolverBodiesKernel, "m_initSolverBodiesKernel"); launcher.setBuffer(m_gpuData->m_gpuSolverBodies->getBufferCL()); launcher.setBuffer(gpuBodies->getBufferCL()); launcher.setConst(numBodies); launcher.launch1D(numBodies); clFinish(m_gpuData->m_queue); // m_gpuData->m_gpuSolverBodies->copyToHost(m_tmpSolverBodyPool); } else { gpuBodies->copyToHost(m_gpuData->m_cpuBodies); for (int i = 0; i < numBodies; i++) { b3RigidBodyData& body = m_gpuData->m_cpuBodies[i]; b3GpuSolverBody& solverBody = m_tmpSolverBodyPool[i]; initSolverBody(i, &solverBody, &body); solverBody.m_originalBodyIndex = i; } m_gpuData->m_gpuSolverBodies->copyFromHost(m_tmpSolverBodyPool); } } // int totalBodies = 0; int totalNumRows = 0; //b3RigidBody* rb0=0,*rb1=0; //if (1) { { // int i; m_tmpConstraintSizesPool.resizeNoInitialize(numConstraints); // b3OpenCLArray<b3GpuGenericConstraint> gpuConstraints(m_gpuData->m_context,m_gpuData->m_queue); if (useGpuInfo1) { B3_PROFILE("info1 and init batchConstraint"); m_gpuData->m_gpuConstraintInfo1->resize(numConstraints); if (1) { B3_PROFILE("getInfo1Kernel"); b3LauncherCL launcher(m_gpuData->m_queue, m_gpuData->m_getInfo1Kernel, "m_getInfo1Kernel"); launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL()); launcher.setBuffer(gpuConstraints->getBufferCL()); launcher.setConst(numConstraints); launcher.launch1D(numConstraints); clFinish(m_gpuData->m_queue); } if (m_gpuData->m_batchSizes.size() == 0) { B3_PROFILE("initBatchConstraintsKernel"); m_gpuData->m_gpuConstraintRowOffsets->resize(numConstraints); unsigned int total = 0; m_gpuData->m_prefixScan->execute(*m_gpuData->m_gpuConstraintInfo1, *m_gpuData->m_gpuConstraintRowOffsets, numConstraints, &total); unsigned int lastElem = m_gpuData->m_gpuConstraintInfo1->at(numConstraints - 1); totalNumRows = total + lastElem; { B3_PROFILE("init batch constraints"); b3LauncherCL launcher(m_gpuData->m_queue, m_gpuData->m_initBatchConstraintsKernel, "m_initBatchConstraintsKernel"); launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuConstraintRowOffsets->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuBatchConstraints->getBufferCL()); launcher.setBuffer(gpuConstraints->getBufferCL()); launcher.setBuffer(gpuBodies->getBufferCL()); launcher.setConst(numConstraints); launcher.launch1D(numConstraints); clFinish(m_gpuData->m_queue); } //assume the batching happens on CPU, so copy the data m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints); } } else { totalNumRows = 0; gpuConstraints->copyToHost(m_gpuData->m_cpuConstraints); //calculate the total number of contraint rows for (int i = 0; i < numConstraints; i++) { unsigned int& info1 = m_tmpConstraintSizesPool[i]; // unsigned int info1; if (m_gpuData->m_cpuConstraints[i].isEnabled()) { m_gpuData->m_cpuConstraints[i].getInfo1(&info1, &m_gpuData->m_cpuBodies[0]); } else { info1 = 0; } totalNumRows += info1; } m_gpuData->m_gpuBatchConstraints->copyFromHost(batchConstraints); m_gpuData->m_gpuConstraintInfo1->copyFromHost(m_tmpConstraintSizesPool); } m_tmpSolverNonContactConstraintPool.resizeNoInitialize(totalNumRows); m_gpuData->m_gpuConstraintRows->resize(totalNumRows); // b3GpuConstraintArray verify; if (useGpuInfo2) { { B3_PROFILE("getInfo2Kernel"); b3LauncherCL launcher(m_gpuData->m_queue, m_gpuData->m_getInfo2Kernel, "m_getInfo2Kernel"); launcher.setBuffer(m_gpuData->m_gpuConstraintRows->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuConstraintRowOffsets->getBufferCL()); launcher.setBuffer(gpuConstraints->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuBatchConstraints->getBufferCL()); launcher.setBuffer(gpuBodies->getBufferCL()); launcher.setBuffer(gpuInertias->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuSolverBodies->getBufferCL()); launcher.setConst(infoGlobal.m_timeStep); launcher.setConst(infoGlobal.m_erp); launcher.setConst(infoGlobal.m_globalCfm); launcher.setConst(infoGlobal.m_damping); launcher.setConst(infoGlobal.m_numIterations); launcher.setConst(numConstraints); launcher.launch1D(numConstraints); clFinish(m_gpuData->m_queue); if (m_gpuData->m_batchSizes.size() == 0) m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints); //m_gpuData->m_gpuConstraintRows->copyToHost(verify); //m_gpuData->m_gpuConstraintRows->copyToHost(m_tmpSolverNonContactConstraintPool); } } else { gpuInertias->copyToHost(m_gpuData->m_cpuInertias); ///setup the b3SolverConstraints for (int i = 0; i < numConstraints; i++) { const int& info1 = m_tmpConstraintSizesPool[i]; if (info1) { int constraintIndex = batchConstraints[i].m_originalConstraintIndex; int constraintRowOffset = m_gpuData->m_cpuConstraintRowOffsets[constraintIndex]; b3GpuSolverConstraint* currentConstraintRow = &m_tmpSolverNonContactConstraintPool[constraintRowOffset]; b3GpuGenericConstraint& constraint = m_gpuData->m_cpuConstraints[i]; b3RigidBodyData& rbA = m_gpuData->m_cpuBodies[constraint.getRigidBodyA()]; //b3RigidBody& rbA = constraint.getRigidBodyA(); // b3RigidBody& rbB = constraint.getRigidBodyB(); b3RigidBodyData& rbB = m_gpuData->m_cpuBodies[constraint.getRigidBodyB()]; int solverBodyIdA = constraint.getRigidBodyA(); //getOrInitSolverBody(constraint.getRigidBodyA(),bodies,inertias); int solverBodyIdB = constraint.getRigidBodyB(); //getOrInitSolverBody(constraint.getRigidBodyB(),bodies,inertias); b3GpuSolverBody* bodyAPtr = &m_tmpSolverBodyPool[solverBodyIdA]; b3GpuSolverBody* bodyBPtr = &m_tmpSolverBodyPool[solverBodyIdB]; if (rbA.m_invMass) { batchConstraints[i].m_bodyAPtrAndSignBit = solverBodyIdA; } else { if (!solverBodyIdA) m_staticIdx = 0; batchConstraints[i].m_bodyAPtrAndSignBit = -solverBodyIdA; } if (rbB.m_invMass) { batchConstraints[i].m_bodyBPtrAndSignBit = solverBodyIdB; } else { if (!solverBodyIdB) m_staticIdx = 0; batchConstraints[i].m_bodyBPtrAndSignBit = -solverBodyIdB; } int overrideNumSolverIterations = 0; //constraint->getOverrideNumSolverIterations() > 0 ? constraint->getOverrideNumSolverIterations() : infoGlobal.m_numIterations; if (overrideNumSolverIterations > m_maxOverrideNumSolverIterations) m_maxOverrideNumSolverIterations = overrideNumSolverIterations; int j; for (j = 0; j < info1; j++) { memset(¤tConstraintRow[j], 0, sizeof(b3GpuSolverConstraint)); currentConstraintRow[j].m_angularComponentA.setValue(0, 0, 0); currentConstraintRow[j].m_angularComponentB.setValue(0, 0, 0); currentConstraintRow[j].m_appliedImpulse = 0.f; currentConstraintRow[j].m_appliedPushImpulse = 0.f; currentConstraintRow[j].m_cfm = 0.f; currentConstraintRow[j].m_contactNormal.setValue(0, 0, 0); currentConstraintRow[j].m_friction = 0.f; currentConstraintRow[j].m_frictionIndex = 0; currentConstraintRow[j].m_jacDiagABInv = 0.f; currentConstraintRow[j].m_lowerLimit = 0.f; currentConstraintRow[j].m_upperLimit = 0.f; currentConstraintRow[j].m_originalContactPoint = 0; currentConstraintRow[j].m_overrideNumSolverIterations = 0; currentConstraintRow[j].m_relpos1CrossNormal.setValue(0, 0, 0); currentConstraintRow[j].m_relpos2CrossNormal.setValue(0, 0, 0); currentConstraintRow[j].m_rhs = 0.f; currentConstraintRow[j].m_rhsPenetration = 0.f; currentConstraintRow[j].m_solverBodyIdA = 0; currentConstraintRow[j].m_solverBodyIdB = 0; currentConstraintRow[j].m_lowerLimit = -B3_INFINITY; currentConstraintRow[j].m_upperLimit = B3_INFINITY; currentConstraintRow[j].m_appliedImpulse = 0.f; currentConstraintRow[j].m_appliedPushImpulse = 0.f; currentConstraintRow[j].m_solverBodyIdA = solverBodyIdA; currentConstraintRow[j].m_solverBodyIdB = solverBodyIdB; currentConstraintRow[j].m_overrideNumSolverIterations = overrideNumSolverIterations; } bodyAPtr->internalGetDeltaLinearVelocity().setValue(0.f, 0.f, 0.f); bodyAPtr->internalGetDeltaAngularVelocity().setValue(0.f, 0.f, 0.f); bodyAPtr->internalGetPushVelocity().setValue(0.f, 0.f, 0.f); bodyAPtr->internalGetTurnVelocity().setValue(0.f, 0.f, 0.f); bodyBPtr->internalGetDeltaLinearVelocity().setValue(0.f, 0.f, 0.f); bodyBPtr->internalGetDeltaAngularVelocity().setValue(0.f, 0.f, 0.f); bodyBPtr->internalGetPushVelocity().setValue(0.f, 0.f, 0.f); bodyBPtr->internalGetTurnVelocity().setValue(0.f, 0.f, 0.f); b3GpuConstraintInfo2 info2; info2.fps = 1.f / infoGlobal.m_timeStep; info2.erp = infoGlobal.m_erp; info2.m_J1linearAxis = currentConstraintRow->m_contactNormal; info2.m_J1angularAxis = currentConstraintRow->m_relpos1CrossNormal; info2.m_J2linearAxis = 0; info2.m_J2angularAxis = currentConstraintRow->m_relpos2CrossNormal; info2.rowskip = sizeof(b3GpuSolverConstraint) / sizeof(b3Scalar); //check this ///the size of b3GpuSolverConstraint needs be a multiple of b3Scalar b3Assert(info2.rowskip * sizeof(b3Scalar) == sizeof(b3GpuSolverConstraint)); info2.m_constraintError = ¤tConstraintRow->m_rhs; currentConstraintRow->m_cfm = infoGlobal.m_globalCfm; info2.m_damping = infoGlobal.m_damping; info2.cfm = ¤tConstraintRow->m_cfm; info2.m_lowerLimit = ¤tConstraintRow->m_lowerLimit; info2.m_upperLimit = ¤tConstraintRow->m_upperLimit; info2.m_numIterations = infoGlobal.m_numIterations; m_gpuData->m_cpuConstraints[i].getInfo2(&info2, &m_gpuData->m_cpuBodies[0]); ///finalize the constraint setup for (j = 0; j < info1; j++) { b3GpuSolverConstraint& solverConstraint = currentConstraintRow[j]; if (solverConstraint.m_upperLimit >= m_gpuData->m_cpuConstraints[i].getBreakingImpulseThreshold()) { solverConstraint.m_upperLimit = m_gpuData->m_cpuConstraints[i].getBreakingImpulseThreshold(); } if (solverConstraint.m_lowerLimit <= -m_gpuData->m_cpuConstraints[i].getBreakingImpulseThreshold()) { solverConstraint.m_lowerLimit = -m_gpuData->m_cpuConstraints[i].getBreakingImpulseThreshold(); } // solverConstraint.m_originalContactPoint = constraint; b3Matrix3x3& invInertiaWorldA = m_gpuData->m_cpuInertias[constraint.getRigidBodyA()].m_invInertiaWorld; { //b3Vector3 angularFactorA(1,1,1); const b3Vector3& ftorqueAxis1 = solverConstraint.m_relpos1CrossNormal; solverConstraint.m_angularComponentA = invInertiaWorldA * ftorqueAxis1; //*angularFactorA; } b3Matrix3x3& invInertiaWorldB = m_gpuData->m_cpuInertias[constraint.getRigidBodyB()].m_invInertiaWorld; { const b3Vector3& ftorqueAxis2 = solverConstraint.m_relpos2CrossNormal; solverConstraint.m_angularComponentB = invInertiaWorldB * ftorqueAxis2; //*constraint.getRigidBodyB().getAngularFactor(); } { //it is ok to use solverConstraint.m_contactNormal instead of -solverConstraint.m_contactNormal //because it gets multiplied iMJlB b3Vector3 iMJlA = solverConstraint.m_contactNormal * rbA.m_invMass; b3Vector3 iMJaA = invInertiaWorldA * solverConstraint.m_relpos1CrossNormal; b3Vector3 iMJlB = solverConstraint.m_contactNormal * rbB.m_invMass; //sign of normal? b3Vector3 iMJaB = invInertiaWorldB * solverConstraint.m_relpos2CrossNormal; b3Scalar sum = iMJlA.dot(solverConstraint.m_contactNormal); sum += iMJaA.dot(solverConstraint.m_relpos1CrossNormal); sum += iMJlB.dot(solverConstraint.m_contactNormal); sum += iMJaB.dot(solverConstraint.m_relpos2CrossNormal); b3Scalar fsum = b3Fabs(sum); b3Assert(fsum > B3_EPSILON); solverConstraint.m_jacDiagABInv = fsum > B3_EPSILON ? b3Scalar(1.) / sum : 0.f; } ///fix rhs ///todo: add force/torque accelerators { b3Scalar rel_vel; b3Scalar vel1Dotn = solverConstraint.m_contactNormal.dot(rbA.m_linVel) + solverConstraint.m_relpos1CrossNormal.dot(rbA.m_angVel); b3Scalar vel2Dotn = -solverConstraint.m_contactNormal.dot(rbB.m_linVel) + solverConstraint.m_relpos2CrossNormal.dot(rbB.m_angVel); rel_vel = vel1Dotn + vel2Dotn; b3Scalar restitution = 0.f; b3Scalar positionalError = solverConstraint.m_rhs; //already filled in by getConstraintInfo2 b3Scalar velocityError = restitution - rel_vel * info2.m_damping; b3Scalar penetrationImpulse = positionalError * solverConstraint.m_jacDiagABInv; b3Scalar velocityImpulse = velocityError * solverConstraint.m_jacDiagABInv; solverConstraint.m_rhs = penetrationImpulse + velocityImpulse; solverConstraint.m_appliedImpulse = 0.f; } } } } m_gpuData->m_gpuConstraintRows->copyFromHost(m_tmpSolverNonContactConstraintPool); m_gpuData->m_gpuConstraintInfo1->copyFromHost(m_tmpConstraintSizesPool); if (m_gpuData->m_batchSizes.size() == 0) m_gpuData->m_gpuBatchConstraints->copyFromHost(batchConstraints); else m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints); m_gpuData->m_gpuSolverBodies->copyFromHost(m_tmpSolverBodyPool); } //end useGpuInfo2 } #ifdef B3_SUPPORT_CONTACT_CONSTRAINTS { int i; for (i = 0; i < numManifolds; i++) { b3Contact4& manifold = manifoldPtr[i]; convertContact(bodies, inertias, &manifold, infoGlobal); } } #endif //B3_SUPPORT_CONTACT_CONSTRAINTS } // b3ContactSolverInfo info = infoGlobal; // int numNonContactPool = m_tmpSolverNonContactConstraintPool.size(); // int numConstraintPool = m_tmpSolverContactConstraintPool.size(); // int numFrictionPool = m_tmpSolverContactFrictionConstraintPool.size(); return 0.f; }
void GLInstancingRenderer::init() { b3Assert(glGetError() ==GL_NO_ERROR); glEnable(GL_DEPTH_TEST); glDepthFunc(GL_LESS); b3Assert(glGetError() ==GL_NO_ERROR); // glClearColor(float(0.),float(0.),float(0.4),float(0)); b3Assert(glGetError() ==GL_NO_ERROR); b3Assert(glGetError() ==GL_NO_ERROR); { B3_PROFILE("texture"); if(m_textureenabled) { if(!m_textureinitialized) { glActiveTexture(GL_TEXTURE0); GLubyte* image=new GLubyte[256*256*3]; for(int y=0;y<256;++y) { // const int t=y>>5; GLubyte* pi=image+y*256*3; for(int x=0;x<256;++x) { if (x<2||y<2||x>253||y>253) { pi[0]=255;//0; pi[1]=255;//0; pi[2]=255;//0; } else { pi[0]=255; pi[1]=255; pi[2]=255; } /* const int s=x>>5; const GLubyte b=180; GLubyte c=b+((s+t&1)&1)*(255-b); pi[0]=c; pi[1]=c; pi[2]=c; */ pi+=3; } } glGenTextures(1,(GLuint*)&m_data->m_defaultTexturehandle); glBindTexture(GL_TEXTURE_2D,m_data->m_defaultTexturehandle); b3Assert(glGetError() ==GL_NO_ERROR); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, 256,256,0,GL_RGB,GL_UNSIGNED_BYTE,image); glGenerateMipmap(GL_TEXTURE_2D); b3Assert(glGetError() ==GL_NO_ERROR); delete[] image; m_textureinitialized=true; } b3Assert(glGetError() ==GL_NO_ERROR); glBindTexture(GL_TEXTURE_2D,m_data->m_defaultTexturehandle); b3Assert(glGetError() ==GL_NO_ERROR); } else { glDisable(GL_TEXTURE_2D); b3Assert(glGetError() ==GL_NO_ERROR); } } //glEnable(GL_COLOR_MATERIAL); b3Assert(glGetError() ==GL_NO_ERROR); // glEnable(GL_CULL_FACE); // glCullFace(GL_BACK); }
void GLInstancingRenderer::renderSceneInternal(int renderMode) { // glEnable(GL_DEPTH_TEST); GLint dims[4]; glGetIntegerv(GL_VIEWPORT, dims); //we need to get the viewport dims, because on Apple Retina the viewport dimension is different from screenWidth //printf("dims=%d,%d,%d,%d\n",dims[0],dims[1],dims[2],dims[3]); // Accept fragment if it closer to the camera than the former one //glDepthFunc(GL_LESS); // Cull triangles which normal is not towards the camera glEnable(GL_CULL_FACE); B3_PROFILE("GLInstancingRenderer::RenderScene"); { B3_PROFILE("init"); init(); } b3Assert(glGetError() ==GL_NO_ERROR); float depthProjectionMatrix[4][4]; GLfloat depthModelViewMatrix[4][4]; //GLfloat depthModelViewMatrix2[4][4]; // Compute the MVP matrix from the light's point of view if (renderMode==B3_CREATE_SHADOWMAP_RENDERMODE) { glEnable(GL_CULL_FACE); glCullFace(GL_FRONT); if (!m_data->m_shadowMap) { glActiveTexture(GL_TEXTURE0); glGenTextures(1,&m_data->m_shadowTexture); glBindTexture(GL_TEXTURE_2D,m_data->m_shadowTexture); //glTexImage2D(GL_TEXTURE_2D,0,GL_DEPTH_COMPONENT16,m_screenWidth,m_screenHeight,0,GL_DEPTH_COMPONENT,GL_FLOAT,0); //glTexImage2D(GL_TEXTURE_2D,0,GL_DEPTH_COMPONENT32,m_screenWidth,m_screenHeight,0,GL_DEPTH_COMPONENT,GL_FLOAT,0); #ifdef OLD_SHADOWMAP_INIT glTexImage2D(GL_TEXTURE_2D, 0,GL_DEPTH_COMPONENT16, shadowMapWidth, shadowMapHeight, 0,GL_DEPTH_COMPONENT, GL_FLOAT, 0); #else//OLD_SHADOWMAP_INIT //Reduce size of shadowMap if glTexImage2D call fails as may happen in some cases //https://github.com/bulletphysics/bullet3/issues/40 int size; glGetIntegerv(GL_MAX_TEXTURE_SIZE, &size); if (size < shadowMapWidth){ shadowMapWidth = size; } if (size < shadowMapHeight){ shadowMapHeight = size; } GLuint err; do { glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT16, shadowMapWidth, shadowMapHeight, 0, GL_DEPTH_COMPONENT, GL_FLOAT, 0); err = glGetError(); if (err!=GL_NO_ERROR){ shadowMapHeight >>= 1; shadowMapWidth >>= 1; } } while (err != GL_NO_ERROR && shadowMapWidth > 0); #endif//OLD_SHADOWMAP_INIT glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); float l_ClampColor[] = {1.0, 1.0, 1.0, 1.0}; glTexParameterfv(GL_TEXTURE_2D, GL_TEXTURE_BORDER_COLOR, l_ClampColor); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_BORDER); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_BORDER); // glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); // glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_COMPARE_FUNC, GL_LEQUAL); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_COMPARE_MODE, GL_COMPARE_REF_TO_TEXTURE); m_data->m_shadowMap=new GLRenderToTexture(); m_data->m_shadowMap->init(shadowMapWidth, shadowMapHeight,m_data->m_shadowTexture,RENDERTEXTURE_DEPTH); }
///todo: add some acceleration structure (AABBs, tree etc) void b3GpuRaycast::castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults, int numBodies,const struct b3RigidBodyData* bodies, int numCollidables, const struct b3Collidable* collidables, const struct b3GpuNarrowPhaseInternalData* narrowphaseData, class b3GpuBroadphaseInterface* broadphase) { //castRaysHost(rays,hitResults,numBodies,bodies,numCollidables,collidables,narrowphaseData); B3_PROFILE("castRaysGPU"); { B3_PROFILE("raycast copyFromHost"); m_data->m_gpuRays->copyFromHost(rays); m_data->m_gpuHitResults->copyFromHost(hitResults); } int numRays = hitResults.size(); { m_data->m_firstRayRigidPairIndexPerRay->resize(numRays); m_data->m_numRayRigidPairsPerRay->resize(numRays); m_data->m_gpuNumRayRigidPairs->resize(1); m_data->m_gpuRayRigidPairs->resize(numRays * 16); } //run kernel const bool USE_BRUTE_FORCE_RAYCAST = false; if(USE_BRUTE_FORCE_RAYCAST) { B3_PROFILE("raycast launch1D"); b3LauncherCL launcher(m_data->m_q,m_data->m_raytraceKernel,"m_raytraceKernel"); int numRays = rays.size(); launcher.setConst(numRays); launcher.setBuffer(m_data->m_gpuRays->getBufferCL()); launcher.setBuffer(m_data->m_gpuHitResults->getBufferCL()); launcher.setConst(numBodies); launcher.setBuffer(narrowphaseData->m_bodyBufferGPU->getBufferCL()); launcher.setBuffer(narrowphaseData->m_collidablesGPU->getBufferCL()); launcher.setBuffer(narrowphaseData->m_convexFacesGPU->getBufferCL()); launcher.setBuffer(narrowphaseData->m_convexPolyhedraGPU->getBufferCL()); launcher.launch1D(numRays); clFinish(m_data->m_q); } else { m_data->m_plbvh->build( broadphase->getAllAabbsGPU(), broadphase->getSmallAabbIndicesGPU(), broadphase->getLargeAabbIndicesGPU() ); m_data->m_plbvh->testRaysAgainstBvhAabbs(*m_data->m_gpuRays, *m_data->m_gpuNumRayRigidPairs, *m_data->m_gpuRayRigidPairs); int numRayRigidPairs = -1; m_data->m_gpuNumRayRigidPairs->copyToHostPointer(&numRayRigidPairs, 1); if( numRayRigidPairs > m_data->m_gpuRayRigidPairs->size() ) { numRayRigidPairs = m_data->m_gpuRayRigidPairs->size(); m_data->m_gpuNumRayRigidPairs->copyFromHostPointer(&numRayRigidPairs, 1); } m_data->m_gpuRayRigidPairs->resize(numRayRigidPairs); //Radix sort needs b3OpenCLArray::size() to be correct //Sort ray-rigid pairs by ray index { B3_PROFILE("sort ray-rigid pairs"); m_data->m_radixSorter->execute( *reinterpret_cast< b3OpenCLArray<b3SortData>* >(m_data->m_gpuRayRigidPairs) ); } //detect start,count of each ray pair { B3_PROFILE("detect ray-rigid pair index ranges"); { B3_PROFILE("reset ray-rigid pair index ranges"); m_data->m_fill->execute(*m_data->m_firstRayRigidPairIndexPerRay, numRayRigidPairs, numRays); //atomic_min used to find first index m_data->m_fill->execute(*m_data->m_numRayRigidPairsPerRay, 0, numRays); clFinish(m_data->m_q); } b3BufferInfoCL bufferInfo[] = { b3BufferInfoCL( m_data->m_gpuRayRigidPairs->getBufferCL() ), b3BufferInfoCL( m_data->m_firstRayRigidPairIndexPerRay->getBufferCL() ), b3BufferInfoCL( m_data->m_numRayRigidPairsPerRay->getBufferCL() ) }; b3LauncherCL launcher(m_data->m_q, m_data->m_findRayRigidPairIndexRanges, "m_findRayRigidPairIndexRanges"); launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst(numRayRigidPairs); launcher.launch1D(numRayRigidPairs); clFinish(m_data->m_q); } { B3_PROFILE("ray-rigid intersection"); b3BufferInfoCL bufferInfo[] = { b3BufferInfoCL( m_data->m_gpuRays->getBufferCL() ), b3BufferInfoCL( m_data->m_gpuHitResults->getBufferCL() ), b3BufferInfoCL( m_data->m_firstRayRigidPairIndexPerRay->getBufferCL() ), b3BufferInfoCL( m_data->m_numRayRigidPairsPerRay->getBufferCL() ), b3BufferInfoCL( narrowphaseData->m_bodyBufferGPU->getBufferCL() ), b3BufferInfoCL( narrowphaseData->m_collidablesGPU->getBufferCL() ), b3BufferInfoCL( narrowphaseData->m_convexFacesGPU->getBufferCL() ), b3BufferInfoCL( narrowphaseData->m_convexPolyhedraGPU->getBufferCL() ), b3BufferInfoCL( m_data->m_gpuRayRigidPairs->getBufferCL() ) }; b3LauncherCL launcher(m_data->m_q, m_data->m_raytracePairsKernel, "m_raytracePairsKernel"); launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst(numRays); launcher.launch1D(numRays); clFinish(m_data->m_q); } } //copy results { B3_PROFILE("raycast copyToHost"); m_data->m_gpuHitResults->copyToHost(hitResults); } }
int main(int argc, char* argv[]) { int sz = sizeof(b3Generic6DofConstraint); int sz2 = sizeof(b3Point2PointConstraint); int sz3 = sizeof(b3TypedConstraint); int sz4 = sizeof(b3TranslationalLimitMotor); int sz5 = sizeof(b3RotationalLimitMotor); int sz6 = sizeof(b3Transform); //b3OpenCLUtils::setCachePath("/Users/erwincoumans/develop/mycache"); b3SetCustomEnterProfileZoneFunc(b3ProfileManager::Start_Profile); b3SetCustomLeaveProfileZoneFunc(b3ProfileManager::Stop_Profile); b3SetCustomPrintfFunc(myprintf); b3Vector3 test=b3MakeVector3(1,2,3); test.x = 1; test.y = 4; printf("main start"); b3CommandLineArgs args(argc,argv); ParticleDemo::ConstructionInfo ci; if (args.CheckCmdLineFlag("help")) { Usage(); return 0; } selectedDemo = loadCurrentDemoEntry(sStartFileName); args.GetCmdLineArgument("selected_demo",selectedDemo); if (args.CheckCmdLineFlag("new_batching")) { useNewBatchingKernel = true; } bool benchmark=args.CheckCmdLineFlag("benchmark"); args.GetCmdLineArgument("max_framecount",maxFrameCount); args.GetCmdLineArgument("shadowmap_size",shadowMapWorldSize); args.GetCmdLineArgument("shadowmap_resolution",shadowMapWidth); shadowMapHeight=shadowMapWidth; if (args.CheckCmdLineFlag("disable_shadowmap")) { useShadowMap = false; } args.GetCmdLineArgument("pair_benchmark_file",gPairBenchFileName); gDebugLauncherCL = args.CheckCmdLineFlag("debug_kernel_launch"); dump_timings=args.CheckCmdLineFlag("dump_timings"); ci.useOpenCL = !args.CheckCmdLineFlag("disable_opencl"); ci.m_useConcaveMesh = true;//args.CheckCmdLineFlag("use_concave_mesh"); if (ci.m_useConcaveMesh) { enableExperimentalCpuConcaveCollision = true; } ci.m_useInstancedCollisionShapes = !args.CheckCmdLineFlag("no_instanced_collision_shapes"); args.GetCmdLineArgument("cl_device", ci.preferredOpenCLDeviceIndex); args.GetCmdLineArgument("cl_platform", ci.preferredOpenCLPlatformIndex); gAllowCpuOpenCL = args.CheckCmdLineFlag("allow_opencl_cpu"); gUseLargeBatches = args.CheckCmdLineFlag("use_large_batches"); gUseJacobi = args.CheckCmdLineFlag("use_jacobi"); gUseDbvt = args.CheckCmdLineFlag("use_dbvt"); gDumpContactStats = args.CheckCmdLineFlag("dump_contact_stats"); gCalcWorldSpaceAabbOnCpu = args.CheckCmdLineFlag("calc_aabb_cpu"); gUseCalculateOverlappingPairsHost = args.CheckCmdLineFlag("calc_pairs_cpu"); gIntegrateOnCpu = args.CheckCmdLineFlag("integrate_cpu"); gConvertConstraintOnCpu = args.CheckCmdLineFlag("convert_constraints_cpu"); useUniformGrid = args.CheckCmdLineFlag("use_uniform_grid"); args.GetCmdLineArgument("x_dim", ci.arraySizeX); args.GetCmdLineArgument("y_dim", ci.arraySizeY); args.GetCmdLineArgument("z_dim", ci.arraySizeZ); args.GetCmdLineArgument("x_gap", ci.gapX); args.GetCmdLineArgument("y_gap", ci.gapY); args.GetCmdLineArgument("z_gap", ci.gapZ); gPause = args.CheckCmdLineFlag("paused"); gDebugForceLoadingFromSource = args.CheckCmdLineFlag("load_cl_kernels_from_disk"); gDebugSkipLoadingBinary = args.CheckCmdLineFlag("disable_cached_cl_kernels"); #ifndef B3_NO_PROFILE b3ProfileManager::Reset(); #endif //B3_NO_PROFILE window = new b3gDefaultOpenGLWindow(); b3gWindowConstructionInfo wci(g_OpenGLWidth,g_OpenGLHeight); window->createWindow(wci); window->setResizeCallback(MyResizeCallback); window->setMouseMoveCallback(MyMouseMoveCallback); window->setMouseButtonCallback(MyMouseButtonCallback); window->setKeyboardCallback(MyKeyboardCallback); window->setWindowTitle("Bullet 3.x GPU Rigid Body http://bulletphysics.org"); printf("-----------------------------------------------------\n"); #ifndef __APPLE__ glewInit(); #endif gui = new GwenUserInterface(); printf("started GwenUserInterface"); GLPrimitiveRenderer prim(g_OpenGLWidth,g_OpenGLHeight); stash = initFont(&prim); if (gui) { gui->init(g_OpenGLWidth,g_OpenGLHeight,stash,window->getRetinaScale()); printf("init fonts"); gui->setToggleButtonCallback(MyButtonCallback); gui->registerToggleButton(MYPAUSE,"Pause"); gui->registerToggleButton(MYPROFILE,"Profile"); gui->registerToggleButton(MYRESET,"Reset"); int numItems = sizeof(allDemos)/sizeof(ParticleDemo::CreateFunc*); demoNames.clear(); for (int i=0;i<numItems;i++) { GpuDemo* demo = allDemos[i](); demoNames.push_back(demo->getName()); delete demo; } gui->registerComboBox(MYCOMBOBOX1,numItems,&demoNames[0],selectedDemo); gui->setComboBoxCallback(MyComboBoxCallback); } do { bool syncOnly = false; gReset = false; { GLint err; glEnable(GL_BLEND); err = glGetError(); b3Assert(err==GL_NO_ERROR); glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); glDisable(GL_DEPTH_TEST); err = glGetError(); b3Assert(err==GL_NO_ERROR); window->startRendering(); glClearColor(1,1,1,1); glClear(GL_COLOR_BUFFER_BIT| GL_DEPTH_BUFFER_BIT);//|GL_STENCIL_BUFFER_BIT); glEnable(GL_DEPTH_TEST); sth_begin_draw(stash); //sth_draw_text(stash, droidRegular,12.f, dx, dy-50, "How does this OpenGL True Type font look? ", &dx,width,height); int spacing = 0;//g_OpenGLHeight; float sx,sy,dx,dy,lh; sx = 0; sy = g_OpenGLHeight; dx = sx; dy = sy; //if (1) const char* msg[] = {"Please wait, initializing the OpenCL demo", "Please make sure to run the demo on a high-end discrete GPU with OpenCL support", "The first time it can take a bit longer to compile the OpenCL kernels.", "Check the console if it takes longer than 1 minute or if a demos has issues.", "Please share the full commandline output when reporting issues:", "App_Bullet3_OpenCL_Demos_* >> error.log", "", "", #ifdef _DEBUG "Some of the demos load a large .obj file,", "please use an optimized build of this app for faster parsing", "", "", #endif "You can press F1 to create a single screenshot,", "or press F2 toggle screenshot (useful to create movies)", "", "", "There are various command-line options such as --benchmark", "See http://github.com/erwincoumans/bullet3 for more information" }; int fontSize = 68; int nummsg = sizeof(msg)/sizeof(const char*); for (int i=0;i<nummsg;i++) { char txt[512]; sprintf(txt,"%s",msg[i]); //sth_draw_text(stash, droidRegular,i, 10, dy-spacing, txt, &dx,g_OpenGLWidth,g_OpenGLHeight); sth_draw_text(stash, droidRegular,fontSize, 10, spacing, txt, &dx,g_OpenGLWidth,g_OpenGLHeight); spacing+=fontSize; fontSize = 32; } sth_end_draw(stash); sth_flush_draw(stash); window->endRendering(); } static bool once=true; //glClearColor(0.3f, 0.3f, 0.3f, 1.0f); glClearColor(1,1,1,1); glClear(GL_COLOR_BUFFER_BIT|GL_DEPTH_BUFFER_BIT); window->setWheelCallback(b3DefaultWheelCallback); { GpuDemo* demo = allDemos[selectedDemo](); sDemo = demo; // demo->myinit(); bool useGpu = false; //int maxObjectCapacity=128*1024; int maxObjectCapacity=1024*1024; maxObjectCapacity = b3Max(maxObjectCapacity,ci.arraySizeX*ci.arraySizeX*ci.arraySizeX+10); { ci.m_instancingRenderer = new GLInstancingRenderer(maxObjectCapacity);//render.getInstancingRenderer(); ci.m_window = window; ci.m_gui = gui; ci.m_instancingRenderer->init(); ci.m_instancingRenderer->resize(g_OpenGLWidth,g_OpenGLHeight); ci.m_instancingRenderer->InitShaders(); ci.m_primRenderer = &prim; // render.init(); } { demo->initPhysics(ci); } printf("-----------------------------------------------------\n"); FILE* csvFile = 0; FILE* detailsFile = 0; if (benchmark) { char prefixFileName[1024]; char csvFileName[1024]; char detailsFileName[1024]; b3OpenCLDeviceInfo info; b3OpenCLUtils::getDeviceInfo(demo->getInternalData()->m_clDevice,&info); //todo: move this time stuff into the Platform/Window class #ifdef _WIN32 SYSTEMTIME time; GetLocalTime(&time); char buf[1024]; DWORD dwCompNameLen = 1024; if (0 != GetComputerName(buf, &dwCompNameLen)) { printf("%s", buf); } else { printf("unknown", buf); } sprintf(prefixFileName,"%s_%s_%s_%d_%d_%d_date_%d-%d-%d_time_%d-%d-%d",info.m_deviceName,buf,demoNames[selectedDemo],ci.arraySizeX,ci.arraySizeY,ci.arraySizeZ,time.wDay,time.wMonth,time.wYear,time.wHour,time.wMinute,time.wSecond); #else timeval now; gettimeofday(&now,0); struct tm* ptm; ptm = localtime (&now.tv_sec); char buf[1024]; #ifdef __APPLE__ sprintf(buf,"MacOSX"); #else sprintf(buf,"Unix"); #endif sprintf(prefixFileName,"%s_%s_%s_%d_%d_%d_date_%d-%d-%d_time_%d-%d-%d",info.m_deviceName,buf,demoNames[selectedDemo],ci.arraySizeX,ci.arraySizeY,ci.arraySizeZ, ptm->tm_mday, ptm->tm_mon+1, ptm->tm_year+1900, ptm->tm_hour, ptm->tm_min, ptm->tm_sec); #endif sprintf(csvFileName,"%s.csv",prefixFileName); sprintf(detailsFileName,"%s.txt",prefixFileName); printf("Open csv file %s and details file %s\n", csvFileName,detailsFileName); //GetSystemTime(&time2); csvFile=fopen(csvFileName,"w"); detailsFile = fopen(detailsFileName,"w"); if (detailsFile) defaultOutput = detailsFile; //if (f) // fprintf(f,"%s (%dx%dx%d=%d),\n", g_deviceName,ci.arraySizeX,ci.arraySizeY,ci.arraySizeZ,ci.arraySizeX*ci.arraySizeY*ci.arraySizeZ); } fprintf(defaultOutput,"Demo settings:\n"); fprintf(defaultOutput," SelectedDemo=%d, demoname = %s\n", selectedDemo, demo->getName()); fprintf(defaultOutput," x_dim=%d, y_dim=%d, z_dim=%d\n",ci.arraySizeX,ci.arraySizeY,ci.arraySizeZ); fprintf(defaultOutput," x_gap=%f, y_gap=%f, z_gap=%f\n",ci.gapX,ci.gapY,ci.gapZ); fprintf(defaultOutput,"\nOpenCL settings:\n"); fprintf(defaultOutput," Preferred cl_device index %d\n", ci.preferredOpenCLDeviceIndex); fprintf(defaultOutput," Preferred cl_platform index%d\n", ci.preferredOpenCLPlatformIndex); fprintf(defaultOutput,"\n"); if (demo->getInternalData()->m_platformId) { b3OpenCLUtils::printPlatformInfo( demo->getInternalData()->m_platformId); fprintf(defaultOutput,"\n"); b3OpenCLUtils::printDeviceInfo( demo->getInternalData()->m_clDevice); fprintf(defaultOutput,"\n"); } do { GLint err = glGetError(); assert(err==GL_NO_ERROR); if (exportFrame || exportMovie) { if (!renderTexture) { renderTexture = new GLRenderToTexture(); GLuint renderTextureId; glGenTextures(1, &renderTextureId); // "Bind" the newly created texture : all future texture functions will modify this texture glBindTexture(GL_TEXTURE_2D, renderTextureId); // Give an empty image to OpenGL ( the last "0" ) //glTexImage2D(GL_TEXTURE_2D, 0,GL_RGB, g_OpenGLWidth,g_OpenGLHeight, 0,GL_RGBA, GL_UNSIGNED_BYTE, 0); //glTexImage2D(GL_TEXTURE_2D, 0,GL_RGBA32F, g_OpenGLWidth,g_OpenGLHeight, 0,GL_RGBA, GL_FLOAT, 0); glTexImage2D(GL_TEXTURE_2D, 0,GL_RGBA32F, g_OpenGLWidth,g_OpenGLHeight, 0,GL_RGBA, GL_FLOAT, 0); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); //glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); //glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); renderTexture->init(g_OpenGLWidth,g_OpenGLHeight,renderTextureId, RENDERTEXTURE_COLOR); } bool result = renderTexture->enable(); } err = glGetError(); assert(err==GL_NO_ERROR); b3ProfileManager::Reset(); b3ProfileManager::Increment_Frame_Counter(); // render.reshape(g_OpenGLWidth,g_OpenGLHeight); ci.m_instancingRenderer->resize(g_OpenGLWidth,g_OpenGLHeight); prim.setScreenSize(g_OpenGLWidth,g_OpenGLHeight); err = glGetError(); assert(err==GL_NO_ERROR); window->startRendering(); err = glGetError(); assert(err==GL_NO_ERROR); glClear(GL_COLOR_BUFFER_BIT| GL_DEPTH_BUFFER_BIT);//|GL_STENCIL_BUFFER_BIT); glEnable(GL_DEPTH_TEST); err = glGetError(); assert(err==GL_NO_ERROR); if (!gPause) { B3_PROFILE("clientMoveAndDisplay"); demo->clientMoveAndDisplay(); } else { } { B3_PROFILE("renderScene"); demo->renderScene(); } err = glGetError(); assert(err==GL_NO_ERROR); /*if (demo->getDynamicsWorld() && demo->getDynamicsWorld()->getNumCollisionObjects()) { B3_PROFILE("renderPhysicsWorld"); b3AlignedObjectArray<b3CollisionObject*> arr = demo->getDynamicsWorld()->getCollisionObjectArray(); b3CollisionObject** colObjArray = &arr[0]; render.renderPhysicsWorld(demo->getDynamicsWorld()->getNumCollisionObjects(),colObjArray, syncOnly); syncOnly = true; } */ if (exportFrame || exportMovie) { char fileName[1024]; sprintf(fileName,"screenShot%d.png",frameIndex++); writeTextureToPng(g_OpenGLWidth,g_OpenGLHeight,fileName); exportFrame = false; renderTexture->disable(); } { B3_PROFILE("gui->draw"); if (gui && gDrawGui) gui->draw(g_OpenGLWidth,g_OpenGLHeight); } err = glGetError(); assert(err==GL_NO_ERROR); { B3_PROFILE("window->endRendering"); window->endRendering(); } err = glGetError(); assert(err==GL_NO_ERROR); { B3_PROFILE("glFinish"); } if (dump_timings) { b3ProfileManager::dumpAll(stdout); } if (csvFile) { static int frameCount=0; if (frameCount>0) { DumpSimulationTime(csvFile); if (detailsFile) { fprintf(detailsFile,"\n==================================\nFrame %d:\n", frameCount); b3ProfileManager::dumpAll(detailsFile); } } if (frameCount>=maxFrameCount) window->setRequestExit(); frameCount++; } if (gStep) gPause=true; } while (!window->requestedExit() && !gReset); demo->exitPhysics(); b3ProfileManager::CleanupMemory(); delete ci.m_instancingRenderer; delete demo; sDemo = 0; if (detailsFile) { fclose(detailsFile); detailsFile=0; } if (csvFile) { fclose(csvFile); csvFile=0; } } } while (gReset); if (gui) gui->setComboBoxCallback(0); { delete gui; gui=0; exitFont(); window->closeWindow(); delete window; window = 0; } return 0; }
inline int b3GpuPgsContactSolver::sortConstraintByBatch2( b3Contact4* cs, int numConstraints, int simdWidth , int staticIdx, int numBodies) { B3_PROFILE("sortConstraintByBatch2"); bodyUsed2.resize(2*simdWidth); for (int q=0;q<2*simdWidth;q++) bodyUsed2[q]=0; int curBodyUsed = 0; int numIter = 0; m_data->m_sortData.resize(numConstraints); m_data->m_idxBuffer.resize(numConstraints); m_data->m_old.resize(numConstraints); unsigned int* idxSrc = &m_data->m_idxBuffer[0]; #if defined(_DEBUG) for(int i=0; i<numConstraints; i++) cs[i].getBatchIdx() = -1; #endif for(int i=0; i<numConstraints; i++) idxSrc[i] = i; int numValidConstraints = 0; int unprocessedConstraintIndex = 0; int batchIdx = 0; { B3_PROFILE("cpu batch innerloop"); while( numValidConstraints < numConstraints) { numIter++; int nCurrentBatch = 0; // clear flag for(int i=0; i<curBodyUsed; i++) bodyUsed2[i] = 0; curBodyUsed = 0; for(int i=numValidConstraints; i<numConstraints; i++) { int idx = idxSrc[i]; b3Assert( idx < numConstraints ); // check if it can go int bodyAS = cs[idx].m_bodyAPtrAndSignBit; int bodyBS = cs[idx].m_bodyBPtrAndSignBit; int bodyA = abs(bodyAS); int bodyB = abs(bodyBS); bool aIsStatic = (bodyAS<0) || bodyAS==staticIdx; bool bIsStatic = (bodyBS<0) || bodyBS==staticIdx; int aUnavailable = 0; int bUnavailable = 0; if (!aIsStatic) { for (int j=0;j<curBodyUsed;j++) { if (bodyA == bodyUsed2[j]) { aUnavailable=1; break; } } } if (!aUnavailable) if (!bIsStatic) { for (int j=0;j<curBodyUsed;j++) { if (bodyB == bodyUsed2[j]) { bUnavailable=1; break; } } } if( aUnavailable==0 && bUnavailable==0 ) // ok { if (!aIsStatic) { bodyUsed2[curBodyUsed++] = bodyA; } if (!bIsStatic) { bodyUsed2[curBodyUsed++] = bodyB; } cs[idx].getBatchIdx() = batchIdx; m_data->m_sortData[idx].m_key = batchIdx; m_data->m_sortData[idx].m_value = idx; if (i!=numValidConstraints) { b3Swap(idxSrc[i], idxSrc[numValidConstraints]); } numValidConstraints++; { nCurrentBatch++; if( nCurrentBatch == simdWidth ) { nCurrentBatch = 0; for(int i=0; i<curBodyUsed; i++) bodyUsed2[i] = 0; curBodyUsed = 0; } } } } batchIdx ++; } } { B3_PROFILE("quickSort"); //m_data->m_sortData.quickSort(sortfnc); } { B3_PROFILE("reorder"); // reorder memcpy( &m_data->m_old[0], cs, sizeof(b3Contact4)*numConstraints); for(int i=0; i<numConstraints; i++) { b3Assert(m_data->m_sortData[idxSrc[i]].m_value == idxSrc[i]); int idx = m_data->m_sortData[idxSrc[i]].m_value; cs[i] = m_data->m_old[idx]; } } #if defined(_DEBUG) // debugPrintf( "nBatches: %d\n", batchIdx ); for(int i=0; i<numConstraints; i++) { b3Assert( cs[i].getBatchIdx() != -1 ); } #endif return batchIdx; }
inline int b3GpuPgsContactSolver::sortConstraintByBatch( b3Contact4* cs, int n, int simdWidth , int staticIdx, int numBodies) { B3_PROFILE("sortConstraintByBatch"); int numIter = 0; sortData.resize(n); idxBuffer.resize(n); old.resize(n); unsigned int* idxSrc = &idxBuffer[0]; unsigned int* idxDst = &idxBuffer[0]; int nIdxSrc, nIdxDst; const int N_FLG = 256; const int FLG_MASK = N_FLG-1; unsigned int flg[N_FLG/32]; #if defined(_DEBUG) for(int i=0; i<n; i++) cs[i].getBatchIdx() = -1; #endif for(int i=0; i<n; i++) idxSrc[i] = i; nIdxSrc = n; int batchIdx = 0; { B3_PROFILE("cpu batch innerloop"); while( nIdxSrc ) { numIter++; nIdxDst = 0; int nCurrentBatch = 0; // clear flag for(int i=0; i<N_FLG/32; i++) flg[i] = 0; for(int i=0; i<nIdxSrc; i++) { int idx = idxSrc[i]; b3Assert( idx < n ); // check if it can go int bodyAS = cs[idx].m_bodyAPtrAndSignBit; int bodyBS = cs[idx].m_bodyBPtrAndSignBit; int bodyA = abs(bodyAS); int bodyB = abs(bodyBS); int aIdx = bodyA & FLG_MASK; int bIdx = bodyB & FLG_MASK; unsigned int aUnavailable = flg[ aIdx/32 ] & (1<<(aIdx&31)); unsigned int bUnavailable = flg[ bIdx/32 ] & (1<<(bIdx&31)); bool aIsStatic = (bodyAS<0) || bodyAS==staticIdx; bool bIsStatic = (bodyBS<0) || bodyBS==staticIdx; //use inv_mass! aUnavailable = !aIsStatic? aUnavailable:0;// bUnavailable = !bIsStatic? bUnavailable:0; if( aUnavailable==0 && bUnavailable==0 ) // ok { if (!aIsStatic) flg[ aIdx/32 ] |= (1<<(aIdx&31)); if (!bIsStatic) flg[ bIdx/32 ] |= (1<<(bIdx&31)); cs[idx].getBatchIdx() = batchIdx; sortData[idx].m_key = batchIdx; sortData[idx].m_value = idx; { nCurrentBatch++; if( nCurrentBatch == simdWidth ) { nCurrentBatch = 0; for(int i=0; i<N_FLG/32; i++) flg[i] = 0; } } } else { idxDst[nIdxDst++] = idx; } } b3Swap( idxSrc, idxDst ); b3Swap( nIdxSrc, nIdxDst ); batchIdx ++; } } { B3_PROFILE("quickSort"); sortData.quickSort(sortfnc); } { B3_PROFILE("reorder"); // reorder memcpy( &old[0], cs, sizeof(b3Contact4)*n); for(int i=0; i<n; i++) { int idx = sortData[i].m_value; cs[i] = old[idx]; } } #if defined(_DEBUG) // debugPrintf( "nBatches: %d\n", batchIdx ); for(int i=0; i<n; i++) { b3Assert( cs[i].getBatchIdx() != -1 ); } #endif return batchIdx; }
void OpenGLExampleBrowser::update(float deltaTime) { b3ChromeUtilsEnableProfiling(); B3_PROFILE("OpenGLExampleBrowser::update"); assert(glGetError()==GL_NO_ERROR); s_instancingRenderer->init(); DrawGridData dg; dg.upAxis = s_app->getUpAxis(); { BT_PROFILE("Update Camera and Light"); s_instancingRenderer->updateCamera(dg.upAxis); } static int frameCount = 0; frameCount++; if (0) { BT_PROFILE("Draw frame counter"); char bla[1024]; sprintf(bla,"Frame %d", frameCount); s_app->drawText(bla,10,10); } if (gPngFileName) { static int skip = 0; skip--; if (skip<0) { skip=gPngSkipFrames; //printf("gPngFileName=%s\n",gPngFileName); static int s_frameCount = 100; sprintf(staticPngFileName,"%s%d.png",gPngFileName,s_frameCount++); //b3Printf("Made screenshot %s",staticPngFileName); s_app->dumpNextFrameToPng(staticPngFileName); glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); } } if (sCurrentDemo) { if (!pauseSimulation || singleStepSimulation) { //printf("---------------------------------------------------\n"); //printf("Framecount = %d\n",frameCount); B3_PROFILE("sCurrentDemo->stepSimulation"); if (gFixedTimeStep>0) { sCurrentDemo->stepSimulation(gFixedTimeStep); } else { sCurrentDemo->stepSimulation(deltaTime);//1./60.f); } } if (renderGrid) { BT_PROFILE("Draw Grid"); glPolygonOffset(3.0, 3); glEnable(GL_POLYGON_OFFSET_FILL); s_app->drawGrid(dg); } if (renderVisualGeometry && ((gDebugDrawFlags&btIDebugDraw::DBG_DrawWireframe)==0)) { if (visualWireframe) { glPolygonMode( GL_FRONT_AND_BACK, GL_LINE ); } BT_PROFILE("Render Scene"); sCurrentDemo->renderScene(); } //else { B3_PROFILE("physicsDebugDraw"); glPolygonMode( GL_FRONT_AND_BACK, GL_FILL ); sCurrentDemo->physicsDebugDraw(gDebugDrawFlags); } } { if (gui2 && s_guiHelper && s_guiHelper->getRenderInterface() && s_guiHelper->getRenderInterface()->getActiveCamera()) { B3_PROFILE("setStatusBarMessage"); char msg[1024]; float camDist = s_guiHelper->getRenderInterface()->getActiveCamera()->getCameraDistance(); float pitch = s_guiHelper->getRenderInterface()->getActiveCamera()->getCameraPitch(); float yaw = s_guiHelper->getRenderInterface()->getActiveCamera()->getCameraYaw(); float camTarget[3]; s_guiHelper->getRenderInterface()->getActiveCamera()->getCameraTargetPosition(camTarget); sprintf(msg,"dist=%f, pitch=%f, yaw=%f,target=%f,%f,%f", camDist,pitch,yaw,camTarget[0],camTarget[1],camTarget[2]); gui2->setStatusBarMessage(msg, true); } } static int toggle = 1; if (renderGui) { B3_PROFILE("renderGui"); #ifndef BT_NO_PROFILE if (!pauseSimulation || singleStepSimulation) { if (isProfileWindowVisible(s_profWindow)) { processProfileData(s_profWindow,false); } } #endif //#ifndef BT_NO_PROFILE if (sUseOpenGL2) { saveOpenGLState(s_instancingRenderer->getScreenWidth(), s_instancingRenderer->getScreenHeight()); } if (m_internalData->m_gui) { gBlockGuiMessages = true; m_internalData->m_gui->draw(s_instancingRenderer->getScreenWidth(), s_instancingRenderer->getScreenHeight()); gBlockGuiMessages = false; } if (sUseOpenGL2) { restoreOpenGLState(); } } singleStepSimulation = false; toggle=1-toggle; { BT_PROFILE("Sync Parameters"); if (s_parameterInterface) { s_parameterInterface->syncParameters(); } } { BT_PROFILE("Swap Buffers"); s_app->swapBuffer(); } if (gui2) { B3_PROFILE("forceUpdateScrollBars"); gui2->forceUpdateScrollBars(); } }
int main(int argc, char* argv[]) { b3SetCustomEnterProfileZoneFunc(b3ProfileManager::Start_Profile); b3SetCustomLeaveProfileZoneFunc(b3ProfileManager::Stop_Profile); b3SetCustomPrintfFunc(myprintf); b3Vector3 test=b3MakeVector3(1,2,3); test.x = 1; test.y = 4; b3Printf("main start"); b3CommandLineArgs args(argc,argv); if (args.CheckCmdLineFlag("help")) { Usage(); return 0; } args.GetCmdLineArgument("selected_demo",selectedDemo); bool benchmark=args.CheckCmdLineFlag("benchmark"); args.GetCmdLineArgument("max_framecount",maxFrameCount); dump_timings=args.CheckCmdLineFlag("dump_timings"); #ifndef B3_NO_PROFILE b3ProfileManager::Reset(); #endif //B3_NO_PROFILE window = new b3gDefaultOpenGLWindow(); b3gWindowConstructionInfo wci(g_OpenGLWidth,g_OpenGLHeight); window->createWindow(wci); window->setResizeCallback(MyResizeCallback); window->setMouseMoveCallback(MyMouseMoveCallback); window->setMouseButtonCallback(MyMouseButtonCallback); window->setKeyboardCallback(MyKeyboardCallback); window->setWindowTitle("Bullet 3.x GPU Rigid Body http://bulletphysics.org"); #ifndef __APPLE__ glewInit(); #endif gui = new GwenUserInterface(); b3Printf("started GwenUserInterface\n"); GLPrimitiveRenderer prim(g_OpenGLWidth,g_OpenGLHeight); stash = initFont(&prim); if (gui) { gui->init(g_OpenGLWidth,g_OpenGLHeight,stash,window->getRetinaScale()); b3Printf("init fonts\n"); gui->setToggleButtonCallback(MyButtonCallback); gui->registerToggleButton(MYPAUSE,"Pause"); gui->registerToggleButton(MYPROFILE,"Profile"); gui->registerToggleButton(MYRESET,"Reset"); int numItems = sizeof(allDemos)/sizeof(CpuDemo::CreateFunc*); demoNames.clear(); for (int i=0;i<numItems;i++) { CpuDemo* demo = allDemos[i](); demoNames.push_back(demo->getName()); delete demo; } gui->registerComboBox(MYCOMBOBOX1,numItems,&demoNames[0]); gui->setComboBoxCallback(MyComboBoxCallback); } do { bool syncOnly = false; gReset = false; static bool once=true; //glClearColor(0.3f, 0.3f, 0.3f, 1.0f); glClearColor(1,1,1,1); glClear(GL_COLOR_BUFFER_BIT|GL_DEPTH_BUFFER_BIT); window->setWheelCallback(b3DefaultWheelCallback); { CpuDemo* demo = allDemos[selectedDemo](); sDemo = demo; // demo->myinit(); bool useGpu = false; int maxObjectCapacity=1024*1024;//128*1024; int maxShapeCapacityInBytes=10*1024*1024; //maxObjectCapacity = b3Max(maxObjectCapacity,ci.arraySizeX*ci.arraySizeX*ci.arraySizeX+10); CpuDemo::ConstructionInfo ci; ci.m_instancingRenderer = new GLInstancingRenderer(maxObjectCapacity,maxShapeCapacityInBytes); ci.m_window = window; ci.m_gui = gui; ci.m_instancingRenderer->init(); ci.m_instancingRenderer->resize(g_OpenGLWidth,g_OpenGLHeight); ci.m_instancingRenderer->InitShaders(); ci.m_primRenderer = &prim; // render.init(); { demo->initPhysics(ci); } FILE* csvFile = 0; FILE* detailsFile = 0; if (benchmark) { gPause = false; char prefixFileName[1024]; char csvFileName[1024]; char detailsFileName[1024]; //todo: move this time stuff into the Platform/Window class #ifdef _WIN32 SYSTEMTIME time; GetLocalTime(&time); char buf[1024]; DWORD dwCompNameLen = 1024; if (0 != GetComputerName(buf, &dwCompNameLen)) { printf("%s", buf); } else { printf("unknown", buf); } sprintf(prefixFileName,"%s_%s_%s_date_%d-%d-%d_time_%d-%d-%d","CPU",buf,demoNames[selectedDemo],time.wDay,time.wMonth,time.wYear,time.wHour,time.wMinute,time.wSecond); #else timeval now; gettimeofday(&now,0); struct tm* ptm; ptm = localtime (&now.tv_sec); char buf[1024]; #ifdef __APPLE__ sprintf(buf,"MacOSX"); #else sprintf(buf,"Unix"); #endif sprintf(prefixFileName,"%s_%s_%s_%d_%d_%d_date_%d-%d-%d_time_%d-%d-%d",info.m_deviceName,buf,demoNames[selectedDemo],ci.arraySizeX,ci.arraySizeY,ci.arraySizeZ, ptm->tm_mday, ptm->tm_mon+1, ptm->tm_year+1900, ptm->tm_hour, ptm->tm_min, ptm->tm_sec); #endif sprintf(csvFileName,"%s.csv",prefixFileName); sprintf(detailsFileName,"%s.txt",prefixFileName); printf("Open csv file %s and details file %s\n", csvFileName,detailsFileName); //GetSystemTime(&time2); csvFile=fopen(csvFileName,"w"); detailsFile = fopen(detailsFileName,"w"); if (detailsFile) defaultOutput = detailsFile; //if (f) // fprintf(f,"%s (%dx%dx%d=%d),\n", g_deviceName,ci.arraySizeX,ci.arraySizeY,ci.arraySizeZ,ci.arraySizeX*ci.arraySizeY*ci.arraySizeZ); } do { GLint err = glGetError(); assert(err==GL_NO_ERROR); if (exportFrame || exportMovie) { if (!renderTexture) { renderTexture = new GLRenderToTexture(); GLuint renderTextureId; glGenTextures(1, &renderTextureId); // "Bind" the newly created texture : all future texture functions will modify this texture glBindTexture(GL_TEXTURE_2D, renderTextureId); // Give an empty image to OpenGL ( the last "0" ) //glTexImage2D(GL_TEXTURE_2D, 0,GL_RGB, g_OpenGLWidth,g_OpenGLHeight, 0,GL_RGBA, GL_UNSIGNED_BYTE, 0); //glTexImage2D(GL_TEXTURE_2D, 0,GL_RGBA32F, g_OpenGLWidth,g_OpenGLHeight, 0,GL_RGBA, GL_FLOAT, 0); glTexImage2D(GL_TEXTURE_2D, 0,GL_RGBA32F, g_OpenGLWidth,g_OpenGLHeight, 0,GL_RGBA, GL_FLOAT, 0); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); //glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); //glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); renderTexture->init(g_OpenGLWidth,g_OpenGLHeight,renderTextureId, RENDERTEXTURE_COLOR); } bool result = renderTexture->enable(); } err = glGetError(); assert(err==GL_NO_ERROR); b3ProfileManager::Reset(); b3ProfileManager::Increment_Frame_Counter(); // render.reshape(g_OpenGLWidth,g_OpenGLHeight); ci.m_instancingRenderer->resize(g_OpenGLWidth,g_OpenGLHeight); prim.setScreenSize(g_OpenGLWidth,g_OpenGLHeight); err = glGetError(); assert(err==GL_NO_ERROR); window->startRendering(); err = glGetError(); assert(err==GL_NO_ERROR); glClear(GL_COLOR_BUFFER_BIT| GL_DEPTH_BUFFER_BIT);//|GL_STENCIL_BUFFER_BIT); glEnable(GL_DEPTH_TEST); err = glGetError(); assert(err==GL_NO_ERROR); if (!gPause) { B3_PROFILE("clientMoveAndDisplay"); demo->clientMoveAndDisplay(); } else { } { B3_PROFILE("renderScene"); demo->renderScene(); } err = glGetError(); assert(err==GL_NO_ERROR); /*if (demo->getDynamicsWorld() && demo->getDynamicsWorld()->getNumCollisionObjects()) { B3_PROFILE("renderPhysicsWorld"); b3AlignedObjectArray<b3CollisionObject*> arr = demo->getDynamicsWorld()->getCollisionObjectArray(); b3CollisionObject** colObjArray = &arr[0]; render.renderPhysicsWorld(demo->getDynamicsWorld()->getNumCollisionObjects(),colObjArray, syncOnly); syncOnly = true; } */ if (exportFrame || exportMovie) { char fileName[1024]; sprintf(fileName,"screenShot%d.png",frameIndex++); writeTextureToPng(g_OpenGLWidth,g_OpenGLHeight,fileName); exportFrame = false; renderTexture->disable(); } { B3_PROFILE("gui->draw"); if (gui && gDrawGui) gui->draw(g_OpenGLWidth,g_OpenGLHeight); } err = glGetError(); assert(err==GL_NO_ERROR); { B3_PROFILE("window->endRendering"); window->endRendering(); } err = glGetError(); assert(err==GL_NO_ERROR); { B3_PROFILE("glFinish"); } if (dump_timings) { b3ProfileManager::dumpAll(stdout); } if (csvFile) { static int frameCount=0; if (frameCount>0) { DumpSimulationTime(csvFile); if (detailsFile) { fprintf(detailsFile,"\n==================================\nFrame %d:\n", frameCount); b3ProfileManager::dumpAll(detailsFile); } } if (frameCount>=maxFrameCount) window->setRequestExit(); frameCount++; } if (gStep) gPause=true; } while (!window->requestedExit() && !gReset); demo->exitPhysics(); b3ProfileManager::CleanupMemory(); delete ci.m_instancingRenderer; delete demo; sDemo = 0; if (detailsFile) { fclose(detailsFile); detailsFile=0; } if (csvFile) { fclose(csvFile); csvFile=0; } } } while (gReset); if (gui) gui->setComboBoxCallback(0); { delete gui; gui=0; exitFont(); window->closeWindow(); delete window; window = 0; } return 0; }
b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlyFinish(b3OpenCLArray<b3RigidBodyData>* gpuBodies, b3OpenCLArray<b3InertiaData>* gpuInertias, int numBodies, b3OpenCLArray<b3GpuGenericConstraint>* gpuConstraints, int numConstraints, const b3ContactSolverInfo& infoGlobal) { B3_PROFILE("solveGroupCacheFriendlyFinish"); // int numPoolConstraints = m_tmpSolverContactConstraintPool.size(); // int i,j; { if (gpuBreakConstraints) { B3_PROFILE("breakViolatedConstraintsKernel"); b3LauncherCL launcher(m_gpuData->m_queue, m_gpuData->m_breakViolatedConstraintsKernel, "m_breakViolatedConstraintsKernel"); launcher.setBuffer(gpuConstraints->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuConstraintRowOffsets->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuConstraintRows->getBufferCL()); launcher.setConst(numConstraints); launcher.launch1D(numConstraints); } else { gpuConstraints->copyToHost(m_gpuData->m_cpuConstraints); m_gpuData->m_gpuBatchConstraints->copyToHost(m_gpuData->m_cpuBatchConstraints); m_gpuData->m_gpuConstraintRows->copyToHost(m_gpuData->m_cpuConstraintRows); gpuConstraints->copyToHost(m_gpuData->m_cpuConstraints); m_gpuData->m_gpuConstraintInfo1->copyToHost(m_gpuData->m_cpuConstraintInfo1); m_gpuData->m_gpuConstraintRowOffsets->copyToHost(m_gpuData->m_cpuConstraintRowOffsets); for (int cid = 0; cid < numConstraints; cid++) { int originalConstraintIndex = batchConstraints[cid].m_originalConstraintIndex; int constraintRowOffset = m_gpuData->m_cpuConstraintRowOffsets[originalConstraintIndex]; int numRows = m_gpuData->m_cpuConstraintInfo1[originalConstraintIndex]; if (numRows) { // printf("cid=%d, breakingThreshold =%f\n",cid,breakingThreshold); for (int i = 0; i < numRows; i++) { int rowIndex = constraintRowOffset + i; int orgConstraintIndex = m_gpuData->m_cpuConstraintRows[rowIndex].m_originalConstraintIndex; float breakingThreshold = m_gpuData->m_cpuConstraints[orgConstraintIndex].m_breakingImpulseThreshold; // printf("rows[%d].m_appliedImpulse=%f\n",rowIndex,rows[rowIndex].m_appliedImpulse); if (b3Fabs(m_gpuData->m_cpuConstraintRows[rowIndex].m_appliedImpulse) >= breakingThreshold) { m_gpuData->m_cpuConstraints[orgConstraintIndex].m_flags = 0; //&= ~B3_CONSTRAINT_FLAG_ENABLED; } } } } gpuConstraints->copyFromHost(m_gpuData->m_cpuConstraints); } } { if (useGpuWriteBackVelocities) { B3_PROFILE("GPU write back velocities and transforms"); b3LauncherCL launcher(m_gpuData->m_queue, m_gpuData->m_writeBackVelocitiesKernel, "m_writeBackVelocitiesKernel"); launcher.setBuffer(gpuBodies->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuSolverBodies->getBufferCL()); launcher.setConst(numBodies); launcher.launch1D(numBodies); clFinish(m_gpuData->m_queue); // m_gpuData->m_gpuSolverBodies->copyToHost(m_tmpSolverBodyPool); // m_gpuData->m_gpuBodies->copyToHostPointer(bodies,numBodies); //m_gpuData->m_gpuBodies->copyToHost(testBodies); } else { B3_PROFILE("CPU write back velocities and transforms"); m_gpuData->m_gpuSolverBodies->copyToHost(m_tmpSolverBodyPool); gpuBodies->copyToHost(m_gpuData->m_cpuBodies); for (int i = 0; i < m_tmpSolverBodyPool.size(); i++) { int bodyIndex = m_tmpSolverBodyPool[i].m_originalBodyIndex; //printf("bodyIndex=%d\n",bodyIndex); b3Assert(i == bodyIndex); b3RigidBodyData* body = &m_gpuData->m_cpuBodies[bodyIndex]; if (body->m_invMass) { if (infoGlobal.m_splitImpulse) m_tmpSolverBodyPool[i].writebackVelocityAndTransform(infoGlobal.m_timeStep, infoGlobal.m_splitImpulseTurnErp); else m_tmpSolverBodyPool[i].writebackVelocity(); if (m_usePgs) { body->m_linVel = m_tmpSolverBodyPool[i].m_linearVelocity; body->m_angVel = m_tmpSolverBodyPool[i].m_angularVelocity; } else { b3Assert(0); } /* if (infoGlobal.m_splitImpulse) { body->m_pos = m_tmpSolverBodyPool[i].m_worldTransform.getOrigin(); b3Quaternion orn; orn = m_tmpSolverBodyPool[i].m_worldTransform.getRotation(); body->m_quat = orn; } */ } } //for gpuBodies->copyFromHost(m_gpuData->m_cpuBodies); } } clFinish(m_gpuData->m_queue); m_tmpSolverContactConstraintPool.resizeNoInitialize(0); m_tmpSolverNonContactConstraintPool.resizeNoInitialize(0); m_tmpSolverContactFrictionConstraintPool.resizeNoInitialize(0); m_tmpSolverContactRollingFrictionConstraintPool.resizeNoInitialize(0); m_tmpSolverBodyPool.resizeNoInitialize(0); return 0.f; }
void PhysicsServerExample::renderScene() { B3_PROFILE("PhysicsServerExample::RenderScene"); static char line0[1024]; static char line1[1024]; if (gEnableRealTimeSimVR) { static int frameCount=0; static btScalar prevTime = m_clock.getTimeSeconds(); frameCount++; static btScalar worseFps = 1000000; int numFrames = 200; static int count = 0; count++; if (0 == (count & 1)) { btScalar curTime = m_clock.getTimeSeconds(); btScalar fps = 1. / (curTime - prevTime); prevTime = curTime; if (fps < worseFps) { worseFps = fps; } if (count > numFrames) { count = 0; sprintf(line0, "fps:%f frame:%d", worseFps, frameCount / 2); sprintf(line1, "drop:%d tscale:%f dt:%f, substep %f)", gDroppedSimulationSteps, simTimeScalingFactor,gDtInSec, gSubStep); gDroppedSimulationSteps = 0; worseFps = 1000000; } } #ifdef BT_ENABLE_VR if ((gInternalSimFlags&2 ) && m_tinyVrGui==0) { ComboBoxParams comboParams; comboParams.m_comboboxId = 0; comboParams.m_numItems = 0; comboParams.m_startItem = 0; comboParams.m_callback = 0;//MyComboBoxCallback; comboParams.m_userPointer = 0;//this; m_tinyVrGui = new TinyVRGui(comboParams,this->m_multiThreadedHelper->m_childGuiHelper->getRenderInterface()); m_tinyVrGui->init(); } if (m_tinyVrGui) { b3Transform tr;tr.setIdentity(); tr.setOrigin(b3MakeVector3(gVRController2Pos[0],gVRController2Pos[1],gVRController2Pos[2])); tr.setRotation(b3Quaternion(gVRController2Orn[0],gVRController2Orn[1],gVRController2Orn[2],gVRController2Orn[3])); tr = tr*b3Transform(b3Quaternion(0,0,-SIMD_HALF_PI),b3MakeVector3(0,0,0)); b3Scalar dt = 0.01; m_tinyVrGui->clearTextArea(); m_tinyVrGui->grapicalPrintf(line0,0,0,0,0,0,255); m_tinyVrGui->grapicalPrintf(line1,0,16,255,255,255,255); m_tinyVrGui->tick(dt,tr); } #endif//BT_ENABLE_VR } ///debug rendering //m_args[0].m_cs->lock(); //gVRTeleportPos[0] += 0.01; vrOffset[12]=-gVRTeleportPos[0]; vrOffset[13]=-gVRTeleportPos[1]; vrOffset[14]=-gVRTeleportPos[2]; this->m_multiThreadedHelper->m_childGuiHelper->getRenderInterface()-> getActiveCamera()->setVRCameraOffsetTransform(vrOffset); m_physicsServer.renderScene(); for (int i=0;i<MAX_VR_CONTROLLERS;i++) { if (m_args[0].m_isVrControllerPicking[i] || m_args[0].m_isVrControllerDragging[i]) { btVector3 from = m_args[0].m_vrControllerPos[i]; btMatrix3x3 mat(m_args[0].m_vrControllerOrn[i]); btVector3 toX = from+mat.getColumn(0); btVector3 toY = from+mat.getColumn(1); btVector3 toZ = from+mat.getColumn(2); int width = 2; btVector4 color; color=btVector4(1,0,0,1); m_guiHelper->getAppInterface()->m_renderer->drawLine(from,toX,color,width); color=btVector4(0,1,0,1); m_guiHelper->getAppInterface()->m_renderer->drawLine(from,toY,color,width); color=btVector4(0,0,1,1); m_guiHelper->getAppInterface()->m_renderer->drawLine(from,toZ,color,width); } } if (m_guiHelper->getAppInterface()->m_renderer->getActiveCamera()->isVRCamera()) { gEnableRealTimeSimVR = true; } if (gDebugRenderToggle) if (m_guiHelper->getAppInterface()->m_renderer->getActiveCamera()->isVRCamera()) { B3_PROFILE("Draw Debug HUD"); //some little experiment to add text/HUD to a VR camera (HTC Vive/Oculus Rift) float pos[4]; m_guiHelper->getAppInterface()->m_renderer->getActiveCamera()->getCameraTargetPosition(pos); pos[0]+=gVRTeleportPos[0]; pos[1]+=gVRTeleportPos[1]; pos[2]+=gVRTeleportPos[2]; btTransform viewTr; btScalar m[16]; float mf[16]; m_guiHelper->getAppInterface()->m_renderer->getActiveCamera()->getCameraViewMatrix(mf); for (int i=0;i<16;i++) { m[i] = mf[i]; } m[12]=+gVRTeleportPos[0]; m[13]=+gVRTeleportPos[1]; m[14]=+gVRTeleportPos[2]; viewTr.setFromOpenGLMatrix(m); btTransform viewTrInv = viewTr.inverse(); btVector3 side = viewTrInv.getBasis().getColumn(0); btVector3 up = viewTrInv.getBasis().getColumn(1); btVector3 fwd = viewTrInv.getBasis().getColumn(2); float upMag = 0; float sideMag = 2.2; float fwdMag = -4; m_guiHelper->getAppInterface()->drawText3D(line0,pos[0]+upMag*up[0]-sideMag*side[0]+fwdMag*fwd[0],pos[1]+upMag*up[1]-sideMag*side[1]+fwdMag*fwd[1],pos[2]+upMag*up[2]-sideMag*side[2]+fwdMag*fwd[2],1); //btVector3 fwd = viewTrInv.getBasis().getColumn(2); up = viewTrInv.getBasis().getColumn(1); upMag = -0.3; m_guiHelper->getAppInterface()->drawText3D(line1,pos[0]+upMag*up[0]-sideMag*side[0]+fwdMag*fwd[0],pos[1]+upMag*up[1]-sideMag*side[1]+fwdMag*fwd[1],pos[2]+upMag*up[2]-sideMag*side[2]+fwdMag*fwd[2],1); } //m_args[0].m_cs->unlock(); }
void b3Solver::solveContactConstraint( const b3OpenCLArray<b3RigidBodyCL>* bodyBuf, const b3OpenCLArray<b3InertiaCL>* shapeBuf, b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches) { b3Int4 cdata = b3MakeInt4( n, 0, 0, 0 ); { const int nn = N_SPLIT*N_SPLIT; cdata.x = 0; cdata.y = maxNumBatches;//250; int numWorkItems = 64*nn/N_BATCHES; #ifdef DEBUG_ME SolverDebugInfo* debugInfo = new SolverDebugInfo[numWorkItems]; adl::b3OpenCLArray<SolverDebugInfo> gpuDebugInfo(data->m_device,numWorkItems); #endif { B3_PROFILE("m_batchSolveKernel iterations"); for(int iter=0; iter<m_nIterations; iter++) { for(int ib=0; ib<N_BATCHES; ib++) { if (verify) { checkConstraintBatch(bodyBuf,shapeBuf,constraint,m_numConstraints,m_offsets,ib); } #ifdef DEBUG_ME memset(debugInfo,0,sizeof(SolverDebugInfo)*numWorkItems); gpuDebugInfo.write(debugInfo,numWorkItems); #endif cdata.z = ib; cdata.w = N_SPLIT; b3LauncherCL launcher( m_queue, m_solveContactKernel ); #if 1 b3BufferInfoCL bInfo[] = { b3BufferInfoCL( bodyBuf->getBufferCL() ), b3BufferInfoCL( shapeBuf->getBufferCL() ), b3BufferInfoCL( constraint->getBufferCL() ), b3BufferInfoCL( m_numConstraints->getBufferCL() ), b3BufferInfoCL( m_offsets->getBufferCL() ) #ifdef DEBUG_ME , b3BufferInfoCL(&gpuDebugInfo) #endif }; launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); //launcher.setConst( cdata.x ); launcher.setConst( cdata.y ); launcher.setConst( cdata.z ); launcher.setConst( cdata.w ); launcher.launch1D( numWorkItems, 64 ); #else const char* fileName = "m_batchSolveKernel.bin"; FILE* f = fopen(fileName,"rb"); if (f) { int sizeInBytes=0; if (fseek(f, 0, SEEK_END) || (sizeInBytes = ftell(f)) == EOF || fseek(f, 0, SEEK_SET)) { printf("error, cannot get file size\n"); exit(0); } unsigned char* buf = (unsigned char*) malloc(sizeInBytes); fread(buf,sizeInBytes,1,f); int serializedBytes = launcher.deserializeArgs(buf, sizeInBytes,m_context); int num = *(int*)&buf[serializedBytes]; launcher.launch1D( num); //this clFinish is for testing on errors clFinish(m_queue); } #endif #ifdef DEBUG_ME clFinish(m_queue); gpuDebugInfo.read(debugInfo,numWorkItems); clFinish(m_queue); for (int i=0; i<numWorkItems; i++) { if (debugInfo[i].m_valInt2>0) { printf("debugInfo[i].m_valInt2 = %d\n",i,debugInfo[i].m_valInt2); } if (debugInfo[i].m_valInt3>0) { printf("debugInfo[i].m_valInt3 = %d\n",i,debugInfo[i].m_valInt3); } } #endif //DEBUG_ME } } clFinish(m_queue); } cdata.x = 1; bool applyFriction=true; if (applyFriction) { B3_PROFILE("m_batchSolveKernel iterations2"); for(int iter=0; iter<m_nIterations; iter++) { for(int ib=0; ib<N_BATCHES; ib++) { cdata.z = ib; cdata.w = N_SPLIT; b3BufferInfoCL bInfo[] = { b3BufferInfoCL( bodyBuf->getBufferCL() ), b3BufferInfoCL( shapeBuf->getBufferCL() ), b3BufferInfoCL( constraint->getBufferCL() ), b3BufferInfoCL( m_numConstraints->getBufferCL() ), b3BufferInfoCL( m_offsets->getBufferCL() ) #ifdef DEBUG_ME ,b3BufferInfoCL(&gpuDebugInfo) #endif //DEBUG_ME }; b3LauncherCL launcher( m_queue, m_solveFrictionKernel ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); //launcher.setConst( cdata.x ); launcher.setConst( cdata.y ); launcher.setConst( cdata.z ); launcher.setConst( cdata.w ); launcher.launch1D( 64*nn/N_BATCHES, 64 ); } } clFinish(m_queue); } #ifdef DEBUG_ME delete[] debugInfo; #endif //DEBUG_ME } }
void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem inertiaBuf, int numContacts, cl_mem contactBuf, const b3Config& config, int static0Index) { B3_PROFILE("solveContacts"); m_data->m_bodyBufferGPU->setFromOpenCLBuffer(bodyBuf,numBodies); m_data->m_inertiaBufferGPU->setFromOpenCLBuffer(inertiaBuf,numBodies); m_data->m_pBufContactOutGPU->setFromOpenCLBuffer(contactBuf,numContacts); if (optionalSortContactsDeterminism) { if (!gCpuSortContactsDeterminism) { B3_PROFILE("GPU Sort contact constraints (determinism)"); m_data->m_pBufContactOutGPUCopy->resize(numContacts); m_data->m_contactKeyValues->resize(numContacts); m_data->m_pBufContactOutGPU->copyToCL(m_data->m_pBufContactOutGPUCopy->getBufferCL(),numContacts,0,0); { b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataChildShapeBKernel,"m_setDeterminismSortDataChildShapeBKernel"); launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL()); launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL()); launcher.setConst(numContacts); launcher.launch1D( numContacts, 64 ); } m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues); { b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataChildShapeAKernel,"m_setDeterminismSortDataChildShapeAKernel"); launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL()); launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL()); launcher.setConst(numContacts); launcher.launch1D( numContacts, 64 ); } m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues); { b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataBodyBKernel,"m_setDeterminismSortDataBodyBKernel"); launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL()); launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL()); launcher.setConst(numContacts); launcher.launch1D( numContacts, 64 ); } m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues); { b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataBodyAKernel,"m_setDeterminismSortDataBodyAKernel"); launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL()); launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL()); launcher.setConst(numContacts); launcher.launch1D( numContacts, 64 ); } m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues); { B3_PROFILE("gpu reorderContactKernel (determinism)"); b3Int4 cdata; cdata.x = numContacts; //b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ), b3BufferInfoCL( m_data->m_solverGPU->m_contactBuffer2->getBufferCL()) // , b3BufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) }; b3LauncherCL launcher(m_data->m_queue,m_data->m_solverGPU->m_reorderContactKernel,"m_reorderContactKernel"); launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL()); launcher.setBuffer(m_data->m_pBufContactOutGPU->getBufferCL()); launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL()); launcher.setConst( cdata ); launcher.launch1D( numContacts, 64 ); } } else { B3_PROFILE("CPU Sort contact constraints (determinism)"); b3AlignedObjectArray<b3Contact4> cpuConstraints; m_data->m_pBufContactOutGPU->copyToHost(cpuConstraints); bool sort = true; if (sort) { cpuConstraints.quickSort(b3ContactCmp); for (int i=0;i<cpuConstraints.size();i++) { cpuConstraints[i].m_batchIdx = i; } } m_data->m_pBufContactOutGPU->copyFromHost(cpuConstraints); if (m_debugOutput==100) { for (int i=0;i<cpuConstraints.size();i++) { printf("c[%d].m_bodyA = %d, m_bodyB = %d, batchId = %d\n",i,cpuConstraints[i].m_bodyAPtrAndSignBit,cpuConstraints[i].m_bodyBPtrAndSignBit, cpuConstraints[i].m_batchIdx); } } m_debugOutput++; } } int nContactOut = m_data->m_pBufContactOutGPU->size(); bool useSolver = true; if (useSolver) { float dt=1./60.; b3ConstraintCfg csCfg( dt ); csCfg.m_enableParallelSolve = true; csCfg.m_batchCellSize = 6; csCfg.m_staticIdx = static0Index; b3OpenCLArray<b3RigidBodyData>* bodyBuf = m_data->m_bodyBufferGPU; void* additionalData = 0;//m_data->m_frictionCGPU; const b3OpenCLArray<b3InertiaData>* shapeBuf = m_data->m_inertiaBufferGPU; b3OpenCLArray<b3GpuConstraint4>* contactConstraintOut = m_data->m_contactCGPU; int nContacts = nContactOut; int maxNumBatches = 0; if (!gUseLargeBatches) { if( m_data->m_solverGPU->m_contactBuffer2) { m_data->m_solverGPU->m_contactBuffer2->resize(nContacts); } if( m_data->m_solverGPU->m_contactBuffer2 == 0 ) { m_data->m_solverGPU->m_contactBuffer2 = new b3OpenCLArray<b3Contact4>(m_data->m_context,m_data->m_queue, nContacts ); m_data->m_solverGPU->m_contactBuffer2->resize(nContacts); } //clFinish(m_data->m_queue); { B3_PROFILE("batching"); //@todo: just reserve it, without copy of original contact (unless we use warmstarting) const b3OpenCLArray<b3RigidBodyData>* bodyNative = bodyBuf; { //b3OpenCLArray<b3RigidBodyData>* bodyNative = b3OpenCLArrayUtils::map<adl::TYPE_CL, true>( data->m_device, bodyBuf ); //b3OpenCLArray<b3Contact4>* contactNative = b3OpenCLArrayUtils::map<adl::TYPE_CL, true>( data->m_device, contactsIn ); const int sortAlignment = 512; // todo. get this out of sort if( csCfg.m_enableParallelSolve ) { int sortSize = B3NEXTMULTIPLEOF( nContacts, sortAlignment ); b3OpenCLArray<unsigned int>* countsNative = m_data->m_solverGPU->m_numConstraints; b3OpenCLArray<unsigned int>* offsetsNative = m_data->m_solverGPU->m_offsets; if (!gCpuSetSortData) { // 2. set cell idx B3_PROFILE("GPU set cell idx"); struct CB { int m_nContacts; int m_staticIdx; float m_scale; b3Int4 m_nSplit; }; b3Assert( sortSize%64 == 0 ); CB cdata; cdata.m_nContacts = nContacts; cdata.m_staticIdx = csCfg.m_staticIdx; cdata.m_scale = 1.f/csCfg.m_batchCellSize; cdata.m_nSplit.x = B3_SOLVER_N_SPLIT_X; cdata.m_nSplit.y = B3_SOLVER_N_SPLIT_Y; cdata.m_nSplit.z = B3_SOLVER_N_SPLIT_Z; m_data->m_solverGPU->m_sortDataBuffer->resize(nContacts); b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ), b3BufferInfoCL( bodyBuf->getBufferCL()), b3BufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) }; b3LauncherCL launcher(m_data->m_queue, m_data->m_solverGPU->m_setSortDataKernel,"m_setSortDataKernel" ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( cdata.m_nContacts ); launcher.setConst( cdata.m_scale ); launcher.setConst(cdata.m_nSplit); launcher.setConst(cdata.m_staticIdx); launcher.launch1D( sortSize, 64 ); } else { m_data->m_solverGPU->m_sortDataBuffer->resize(nContacts); b3AlignedObjectArray<b3SortData> sortDataCPU; m_data->m_solverGPU->m_sortDataBuffer->copyToHost(sortDataCPU); b3AlignedObjectArray<b3Contact4> contactCPU; m_data->m_pBufContactOutGPU->copyToHost(contactCPU); b3AlignedObjectArray<b3RigidBodyData> bodiesCPU; bodyBuf->copyToHost(bodiesCPU); float scale = 1.f/csCfg.m_batchCellSize; b3Int4 nSplit; nSplit.x = B3_SOLVER_N_SPLIT_X; nSplit.y = B3_SOLVER_N_SPLIT_Y; nSplit.z = B3_SOLVER_N_SPLIT_Z; SetSortDataCPU(&contactCPU[0], &bodiesCPU[0], &sortDataCPU[0], nContacts,scale,nSplit,csCfg.m_staticIdx); m_data->m_solverGPU->m_sortDataBuffer->copyFromHost(sortDataCPU); } if (!gCpuRadixSort) { // 3. sort by cell idx B3_PROFILE("gpuRadixSort"); //int n = B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT; //int sortBit = 32; //if( n <= 0xffff ) sortBit = 16; //if( n <= 0xff ) sortBit = 8; //adl::RadixSort<adl::TYPE_CL>::execute( data->m_sort, *data->m_sortDataBuffer, sortSize ); //adl::RadixSort32<adl::TYPE_CL>::execute( data->m_sort32, *data->m_sortDataBuffer, sortSize ); b3OpenCLArray<b3SortData>& keyValuesInOut = *(m_data->m_solverGPU->m_sortDataBuffer); this->m_data->m_solverGPU->m_sort32->execute(keyValuesInOut); } else { b3OpenCLArray<b3SortData>& keyValuesInOut = *(m_data->m_solverGPU->m_sortDataBuffer); b3AlignedObjectArray<b3SortData> hostValues; keyValuesInOut.copyToHost(hostValues); hostValues.quickSort(sortfnc); keyValuesInOut.copyFromHost(hostValues); } if (gUseScanHost) { // 4. find entries B3_PROFILE("cpuBoundSearch"); b3AlignedObjectArray<unsigned int> countsHost; countsNative->copyToHost(countsHost); b3AlignedObjectArray<b3SortData> sortDataHost; m_data->m_solverGPU->m_sortDataBuffer->copyToHost(sortDataHost); //m_data->m_solverGPU->m_search->executeHost(*m_data->m_solverGPU->m_sortDataBuffer,nContacts,*countsNative,B3_SOLVER_N_CELLS,b3BoundSearchCL::COUNT); m_data->m_solverGPU->m_search->executeHost(sortDataHost,nContacts,countsHost,B3_SOLVER_N_CELLS,b3BoundSearchCL::COUNT); countsNative->copyFromHost(countsHost); //adl::BoundSearch<adl::TYPE_CL>::execute( data->m_search, *data->m_sortDataBuffer, nContacts, *countsNative, // B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT, adl::BoundSearchBase::COUNT ); //unsigned int sum; //m_data->m_solverGPU->m_scan->execute(*countsNative,*offsetsNative, B3_SOLVER_N_CELLS);//,&sum ); b3AlignedObjectArray<unsigned int> offsetsHost; offsetsHost.resize(offsetsNative->size()); m_data->m_solverGPU->m_scan->executeHost(countsHost,offsetsHost, B3_SOLVER_N_CELLS);//,&sum ); offsetsNative->copyFromHost(offsetsHost); //printf("sum = %d\n",sum); } else { // 4. find entries B3_PROFILE("gpuBoundSearch"); m_data->m_solverGPU->m_search->execute(*m_data->m_solverGPU->m_sortDataBuffer,nContacts,*countsNative,B3_SOLVER_N_CELLS,b3BoundSearchCL::COUNT); m_data->m_solverGPU->m_scan->execute(*countsNative,*offsetsNative, B3_SOLVER_N_CELLS);//,&sum ); } if (nContacts) { // 5. sort constraints by cellIdx if (gReorderContactsOnCpu) { B3_PROFILE("cpu m_reorderContactKernel"); b3AlignedObjectArray<b3SortData> sortDataHost; m_data->m_solverGPU->m_sortDataBuffer->copyToHost(sortDataHost); b3AlignedObjectArray<b3Contact4> inContacts; b3AlignedObjectArray<b3Contact4> outContacts; m_data->m_pBufContactOutGPU->copyToHost(inContacts); outContacts.resize(inContacts.size()); for (int i=0;i<nContacts;i++) { int srcIdx = sortDataHost[i].y; outContacts[i] = inContacts[srcIdx]; } m_data->m_solverGPU->m_contactBuffer2->copyFromHost(outContacts); /* "void ReorderContactKernel(__global struct b3Contact4Data* in, __global struct b3Contact4Data* out, __global int2* sortData, int4 cb )\n" "{\n" " int nContacts = cb.x;\n" " int gIdx = GET_GLOBAL_IDX;\n" " if( gIdx < nContacts )\n" " {\n" " int srcIdx = sortData[gIdx].y;\n" " out[gIdx] = in[srcIdx];\n" " }\n" "}\n" */ } else { B3_PROFILE("gpu m_reorderContactKernel"); b3Int4 cdata; cdata.x = nContacts; b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ), b3BufferInfoCL( m_data->m_solverGPU->m_contactBuffer2->getBufferCL()) , b3BufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) }; b3LauncherCL launcher(m_data->m_queue,m_data->m_solverGPU->m_reorderContactKernel,"m_reorderContactKernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( cdata ); launcher.launch1D( nContacts, 64 ); } } } } //clFinish(m_data->m_queue); // { // b3AlignedObjectArray<unsigned int> histogram; // m_data->m_solverGPU->m_numConstraints->copyToHost(histogram); // printf(",,,\n"); // } if (nContacts) { if (gUseCpuCopyConstraints) { for (int i=0;i<nContacts;i++) { m_data->m_pBufContactOutGPU->copyFromOpenCLArray(*m_data->m_solverGPU->m_contactBuffer2); // m_data->m_solverGPU->m_contactBuffer2->getBufferCL(); // m_data->m_pBufContactOutGPU->getBufferCL() } } else { B3_PROFILE("gpu m_copyConstraintKernel"); b3Int4 cdata; cdata.x = nContacts; b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_data->m_solverGPU->m_contactBuffer2->getBufferCL() ), b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ) }; b3LauncherCL launcher(m_data->m_queue, m_data->m_solverGPU->m_copyConstraintKernel,"m_copyConstraintKernel" ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( cdata ); launcher.launch1D( nContacts, 64 ); //we use the clFinish for proper benchmark/profile clFinish(m_data->m_queue); } } bool compareGPU = false; if (nContacts) { if (!gCpuBatchContacts) { B3_PROFILE("gpu batchContacts"); maxNumBatches = 150;//250; m_data->m_solverGPU->batchContacts( m_data->m_pBufContactOutGPU, nContacts, m_data->m_solverGPU->m_numConstraints, m_data->m_solverGPU->m_offsets, csCfg.m_staticIdx ); clFinish(m_data->m_queue); } else { B3_PROFILE("cpu batchContacts"); static b3AlignedObjectArray<b3Contact4> cpuContacts; b3OpenCLArray<b3Contact4>* contactsIn = m_data->m_solverGPU->m_contactBuffer2; { B3_PROFILE("copyToHost"); contactsIn->copyToHost(cpuContacts); } b3OpenCLArray<unsigned int>* countsNative = m_data->m_solverGPU->m_numConstraints; b3OpenCLArray<unsigned int>* offsetsNative = m_data->m_solverGPU->m_offsets; b3AlignedObjectArray<unsigned int> nNativeHost; b3AlignedObjectArray<unsigned int> offsetsNativeHost; { B3_PROFILE("countsNative/offsetsNative copyToHost"); countsNative->copyToHost(nNativeHost); offsetsNative->copyToHost(offsetsNativeHost); } int numNonzeroGrid=0; if (gUseLargeBatches) { m_data->m_batchSizes.resize(B3_MAX_NUM_BATCHES); int totalNumConstraints = cpuContacts.size(); int simdWidth =numBodies+1;//-1;//64;//-1;//32; int numBatches = sortConstraintByBatch3( &cpuContacts[0], totalNumConstraints, totalNumConstraints+1,csCfg.m_staticIdx ,numBodies,&m_data->m_batchSizes[0]); // on GPU maxNumBatches = b3Max(numBatches,maxNumBatches); static int globalMaxBatch = 0; if (maxNumBatches>globalMaxBatch ) { globalMaxBatch = maxNumBatches; b3Printf("maxNumBatches = %d\n",maxNumBatches); } } else { m_data->m_batchSizes.resize(B3_SOLVER_N_CELLS*B3_MAX_NUM_BATCHES); B3_PROFILE("cpu batch grid"); for(int i=0; i<B3_SOLVER_N_CELLS; i++) { int n = (nNativeHost)[i]; int offset = (offsetsNativeHost)[i]; if( n ) { numNonzeroGrid++; int simdWidth =numBodies+1;//-1;//64;//-1;//32; int numBatches = sortConstraintByBatch3( &cpuContacts[0]+offset, n, simdWidth,csCfg.m_staticIdx ,numBodies,&m_data->m_batchSizes[i*B3_MAX_NUM_BATCHES]); // on GPU maxNumBatches = b3Max(numBatches,maxNumBatches); static int globalMaxBatch = 0; if (maxNumBatches>globalMaxBatch ) { globalMaxBatch = maxNumBatches; b3Printf("maxNumBatches = %d\n",maxNumBatches); } //we use the clFinish for proper benchmark/profile } } //clFinish(m_data->m_queue); } { B3_PROFILE("m_contactBuffer->copyFromHost"); m_data->m_solverGPU->m_contactBuffer2->copyFromHost((b3AlignedObjectArray<b3Contact4>&)cpuContacts); } } } } } //printf("maxNumBatches = %d\n", maxNumBatches); if (gUseLargeBatches) { if (nContacts) { B3_PROFILE("cpu batchContacts"); static b3AlignedObjectArray<b3Contact4> cpuContacts; // b3OpenCLArray<b3Contact4>* contactsIn = m_data->m_solverGPU->m_contactBuffer2; { B3_PROFILE("copyToHost"); m_data->m_pBufContactOutGPU->copyToHost(cpuContacts); } b3OpenCLArray<unsigned int>* countsNative = m_data->m_solverGPU->m_numConstraints; b3OpenCLArray<unsigned int>* offsetsNative = m_data->m_solverGPU->m_offsets; int numNonzeroGrid=0; { m_data->m_batchSizes.resize(B3_MAX_NUM_BATCHES); int totalNumConstraints = cpuContacts.size(); int simdWidth =numBodies+1;//-1;//64;//-1;//32; int numBatches = sortConstraintByBatch3( &cpuContacts[0], totalNumConstraints, totalNumConstraints+1,csCfg.m_staticIdx ,numBodies,&m_data->m_batchSizes[0]); // on GPU maxNumBatches = b3Max(numBatches,maxNumBatches); static int globalMaxBatch = 0; if (maxNumBatches>globalMaxBatch ) { globalMaxBatch = maxNumBatches; b3Printf("maxNumBatches = %d\n",maxNumBatches); } } { B3_PROFILE("m_contactBuffer->copyFromHost"); m_data->m_solverGPU->m_contactBuffer2->copyFromHost((b3AlignedObjectArray<b3Contact4>&)cpuContacts); } } } if (nContacts) { B3_PROFILE("gpu convertToConstraints"); m_data->m_solverGPU->convertToConstraints( bodyBuf, shapeBuf, m_data->m_solverGPU->m_contactBuffer2, contactConstraintOut, additionalData, nContacts, (b3SolverBase::ConstraintCfg&) csCfg ); clFinish(m_data->m_queue); } if (1) { int numIter = 4; m_data->m_solverGPU->m_nIterations = numIter;//10 if (!gCpuSolveConstraint) { B3_PROFILE("GPU solveContactConstraint"); /*m_data->m_solverGPU->solveContactConstraint( m_data->m_bodyBufferGPU, m_data->m_inertiaBufferGPU, m_data->m_contactCGPU,0, nContactOut , maxNumBatches); */ //m_data->m_batchSizesGpu->copyFromHost(m_data->m_batchSizes); if (gUseLargeBatches) { solveContactConstraintBatchSizes(m_data->m_bodyBufferGPU, m_data->m_inertiaBufferGPU, m_data->m_contactCGPU,0, nContactOut , maxNumBatches,numIter,&m_data->m_batchSizes); } else { solveContactConstraint( m_data->m_bodyBufferGPU, m_data->m_inertiaBufferGPU, m_data->m_contactCGPU,0, nContactOut , maxNumBatches,numIter,&m_data->m_batchSizes);//m_data->m_batchSizesGpu); } } else { B3_PROFILE("Host solveContactConstraint"); m_data->m_solverGPU->solveContactConstraintHost(m_data->m_bodyBufferGPU, m_data->m_inertiaBufferGPU, m_data->m_contactCGPU,0, nContactOut ,maxNumBatches,&m_data->m_batchSizes); } } #if 0 if (0) { B3_PROFILE("read body velocities back to CPU"); //read body updated linear/angular velocities back to CPU m_data->m_bodyBufferGPU->read( m_data->m_bodyBufferCPU->m_ptr,numOfConvexRBodies); adl::DeviceUtils::waitForCompletion( m_data->m_deviceCL ); } #endif } }
void b3Solver::batchContacts( b3OpenCLArray<b3Contact4>* contacts, int nContacts, b3OpenCLArray<unsigned int>* nNative, b3OpenCLArray<unsigned int>* offsetsNative, int staticIdx ) { int numWorkItems = 64*N_SPLIT*N_SPLIT; { B3_PROFILE("batch generation"); b3Int4 cdata; cdata.x = nContacts; cdata.y = 0; cdata.z = staticIdx; #ifdef BATCH_DEBUG SolverDebugInfo* debugInfo = new SolverDebugInfo[numWorkItems]; adl::b3OpenCLArray<SolverDebugInfo> gpuDebugInfo(data->m_device,numWorkItems); memset(debugInfo,0,sizeof(SolverDebugInfo)*numWorkItems); gpuDebugInfo.write(debugInfo,numWorkItems); #endif b3BufferInfoCL bInfo[] = { b3BufferInfoCL( contacts->getBufferCL() ), b3BufferInfoCL( m_contactBuffer2->getBufferCL()), b3BufferInfoCL( nNative->getBufferCL() ), b3BufferInfoCL( offsetsNative->getBufferCL() ), #ifdef BATCH_DEBUG , b3BufferInfoCL(&gpuDebugInfo) #endif }; { B3_PROFILE("batchingKernel"); //b3LauncherCL launcher( m_queue, m_batchingKernel); cl_kernel k = useNewBatchingKernel ? m_batchingKernelNew : m_batchingKernel; b3LauncherCL launcher( m_queue, k); if (!useNewBatchingKernel ) { launcher.setBuffer( contacts->getBufferCL() ); } launcher.setBuffer( m_contactBuffer2->getBufferCL() ); launcher.setBuffer( nNative->getBufferCL()); launcher.setBuffer( offsetsNative->getBufferCL()); //launcher.setConst( cdata ); launcher.setConst(staticIdx); launcher.launch1D( numWorkItems, 64 ); clFinish(m_queue); } #ifdef BATCH_DEBUG aaaa b3Contact4* hostContacts = new b3Contact4[nContacts]; m_contactBuffer->read(hostContacts,nContacts); clFinish(m_queue); gpuDebugInfo.read(debugInfo,numWorkItems); clFinish(m_queue); for (int i=0; i<numWorkItems; i++) { if (debugInfo[i].m_valInt1>0) { printf("catch\n"); } if (debugInfo[i].m_valInt2>0) { printf("catch22\n"); } if (debugInfo[i].m_valInt3>0) { printf("catch666\n"); } if (debugInfo[i].m_valInt4>0) { printf("catch777\n"); } } delete[] debugInfo; #endif //BATCH_DEBUG } // copy buffer to buffer //b3Assert(m_contactBuffer->size()==nContacts); //contacts->copyFromOpenCLArray( *m_contactBuffer); //clFinish(m_queue);//needed? }
void b3GpuRaycast::castRaysHost(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults, int numBodies,const struct b3RigidBodyData* bodies, int numCollidables,const struct b3Collidable* collidables, const struct b3GpuNarrowPhaseInternalData* narrowphaseData) { // return castRays(rays,hitResults,numBodies,bodies,numCollidables,collidables); B3_PROFILE("castRaysHost"); for (int r=0;r<rays.size();r++) { b3Vector3 rayFrom = rays[r].m_from; b3Vector3 rayTo = rays[r].m_to; float hitFraction = hitResults[r].m_hitFraction; int hitBodyIndex= -1; b3Vector3 hitNormal; for (int b=0;b<numBodies;b++) { const b3Vector3& pos = bodies[b].m_pos; const b3Quaternion& orn = bodies[b].m_quat; switch (collidables[bodies[b].m_collidableIdx].m_shapeType) { case SHAPE_SPHERE: { b3Scalar radius = collidables[bodies[b].m_collidableIdx].m_radius; if (sphere_intersect(pos, radius, rayFrom, rayTo,hitFraction)) { hitBodyIndex = b; b3Vector3 hitPoint; hitPoint.setInterpolate3(rays[r].m_from, rays[r].m_to,hitFraction); hitNormal = (hitPoint-bodies[b].m_pos).normalize(); } } case SHAPE_CONVEX_HULL: { b3Transform convexWorldTransform; convexWorldTransform.setIdentity(); convexWorldTransform.setOrigin(bodies[b].m_pos); convexWorldTransform.setRotation(bodies[b].m_quat); b3Transform convexWorld2Local = convexWorldTransform.inverse(); b3Vector3 rayFromLocal = convexWorld2Local(rayFrom); b3Vector3 rayToLocal = convexWorld2Local(rayTo); int shapeIndex = collidables[bodies[b].m_collidableIdx].m_shapeIndex; const b3ConvexPolyhedronData& poly = narrowphaseData->m_convexPolyhedra[shapeIndex]; if (rayConvex(rayFromLocal, rayToLocal,poly,narrowphaseData->m_convexFaces, hitFraction, hitNormal)) { hitBodyIndex = b; } break; } default: { static bool once=true; if (once) { once=false; b3Warning("Raytest: unsupported shape type\n"); } } } } if (hitBodyIndex>=0) { hitResults[r].m_hitFraction = hitFraction; hitResults[r].m_hitPoint.setInterpolate3(rays[r].m_from, rays[r].m_to,hitFraction); hitResults[r].m_hitNormal = hitNormal; hitResults[r].m_hitBody = hitBodyIndex; } } }