void CLPhysicsDemo::stepSimulation() { int sz = sizeof(ConvexPolyhedronCL2); int sz1 = sizeof(ConvexPolyhedronCL); btAssert(sz==sz1); int b1 = sizeof(Body2); int b2 = sizeof(RigidBodyBase::Body); btAssert(b1==b2); int ciErrNum=CL_SUCCESS; if (1 && m_numPhysicsInstances) { gFpIO.m_numObjects = m_numPhysicsInstances; if (useSapGpuBroadphase) { gFpIO.m_dAABB = m_data->m_BroadphaseSap->getAabbBuffer(); } else { gFpIO.m_dAABB = m_data->m_BroadphaseGrid->getAabbBuffer(); } gFpIO.m_dlocalShapeAABB = (cl_mem)m_data->m_localShapeAABBGPU->getBufferCL(); gFpIO.m_numOverlap = 0; { BT_PROFILE("setupGpuAabbs"); setupGpuAabbsFull(gFpIO,m_narrowphaseAndSolver->getBodiesGpu(), m_narrowphaseAndSolver->getCollidablesGpu() ); // setupGpuAabbsSimple(gFpIO); } { } if (1) { BT_PROFILE("calculateOverlappingPairs"); if (useSapGpuBroadphase) { m_data->m_BroadphaseSap->calculateOverlappingPairs(); gFpIO.m_numOverlap = m_data->m_BroadphaseSap->getNumOverlap(); gFpIO.m_dAllOverlappingPairs = m_data->m_BroadphaseSap->getOverlappingPairBuffer(); } else { m_data->m_BroadphaseGrid->calculateOverlappingPairs(); gFpIO.m_dAllOverlappingPairs = m_data->m_BroadphaseGrid->getOverlappingPairBuffer(); gFpIO.m_numOverlap = m_data->m_BroadphaseGrid->getNumOverlap(); } } //printf("gFpIO.m_numOverlap = %d\n",gFpIO.m_numOverlap ); if (gFpIO.m_numOverlap>=0 && gFpIO.m_numOverlap<MAX_BROADPHASE_COLLISION_CL) { //colorPairsOpenCL(gFpIO); if (runOpenCLKernels) { { BT_PROFILE("setupBodies"); if (m_narrowphaseAndSolver) setupBodies(gFpIO, m_data->m_linVelBuf->getBufferCL(), m_data->m_angVelBuf->getBufferCL(), m_narrowphaseAndSolver->getBodiesGpu(), m_narrowphaseAndSolver->getBodyInertiasGpu()); } { BT_PROFILE("computeContacts"); if (m_narrowphaseAndSolver) m_narrowphaseAndSolver->computeContacts(gFpIO.m_dAllOverlappingPairs,gFpIO.m_numOverlap, gFpIO.m_dAABB,gFpIO.m_numObjects); } bool useOpenCLSolver = true; if (useOpenCLSolver) { BT_PROFILE("solve Contact Constraints (OpenCL)"); if (m_narrowphaseAndSolver) m_narrowphaseAndSolver->solveContacts(); } else { BT_PROFILE("solve Contact Constraints CPU/serial"); if (m_narrowphaseAndSolver && m_data->m_pgsSolver && m_narrowphaseAndSolver->getNumContactsGpu()) { btGpuNarrowphaseAndSolver* np = m_narrowphaseAndSolver; btAlignedObjectArray<RigidBodyBase::Body> hostBodies; btOpenCLArray<RigidBodyBase::Body> gpuBodies(g_cxMainContext,g_cqCommandQue,0,true); gpuBodies.setFromOpenCLBuffer(np->getBodiesGpu(),np->getNumBodiesGpu()); gpuBodies.copyToHost(hostBodies); btAlignedObjectArray<RigidBodyBase::Inertia> hostInertias; btOpenCLArray<RigidBodyBase::Inertia> gpuInertias(g_cxMainContext,g_cqCommandQue,0,true); gpuInertias.setFromOpenCLBuffer(np->getBodyInertiasGpu(),np->getNumBodiesGpu()); gpuInertias.copyToHost(hostInertias); btAlignedObjectArray<Contact4> hostContacts; btOpenCLArray<Contact4> gpuContacts(g_cxMainContext,g_cqCommandQue,0,true); gpuContacts.setFromOpenCLBuffer(np->getContactsGpu(),np->getNumContactsGpu()); gpuContacts.copyToHost(hostContacts); { BT_PROFILE("pgsSolver::solveContacts"); m_data->m_pgsSolver->solveContacts(np->getNumBodiesGpu(),&hostBodies[0],&hostInertias[0],np->getNumContactsGpu(),&hostContacts[0]); } gpuBodies.copyFromHost(hostBodies); } } { BT_PROFILE("copyBodyVelocities"); if (m_narrowphaseAndSolver) copyBodyVelocities(gFpIO, m_data->m_linVelBuf->getBufferCL(), m_data->m_angVelBuf->getBufferCL(), m_narrowphaseAndSolver->getBodiesGpu()); } } } else { printf("error, gFpIO.m_numOverlap = %d\n",gFpIO.m_numOverlap); btAssert(0); } { BT_PROFILE("integrateTransforms"); if (runOpenCLKernels) { bool integrateOnGpu = true; if (integrateOnGpu) { int numObjects = m_numPhysicsInstances; int offset = m_maxShapeBufferCapacityInBytes/4; ciErrNum = clSetKernelArg(g_integrateTransformsKernel2, 0, sizeof(int), &offset); ciErrNum = clSetKernelArg(g_integrateTransformsKernel2, 1, sizeof(int), &numObjects); cl_mem bodyGpuBuffer = m_narrowphaseAndSolver->getBodiesGpu(); ciErrNum = clSetKernelArg(g_integrateTransformsKernel2, 2, sizeof(cl_mem), (void*)&bodyGpuBuffer ); cl_mem lv = m_data->m_linVelBuf->getBufferCL(); cl_mem av = m_data->m_angVelBuf->getBufferCL(); cl_mem btimes = m_data->m_bodyTimes->getBufferCL(); ciErrNum = clSetKernelArg(g_integrateTransformsKernel2, 3, sizeof(cl_mem), (void*)&lv); ciErrNum = clSetKernelArg(g_integrateTransformsKernel2, 4, sizeof(cl_mem), (void*)&av); ciErrNum = clSetKernelArg(g_integrateTransformsKernel2, 5, sizeof(cl_mem), (void*)&btimes); size_t workGroupSize = 64; size_t numWorkItems = workGroupSize*((m_numPhysicsInstances + (workGroupSize)) / workGroupSize); if (workGroupSize>numWorkItems) workGroupSize=numWorkItems; ciErrNum = clEnqueueNDRangeKernel(g_cqCommandQue, g_integrateTransformsKernel2, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0); oclCHECKERROR(ciErrNum, CL_SUCCESS); } else { #ifdef _WIN32 //debug velocity btAlignedObjectArray<btVector3> linvel; m_data->m_linVelBuf->copyToHost(linvel); for (int i=0;i<linvel.size();i++) { btAssert(_finite(linvel[i].x())); } #endif btAssert(0); } } } } oclCHECKERROR(ciErrNum, CL_SUCCESS); if (runOpenCLKernels) { BT_PROFILE("clFinish"); clFinish(g_cqCommandQue); } }
void CLPhysicsDemo::stepSimulation() { int sz = sizeof(ConvexPolyhedronCL2); int sz1 = sizeof(ConvexPolyhedronCL); btAssert(sz==sz1); int b1 = sizeof(Body2); int b2 = sizeof(RigidBodyBase::Body); btAssert(b1==b2); BT_PROFILE("simulationLoop"); cl_int ciErrNum = CL_SUCCESS; if(m_data->m_useInterop) { #ifndef __APPLE__ clBuffer = g_interopBuffer->getCLBUffer(); BT_PROFILE("clEnqueueAcquireGLObjects"); { BT_PROFILE("clEnqueueAcquireGLObjects"); ciErrNum = clEnqueueAcquireGLObjects(g_cqCommandQue, 1, &clBuffer, 0, 0, NULL); clFinish(g_cqCommandQue); } #else assert(0); #endif } else { glBindBuffer(GL_ARRAY_BUFFER, cube_vbo); glFlush(); BT_PROFILE("glMapBuffer and clEnqueueWriteBuffer"); blocking= CL_TRUE; hostPtr= (char*)glMapBuffer( GL_ARRAY_BUFFER,GL_READ_WRITE);//GL_WRITE_ONLY if (!clBuffer) { clBuffer = clCreateBuffer(g_cxMainContext, CL_MEM_READ_WRITE, VBOsize, 0, &ciErrNum); } clFinish(g_cqCommandQue); oclCHECKERROR(ciErrNum, CL_SUCCESS); ciErrNum = clEnqueueWriteBuffer ( g_cqCommandQue, clBuffer, blocking, 0, VBOsize, hostPtr,0,0,0 ); clFinish(g_cqCommandQue); } oclCHECKERROR(ciErrNum, CL_SUCCESS); if (1 && m_numPhysicsInstances) { gFpIO.m_numObjects = m_numPhysicsInstances; gFpIO.m_positionOffset = SHAPE_VERTEX_BUFFER_SIZE/4; gFpIO.m_clObjectsBuffer = clBuffer; if (useSapGpuBroadphase) { gFpIO.m_dAABB = m_data->m_BroadphaseSap->getAabbBuffer(); } else { gFpIO.m_dAABB = m_data->m_BroadphaseGrid->getAabbBuffer(); } gFpIO.m_dlocalShapeAABB = (cl_mem)m_data->m_localShapeAABBGPU->getBufferCL(); gFpIO.m_numOverlap = 0; { BT_PROFILE("setupGpuAabbs"); setupGpuAabbsFull(gFpIO,narrowphaseAndSolver->getBodiesGpu(), narrowphaseAndSolver->getCollidablesGpu() ); // setupGpuAabbsSimple(gFpIO); } { } if (1) { BT_PROFILE("calculateOverlappingPairs"); if (useSapGpuBroadphase) { m_data->m_BroadphaseSap->calculateOverlappingPairs(); gFpIO.m_dAllOverlappingPairs = m_data->m_BroadphaseSap->getOverlappingPairBuffer(); gFpIO.m_numOverlap = m_data->m_BroadphaseSap->getNumOverlap(); } else { m_data->m_BroadphaseGrid->calculateOverlappingPairs(); gFpIO.m_dAllOverlappingPairs = m_data->m_BroadphaseGrid->getOverlappingPairBuffer(); gFpIO.m_numOverlap = m_data->m_BroadphaseGrid->getNumOverlap(); } } //printf("gFpIO.m_numOverlap = %d\n",gFpIO.m_numOverlap ); if (gFpIO.m_numOverlap>=0 && gFpIO.m_numOverlap<MAX_BROADPHASE_COLLISION_CL) { colorPairsOpenCL(gFpIO); if (runOpenCLKernels) { { //BT_PROFILE("setupBodies"); if (narrowphaseAndSolver) setupBodies(gFpIO, m_data->m_linVelBuf->getBufferCL(), m_data->m_angVelBuf->getBufferCL(), narrowphaseAndSolver->getBodiesGpu(), narrowphaseAndSolver->getBodyInertiasGpu()); } { BT_PROFILE("computeContactsAndSolver"); if (narrowphaseAndSolver) narrowphaseAndSolver->computeContactsAndSolver(gFpIO.m_dAllOverlappingPairs,gFpIO.m_numOverlap, gFpIO.m_dAABB,gFpIO.m_numObjects); } { BT_PROFILE("copyBodyVelocities"); if (narrowphaseAndSolver) copyBodyVelocities(gFpIO, m_data->m_linVelBuf->getBufferCL(), m_data->m_angVelBuf->getBufferCL(), narrowphaseAndSolver->getBodiesGpu(), narrowphaseAndSolver->getBodyInertiasGpu()); } } } else { printf("error, gFpIO.m_numOverlap = %d\n",gFpIO.m_numOverlap); btAssert(0); } { BT_PROFILE("integrateTransforms"); if (runOpenCLKernels) { bool integrateOnGpu = true; if (integrateOnGpu) { int numObjects = m_numPhysicsInstances; int offset = SHAPE_VERTEX_BUFFER_SIZE/4; ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 0, sizeof(int), &offset); ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 1, sizeof(int), &numObjects); ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 2, sizeof(cl_mem), (void*)&clBuffer ); cl_mem lv = m_data->m_linVelBuf->getBufferCL(); cl_mem av = m_data->m_angVelBuf->getBufferCL(); cl_mem btimes = m_data->m_bodyTimes->getBufferCL(); ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 3, sizeof(cl_mem), (void*)&lv); ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 4, sizeof(cl_mem), (void*)&av); ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 5, sizeof(cl_mem), (void*)&btimes); size_t workGroupSize = 64; size_t numWorkItems = workGroupSize*((m_numPhysicsInstances + (workGroupSize)) / workGroupSize); if (workGroupSize>numWorkItems) workGroupSize=numWorkItems; ciErrNum = clEnqueueNDRangeKernel(g_cqCommandQue, g_integrateTransformsKernel, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0); oclCHECKERROR(ciErrNum, CL_SUCCESS); } else { //debug velocity btAlignedObjectArray<btVector3> linvel; m_data->m_linVelBuf->copyToHost(linvel); for (int i=0;i<linvel.size();i++) { btAssert(_finite(linvel[i].x())); } btAssert(0); } } } } if(m_data->m_useInterop) { #ifndef __APPLE__ BT_PROFILE("clEnqueueReleaseGLObjects"); ciErrNum = clEnqueueReleaseGLObjects(g_cqCommandQue, 1, &clBuffer, 0, 0, 0); clFinish(g_cqCommandQue); #endif } else { BT_PROFILE("clEnqueueReadBuffer clReleaseMemObject and glUnmapBuffer"); ciErrNum = clEnqueueReadBuffer ( g_cqCommandQue, clBuffer, blocking, 0, VBOsize, hostPtr,0,0,0); //clReleaseMemObject(clBuffer); clFinish(g_cqCommandQue); glUnmapBuffer( GL_ARRAY_BUFFER); glFlush(); } oclCHECKERROR(ciErrNum, CL_SUCCESS); if (runOpenCLKernels) { BT_PROFILE("clFinish"); clFinish(g_cqCommandQue); } }
void CLPhysicsDemo::stepSimulation() { BT_PROFILE("simulationLoop"); { BT_PROFILE("glFinish"); glFinish(); } cl_int ciErrNum = CL_SUCCESS; if(m_data->m_useInterop) { clBuffer = g_interopBuffer->getCLBUffer(); BT_PROFILE("clEnqueueAcquireGLObjects"); ciErrNum = clEnqueueAcquireGLObjects(g_cqCommandQue, 1, &clBuffer, 0, 0, NULL); adl::DeviceUtils::waitForCompletion( g_deviceCL ); } else { glBindBuffer(GL_ARRAY_BUFFER, cube_vbo); glFlush(); BT_PROFILE("glMapBuffer and clEnqueueWriteBuffer"); blocking= CL_TRUE; hostPtr= (char*)glMapBuffer( GL_ARRAY_BUFFER,GL_READ_WRITE);//GL_WRITE_ONLY if (!clBuffer) { clBuffer = clCreateBuffer(g_cxMainContext, CL_MEM_READ_WRITE, VBOsize, 0, &ciErrNum); } adl::DeviceUtils::waitForCompletion( g_deviceCL ); oclCHECKERROR(ciErrNum, CL_SUCCESS); ciErrNum = clEnqueueWriteBuffer ( g_cqCommandQue, clBuffer, blocking, 0, VBOsize, hostPtr,0,0,0 ); adl::DeviceUtils::waitForCompletion( g_deviceCL ); } oclCHECKERROR(ciErrNum, CL_SUCCESS); if (runOpenCLKernels && m_numPhysicsInstances) { gFpIO.m_numObjects = m_numPhysicsInstances; gFpIO.m_positionOffset = SHAPE_VERTEX_BUFFER_SIZE/4; gFpIO.m_clObjectsBuffer = clBuffer; gFpIO.m_dAABB = m_data->m_Broadphase->m_dAABB; gFpIO.m_dlocalShapeAABB = (cl_mem)m_data->m_localShapeAABB->m_ptr; gFpIO.m_numOverlap = 0; { BT_PROFILE("setupGpuAabbs"); setupGpuAabbsFull(gFpIO,narrowphaseAndSolver->getBodiesGpu() ); } if (1) { BT_PROFILE("calculateOverlappingPairs"); m_data->m_Broadphase->calculateOverlappingPairs(0, m_numPhysicsInstances); gFpIO.m_dAllOverlappingPairs = m_data->m_Broadphase->m_dAllOverlappingPairs; gFpIO.m_numOverlap = m_data->m_Broadphase->m_numPrefixSum; } //printf("gFpIO.m_numOverlap = %d\n",gFpIO.m_numOverlap ); if (gFpIO.m_numOverlap>=0 && gFpIO.m_numOverlap<MAX_BROADPHASE_COLLISION_CL) { colorPairsOpenCL(gFpIO); if (1) { { //BT_PROFILE("setupBodies"); if (narrowphaseAndSolver) setupBodies(gFpIO, gLinVelMem, gAngVelMem, narrowphaseAndSolver->getBodiesGpu(), narrowphaseAndSolver->getBodyInertiasGpu()); } if (gFpIO.m_numOverlap) { BT_PROFILE("computeContactsAndSolver"); if (narrowphaseAndSolver) narrowphaseAndSolver->computeContactsAndSolver(gFpIO.m_dAllOverlappingPairs,gFpIO.m_numOverlap); } { BT_PROFILE("copyBodyVelocities"); if (narrowphaseAndSolver) copyBodyVelocities(gFpIO, gLinVelMem, gAngVelMem, narrowphaseAndSolver->getBodiesGpu(), narrowphaseAndSolver->getBodyInertiasGpu()); } } } else { printf("error, gFpIO.m_numOverlap = %d\n",gFpIO.m_numOverlap); btAssert(0); } { BT_PROFILE("integrateTransforms"); if (runOpenCLKernels) { int numObjects = m_numPhysicsInstances; int offset = SHAPE_VERTEX_BUFFER_SIZE/4; ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 0, sizeof(int), &offset); ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 1, sizeof(int), &numObjects); ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 2, sizeof(cl_mem), (void*)&clBuffer ); ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 3, sizeof(cl_mem), (void*)&gLinVelMem); ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 4, sizeof(cl_mem), (void*)&gAngVelMem); ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 5, sizeof(cl_mem), (void*)&gBodyTimes); size_t workGroupSize = 64; size_t numWorkItems = workGroupSize*((m_numPhysicsInstances + (workGroupSize)) / workGroupSize); if (workGroupSize>numWorkItems) workGroupSize=numWorkItems; ciErrNum = clEnqueueNDRangeKernel(g_cqCommandQue, g_integrateTransformsKernel, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0); oclCHECKERROR(ciErrNum, CL_SUCCESS); } } } if(m_data->m_useInterop) { BT_PROFILE("clEnqueueReleaseGLObjects"); ciErrNum = clEnqueueReleaseGLObjects(g_cqCommandQue, 1, &clBuffer, 0, 0, 0); adl::DeviceUtils::waitForCompletion( g_deviceCL ); } else { BT_PROFILE("clEnqueueReadBuffer clReleaseMemObject and glUnmapBuffer"); ciErrNum = clEnqueueReadBuffer ( g_cqCommandQue, clBuffer, blocking, 0, VBOsize, hostPtr,0,0,0); //clReleaseMemObject(clBuffer); adl::DeviceUtils::waitForCompletion( g_deviceCL ); glUnmapBuffer( GL_ARRAY_BUFFER); glFlush(); } oclCHECKERROR(ciErrNum, CL_SUCCESS); if (runOpenCLKernels) { BT_PROFILE("clFinish"); clFinish(g_cqCommandQue); } }