void initCL( void* glCtx, void* glDC ) { int ciErrNum = 0; #if defined(CL_PLATFORM_MINI_CL) cl_device_type deviceType = CL_DEVICE_TYPE_CPU; #elif defined(CL_PLATFORM_AMD) cl_device_type deviceType = CL_DEVICE_TYPE_GPU; #elif defined(CL_PLATFORM_NVIDIA) cl_device_type deviceType = CL_DEVICE_TYPE_GPU; #else cl_device_type deviceType = CL_DEVICE_TYPE_CPU; #endif //g_cxMainContext = btOclCommon::createContextFromType(CL_DEVICE_TYPE_ALL, &ciErrNum); //g_cxMainContext = btOclCommon::createContextFromType(CL_DEVICE_TYPE_GPU, &ciErrNum); //g_cxMainContext = btOclCommon::createContextFromType(CL_DEVICE_TYPE_CPU, &ciErrNum); //try CL_DEVICE_TYPE_DEBUG for sequential, non-threaded execution, when using MiniCL on CPU, it gives a full callstack at the crash in the kernel //#ifdef USE_MINICL // g_cxMainContext = btOclCommon::createContextFromType(CL_DEVICE_TYPE_DEBUG, &ciErrNum); //#else g_cxMainContext = btOclCommon::createContextFromType(deviceType, &ciErrNum, (intptr_t)glCtx, (intptr_t)glDC); //#endif oclCHECKERROR(ciErrNum, CL_SUCCESS); g_cdDevice = btOclGetMaxFlopsDev(g_cxMainContext); btOclPrintDevInfo(g_cdDevice); // create a command-queue g_cqCommandQue = clCreateCommandQueue(g_cxMainContext, g_cdDevice, 0, &ciErrNum); oclCHECKERROR(ciErrNum, CL_SUCCESS); }
void InitCL() { void* glCtx=0; void* glDC = 0; #ifdef _WIN32 glCtx = wglGetCurrentContext(); #else //!_WIN32 GLXContext glCtx = glXGetCurrentContext(); #endif //!_WIN32 glDC = wglGetCurrentDC(); int ciErrNum = 0; cl_device_type deviceType = CL_DEVICE_TYPE_ALL;//GPU; g_cxMainContext = btOpenCLUtils::createContextFromType(deviceType, &ciErrNum, glCtx, glDC); oclCHECKERROR(ciErrNum, CL_SUCCESS); int numDev = btOpenCLUtils::getNumDevices(g_cxMainContext); if (numDev>0) { g_device= btOpenCLUtils::getDevice(g_cxMainContext,0); btOpenCLUtils::printDeviceInfo(g_device); // create a command-queue g_cqCommandQue = clCreateCommandQueue(g_cxMainContext, g_device, 0, &ciErrNum); oclCHECKERROR(ciErrNum, CL_SUCCESS); //normally you would create and execute kernels using this command queue } }
void btParticlesDynamicsWorld::runComputeCellIdKernel() { cl_int ciErrNum; #if 0 if(m_useCpuControls[SIMSTAGE_COMPUTE_CELL_ID]->m_active) { // CPU version unsigned int memSize = sizeof(btVector3) * m_numParticles; ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dPos, CL_TRUE, 0, memSize, &(m_hPos[0]), 0, NULL, NULL); oclCHECKERROR(ciErrNum, CL_SUCCESS); for(int index = 0; index < m_numParticles; index++) { btVector3 pos = m_hPos[index]; btInt4 gridPos = cpu_getGridPos(pos, &m_simParams); unsigned int hash = cpu_getPosHash(gridPos, &m_simParams); m_hPosHash[index].x = hash; m_hPosHash[index].y = index; } memSize = sizeof(btInt2) * m_numParticles; ciErrNum = clEnqueueWriteBuffer(m_cqCommandQue, m_dPosHash, CL_TRUE, 0, memSize, &(m_hPosHash[0]), 0, NULL, NULL); oclCHECKERROR(ciErrNum, CL_SUCCESS); } else #endif { BT_PROFILE("ComputeCellId"); runKernelWithWorkgroupSize(PARTICLES_KERNEL_COMPUTE_CELL_ID, m_numParticles); ciErrNum = clFinish(m_cqCommandQue); oclCHECKERROR(ciErrNum, CL_SUCCESS); } /* // check int memSize = sizeof(btInt2) * m_hashSize; ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dPosHash, CL_TRUE, 0, memSize, &(m_hPosHash[0]), 0, NULL, NULL); oclCHECKERROR(ciErrNum, CL_SUCCESS); memSize = sizeof(float) * 4 * m_numParticles; ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dPos, CL_TRUE, 0, memSize, &(m_hPos[0]), 0, NULL, NULL); oclCHECKERROR(ciErrNum, CL_SUCCESS); */ { BT_PROFILE("Copy VBO"); // Explicit Copy (until OpenGL interop will work) // map the PBO to copy data from the CL buffer via host glBindBufferARB(GL_ARRAY_BUFFER, m_vbo); // map the buffer object into client's memory void* ptr = glMapBufferARB(GL_ARRAY_BUFFER, GL_WRITE_ONLY_ARB); ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dPos, CL_TRUE, 0, sizeof(float) * 4 * m_numParticles, ptr, 0, NULL, NULL); oclCHECKERROR(ciErrNum, CL_SUCCESS); glUnmapBufferARB(GL_ARRAY_BUFFER); glBindBufferARB(GL_ARRAY_BUFFER,0); } }
void InitCL(int preferredDeviceIndex, int preferredPlatformIndex, bool useInterop) { void* glCtx=0; void* glDC = 0; #ifdef _WIN32 glCtx = wglGetCurrentContext(); glDC = wglGetCurrentDC(); #else //!_WIN32 #ifndef __APPLE__ GLXContext glCtx = glXGetCurrentContext(); glDC = wglGetCurrentDC();//?? #endif #endif //!_WIN32 int ciErrNum = 0; //#ifdef CL_PLATFORM_INTEL // cl_device_type deviceType = CL_DEVICE_TYPE_ALL; //#else cl_device_type deviceType = CL_DEVICE_TYPE_GPU; //#endif if (useInterop) { g_cxMainContext = btOpenCLUtils::createContextFromType(deviceType, &ciErrNum, glCtx, glDC); } else { g_cxMainContext = btOpenCLUtils::createContextFromType(deviceType, &ciErrNum, 0,0,preferredDeviceIndex, preferredPlatformIndex); } oclCHECKERROR(ciErrNum, CL_SUCCESS); int numDev = btOpenCLUtils::getNumDevices(g_cxMainContext); if (numDev>0) { g_device= btOpenCLUtils::getDevice(g_cxMainContext,0); g_cqCommandQue = clCreateCommandQueue(g_cxMainContext, g_device, 0, &ciErrNum); oclCHECKERROR(ciErrNum, CL_SUCCESS); btOpenCLUtils::printDeviceInfo(g_device); btOpenCLUtils::getDeviceInfo(g_device,&info); g_deviceName = info.m_deviceName; } }
void initCL( void* glCtx, void* glDC ) { int ciErrNum = 0; #if defined(CL_PLATFORM_MINI_CL) cl_device_type deviceType = CL_DEVICE_TYPE_CPU;//or use CL_DEVICE_TYPE_DEBUG to debug MiniCL #elif defined(CL_PLATFORM_INTEL) cl_device_type deviceType = CL_DEVICE_TYPE_CPU; #elif defined(CL_PLATFORM_AMD) cl_device_type deviceType = CL_DEVICE_TYPE_GPU; #elif defined(CL_PLATFORM_NVIDIA) cl_device_type deviceType = CL_DEVICE_TYPE_GPU; #else #ifdef __APPLE__ cl_device_type deviceType = CL_DEVICE_TYPE_ALL;//GPU; #else cl_device_type deviceType = CL_DEVICE_TYPE_CPU;//CL_DEVICE_TYPE_ALL #endif//__APPLE__ #endif g_cxMainContext = btOclCommon::createContextFromType(deviceType, &ciErrNum, glCtx, glDC); switch (deviceType) { case CL_DEVICE_TYPE_GPU: printf("createContextFromType(CL_DEVICE_TYPE_GPU)\n"); break; case CL_DEVICE_TYPE_CPU: printf("createContextFromType(CL_DEVICE_TYPE_CPU)\n"); break; case CL_DEVICE_TYPE_ALL: printf("createContextFromType(CL_DEVICE_TYPE_ALL)\n"); break; default: printf("createContextFromType(unknown device type %d\n",(int)deviceType); }; //#endif oclCHECKERROR(ciErrNum, CL_SUCCESS); g_cdDevice = btOclGetMaxFlopsDev(g_cxMainContext); btOclPrintDevInfo(g_cdDevice); // create a command-queue g_cqCommandQue = clCreateCommandQueue(g_cxMainContext, g_cdDevice, 0, &ciErrNum); oclCHECKERROR(ciErrNum, CL_SUCCESS); }
void InitCL(int preferredDeviceIndex, int preferredPlatformIndex) { void* glCtx=0; void* glDC = 0; #ifdef _WIN32 glCtx = wglGetCurrentContext(); #else //!_WIN32 GLXContext glCtx = glXGetCurrentContext(); #endif //!_WIN32 glDC = wglGetCurrentDC(); int ciErrNum = 0; #ifdef CL_PLATFORM_INTEL cl_device_type deviceType = CL_DEVICE_TYPE_ALL; #else cl_device_type deviceType = CL_DEVICE_TYPE_GPU; #endif if (USE_GL_CL_INTEROP) { g_cxMainContext = btOpenCLUtils::createContextFromType(deviceType, &ciErrNum, glCtx, glDC); } else { g_cxMainContext = btOpenCLUtils::createContextFromType(deviceType, &ciErrNum, 0,0,preferredDeviceIndex, preferredPlatformIndex); } oclCHECKERROR(ciErrNum, CL_SUCCESS); int numDev = btOpenCLUtils::getNumDevices(g_cxMainContext); if (numDev>0) { g_device= btOpenCLUtils::getDevice(g_cxMainContext,0); btOpenCLDeviceInfo clInfo; btOpenCLUtils::getDeviceInfo(g_device,clInfo); btOpenCLUtils::printDeviceInfo(g_device); // create a command-queue g_cqCommandQue = clCreateCommandQueue(g_cxMainContext, g_device, 0, &ciErrNum); oclCHECKERROR(ciErrNum, CL_SUCCESS); //normally you would create and execute kernels using this command queue } }
void btGpuDemo3dOCLWrap::initKernel(int kernelId, char* pName) { cl_int ciErrNum; cl_kernel kernel = clCreateKernel(m_cpProgram, pName, &ciErrNum); oclCHECKERROR(ciErrNum, CL_SUCCESS); size_t wgSize; ciErrNum = clGetKernelWorkGroupInfo(kernel, m_cdDevice, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wgSize, NULL); oclCHECKERROR(ciErrNum, CL_SUCCESS); m_kernels[kernelId].m_Id = kernelId; m_kernels[kernelId].m_kernel = kernel; m_kernels[kernelId].m_name = pName; m_kernels[kernelId].m_workgroupSize = (int)wgSize; return; }
void CL2GL(CLPhysicsDemo& demo) { int VBOsize = demo.m_maxShapeBufferCapacityInBytes+demo.m_numPhysicsInstances*(4+4+4+3)*sizeof(float); int ciErrNum; if(useInterop) { #ifndef __APPLE__ BT_PROFILE("clEnqueueReleaseGLObjects"); ciErrNum = clEnqueueReleaseGLObjects(g_cqCommandQue, 1, &clBuffer, 0, 0, 0); clFinish(g_cqCommandQue); #endif } else { BT_PROFILE("clEnqueueReadBuffer clReleaseMemObject and glUnmapBuffer"); ciErrNum = clEnqueueReadBuffer ( g_cqCommandQue, clBuffer, blocking, 0, VBOsize, hostPtr,0,0,0); //clReleaseMemObject(clBuffer); clFinish(g_cqCommandQue); glUnmapBuffer( GL_ARRAY_BUFFER); glFlush(); } oclCHECKERROR(ciErrNum, CL_SUCCESS); }
void btParticlesDynamicsWorld::runSortHashKernel() { cl_int ciErrNum; int memSize = m_numParticles * sizeof(btInt2); if(m_useCpuControls[SIMSTAGE_SORT_CELL_ID]->m_active) { // CPU version // get hash from GPU ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dPosHash, CL_TRUE, 0, memSize, &(m_hPosHash[0]), 0, NULL, NULL); oclCHECKERROR(ciErrNum, CL_SUCCESS); // sort class btHashPosKey { public: unsigned int hash; unsigned int index; void quickSort(btHashPosKey* pData, int lo, int hi) { int i=lo, j=hi; btHashPosKey x = pData[(lo+hi)/2]; do { while(pData[i].hash < x.hash) i++; while(x.hash < pData[j].hash) j--; if(i <= j) { btHashPosKey t = pData[i]; pData[i] = pData[j]; pData[j] = t; i++; j--; } } while(i <= j); if(lo < j) pData->quickSort(pData, lo, j); if(i < hi) pData->quickSort(pData, i, hi); }
void btGpuDemo3dOCLWrap::copyArrayFromDevice(void* host, const cl_mem device, unsigned int size, int hostOffs, int devOffs) { cl_int ciErrNum; char* pHost = (char*)host + hostOffs; ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, device, CL_TRUE, devOffs, size, pHost, 0, NULL, NULL); oclCHECKERROR(ciErrNum, CL_SUCCESS); }
void InitCL(int preferredDeviceIndex, int preferredPlatformIndex) { bool useInterop = false; void* glCtx=0; void* glDC = 0; int ciErrNum = 0; //#ifdef CL_PLATFORM_INTEL // cl_device_type deviceType = CL_DEVICE_TYPE_ALL; //#else cl_device_type deviceType = CL_DEVICE_TYPE_GPU; //#endif if (useInterop) { // g_cxMainContext = btOpenCLUtils::createContextFromType(deviceType, &ciErrNum, glCtx, glDC); } else { cl_platform_id platform; g_cxMainContext = btOpenCLUtils::createContextFromType(deviceType, &ciErrNum, 0,0,preferredDeviceIndex, preferredPlatformIndex, &platform); if (g_cxMainContext && platform) { btOpenCLUtils::printPlatformInfo(platform); } } oclCHECKERROR(ciErrNum, CL_SUCCESS); int numDev = btOpenCLUtils::getNumDevices(g_cxMainContext); if (numDev>0) { g_device= btOpenCLUtils::getDevice(g_cxMainContext,0); g_cqCommandQueue = clCreateCommandQueue(g_cxMainContext, g_device, 0, &ciErrNum); oclCHECKERROR(ciErrNum, CL_SUCCESS); btOpenCLUtils::printDeviceInfo(g_device); } }
void BasicGpuDemo::initCL(int preferredDeviceIndex, int preferredPlatformIndex) { void* glCtx=0; void* glDC = 0; int ciErrNum = 0; //#ifdef CL_PLATFORM_INTEL //cl_device_type deviceType = CL_DEVICE_TYPE_ALL; //#else cl_device_type deviceType = CL_DEVICE_TYPE_GPU; //#endif cl_platform_id platformId; // if (useInterop) // { // m_data->m_clContext = b3OpenCLUtils::createContextFromType(deviceType, &ciErrNum, glCtx, glDC); // } else { m_clData->m_clContext = b3OpenCLUtils::createContextFromType(deviceType, &ciErrNum, 0,0,preferredDeviceIndex, preferredPlatformIndex,&platformId); b3OpenCLUtils::printPlatformInfo(platformId); } oclCHECKERROR(ciErrNum, CL_SUCCESS); int numDev = b3OpenCLUtils::getNumDevices(m_clData->m_clContext); if (numDev>0) { m_clData->m_clDevice= b3OpenCLUtils::getDevice(m_clData->m_clContext,0); m_clData->m_clQueue = clCreateCommandQueue(m_clData->m_clContext, m_clData->m_clDevice, 0, &ciErrNum); oclCHECKERROR(ciErrNum, CL_SUCCESS); b3OpenCLUtils::printDeviceInfo(m_clData->m_clDevice); b3OpenCLDeviceInfo info; b3OpenCLUtils::getDeviceInfo(m_clData->m_clDevice,&info); m_clData->m_clDeviceName = info.m_deviceName; m_clData->m_clInitialized = true; } }
void GL2CL(CLPhysicsDemo& demo, GLInstancingRenderer& render) { BT_PROFILE("simulationLoop"); int VBOsize = demo.m_maxShapeBufferCapacityInBytes+demo.m_numPhysicsInstances*(4+4+4+3)*sizeof(float); cl_int ciErrNum = CL_SUCCESS; if(useInterop) { #ifndef __APPLE__ clBuffer = g_interopBuffer->getCLBUffer(); BT_PROFILE("clEnqueueAcquireGLObjects"); { BT_PROFILE("clEnqueueAcquireGLObjects"); ciErrNum = clEnqueueAcquireGLObjects(g_cqCommandQue, 1, &clBuffer, 0, 0, NULL); clFinish(g_cqCommandQue); } #else assert(0); #endif } else { glBindBuffer(GL_ARRAY_BUFFER, render.getInternalData()->m_vbo); glFlush(); BT_PROFILE("glMapBuffer and clEnqueueWriteBuffer"); blocking= CL_TRUE; hostPtr= (char*)glMapBuffer( GL_ARRAY_BUFFER,GL_READ_WRITE);//GL_WRITE_ONLY if (!clBuffer) { int maxVBOsize = demo.m_maxShapeBufferCapacityInBytes+MAX_CONVEX_BODIES_CL*(4+4+4+3)*sizeof(float); clBuffer = clCreateBuffer(g_cxMainContext, CL_MEM_READ_WRITE,maxVBOsize, 0, &ciErrNum); clFinish(g_cqCommandQue); oclCHECKERROR(ciErrNum, CL_SUCCESS); ciErrNum = clEnqueueWriteBuffer ( g_cqCommandQue, clBuffer, blocking, 0, VBOsize, hostPtr,0,0,0 ); clFinish(g_cqCommandQue); } } gFpIO.m_clObjectsBuffer = clBuffer; gFpIO.m_positionOffset = demo.m_maxShapeBufferCapacityInBytes/4; }
void btParticlesDynamicsWorld::grabSimulationData() { // const btVector3& gravity = getGravity(); //btVector3 gravity(0., -0.06, 0.); //btVector3 gravity(0., -0.0003f, 0.); btVector3 gravity(0,-0.0003,0); m_simParams.m_gravity[0] = gravity[0]; m_simParams.m_gravity[1] = gravity[1]; m_simParams.m_gravity[2] = gravity[2]; m_simParams.m_particleRad = m_particleRad; m_simParams.m_globalDamping = 1.0f; m_simParams.m_boundaryDamping = -0.5f; // m_simParams.m_collisionDamping = 0.02f; // m_simParams.m_spring = 0.5f; // m_simParams.m_shear = 0.1f; // m_simParams.m_attraction = 0.0f; m_simParams.m_collisionDamping = 0.025f;//0.02f; m_simParams.m_spring = 0.5f; m_simParams.m_shear = 0.1f; m_simParams.m_attraction = 0.001f; // copy data to GPU cl_int ciErrNum; unsigned int memSize = sizeof(btVector3) * m_numParticles; ciErrNum = clEnqueueWriteBuffer(m_cqCommandQue, m_dPos, CL_TRUE, 0, memSize, &(m_hPos[0]), 0, NULL, NULL); oclCHECKERROR(ciErrNum, CL_SUCCESS); ciErrNum = clEnqueueWriteBuffer(m_cqCommandQue, m_dVel, CL_TRUE, 0, memSize, &(m_hVel[0]), 0, NULL, NULL); oclCHECKERROR(ciErrNum, CL_SUCCESS); memSize = sizeof(btSimParams); ciErrNum = clEnqueueWriteBuffer(m_cqCommandQue, m_dSimParams, CL_TRUE, 0, memSize, &m_simParams, 0, NULL, NULL); oclCHECKERROR(ciErrNum, CL_SUCCESS); memSize = m_hashSize * sizeof(btInt2); ciErrNum = clEnqueueWriteBuffer(m_cqCommandQue, m_dPosHash, CL_TRUE, 0, memSize, &(m_hPosHash[0]), 0, NULL, NULL); oclCHECKERROR(ciErrNum, CL_SUCCESS); }
void btParticlesDynamicsWorld::allocateBuffers() { cl_int ciErrNum; // positions of spheres m_hPos.resize(m_numParticles); m_hVel.resize(m_numParticles); m_hSortedPos.resize(m_numParticles); m_hSortedVel.resize(m_numParticles); m_hPosHash.resize(m_hashSize); for(int i = 0; i < m_hashSize; i++) { m_hPosHash[i].x = 0x7FFFFFFF; m_hPosHash[i].y = 0; } unsigned int memSize = sizeof(btVector3) * m_numParticles; m_dPos = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum); oclCHECKERROR(ciErrNum, CL_SUCCESS); m_dVel = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum); oclCHECKERROR(ciErrNum, CL_SUCCESS); m_dSortedPos = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum); oclCHECKERROR(ciErrNum, CL_SUCCESS); m_dSortedVel = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum); oclCHECKERROR(ciErrNum, CL_SUCCESS); memSize = m_hashSize * sizeof(btInt2); m_dPosHash = clCreateBuffer(m_cxMainContext, CL_MEM_READ_ONLY, memSize, NULL, &ciErrNum); oclCHECKERROR(ciErrNum, CL_SUCCESS); // global simulation parameters memSize = sizeof(btSimParams); m_dSimParams = clCreateBuffer(m_cxMainContext, CL_MEM_READ_ONLY, memSize, NULL, &ciErrNum); oclCHECKERROR(ciErrNum, CL_SUCCESS); }
btParallelConstraintSolver::btParallelConstraintSolver() { //initialize MiniCL here cl_int ciErrNum; s_cxMainContext = clCreateContextFromType(0, CL_DEVICE_TYPE_ALL, NULL, NULL, &ciErrNum); oclCHECKERROR(ciErrNum, CL_SUCCESS); size_t dataSzBytes; cl_device_id* clDevices; clGetContextInfo(s_cxMainContext, CL_CONTEXT_DEVICES, 0, NULL, &dataSzBytes); clDevices = (cl_device_id*) malloc(dataSzBytes); clGetContextInfo(s_cxMainContext, CL_CONTEXT_DEVICES, dataSzBytes, clDevices, NULL); s_cdDevice = clDevices[0]; free(clDevices); // create a command-queue s_cqCommandQue = clCreateCommandQueue(s_cxMainContext, s_cdDevice, 0, &ciErrNum); oclCHECKERROR(ciErrNum, CL_SUCCESS); size_t program_length = 0; s_cpProgram = clCreateProgramWithSource(s_cxMainContext, 1, NULL, &program_length, &ciErrNum); oclCHECKERROR(ciErrNum, CL_SUCCESS); // Create kernels s_setupContactKernel = clCreateKernel(s_cpProgram, "kSetupContact", &ciErrNum); oclCHECKERROR(ciErrNum, CL_SUCCESS); size_t wgSize; ciErrNum = clGetKernelWorkGroupInfo(s_setupContactKernel, s_cdDevice, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wgSize, NULL); m_localGroupSize = wgSize; s_solveContactKernel = clCreateKernel(s_cpProgram, "kSolveContact", &ciErrNum); oclCHECKERROR(ciErrNum, CL_SUCCESS); }
void initCL(int preferredDeviceIndex, int preferredPlatformIndex) { //void* glCtx=0; //void* glDC = 0; int ciErrNum = 0; //bound search and radix sort only work on GPU right now (assume 32 or 64 width workgroup without barriers) cl_device_type deviceType = CL_DEVICE_TYPE_ALL; g_context = b3OpenCLUtils::createContextFromType(deviceType, &ciErrNum, 0,0,preferredDeviceIndex, preferredPlatformIndex); oclCHECKERROR(ciErrNum, CL_SUCCESS); int numDev = b3OpenCLUtils::getNumDevices(g_context); if (numDev>0) { b3OpenCLDeviceInfo info; g_device= b3OpenCLUtils::getDevice(g_context,0); g_queue = clCreateCommandQueue(g_context, g_device, 0, &ciErrNum); oclCHECKERROR(ciErrNum, CL_SUCCESS); b3OpenCLUtils::printDeviceInfo(g_device); b3OpenCLUtils::getDeviceInfo(g_device,&info); g_deviceName = info.m_deviceName; } }
void btGpuDemo3dOCLWrap::runKernelWithWorkgroupSize(int kernelId, int globalSize) { if(globalSize <= 0) { return; } cl_kernel kernelFunc = m_kernels[kernelId].m_kernel; cl_int ciErrNum = clSetKernelArg(kernelFunc, 0, sizeof(int), (void*)&globalSize); oclCHECKERROR(ciErrNum, CL_SUCCESS); int workgroupSize = m_kernels[kernelId].m_workgroupSize; if(workgroupSize <= 0) { // let OpenCL library calculate workgroup size size_t globalWorkSize[2]; globalWorkSize[0] = globalSize; globalWorkSize[1] = 1; ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, kernelFunc, 1, NULL, globalWorkSize, NULL, 0,0,0 ); } else { size_t localWorkSize[2], globalWorkSize[2]; workgroupSize = btMin(workgroupSize, globalSize); int num_t = globalSize / workgroupSize; int num_g = num_t * workgroupSize; if(num_g < globalSize) { num_t++; } localWorkSize[0] = workgroupSize; globalWorkSize[0] = num_t * workgroupSize; localWorkSize[1] = 1; globalWorkSize[1] = 1; ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, kernelFunc, 1, NULL, globalWorkSize, localWorkSize, 0,0,0 ); } oclCHECKERROR(ciErrNum, CL_SUCCESS); ciErrNum = clFlush(m_cqCommandQue); oclCHECKERROR(ciErrNum, CL_SUCCESS); }
btScalar btParallelConstraintSolver::solveGroupCacheFriendlyIterations(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer,btStackAlloc* stackAlloc) { BT_PROFILE("runIterations"); #if USE_SEQUENTIAL_SOLVER return btSequentialImpulseConstraintSolver::solveGroupCacheFriendlyIterations(bodies, numBodies, manifoldPtr, numManifolds, constraints, numConstraints, infoGlobal, debugDrawer, stackAlloc); #else int iteration; #if RUN_KERNELS_DIRECTLY for ( iteration = 0;iteration<infoGlobal.m_numIterations;iteration++) { for(int i = 0; i < m_localGroupSize; i++) { kSolveContact(this, m_taskParams, (btContactSolverInfo*)&infoGlobal, i); } } #else cl_int ciErrNum; btParallelConstraintSolver* pSolver = this; btParallelConstraintSolverSetupTaskParams* pTaskParams = m_taskParams; btContactSolverInfo* pInfoGlobal = (btContactSolverInfo*)&infoGlobal; ciErrNum = clSetKernelArg(s_solveContactKernel, 0, sizeof(cl_mem), (void*)&pSolver); ciErrNum |= clSetKernelArg(s_solveContactKernel, 1, sizeof(cl_mem), (void*)&pTaskParams); ciErrNum |= clSetKernelArg(s_solveContactKernel, 2, sizeof(cl_mem), (void*)&pInfoGlobal); oclCHECKERROR(ciErrNum, CL_SUCCESS); size_t szWorkSize[1]; szWorkSize[0] = m_localGroupSize; for ( iteration = 0;iteration<infoGlobal.m_numIterations;iteration++) { ciErrNum = clEnqueueNDRangeKernel(s_cqCommandQue, s_solveContactKernel, 1, NULL, szWorkSize, szWorkSize, 0, NULL, NULL); oclCHECKERROR(ciErrNum, CL_SUCCESS); clFlush(s_cqCommandQue); } #endif return 0.f; #endif }
btGridBroadphaseCl::btGridBroadphaseCl( btOverlappingPairCache* overlappingPairCache, const btVector3& cellSize, int gridSizeX, int gridSizeY, int gridSizeZ, int maxSmallProxies, int maxLargeProxies, int maxPairsPerSmallProxy, btScalar maxSmallProxySize, int maxSmallProxiesPerCell, cl_context context, cl_device_id device, cl_command_queue queue, adl::DeviceCL* deviceCL) :bt3dGridBroadphaseOCL(overlappingPairCache,cellSize, gridSizeX, gridSizeY, gridSizeZ, maxSmallProxies, maxLargeProxies, maxPairsPerSmallProxy, maxSmallProxySize,maxSmallProxiesPerCell, context,device,queue,deviceCL) { m_computeAabbKernel = m_deviceCL->getKernel(COMPUTE_AABB_KERNEL_PATH,"computeAabb","",spComputeAabbSource); m_countOverlappingPairs = m_deviceCL->getKernel(COMPUTE_AABB_KERNEL_PATH,"countOverlappingpairs","",spComputeAabbSource); m_squeezePairCaches = m_deviceCL->getKernel(COMPUTE_AABB_KERNEL_PATH,"squeezePairCaches","",spComputeAabbSource); m_aabbConstBuffer = new adl::Buffer<MyAabbConstData >(m_deviceCL,1,adl::BufferBase::BUFFER_CONST); size_t memSize = m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int)*2; cl_int ciErrNum=0; m_dAllOverlappingPairs = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum); memset(m_hAllOverlappingPairs, 0x00, sizeof(MyUint2)*m_maxHandles * m_maxPairsPerBody); copyArrayToDevice(m_dAllOverlappingPairs, m_hAllOverlappingPairs, m_maxHandles * m_maxPairsPerBody * sizeof(MyUint2)); oclCHECKERROR(ciErrNum, CL_SUCCESS); }
void btGridBroadphaseCl::calculateOverlappingPairs(float* positions, int numObjects) { btDispatcher* dispatcher=0; // update constants { BT_PROFILE("setParameters"); setParameters(&m_params); } // prepare AABB array { BT_PROFILE("prepareAABB"); prepareAABB(positions, numObjects); } // calculate hash { BT_PROFILE("calcHashAABB"); calcHashAABB(); } { BT_PROFILE("sortHash"); // sort bodies based on hash sortHash(); } // find start of each cell { BT_PROFILE("findCellStart"); findCellStart(); } { BT_PROFILE("findOverlappingPairs"); // findOverlappingPairs (small/small) findOverlappingPairs(); } // add pairs to CPU cache { BT_PROFILE("computePairCacheChanges"); #if 0 computePairCacheChanges(); #else int ciErrNum=0; ciErrNum=clSetKernelArg((cl_kernel)m_countOverlappingPairs->m_kernel, 0, sizeof(int), (void*)&numObjects); ciErrNum=clSetKernelArg((cl_kernel)m_countOverlappingPairs->m_kernel, 1, sizeof(cl_mem),(void*)&m_dPairBuff); ciErrNum=clSetKernelArg((cl_kernel)m_countOverlappingPairs->m_kernel, 2, sizeof(cl_mem),(void*)&m_dPairBuffStartCurr); ciErrNum=clSetKernelArg((cl_kernel)m_countOverlappingPairs->m_kernel, 3, sizeof(cl_mem),(void*)&m_dPairScanChanged); ciErrNum=clSetKernelArg((cl_kernel)m_countOverlappingPairs->m_kernel, 4, sizeof(cl_mem),(void*)&m_dAABB); size_t localWorkSize=64; size_t numWorkItems = localWorkSize*((numObjects+ (localWorkSize)) / localWorkSize); ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, (cl_kernel)m_countOverlappingPairs->m_kernel, 1, NULL, &numWorkItems, &localWorkSize, 0,0,0 ); oclCHECKERROR(ciErrNum, CL_SUCCESS); ciErrNum = clFlush(m_cqCommandQue); #endif } { BT_PROFILE("scanOverlappingPairBuff"); scanOverlappingPairBuff(false); } { BT_PROFILE("squeezeOverlappingPairBuff"); //#define FORCE_CPU #ifdef FORCE_CPU bt3dGridBroadphaseOCL::squeezeOverlappingPairBuff(); copyArrayToDevice(m_dPairsChangedXY, m_hPairsChangedXY, sizeof( MyUint2) * m_numPrefixSum); //gSum #else //squeezeOverlappingPairBuff(); int ciErrNum = 0; ciErrNum=clSetKernelArg((cl_kernel)m_squeezePairCaches->m_kernel, 0, sizeof(int), (void*)&numObjects); ciErrNum=clSetKernelArg((cl_kernel)m_squeezePairCaches->m_kernel, 1, sizeof(cl_mem),(void*)&m_dPairBuff); ciErrNum=clSetKernelArg((cl_kernel)m_squeezePairCaches->m_kernel, 2, sizeof(cl_mem),(void*)&m_dPairBuffStartCurr); ciErrNum=clSetKernelArg((cl_kernel)m_squeezePairCaches->m_kernel, 3, sizeof(cl_mem),(void*)&m_dPairScanChanged); ciErrNum=clSetKernelArg((cl_kernel)m_squeezePairCaches->m_kernel, 4, sizeof(cl_mem),(void*)&m_dAllOverlappingPairs); ciErrNum=clSetKernelArg((cl_kernel)m_squeezePairCaches->m_kernel, 5, sizeof(cl_mem),(void*)&m_dAABB); size_t workGroupSize = 64; size_t numWorkItems = workGroupSize*((numObjects+ (workGroupSize)) / workGroupSize); ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, (cl_kernel)m_squeezePairCaches->m_kernel, 1, NULL, &numWorkItems, &workGroupSize, 0,0,0 ); oclCHECKERROR(ciErrNum, CL_SUCCESS); // copyArrayFromDevice(m_hAllOverlappingPairs, m_dAllOverlappingPairs, sizeof(unsigned int) * m_numPrefixSum*2); //gSum // clFinish(m_cqCommandQue); #endif } return; }
void CLPhysicsDemo::stepSimulation() { int sz = sizeof(ConvexPolyhedronCL2); int sz1 = sizeof(ConvexPolyhedronCL); btAssert(sz==sz1); int b1 = sizeof(Body2); int b2 = sizeof(RigidBodyBase::Body); btAssert(b1==b2); BT_PROFILE("simulationLoop"); cl_int ciErrNum = CL_SUCCESS; if(m_data->m_useInterop) { #ifndef __APPLE__ clBuffer = g_interopBuffer->getCLBUffer(); BT_PROFILE("clEnqueueAcquireGLObjects"); { BT_PROFILE("clEnqueueAcquireGLObjects"); ciErrNum = clEnqueueAcquireGLObjects(g_cqCommandQue, 1, &clBuffer, 0, 0, NULL); clFinish(g_cqCommandQue); } #else assert(0); #endif } else { glBindBuffer(GL_ARRAY_BUFFER, cube_vbo); glFlush(); BT_PROFILE("glMapBuffer and clEnqueueWriteBuffer"); blocking= CL_TRUE; hostPtr= (char*)glMapBuffer( GL_ARRAY_BUFFER,GL_READ_WRITE);//GL_WRITE_ONLY if (!clBuffer) { clBuffer = clCreateBuffer(g_cxMainContext, CL_MEM_READ_WRITE, VBOsize, 0, &ciErrNum); } clFinish(g_cqCommandQue); oclCHECKERROR(ciErrNum, CL_SUCCESS); ciErrNum = clEnqueueWriteBuffer ( g_cqCommandQue, clBuffer, blocking, 0, VBOsize, hostPtr,0,0,0 ); clFinish(g_cqCommandQue); } oclCHECKERROR(ciErrNum, CL_SUCCESS); if (1 && m_numPhysicsInstances) { gFpIO.m_numObjects = m_numPhysicsInstances; gFpIO.m_positionOffset = SHAPE_VERTEX_BUFFER_SIZE/4; gFpIO.m_clObjectsBuffer = clBuffer; if (useSapGpuBroadphase) { gFpIO.m_dAABB = m_data->m_BroadphaseSap->getAabbBuffer(); } else { gFpIO.m_dAABB = m_data->m_BroadphaseGrid->getAabbBuffer(); } gFpIO.m_dlocalShapeAABB = (cl_mem)m_data->m_localShapeAABBGPU->getBufferCL(); gFpIO.m_numOverlap = 0; { BT_PROFILE("setupGpuAabbs"); setupGpuAabbsFull(gFpIO,narrowphaseAndSolver->getBodiesGpu(), narrowphaseAndSolver->getCollidablesGpu() ); // setupGpuAabbsSimple(gFpIO); } { } if (1) { BT_PROFILE("calculateOverlappingPairs"); if (useSapGpuBroadphase) { m_data->m_BroadphaseSap->calculateOverlappingPairs(); gFpIO.m_dAllOverlappingPairs = m_data->m_BroadphaseSap->getOverlappingPairBuffer(); gFpIO.m_numOverlap = m_data->m_BroadphaseSap->getNumOverlap(); } else { m_data->m_BroadphaseGrid->calculateOverlappingPairs(); gFpIO.m_dAllOverlappingPairs = m_data->m_BroadphaseGrid->getOverlappingPairBuffer(); gFpIO.m_numOverlap = m_data->m_BroadphaseGrid->getNumOverlap(); } } //printf("gFpIO.m_numOverlap = %d\n",gFpIO.m_numOverlap ); if (gFpIO.m_numOverlap>=0 && gFpIO.m_numOverlap<MAX_BROADPHASE_COLLISION_CL) { colorPairsOpenCL(gFpIO); if (runOpenCLKernels) { { //BT_PROFILE("setupBodies"); if (narrowphaseAndSolver) setupBodies(gFpIO, m_data->m_linVelBuf->getBufferCL(), m_data->m_angVelBuf->getBufferCL(), narrowphaseAndSolver->getBodiesGpu(), narrowphaseAndSolver->getBodyInertiasGpu()); } { BT_PROFILE("computeContactsAndSolver"); if (narrowphaseAndSolver) narrowphaseAndSolver->computeContactsAndSolver(gFpIO.m_dAllOverlappingPairs,gFpIO.m_numOverlap, gFpIO.m_dAABB,gFpIO.m_numObjects); } { BT_PROFILE("copyBodyVelocities"); if (narrowphaseAndSolver) copyBodyVelocities(gFpIO, m_data->m_linVelBuf->getBufferCL(), m_data->m_angVelBuf->getBufferCL(), narrowphaseAndSolver->getBodiesGpu(), narrowphaseAndSolver->getBodyInertiasGpu()); } } } else { printf("error, gFpIO.m_numOverlap = %d\n",gFpIO.m_numOverlap); btAssert(0); } { BT_PROFILE("integrateTransforms"); if (runOpenCLKernels) { bool integrateOnGpu = true; if (integrateOnGpu) { int numObjects = m_numPhysicsInstances; int offset = SHAPE_VERTEX_BUFFER_SIZE/4; ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 0, sizeof(int), &offset); ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 1, sizeof(int), &numObjects); ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 2, sizeof(cl_mem), (void*)&clBuffer ); cl_mem lv = m_data->m_linVelBuf->getBufferCL(); cl_mem av = m_data->m_angVelBuf->getBufferCL(); cl_mem btimes = m_data->m_bodyTimes->getBufferCL(); ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 3, sizeof(cl_mem), (void*)&lv); ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 4, sizeof(cl_mem), (void*)&av); ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 5, sizeof(cl_mem), (void*)&btimes); size_t workGroupSize = 64; size_t numWorkItems = workGroupSize*((m_numPhysicsInstances + (workGroupSize)) / workGroupSize); if (workGroupSize>numWorkItems) workGroupSize=numWorkItems; ciErrNum = clEnqueueNDRangeKernel(g_cqCommandQue, g_integrateTransformsKernel, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0); oclCHECKERROR(ciErrNum, CL_SUCCESS); } else { //debug velocity btAlignedObjectArray<btVector3> linvel; m_data->m_linVelBuf->copyToHost(linvel); for (int i=0;i<linvel.size();i++) { btAssert(_finite(linvel[i].x())); } btAssert(0); } } } } if(m_data->m_useInterop) { #ifndef __APPLE__ BT_PROFILE("clEnqueueReleaseGLObjects"); ciErrNum = clEnqueueReleaseGLObjects(g_cqCommandQue, 1, &clBuffer, 0, 0, 0); clFinish(g_cqCommandQue); #endif } else { BT_PROFILE("clEnqueueReadBuffer clReleaseMemObject and glUnmapBuffer"); ciErrNum = clEnqueueReadBuffer ( g_cqCommandQue, clBuffer, blocking, 0, VBOsize, hostPtr,0,0,0); //clReleaseMemObject(clBuffer); clFinish(g_cqCommandQue); glUnmapBuffer( GL_ARRAY_BUFFER); glFlush(); } oclCHECKERROR(ciErrNum, CL_SUCCESS); if (runOpenCLKernels) { BT_PROFILE("clFinish"); clFinish(g_cqCommandQue); } }
void ParticleDemo::clientMoveAndDisplay() { int numParticles = NUM_PARTICLES_X*NUM_PARTICLES_Y*NUM_PARTICLES_Z; GLuint vbo = m_instancingRenderer->getInternalData()->m_vbo; glBindBuffer(GL_ARRAY_BUFFER, vbo); glFlush(); int posArraySize = numParticles*sizeof(float)*4; cl_bool blocking= CL_TRUE; char* hostPtr= (char*)glMapBufferRange( GL_ARRAY_BUFFER,m_instancingRenderer->getMaxShapeCapacity(),posArraySize, GL_MAP_WRITE_BIT|GL_MAP_READ_BIT );//GL_READ_WRITE);//GL_WRITE_ONLY GLint err = glGetError(); assert(err==GL_NO_ERROR); glFinish(); #if 1 //do some stuff using the OpenCL buffer bool useCpu = false; if (useCpu) { float* posBuffer = (float*)hostPtr; for (int i=0;i<numParticles;i++) { posBuffer[i*4+1] += 0.1; } } else { cl_int ciErrNum; if (!m_data->m_clPositionBuffer) { m_data->m_clPositionBuffer = clCreateBuffer(m_clData->m_clContext, CL_MEM_READ_WRITE, posArraySize, 0, &ciErrNum); clFinish(m_clData->m_clQueue); oclCHECKERROR(ciErrNum, CL_SUCCESS); ciErrNum = clEnqueueWriteBuffer ( m_clData->m_clQueue,m_data->m_clPositionBuffer, blocking,0,posArraySize,hostPtr,0,0,0 ); clFinish(m_clData->m_clQueue); } if (0) { b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_data->m_velocitiesGPU->getBufferCL(), true ), b3BufferInfoCL( m_data->m_clPositionBuffer) }; b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_updatePositionsKernel ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( numParticles); launcher.launch1D( numParticles); clFinish(m_clData->m_clQueue); } if (1) { b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_data->m_clPositionBuffer), b3BufferInfoCL( m_data->m_velocitiesGPU->getBufferCL() ), b3BufferInfoCL( m_data->m_simParamGPU->getBufferCL(),true) }; b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_updatePositionsKernel2 ); launcher.setConst( numParticles); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); float timeStep = 1.f/60.f; launcher.setConst( timeStep); launcher.launch1D( numParticles); clFinish(m_clData->m_clQueue); } { b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_data->m_clPositionBuffer), b3BufferInfoCL( m_data->m_broadphaseGPU->getAabbBufferWS()), }; b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_updateAabbsKernel ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( m_data->m_simParamCPU[0].m_particleRad); launcher.setConst( numParticles); launcher.launch1D( numParticles); clFinish(m_clData->m_clQueue); } //broadphase int numPairsGPU=0; cl_mem pairsGPU = 0; { m_data->m_broadphaseGPU->calculateOverlappingPairs(64*numParticles); pairsGPU = m_data->m_broadphaseGPU->getOverlappingPairBuffer(); numPairsGPU = m_data->m_broadphaseGPU->getNumOverlap(); } if (numPairsGPU) { b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_data->m_clPositionBuffer), b3BufferInfoCL( m_data->m_velocitiesGPU->getBufferCL() ), b3BufferInfoCL( m_data->m_broadphaseGPU->getOverlappingPairBuffer(),true), }; b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_collideParticlesKernel); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( numPairsGPU); launcher.launch1D( numPairsGPU); clFinish(m_clData->m_clQueue); //__kernel void collideParticlesKernel( __global float4* pPos, __global float4* pVel, __global int2* pairs, const int numPairs) } if (1) { ciErrNum = clEnqueueReadBuffer ( m_clData->m_clQueue, m_data->m_clPositionBuffer, blocking, 0, posArraySize, hostPtr,0,0,0); //clReleaseMemObject(clBuffer); clFinish(m_clData->m_clQueue); } } #endif glUnmapBuffer( GL_ARRAY_BUFFER); glFlush(); /* int numParticles = NUM_PARTICLES_X*NUM_PARTICLES_Y*NUM_PARTICLES_Z; for (int objectIndex=0;objectIndex<numParticles;objectIndex++) { float pos[4]={0,0,0,0}; float orn[4]={0,0,0,1}; // m_instancingRenderer->writeSingleInstanceTransformToGPU(pos,orn,i); { glBindBuffer(GL_ARRAY_BUFFER, m_instancingRenderer->getInternalData()->m_vbo); glFlush(); char* orgBase = (char*)glMapBuffer( GL_ARRAY_BUFFER,GL_READ_WRITE); //b3GraphicsInstance* gfxObj = m_graphicsInstances[k]; int totalNumInstances= numParticles; int POSITION_BUFFER_SIZE = (totalNumInstances*sizeof(float)*4); char* base = orgBase; int capInBytes = m_instancingRenderer->getMaxShapeCapacity(); float* positions = (float*)(base+capInBytes); float* orientations = (float*)(base+capInBytes+ POSITION_BUFFER_SIZE); positions[objectIndex*4+1] += 0.1f; glUnmapBuffer( GL_ARRAY_BUFFER); glFlush(); } } */ }
void ParticleDemo::setupScene(const ConstructionInfo& ci) { initCL(ci.preferredOpenCLDeviceIndex,ci.preferredOpenCLPlatformIndex); int numParticles = NUM_PARTICLES_X*NUM_PARTICLES_Y*NUM_PARTICLES_Z; int maxObjects = NUM_PARTICLES_X*NUM_PARTICLES_Y*NUM_PARTICLES_Z+1024; int maxPairsSmallProxy = 32; float radius = 3.f*m_data->m_simParamCPU[0].m_particleRad; m_data->m_broadphaseGPU = new b3GpuSapBroadphase(m_clData->m_clContext ,m_clData->m_clDevice,m_clData->m_clQueue);//overlappingPairCache,b3Vector3(4.f, 4.f, 4.f), 128, 128, 128,maxObjects, maxObjects, maxPairsSmallProxy, 100.f, 128, /*m_data->m_broadphaseGPU = new b3GridBroadphaseCl(overlappingPairCache,b3Vector3(radius,radius,radius), 128, 128, 128, maxObjects, maxObjects, maxPairsSmallProxy, 100.f, 128, m_clData->m_clContext ,m_clData->m_clDevice,m_clData->m_clQueue); */ m_data->m_velocitiesGPU = new b3OpenCLArray<b3Vector3>(m_clData->m_clContext,m_clData->m_clQueue,numParticles); m_data->m_velocitiesCPU.resize(numParticles); for (int i=0;i<numParticles;i++) { m_data->m_velocitiesCPU[i].setValue(0,0,0); } m_data->m_velocitiesGPU->copyFromHost(m_data->m_velocitiesCPU); m_data->m_simParamGPU = new b3OpenCLArray<b3SimParams>(m_clData->m_clContext,m_clData->m_clQueue,1,false); m_data->m_simParamGPU->copyFromHost(m_data->m_simParamCPU); cl_int pErrNum; cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_clData->m_clContext,m_clData->m_clDevice,particleKernelsString,0,"",INTEROPKERNEL_SRC_PATH); m_data->m_updatePositionsKernel = b3OpenCLUtils::compileCLKernelFromString(m_clData->m_clContext, m_clData->m_clDevice,particleKernelsString, "updatePositionsKernel" ,&pErrNum,prog); oclCHECKERROR(pErrNum, CL_SUCCESS); m_data->m_updatePositionsKernel2 = b3OpenCLUtils::compileCLKernelFromString(m_clData->m_clContext, m_clData->m_clDevice,particleKernelsString, "integrateMotionKernel" ,&pErrNum,prog); oclCHECKERROR(pErrNum, CL_SUCCESS); m_data->m_updateAabbsKernel= b3OpenCLUtils::compileCLKernelFromString(m_clData->m_clContext, m_clData->m_clDevice,particleKernelsString, "updateAabbsKernel" ,&pErrNum,prog); oclCHECKERROR(pErrNum, CL_SUCCESS); m_data->m_collideParticlesKernel = b3OpenCLUtils::compileCLKernelFromString(m_clData->m_clContext, m_clData->m_clDevice,particleKernelsString, "collideParticlesKernel" ,&pErrNum,prog); oclCHECKERROR(pErrNum, CL_SUCCESS); m_instancingRenderer = ci.m_instancingRenderer; int strideInBytes = 9*sizeof(float); bool pointSprite = true; int shapeId =-1; if (pointSprite) { int numVertices = sizeof(point_sphere_vertices)/strideInBytes; int numIndices = sizeof(point_sphere_indices)/sizeof(int); shapeId = m_instancingRenderer->registerShape(&point_sphere_vertices[0],numVertices,point_sphere_indices,numIndices,B3_GL_POINTS); } else { int numVertices = sizeof(low_sphere_vertices)/strideInBytes; int numIndices = sizeof(low_sphere_indices)/sizeof(int); shapeId = m_instancingRenderer->registerShape(&low_sphere_vertices[0],numVertices,low_sphere_indices,numIndices); } float position[4] = {0,0,0,0}; float quaternion[4] = {0,0,0,1}; float color[4]={1,0,0,1}; float scaling[4] = {0.023,0.023,0.023,1}; int userIndex = 0; for (int x=0;x<NUM_PARTICLES_X;x++) { for (int y=0;y<NUM_PARTICLES_Y;y++) { for (int z=0;z<NUM_PARTICLES_Z;z++) { float rad = m_data->m_simParamCPU[0].m_particleRad; position[0] = x*(rad*3); position[1] = y*(rad*3); position[2] = z*(rad*3); color[0] = float(x)/float(NUM_PARTICLES_X); color[1] = float(y)/float(NUM_PARTICLES_Y); color[2] = float(z)/float(NUM_PARTICLES_Z); int id = m_instancingRenderer->registerGraphicsInstance(shapeId,position,quaternion,color,scaling); void* userPtr = (void*)userIndex; int collidableIndex = userIndex; b3Vector3 aabbMin,aabbMax; b3Vector3 particleRadius(rad,rad,rad); aabbMin = b3Vector3(position[0],position[1],position[2])-particleRadius; aabbMax = b3Vector3(position[0],position[1],position[2])+particleRadius; m_data->m_broadphaseGPU->createProxy(aabbMin,aabbMax,collidableIndex,1,1); userIndex++; } } } m_data->m_broadphaseGPU->writeAabbsToGpu(); float camPos[4]={1.5,0.5,2.5,0}; m_instancingRenderer->setCameraTargetPosition(camPos); m_instancingRenderer->setCameraDistance(4); m_instancingRenderer->writeTransforms(); }
btScalar btParallelConstraintSolver::solveGroupCacheFriendlySetup(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer,btStackAlloc* stackAlloc) { BT_PROFILE("solveGroupCacheFriendlySetup"); #if USE_SEQUENTIAL_SOLVER return btSequentialImpulseConstraintSolver::solveGroupCacheFriendlySetup(bodies, numBodies, manifoldPtr, numManifolds, constraints, numConstraints, infoGlobal, debugDrawer, stackAlloc); #else (void)stackAlloc; (void)debugDrawer; // nCall++; // printf("Call : %d\n", nCall); // if(nCall == 18) // { // printf("========= HIT : %d\n", nCall); // } if (!(numConstraints + numManifolds)) { // printf("empty\n"); return 0.f; } int numConstr; { BT_PROFILE("prepareBatches"); numConstr = prepareBatches(manifoldPtr, numManifolds, infoGlobal); } if(!numConstr) { return 0.f; } { BT_PROFILE("runSetupKernel"); #if RUN_KERNELS_DIRECTLY for(int i = 0; i < m_localGroupSize; i++) { kSetupContact(this, m_taskParams, (btContactSolverInfo*)&infoGlobal, i); } #else // Set the Argument values cl_int ciErrNum; btParallelConstraintSolver* pSolver = this; btParallelConstraintSolverSetupTaskParams* pTaskParams = m_taskParams; btContactSolverInfo* pInfoGlobal = (btContactSolverInfo*)&infoGlobal; ciErrNum = clSetKernelArg(s_setupContactKernel, 0, sizeof(cl_mem), (void*)&pSolver); ciErrNum |= clSetKernelArg(s_setupContactKernel, 1, sizeof(cl_mem), (void*)&pTaskParams); ciErrNum |= clSetKernelArg(s_setupContactKernel, 2, sizeof(cl_mem), (void*)&pInfoGlobal); oclCHECKERROR(ciErrNum, CL_SUCCESS); size_t szWorkSize[1]; szWorkSize[0] = m_localGroupSize; ciErrNum = clEnqueueNDRangeKernel(s_cqCommandQue, s_setupContactKernel, 1, NULL, szWorkSize, szWorkSize, 0, NULL, NULL); oclCHECKERROR(ciErrNum, CL_SUCCESS); clFlush(s_cqCommandQue); #endif } #if 0 { FILE* ff = fopen("C:\\ttt\\theadstat.txt", "wt"); int totalCont = m_tmpSolverContactConstraintPool.size(); int nRows = (totalCont + m_localGroupSize - 1)/ m_localGroupSize; for(int n = 0; n < nRows; n++) { for(int i = 0; i < m_localGroupSize; i++) { int indx = m_taskParams[i].m_startIndex + n; if(indx < totalCont) { int curr = m_tmpSolverContactConstraintPool[indx].m_numConsecutiveRowsPerKernel; int delta; if(n) { delta = curr - m_tmpSolverContactConstraintPool[indx-1].m_numConsecutiveRowsPerKernel; } else delta = 0; fprintf(ff, "%10d(%6d)\t", curr, delta); } } fprintf(ff, "\n"); } fclose(ff); } #endif { BT_PROFILE("setOrder"); ///@todo: use stack allocator for such temporarily memory, same for solver bodies/constraints int numConstraintPool = m_tmpSolverContactConstraintPool.size(); int numFrictionPool = m_tmpSolverContactFrictionConstraintPool.size(); m_orderTmpConstraintPool.resize(numConstraintPool); m_orderFrictionConstraintPool.resize(numFrictionPool); { int i; for (i=0;i<numConstraintPool;i++) { m_orderTmpConstraintPool[i] = i; } for (i=0;i<numFrictionPool;i++) { m_orderFrictionConstraintPool[i] = i; } } } return 0.f; #endif }
void CLPhysicsDemo::stepSimulation() { int sz = sizeof(ConvexPolyhedronCL2); int sz1 = sizeof(ConvexPolyhedronCL); btAssert(sz==sz1); int b1 = sizeof(Body2); int b2 = sizeof(RigidBodyBase::Body); btAssert(b1==b2); int ciErrNum=CL_SUCCESS; if (1 && m_numPhysicsInstances) { gFpIO.m_numObjects = m_numPhysicsInstances; if (useSapGpuBroadphase) { gFpIO.m_dAABB = m_data->m_BroadphaseSap->getAabbBuffer(); } else { gFpIO.m_dAABB = m_data->m_BroadphaseGrid->getAabbBuffer(); } gFpIO.m_dlocalShapeAABB = (cl_mem)m_data->m_localShapeAABBGPU->getBufferCL(); gFpIO.m_numOverlap = 0; { BT_PROFILE("setupGpuAabbs"); setupGpuAabbsFull(gFpIO,m_narrowphaseAndSolver->getBodiesGpu(), m_narrowphaseAndSolver->getCollidablesGpu() ); // setupGpuAabbsSimple(gFpIO); } { } if (1) { BT_PROFILE("calculateOverlappingPairs"); if (useSapGpuBroadphase) { m_data->m_BroadphaseSap->calculateOverlappingPairs(); gFpIO.m_numOverlap = m_data->m_BroadphaseSap->getNumOverlap(); gFpIO.m_dAllOverlappingPairs = m_data->m_BroadphaseSap->getOverlappingPairBuffer(); } else { m_data->m_BroadphaseGrid->calculateOverlappingPairs(); gFpIO.m_dAllOverlappingPairs = m_data->m_BroadphaseGrid->getOverlappingPairBuffer(); gFpIO.m_numOverlap = m_data->m_BroadphaseGrid->getNumOverlap(); } } //printf("gFpIO.m_numOverlap = %d\n",gFpIO.m_numOverlap ); if (gFpIO.m_numOverlap>=0 && gFpIO.m_numOverlap<MAX_BROADPHASE_COLLISION_CL) { //colorPairsOpenCL(gFpIO); if (runOpenCLKernels) { { BT_PROFILE("setupBodies"); if (m_narrowphaseAndSolver) setupBodies(gFpIO, m_data->m_linVelBuf->getBufferCL(), m_data->m_angVelBuf->getBufferCL(), m_narrowphaseAndSolver->getBodiesGpu(), m_narrowphaseAndSolver->getBodyInertiasGpu()); } { BT_PROFILE("computeContacts"); if (m_narrowphaseAndSolver) m_narrowphaseAndSolver->computeContacts(gFpIO.m_dAllOverlappingPairs,gFpIO.m_numOverlap, gFpIO.m_dAABB,gFpIO.m_numObjects); } bool useOpenCLSolver = true; if (useOpenCLSolver) { BT_PROFILE("solve Contact Constraints (OpenCL)"); if (m_narrowphaseAndSolver) m_narrowphaseAndSolver->solveContacts(); } else { BT_PROFILE("solve Contact Constraints CPU/serial"); if (m_narrowphaseAndSolver && m_data->m_pgsSolver && m_narrowphaseAndSolver->getNumContactsGpu()) { btGpuNarrowphaseAndSolver* np = m_narrowphaseAndSolver; btAlignedObjectArray<RigidBodyBase::Body> hostBodies; btOpenCLArray<RigidBodyBase::Body> gpuBodies(g_cxMainContext,g_cqCommandQue,0,true); gpuBodies.setFromOpenCLBuffer(np->getBodiesGpu(),np->getNumBodiesGpu()); gpuBodies.copyToHost(hostBodies); btAlignedObjectArray<RigidBodyBase::Inertia> hostInertias; btOpenCLArray<RigidBodyBase::Inertia> gpuInertias(g_cxMainContext,g_cqCommandQue,0,true); gpuInertias.setFromOpenCLBuffer(np->getBodyInertiasGpu(),np->getNumBodiesGpu()); gpuInertias.copyToHost(hostInertias); btAlignedObjectArray<Contact4> hostContacts; btOpenCLArray<Contact4> gpuContacts(g_cxMainContext,g_cqCommandQue,0,true); gpuContacts.setFromOpenCLBuffer(np->getContactsGpu(),np->getNumContactsGpu()); gpuContacts.copyToHost(hostContacts); { BT_PROFILE("pgsSolver::solveContacts"); m_data->m_pgsSolver->solveContacts(np->getNumBodiesGpu(),&hostBodies[0],&hostInertias[0],np->getNumContactsGpu(),&hostContacts[0]); } gpuBodies.copyFromHost(hostBodies); } } { BT_PROFILE("copyBodyVelocities"); if (m_narrowphaseAndSolver) copyBodyVelocities(gFpIO, m_data->m_linVelBuf->getBufferCL(), m_data->m_angVelBuf->getBufferCL(), m_narrowphaseAndSolver->getBodiesGpu()); } } } else { printf("error, gFpIO.m_numOverlap = %d\n",gFpIO.m_numOverlap); btAssert(0); } { BT_PROFILE("integrateTransforms"); if (runOpenCLKernels) { bool integrateOnGpu = true; if (integrateOnGpu) { int numObjects = m_numPhysicsInstances; int offset = m_maxShapeBufferCapacityInBytes/4; ciErrNum = clSetKernelArg(g_integrateTransformsKernel2, 0, sizeof(int), &offset); ciErrNum = clSetKernelArg(g_integrateTransformsKernel2, 1, sizeof(int), &numObjects); cl_mem bodyGpuBuffer = m_narrowphaseAndSolver->getBodiesGpu(); ciErrNum = clSetKernelArg(g_integrateTransformsKernel2, 2, sizeof(cl_mem), (void*)&bodyGpuBuffer ); cl_mem lv = m_data->m_linVelBuf->getBufferCL(); cl_mem av = m_data->m_angVelBuf->getBufferCL(); cl_mem btimes = m_data->m_bodyTimes->getBufferCL(); ciErrNum = clSetKernelArg(g_integrateTransformsKernel2, 3, sizeof(cl_mem), (void*)&lv); ciErrNum = clSetKernelArg(g_integrateTransformsKernel2, 4, sizeof(cl_mem), (void*)&av); ciErrNum = clSetKernelArg(g_integrateTransformsKernel2, 5, sizeof(cl_mem), (void*)&btimes); size_t workGroupSize = 64; size_t numWorkItems = workGroupSize*((m_numPhysicsInstances + (workGroupSize)) / workGroupSize); if (workGroupSize>numWorkItems) workGroupSize=numWorkItems; ciErrNum = clEnqueueNDRangeKernel(g_cqCommandQue, g_integrateTransformsKernel2, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0); oclCHECKERROR(ciErrNum, CL_SUCCESS); } else { #ifdef _WIN32 //debug velocity btAlignedObjectArray<btVector3> linvel; m_data->m_linVelBuf->copyToHost(linvel); for (int i=0;i<linvel.size();i++) { btAssert(_finite(linvel[i].x())); } #endif btAssert(0); } } } } oclCHECKERROR(ciErrNum, CL_SUCCESS); if (runOpenCLKernels) { BT_PROFILE("clFinish"); clFinish(g_cqCommandQue); } }
void CLPhysicsDemo::stepSimulation() { BT_PROFILE("simulationLoop"); { BT_PROFILE("glFinish"); glFinish(); } cl_int ciErrNum = CL_SUCCESS; if(m_data->m_useInterop) { clBuffer = g_interopBuffer->getCLBUffer(); BT_PROFILE("clEnqueueAcquireGLObjects"); ciErrNum = clEnqueueAcquireGLObjects(g_cqCommandQue, 1, &clBuffer, 0, 0, NULL); adl::DeviceUtils::waitForCompletion( g_deviceCL ); } else { glBindBuffer(GL_ARRAY_BUFFER, cube_vbo); glFlush(); BT_PROFILE("glMapBuffer and clEnqueueWriteBuffer"); blocking= CL_TRUE; hostPtr= (char*)glMapBuffer( GL_ARRAY_BUFFER,GL_READ_WRITE);//GL_WRITE_ONLY if (!clBuffer) { clBuffer = clCreateBuffer(g_cxMainContext, CL_MEM_READ_WRITE, VBOsize, 0, &ciErrNum); } adl::DeviceUtils::waitForCompletion( g_deviceCL ); oclCHECKERROR(ciErrNum, CL_SUCCESS); ciErrNum = clEnqueueWriteBuffer ( g_cqCommandQue, clBuffer, blocking, 0, VBOsize, hostPtr,0,0,0 ); adl::DeviceUtils::waitForCompletion( g_deviceCL ); } oclCHECKERROR(ciErrNum, CL_SUCCESS); if (runOpenCLKernels && m_numPhysicsInstances) { gFpIO.m_numObjects = m_numPhysicsInstances; gFpIO.m_positionOffset = SHAPE_VERTEX_BUFFER_SIZE/4; gFpIO.m_clObjectsBuffer = clBuffer; gFpIO.m_dAABB = m_data->m_Broadphase->m_dAABB; gFpIO.m_dlocalShapeAABB = (cl_mem)m_data->m_localShapeAABB->m_ptr; gFpIO.m_numOverlap = 0; { BT_PROFILE("setupGpuAabbs"); setupGpuAabbsFull(gFpIO,narrowphaseAndSolver->getBodiesGpu() ); } if (1) { BT_PROFILE("calculateOverlappingPairs"); m_data->m_Broadphase->calculateOverlappingPairs(0, m_numPhysicsInstances); gFpIO.m_dAllOverlappingPairs = m_data->m_Broadphase->m_dAllOverlappingPairs; gFpIO.m_numOverlap = m_data->m_Broadphase->m_numPrefixSum; } //printf("gFpIO.m_numOverlap = %d\n",gFpIO.m_numOverlap ); if (gFpIO.m_numOverlap>=0 && gFpIO.m_numOverlap<MAX_BROADPHASE_COLLISION_CL) { colorPairsOpenCL(gFpIO); if (1) { { //BT_PROFILE("setupBodies"); if (narrowphaseAndSolver) setupBodies(gFpIO, gLinVelMem, gAngVelMem, narrowphaseAndSolver->getBodiesGpu(), narrowphaseAndSolver->getBodyInertiasGpu()); } if (gFpIO.m_numOverlap) { BT_PROFILE("computeContactsAndSolver"); if (narrowphaseAndSolver) narrowphaseAndSolver->computeContactsAndSolver(gFpIO.m_dAllOverlappingPairs,gFpIO.m_numOverlap); } { BT_PROFILE("copyBodyVelocities"); if (narrowphaseAndSolver) copyBodyVelocities(gFpIO, gLinVelMem, gAngVelMem, narrowphaseAndSolver->getBodiesGpu(), narrowphaseAndSolver->getBodyInertiasGpu()); } } } else { printf("error, gFpIO.m_numOverlap = %d\n",gFpIO.m_numOverlap); btAssert(0); } { BT_PROFILE("integrateTransforms"); if (runOpenCLKernels) { int numObjects = m_numPhysicsInstances; int offset = SHAPE_VERTEX_BUFFER_SIZE/4; ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 0, sizeof(int), &offset); ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 1, sizeof(int), &numObjects); ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 2, sizeof(cl_mem), (void*)&clBuffer ); ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 3, sizeof(cl_mem), (void*)&gLinVelMem); ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 4, sizeof(cl_mem), (void*)&gAngVelMem); ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 5, sizeof(cl_mem), (void*)&gBodyTimes); size_t workGroupSize = 64; size_t numWorkItems = workGroupSize*((m_numPhysicsInstances + (workGroupSize)) / workGroupSize); if (workGroupSize>numWorkItems) workGroupSize=numWorkItems; ciErrNum = clEnqueueNDRangeKernel(g_cqCommandQue, g_integrateTransformsKernel, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0); oclCHECKERROR(ciErrNum, CL_SUCCESS); } } } if(m_data->m_useInterop) { BT_PROFILE("clEnqueueReleaseGLObjects"); ciErrNum = clEnqueueReleaseGLObjects(g_cqCommandQue, 1, &clBuffer, 0, 0, 0); adl::DeviceUtils::waitForCompletion( g_deviceCL ); } else { BT_PROFILE("clEnqueueReadBuffer clReleaseMemObject and glUnmapBuffer"); ciErrNum = clEnqueueReadBuffer ( g_cqCommandQue, clBuffer, blocking, 0, VBOsize, hostPtr,0,0,0); //clReleaseMemObject(clBuffer); adl::DeviceUtils::waitForCompletion( g_deviceCL ); glUnmapBuffer( GL_ARRAY_BUFFER); glFlush(); } oclCHECKERROR(ciErrNum, CL_SUCCESS); if (runOpenCLKernels) { BT_PROFILE("clFinish"); clFinish(g_cqCommandQue); } }
void btGpuDemo3dOCLWrap::initCL(int argc, char** argv) { cl_int ciErrNum; // m_cxMainContext = clCreateContextFromType(0, CL_DEVICE_TYPE_ALL, NULL, NULL, &ciErrNum); m_cxMainContext = btOclCommon::createContextFromType(CL_DEVICE_TYPE_ALL, &ciErrNum); oclCHECKERROR(ciErrNum, CL_SUCCESS); m_cdDevice = btOclGetMaxFlopsDev(m_cxMainContext); // create a command-queue m_cqCommandQue = clCreateCommandQueue(m_cxMainContext, m_cdDevice, 0, &ciErrNum); oclCHECKERROR(ciErrNum, CL_SUCCESS); // Program Setup size_t program_length; char* fileName = "Gpu3dDemoOCL.cl"; FILE * fp = fopen(fileName, "rb"); char newFileName[512]; if (fp == NULL) { sprintf(newFileName,"..//%s",fileName); fp = fopen(newFileName, "rb"); if (fp) fileName = newFileName; } if (fp == NULL) { sprintf(newFileName,"Demos//Gpu3dDemo//%s",fileName); fp = fopen(newFileName, "rb"); if (fp) fileName = newFileName; } if (fp == NULL) { sprintf(newFileName,"..//..//..//..//..//Demos//Gpu3dDemo//%s",fileName); fp = fopen(newFileName, "rb"); if (fp) fileName = newFileName; else { printf("cannot find %s\n",newFileName); exit(0); } } char *source = btOclLoadProgSource(fileName, "", &program_length); if(source == NULL) { printf("ERROR : OpenCL can't load file %s\n", fileName); } btAssert(source != NULL); // create the program printf("OpenCL compiles %s ...", fileName); m_cpProgram = clCreateProgramWithSource(m_cxMainContext, 1, (const char**)&source, &program_length, &ciErrNum); oclCHECKERROR(ciErrNum, CL_SUCCESS); free(source); // build the program ciErrNum = clBuildProgram(m_cpProgram, 0, NULL, "-I .", NULL, NULL); if(ciErrNum != CL_SUCCESS) { // write out standard error char cBuildLog[10240]; clGetProgramBuildInfo(m_cpProgram, btOclGetFirstDev(m_cxMainContext), CL_PROGRAM_BUILD_LOG, sizeof(cBuildLog), cBuildLog, NULL ); printf("\n\n%s\n\n\n", cBuildLog); printf("Press ENTER key to terminate the program\n"); getchar(); exit(-1); } printf("OK\n"); }
void btGpuDemo3dOCLWrap::allocateBuffers(int maxObjs, int maxConstr, int maxPointsPerConstr, int maxBatches) { cl_int ciErrNum; m_maxObj = maxObjs; m_maxConstr = maxConstr; m_maxPointsPerConstr = maxPointsPerConstr; unsigned int memSize = sizeof(float)* 4 * m_maxObj * 4; m_dTrans = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum); oclCHECKERROR(ciErrNum, CL_SUCCESS); memSize = sizeof(float)* 4 * m_maxObj; m_dVel = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum); oclCHECKERROR(ciErrNum, CL_SUCCESS); m_dAngVel = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum); oclCHECKERROR(ciErrNum, CL_SUCCESS); memSize = sizeof(int) * m_maxConstr; m_dIds = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum); oclCHECKERROR(ciErrNum, CL_SUCCESS); memSize = sizeof(int) * 2 * m_maxConstr; m_dBatchIds = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum); oclCHECKERROR(ciErrNum, CL_SUCCESS); memSize = sizeof(float) * m_maxConstr * m_maxPointsPerConstr; m_dLambdaDtBox = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum); oclCHECKERROR(ciErrNum, CL_SUCCESS); m_dPositionConstraint = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum); oclCHECKERROR(ciErrNum, CL_SUCCESS); memSize = sizeof(float) * 4 * m_maxConstr * m_maxPointsPerConstr; m_dNormal = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum); oclCHECKERROR(ciErrNum, CL_SUCCESS); m_dContact = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum); oclCHECKERROR(ciErrNum, CL_SUCCESS); memSize = sizeof(float) * m_maxObj * 4 * 2; m_dForceTorqueDamp = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum); oclCHECKERROR(ciErrNum, CL_SUCCESS); memSize = sizeof(float) * m_maxObj * 4 * 3; m_dInvInertiaMass = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum); oclCHECKERROR(ciErrNum, CL_SUCCESS); memSize = sizeof(float) * 4 * 2; m_dParams = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum); oclCHECKERROR(ciErrNum, CL_SUCCESS); }
void btGpuDemo3dOCLWrap::setKernelArg(int kernelId, int argNum, int argSize, void* argPtr) { cl_int ciErrNum; ciErrNum = clSetKernelArg(m_kernels[kernelId].m_kernel, argNum, argSize, argPtr); oclCHECKERROR(ciErrNum, CL_SUCCESS); }