void b3Solver::solveContactConstraint( const b3OpenCLArray<b3RigidBodyCL>* bodyBuf, const b3OpenCLArray<b3InertiaCL>* shapeBuf, b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches) { b3Int4 cdata = b3MakeInt4( n, 0, 0, 0 ); { const int nn = N_SPLIT*N_SPLIT; cdata.x = 0; cdata.y = maxNumBatches;//250; int numWorkItems = 64*nn/N_BATCHES; #ifdef DEBUG_ME SolverDebugInfo* debugInfo = new SolverDebugInfo[numWorkItems]; adl::b3OpenCLArray<SolverDebugInfo> gpuDebugInfo(data->m_device,numWorkItems); #endif { B3_PROFILE("m_batchSolveKernel iterations"); for(int iter=0; iter<m_nIterations; iter++) { for(int ib=0; ib<N_BATCHES; ib++) { if (verify) { checkConstraintBatch(bodyBuf,shapeBuf,constraint,m_numConstraints,m_offsets,ib); } #ifdef DEBUG_ME memset(debugInfo,0,sizeof(SolverDebugInfo)*numWorkItems); gpuDebugInfo.write(debugInfo,numWorkItems); #endif cdata.z = ib; cdata.w = N_SPLIT; b3LauncherCL launcher( m_queue, m_solveContactKernel ); #if 1 b3BufferInfoCL bInfo[] = { b3BufferInfoCL( bodyBuf->getBufferCL() ), b3BufferInfoCL( shapeBuf->getBufferCL() ), b3BufferInfoCL( constraint->getBufferCL() ), b3BufferInfoCL( m_numConstraints->getBufferCL() ), b3BufferInfoCL( m_offsets->getBufferCL() ) #ifdef DEBUG_ME , b3BufferInfoCL(&gpuDebugInfo) #endif }; launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); //launcher.setConst( cdata.x ); launcher.setConst( cdata.y ); launcher.setConst( cdata.z ); launcher.setConst( cdata.w ); launcher.launch1D( numWorkItems, 64 ); #else const char* fileName = "m_batchSolveKernel.bin"; FILE* f = fopen(fileName,"rb"); if (f) { int sizeInBytes=0; if (fseek(f, 0, SEEK_END) || (sizeInBytes = ftell(f)) == EOF || fseek(f, 0, SEEK_SET)) { printf("error, cannot get file size\n"); exit(0); } unsigned char* buf = (unsigned char*) malloc(sizeInBytes); fread(buf,sizeInBytes,1,f); int serializedBytes = launcher.deserializeArgs(buf, sizeInBytes,m_context); int num = *(int*)&buf[serializedBytes]; launcher.launch1D( num); //this clFinish is for testing on errors clFinish(m_queue); } #endif #ifdef DEBUG_ME clFinish(m_queue); gpuDebugInfo.read(debugInfo,numWorkItems); clFinish(m_queue); for (int i=0; i<numWorkItems; i++) { if (debugInfo[i].m_valInt2>0) { printf("debugInfo[i].m_valInt2 = %d\n",i,debugInfo[i].m_valInt2); } if (debugInfo[i].m_valInt3>0) { printf("debugInfo[i].m_valInt3 = %d\n",i,debugInfo[i].m_valInt3); } } #endif //DEBUG_ME } } clFinish(m_queue); } cdata.x = 1; bool applyFriction=true; if (applyFriction) { B3_PROFILE("m_batchSolveKernel iterations2"); for(int iter=0; iter<m_nIterations; iter++) { for(int ib=0; ib<N_BATCHES; ib++) { cdata.z = ib; cdata.w = N_SPLIT; b3BufferInfoCL bInfo[] = { b3BufferInfoCL( bodyBuf->getBufferCL() ), b3BufferInfoCL( shapeBuf->getBufferCL() ), b3BufferInfoCL( constraint->getBufferCL() ), b3BufferInfoCL( m_numConstraints->getBufferCL() ), b3BufferInfoCL( m_offsets->getBufferCL() ) #ifdef DEBUG_ME ,b3BufferInfoCL(&gpuDebugInfo) #endif //DEBUG_ME }; b3LauncherCL launcher( m_queue, m_solveFrictionKernel ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); //launcher.setConst( cdata.x ); launcher.setConst( cdata.y ); launcher.setConst( cdata.z ); launcher.setConst( cdata.w ); launcher.launch1D( 64*nn/N_BATCHES, 64 ); } } clFinish(m_queue); } #ifdef DEBUG_ME delete[] debugInfo; #endif //DEBUG_ME } }
void b3Solver::batchContacts( b3OpenCLArray<b3Contact4>* contacts, int nContacts, b3OpenCLArray<unsigned int>* nNative, b3OpenCLArray<unsigned int>* offsetsNative, int staticIdx ) { int numWorkItems = 64*N_SPLIT*N_SPLIT; { B3_PROFILE("batch generation"); b3Int4 cdata; cdata.x = nContacts; cdata.y = 0; cdata.z = staticIdx; #ifdef BATCH_DEBUG SolverDebugInfo* debugInfo = new SolverDebugInfo[numWorkItems]; adl::b3OpenCLArray<SolverDebugInfo> gpuDebugInfo(data->m_device,numWorkItems); memset(debugInfo,0,sizeof(SolverDebugInfo)*numWorkItems); gpuDebugInfo.write(debugInfo,numWorkItems); #endif b3BufferInfoCL bInfo[] = { b3BufferInfoCL( contacts->getBufferCL() ), b3BufferInfoCL( m_contactBuffer2->getBufferCL()), b3BufferInfoCL( nNative->getBufferCL() ), b3BufferInfoCL( offsetsNative->getBufferCL() ), #ifdef BATCH_DEBUG , b3BufferInfoCL(&gpuDebugInfo) #endif }; { B3_PROFILE("batchingKernel"); //b3LauncherCL launcher( m_queue, m_batchingKernel); cl_kernel k = useNewBatchingKernel ? m_batchingKernelNew : m_batchingKernel; b3LauncherCL launcher( m_queue, k); if (!useNewBatchingKernel ) { launcher.setBuffer( contacts->getBufferCL() ); } launcher.setBuffer( m_contactBuffer2->getBufferCL() ); launcher.setBuffer( nNative->getBufferCL()); launcher.setBuffer( offsetsNative->getBufferCL()); //launcher.setConst( cdata ); launcher.setConst(staticIdx); launcher.launch1D( numWorkItems, 64 ); clFinish(m_queue); } #ifdef BATCH_DEBUG aaaa b3Contact4* hostContacts = new b3Contact4[nContacts]; m_contactBuffer->read(hostContacts,nContacts); clFinish(m_queue); gpuDebugInfo.read(debugInfo,numWorkItems); clFinish(m_queue); for (int i=0; i<numWorkItems; i++) { if (debugInfo[i].m_valInt1>0) { printf("catch\n"); } if (debugInfo[i].m_valInt2>0) { printf("catch22\n"); } if (debugInfo[i].m_valInt3>0) { printf("catch666\n"); } if (debugInfo[i].m_valInt4>0) { printf("catch777\n"); } } delete[] debugInfo; #endif //BATCH_DEBUG } // copy buffer to buffer //b3Assert(m_contactBuffer->size()==nContacts); //contacts->copyFromOpenCLArray( *m_contactBuffer); //clFinish(m_queue);//needed? }
void Solver::batchContacts( btOpenCLArray<Contact4>* contacts, int nContacts, btOpenCLArray<u32>* nNative, btOpenCLArray<u32>* offsetsNative, int staticIdx ) { { BT_PROFILE("batch generation"); btInt4 cdata; cdata.x = nContacts; cdata.y = 0; cdata.z = staticIdx; int numWorkItems = 64*N_SPLIT*N_SPLIT; #ifdef BATCH_DEBUG SolverDebugInfo* debugInfo = new SolverDebugInfo[numWorkItems]; adl::btOpenCLArray<SolverDebugInfo> gpuDebugInfo(data->m_device,numWorkItems); memset(debugInfo,0,sizeof(SolverDebugInfo)*numWorkItems); gpuDebugInfo.write(debugInfo,numWorkItems); #endif btBufferInfoCL bInfo[] = { btBufferInfoCL( contacts->getBufferCL() ), btBufferInfoCL( m_contactBuffer->getBufferCL() ), btBufferInfoCL( nNative->getBufferCL() ), btBufferInfoCL( offsetsNative->getBufferCL() ) #ifdef BATCH_DEBUG , btBufferInfoCL(&gpuDebugInfo) #endif }; { BT_PROFILE("batchingKernel"); btLauncherCL launcher( m_queue, m_batchingKernel); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) ); //launcher.setConst( cdata ); launcher.setConst(staticIdx); launcher.launch1D( numWorkItems, 64 ); clFinish(m_queue); } #ifdef BATCH_DEBUG aaaa Contact4* hostContacts = new Contact4[nContacts]; m_contactBuffer->read(hostContacts,nContacts); clFinish(m_queue); gpuDebugInfo.read(debugInfo,numWorkItems); clFinish(m_queue); for (int i=0; i<numWorkItems; i++) { if (debugInfo[i].m_valInt1>0) { printf("catch\n"); } if (debugInfo[i].m_valInt2>0) { printf("catch22\n"); } if (debugInfo[i].m_valInt3>0) { printf("catch666\n"); } if (debugInfo[i].m_valInt4>0) { printf("catch777\n"); } } delete[] debugInfo; #endif //BATCH_DEBUG } // copy buffer to buffer btAssert(m_contactBuffer->size()==nContacts); //contacts->copyFromOpenCLArray( *m_contactBuffer); //clFinish(m_queue);//needed? }