void btParticlesDynamicsWorld::runComputeCellIdKernel() { cl_int ciErrNum; #if 0 if(m_useCpuControls[SIMSTAGE_COMPUTE_CELL_ID]->m_active) { // CPU version unsigned int memSize = sizeof(btVector3) * m_numParticles; ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dPos, CL_TRUE, 0, memSize, &(m_hPos[0]), 0, NULL, NULL); oclCHECKERROR(ciErrNum, CL_SUCCESS); for(int index = 0; index < m_numParticles; index++) { btVector3 pos = m_hPos[index]; btInt4 gridPos = cpu_getGridPos(pos, &m_simParams); unsigned int hash = cpu_getPosHash(gridPos, &m_simParams); m_hPosHash[index].x = hash; m_hPosHash[index].y = index; } memSize = sizeof(btInt2) * m_numParticles; ciErrNum = clEnqueueWriteBuffer(m_cqCommandQue, m_dPosHash, CL_TRUE, 0, memSize, &(m_hPosHash[0]), 0, NULL, NULL); oclCHECKERROR(ciErrNum, CL_SUCCESS); } else #endif { BT_PROFILE("ComputeCellId"); runKernelWithWorkgroupSize(PARTICLES_KERNEL_COMPUTE_CELL_ID, m_numParticles); ciErrNum = clFinish(m_cqCommandQue); oclCHECKERROR(ciErrNum, CL_SUCCESS); } /* // check int memSize = sizeof(btInt2) * m_hashSize; ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dPosHash, CL_TRUE, 0, memSize, &(m_hPosHash[0]), 0, NULL, NULL); oclCHECKERROR(ciErrNum, CL_SUCCESS); memSize = sizeof(float) * 4 * m_numParticles; ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dPos, CL_TRUE, 0, memSize, &(m_hPos[0]), 0, NULL, NULL); oclCHECKERROR(ciErrNum, CL_SUCCESS); */ { BT_PROFILE("Copy VBO"); // Explicit Copy (until OpenGL interop will work) // map the PBO to copy data from the CL buffer via host glBindBufferARB(GL_ARRAY_BUFFER, m_vbo); // map the buffer object into client's memory void* ptr = glMapBufferARB(GL_ARRAY_BUFFER, GL_WRITE_ONLY_ARB); ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dPos, CL_TRUE, 0, sizeof(float) * 4 * m_numParticles, ptr, 0, NULL, NULL); oclCHECKERROR(ciErrNum, CL_SUCCESS); glUnmapBufferARB(GL_ARRAY_BUFFER); glBindBufferARB(GL_ARRAY_BUFFER,0); } }
void btParticlesDynamicsWorld::runIntegrateMotionKernel() { cl_int ciErrNum; if(m_useCpuControls[SIMSTAGE_INTEGRATE_MOTION]->m_active) { // CPU version #if 1 // read from GPU unsigned int memSize = sizeof(btVector3) * m_numParticles; ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dPos, CL_TRUE, 0, memSize, &(m_hPos[0]), 0, NULL, NULL); oclCHECKERROR(ciErrNum, CL_SUCCESS); ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dVel, CL_TRUE, 0, memSize, &(m_hVel[0]), 0, NULL, NULL); oclCHECKERROR(ciErrNum, CL_SUCCESS); for(int index = 0; index < m_numParticles; index++) { btVector3 pos = m_hPos[index]; btVector3 vel = m_hVel[index]; pos[3] = 1.0f; vel[3] = 0.0f; // apply gravity btVector3 gravity = *((btVector3*)(m_simParams.m_gravity)); float particleRad = m_simParams.m_particleRad; float globalDamping = m_simParams.m_globalDamping; float boundaryDamping = m_simParams.m_boundaryDamping; vel += gravity * m_timeStep; vel *= globalDamping; // integrate position pos += vel * m_timeStep; // collide with world boundaries btVector3 worldMin = *((btVector3*)(m_simParams.m_worldMin)); btVector3 worldMax = *((btVector3*)(m_simParams.m_worldMax)); for(int j = 0; j < 3; j++) { if(pos[j] < (worldMin[j] + particleRad)) { pos[j] = worldMin[j] + particleRad; vel[j] *= boundaryDamping; } if(pos[j] > (worldMax[j] - particleRad)) { pos[j] = worldMax[j] - particleRad; vel[j] *= boundaryDamping; } } // write back position and velocity m_hPos[index] = pos; m_hVel[index] = vel; } #endif // write back to GPU memSize = sizeof(btVector3) * m_numParticles; ciErrNum = clEnqueueWriteBuffer(m_cqCommandQue, m_dPos, CL_TRUE, 0, memSize, &(m_hPos[0]), 0, NULL, NULL); oclCHECKERROR(ciErrNum, CL_SUCCESS); ciErrNum = clEnqueueWriteBuffer(m_cqCommandQue, m_dVel, CL_TRUE, 0, memSize, &(m_hVel[0]), 0, NULL, NULL); oclCHECKERROR(ciErrNum, CL_SUCCESS); } else { // Set work size and execute the kernel ciErrNum = clSetKernelArg(m_kernels[PARTICLES_KERNEL_INTEGRATE_MOTION].m_kernel, 4, sizeof(float), &m_timeStep); oclCHECKERROR(ciErrNum, CL_SUCCESS); runKernelWithWorkgroupSize(PARTICLES_KERNEL_INTEGRATE_MOTION, m_numParticles); ciErrNum = clFinish(m_cqCommandQue); oclCHECKERROR(ciErrNum, CL_SUCCESS); } }
void btParticlesDynamicsWorld::runCollideParticlesKernel() { cl_int ciErrNum; if(m_useCpuControls[SIMSTAGE_COLLIDE_PARTICLES]->m_active) { // CPU version int memSize = sizeof(btVector3) * m_numParticles; { BT_PROFILE("Copy from GPU"); ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dSortedPos, CL_TRUE, 0, memSize, &(m_hSortedPos[0]), 0, NULL, NULL); oclCHECKERROR(ciErrNum, CL_SUCCESS); ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dSortedVel, CL_TRUE, 0, memSize, &(m_hSortedVel[0]), 0, NULL, NULL); oclCHECKERROR(ciErrNum, CL_SUCCESS); memSize = sizeof(btInt2) * m_numParticles; ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dPosHash, CL_TRUE, 0, memSize, &(m_hPosHash[0]), 0, NULL, NULL); memSize = m_numGridCells * sizeof(int); ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dCellStart, CL_TRUE, 0, memSize, &(m_hCellStart[0]), 0, NULL, NULL); oclCHECKERROR(ciErrNum, CL_SUCCESS); } for(int index = 0; index < m_numParticles; index++) { btVector3 posA = m_hSortedPos[index]; btVector3 velA = m_hSortedVel[index]; btVector3 force = btVector3(0, 0, 0); float particleRad = m_simParams.m_particleRad; float collisionDamping = m_simParams.m_collisionDamping; float spring = m_simParams.m_spring; float shear = m_simParams.m_shear; float attraction = m_simParams.m_attraction; int unsortedIndex = m_hPosHash[index].y; //Get address in grid btInt4 gridPosA = cpu_getGridPos(posA, &m_simParams); //Accumulate surrounding cells btInt4 gridPosB; for(int z = -1; z <= 1; z++) { gridPosB.z = gridPosA.z + z; for(int y = -1; y <= 1; y++) { gridPosB.y = gridPosA.y + y; for(int x = -1; x <= 1; x++) { gridPosB.x = gridPosA.x + x; //Get start particle index for this cell unsigned int hashB = cpu_getPosHash(gridPosB, &m_simParams); int startI = m_hCellStart[hashB]; //Skip empty cell if(startI < 0) { continue; } //Iterate over particles in this cell int endI = startI + 8; for(int j = startI; j < endI; j++) { unsigned int hashC = m_hPosHash[j].x; if(hashC != hashB) { break; } if(j == index) { continue; } btVector3 posB = m_hSortedPos[j]; btVector3 velB = m_hSortedVel[j]; //Collide two spheres force += cpu_collideTwoParticles( posA, posB, velA, velB, particleRad, particleRad, spring, collisionDamping, shear, attraction); } } } } //Write new velocity back to original unsorted location m_hVel[unsortedIndex] = velA + force; } memSize = sizeof(btVector3) * m_numParticles; ciErrNum = clEnqueueWriteBuffer(m_cqCommandQue, m_dVel, CL_TRUE, 0, memSize, &(m_hVel[0]), 0, NULL, NULL); oclCHECKERROR(ciErrNum, CL_SUCCESS); } else { runKernelWithWorkgroupSize(PARTICLES_KERNEL_COLLIDE_PARTICLES, m_numParticles); cl_int ciErrNum = clFinish(m_cqCommandQue); oclCHECKERROR(ciErrNum, CL_SUCCESS); } }
void btParticlesDynamicsWorld::runCollideParticlesKernel() { btAlignedObjectArray<int> pairs; float particleRad = m_simParams.m_particleRad; float collideDist2 = (particleRad + particleRad)*(particleRad + particleRad); cl_int ciErrNum; if(m_useCpuControls[SIMSTAGE_COLLIDE_PARTICLES]->m_active) { // CPU version int memSize = sizeof(btVector3) * m_numParticles; { BT_PROFILE("Copy from GPU"); ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dSortedPos, CL_TRUE, 0, memSize, &(m_hSortedPos[0]), 0, NULL, NULL); oclCHECKERROR(ciErrNum, CL_SUCCESS); ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dSortedVel, CL_TRUE, 0, memSize, &(m_hSortedVel[0]), 0, NULL, NULL); oclCHECKERROR(ciErrNum, CL_SUCCESS); memSize = sizeof(btInt2) * m_numParticles; ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dPosHash, CL_TRUE, 0, memSize, &(m_hPosHash[0]), 0, NULL, NULL); memSize = m_numGridCells * sizeof(int); ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dCellStart, CL_TRUE, 0, memSize, &(m_hCellStart[0]), 0, NULL, NULL); oclCHECKERROR(ciErrNum, CL_SUCCESS); } for(int index = 0; index < m_numParticles; index++) { btVector3 posA = m_hSortedPos[index]; btVector3 velA = m_hSortedVel[index]; btVector3 force = btVector3(0, 0, 0); float particleRad = m_simParams.m_particleRad; float collisionDamping = m_simParams.m_collisionDamping; float spring = m_simParams.m_spring; float shear = m_simParams.m_shear; float attraction = m_simParams.m_attraction; int unsortedIndex = m_hPosHash[index].y; //Get address in grid btInt4 gridPosA = cpu_getGridPos(posA, &m_simParams); //Accumulate surrounding cells btInt4 gridPosB; for(int z = -1; z <= 1; z++) { gridPosB.z = gridPosA.z + z; for(int y = -1; y <= 1; y++) { gridPosB.y = gridPosA.y + y; for(int x = -1; x <= 1; x++) { gridPosB.x = gridPosA.x + x; //Get start particle index for this cell unsigned int hashB = cpu_getPosHash(gridPosB, &m_simParams); int startI = m_hCellStart[hashB]; //Skip empty cell if(startI < 0) { continue; } //Iterate over particles in this cell int endI = startI + 32; if(endI > m_numParticles) endI = m_numParticles; for(int j = startI; j < endI; j++) { unsigned int hashC = m_hPosHash[j].x; if(hashC != hashB) { break; } if(j == index) { continue; } btPair pair; pair.v0[0] = index; pair.v0[1] = j; pairs.push_back(pair.value); // printf("index=%d, j=%d\n",index,j); // printf("(index=%d, j=%d) ",index,j); btVector3 posB = m_hSortedPos[j]; btVector3 velB = m_hSortedVel[j]; //Collide two spheres force += cpu_collideTwoParticles( posA, posB, velA, velB, particleRad, particleRad, spring, collisionDamping, shear, attraction); } } } } //Write new velocity back to original unsorted location m_hVel[unsortedIndex] = velA + force; } //#define BRUTE_FORCE_CHECK 1 #ifdef BRUTE_FORCE_CHECK for(int index = 0; index < m_numParticles; index++) { btVector3 posA = m_hSortedPos[index]; btVector3 velA = m_hSortedVel[index]; btVector3 force = btVector3(0, 0, 0); int unsortedIndex = m_hPosHash[index].y; float collisionDamping = m_simParams.m_collisionDamping; float spring = m_simParams.m_spring; float shear = m_simParams.m_shear; float attraction = m_simParams.m_attraction; for(int j = 0 ; j < m_numParticles; j++) { if (index!=j) { btVector3 posB = m_hSortedPos[j]; btVector3 velB = m_hSortedVel[j]; btVector3 relPos = posB - posA; relPos[3] = 0.f; float dist2 = (relPos[0] * relPos[0] + relPos[1] * relPos[1] + relPos[2] * relPos[2]); if(dist2 < collideDist2) { //Collide two spheres // force += cpu_collideTwoParticles( posA, posB, velA, velB, particleRad, particleRad, // spring, collisionDamping, shear, attraction); btPair pair; pair.v0[0] = index; pair.v0[1] = j; if (pairs.findLinearSearch(pair.value)==pairs.size()) { printf("not found index=%d, j=%d\n",index,j); } } } } //Write new velocity back to original unsorted location //m_hVel[unsortedIndex] = velA + force; } #endif //BRUTE_FORCE_CHECK memSize = sizeof(btVector3) * m_numParticles; ciErrNum = clEnqueueWriteBuffer(m_cqCommandQue, m_dVel, CL_TRUE, 0, memSize, &(m_hVel[0]), 0, NULL, NULL); oclCHECKERROR(ciErrNum, CL_SUCCESS); } else { runKernelWithWorkgroupSize(PARTICLES_KERNEL_COLLIDE_PARTICLES, m_numParticles); cl_int ciErrNum = clFinish(m_cqCommandQue); oclCHECKERROR(ciErrNum, CL_SUCCESS); } }