void btParticlesDynamicsWorld::runComputeCellIdKernel() { cl_int ciErrNum; #if 0 if(m_useCpuControls[SIMSTAGE_COMPUTE_CELL_ID]->m_active) { // CPU version unsigned int memSize = sizeof(btVector3) * m_numParticles; ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dPos, CL_TRUE, 0, memSize, &(m_hPos[0]), 0, NULL, NULL); oclCHECKERROR(ciErrNum, CL_SUCCESS); for(int index = 0; index < m_numParticles; index++) { btVector3 pos = m_hPos[index]; btInt4 gridPos = cpu_getGridPos(pos, &m_simParams); unsigned int hash = cpu_getPosHash(gridPos, &m_simParams); m_hPosHash[index].x = hash; m_hPosHash[index].y = index; } memSize = sizeof(btInt2) * m_numParticles; ciErrNum = clEnqueueWriteBuffer(m_cqCommandQue, m_dPosHash, CL_TRUE, 0, memSize, &(m_hPosHash[0]), 0, NULL, NULL); oclCHECKERROR(ciErrNum, CL_SUCCESS); } else #endif { BT_PROFILE("ComputeCellId"); runKernelWithWorkgroupSize(PARTICLES_KERNEL_COMPUTE_CELL_ID, m_numParticles); ciErrNum = clFinish(m_cqCommandQue); oclCHECKERROR(ciErrNum, CL_SUCCESS); } /* // check int memSize = sizeof(btInt2) * m_hashSize; ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dPosHash, CL_TRUE, 0, memSize, &(m_hPosHash[0]), 0, NULL, NULL); oclCHECKERROR(ciErrNum, CL_SUCCESS); memSize = sizeof(float) * 4 * m_numParticles; ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dPos, CL_TRUE, 0, memSize, &(m_hPos[0]), 0, NULL, NULL); oclCHECKERROR(ciErrNum, CL_SUCCESS); */ { BT_PROFILE("Copy VBO"); // Explicit Copy (until OpenGL interop will work) // map the PBO to copy data from the CL buffer via host glBindBufferARB(GL_ARRAY_BUFFER, m_vbo); // map the buffer object into client's memory void* ptr = glMapBufferARB(GL_ARRAY_BUFFER, GL_WRITE_ONLY_ARB); ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dPos, CL_TRUE, 0, sizeof(float) * 4 * m_numParticles, ptr, 0, NULL, NULL); oclCHECKERROR(ciErrNum, CL_SUCCESS); glUnmapBufferARB(GL_ARRAY_BUFFER); glBindBufferARB(GL_ARRAY_BUFFER,0); } }
void btParticlesDynamicsWorld::runCollideParticlesKernel() { cl_int ciErrNum; if(m_useCpuControls[SIMSTAGE_COLLIDE_PARTICLES]->m_active) { // CPU version int memSize = sizeof(btVector3) * m_numParticles; { BT_PROFILE("Copy from GPU"); ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dSortedPos, CL_TRUE, 0, memSize, &(m_hSortedPos[0]), 0, NULL, NULL); oclCHECKERROR(ciErrNum, CL_SUCCESS); ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dSortedVel, CL_TRUE, 0, memSize, &(m_hSortedVel[0]), 0, NULL, NULL); oclCHECKERROR(ciErrNum, CL_SUCCESS); memSize = sizeof(btInt2) * m_numParticles; ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dPosHash, CL_TRUE, 0, memSize, &(m_hPosHash[0]), 0, NULL, NULL); memSize = m_numGridCells * sizeof(int); ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dCellStart, CL_TRUE, 0, memSize, &(m_hCellStart[0]), 0, NULL, NULL); oclCHECKERROR(ciErrNum, CL_SUCCESS); } for(int index = 0; index < m_numParticles; index++) { btVector3 posA = m_hSortedPos[index]; btVector3 velA = m_hSortedVel[index]; btVector3 force = btVector3(0, 0, 0); float particleRad = m_simParams.m_particleRad; float collisionDamping = m_simParams.m_collisionDamping; float spring = m_simParams.m_spring; float shear = m_simParams.m_shear; float attraction = m_simParams.m_attraction; int unsortedIndex = m_hPosHash[index].y; //Get address in grid btInt4 gridPosA = cpu_getGridPos(posA, &m_simParams); //Accumulate surrounding cells btInt4 gridPosB; for(int z = -1; z <= 1; z++) { gridPosB.z = gridPosA.z + z; for(int y = -1; y <= 1; y++) { gridPosB.y = gridPosA.y + y; for(int x = -1; x <= 1; x++) { gridPosB.x = gridPosA.x + x; //Get start particle index for this cell unsigned int hashB = cpu_getPosHash(gridPosB, &m_simParams); int startI = m_hCellStart[hashB]; //Skip empty cell if(startI < 0) { continue; } //Iterate over particles in this cell int endI = startI + 8; for(int j = startI; j < endI; j++) { unsigned int hashC = m_hPosHash[j].x; if(hashC != hashB) { break; } if(j == index) { continue; } btVector3 posB = m_hSortedPos[j]; btVector3 velB = m_hSortedVel[j]; //Collide two spheres force += cpu_collideTwoParticles( posA, posB, velA, velB, particleRad, particleRad, spring, collisionDamping, shear, attraction); } } } } //Write new velocity back to original unsorted location m_hVel[unsortedIndex] = velA + force; } memSize = sizeof(btVector3) * m_numParticles; ciErrNum = clEnqueueWriteBuffer(m_cqCommandQue, m_dVel, CL_TRUE, 0, memSize, &(m_hVel[0]), 0, NULL, NULL); oclCHECKERROR(ciErrNum, CL_SUCCESS); } else { runKernelWithWorkgroupSize(PARTICLES_KERNEL_COLLIDE_PARTICLES, m_numParticles); cl_int ciErrNum = clFinish(m_cqCommandQue); oclCHECKERROR(ciErrNum, CL_SUCCESS); } }
void btParticlesDynamicsWorld::runCollideParticlesKernel() { btAlignedObjectArray<int> pairs; float particleRad = m_simParams.m_particleRad; float collideDist2 = (particleRad + particleRad)*(particleRad + particleRad); cl_int ciErrNum; if(m_useCpuControls[SIMSTAGE_COLLIDE_PARTICLES]->m_active) { // CPU version int memSize = sizeof(btVector3) * m_numParticles; { BT_PROFILE("Copy from GPU"); ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dSortedPos, CL_TRUE, 0, memSize, &(m_hSortedPos[0]), 0, NULL, NULL); oclCHECKERROR(ciErrNum, CL_SUCCESS); ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dSortedVel, CL_TRUE, 0, memSize, &(m_hSortedVel[0]), 0, NULL, NULL); oclCHECKERROR(ciErrNum, CL_SUCCESS); memSize = sizeof(btInt2) * m_numParticles; ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dPosHash, CL_TRUE, 0, memSize, &(m_hPosHash[0]), 0, NULL, NULL); memSize = m_numGridCells * sizeof(int); ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dCellStart, CL_TRUE, 0, memSize, &(m_hCellStart[0]), 0, NULL, NULL); oclCHECKERROR(ciErrNum, CL_SUCCESS); } for(int index = 0; index < m_numParticles; index++) { btVector3 posA = m_hSortedPos[index]; btVector3 velA = m_hSortedVel[index]; btVector3 force = btVector3(0, 0, 0); float particleRad = m_simParams.m_particleRad; float collisionDamping = m_simParams.m_collisionDamping; float spring = m_simParams.m_spring; float shear = m_simParams.m_shear; float attraction = m_simParams.m_attraction; int unsortedIndex = m_hPosHash[index].y; //Get address in grid btInt4 gridPosA = cpu_getGridPos(posA, &m_simParams); //Accumulate surrounding cells btInt4 gridPosB; for(int z = -1; z <= 1; z++) { gridPosB.z = gridPosA.z + z; for(int y = -1; y <= 1; y++) { gridPosB.y = gridPosA.y + y; for(int x = -1; x <= 1; x++) { gridPosB.x = gridPosA.x + x; //Get start particle index for this cell unsigned int hashB = cpu_getPosHash(gridPosB, &m_simParams); int startI = m_hCellStart[hashB]; //Skip empty cell if(startI < 0) { continue; } //Iterate over particles in this cell int endI = startI + 32; if(endI > m_numParticles) endI = m_numParticles; for(int j = startI; j < endI; j++) { unsigned int hashC = m_hPosHash[j].x; if(hashC != hashB) { break; } if(j == index) { continue; } btPair pair; pair.v0[0] = index; pair.v0[1] = j; pairs.push_back(pair.value); // printf("index=%d, j=%d\n",index,j); // printf("(index=%d, j=%d) ",index,j); btVector3 posB = m_hSortedPos[j]; btVector3 velB = m_hSortedVel[j]; //Collide two spheres force += cpu_collideTwoParticles( posA, posB, velA, velB, particleRad, particleRad, spring, collisionDamping, shear, attraction); } } } } //Write new velocity back to original unsorted location m_hVel[unsortedIndex] = velA + force; } //#define BRUTE_FORCE_CHECK 1 #ifdef BRUTE_FORCE_CHECK for(int index = 0; index < m_numParticles; index++) { btVector3 posA = m_hSortedPos[index]; btVector3 velA = m_hSortedVel[index]; btVector3 force = btVector3(0, 0, 0); int unsortedIndex = m_hPosHash[index].y; float collisionDamping = m_simParams.m_collisionDamping; float spring = m_simParams.m_spring; float shear = m_simParams.m_shear; float attraction = m_simParams.m_attraction; for(int j = 0 ; j < m_numParticles; j++) { if (index!=j) { btVector3 posB = m_hSortedPos[j]; btVector3 velB = m_hSortedVel[j]; btVector3 relPos = posB - posA; relPos[3] = 0.f; float dist2 = (relPos[0] * relPos[0] + relPos[1] * relPos[1] + relPos[2] * relPos[2]); if(dist2 < collideDist2) { //Collide two spheres // force += cpu_collideTwoParticles( posA, posB, velA, velB, particleRad, particleRad, // spring, collisionDamping, shear, attraction); btPair pair; pair.v0[0] = index; pair.v0[1] = j; if (pairs.findLinearSearch(pair.value)==pairs.size()) { printf("not found index=%d, j=%d\n",index,j); } } } } //Write new velocity back to original unsorted location //m_hVel[unsortedIndex] = velA + force; } #endif //BRUTE_FORCE_CHECK memSize = sizeof(btVector3) * m_numParticles; ciErrNum = clEnqueueWriteBuffer(m_cqCommandQue, m_dVel, CL_TRUE, 0, memSize, &(m_hVel[0]), 0, NULL, NULL); oclCHECKERROR(ciErrNum, CL_SUCCESS); } else { runKernelWithWorkgroupSize(PARTICLES_KERNEL_COLLIDE_PARTICLES, m_numParticles); cl_int ciErrNum = clFinish(m_cqCommandQue); oclCHECKERROR(ciErrNum, CL_SUCCESS); } }