///todo: add some acceleration structure (AABBs, tree etc) void b3GpuRaycast::castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults, int numBodies,const struct b3RigidBodyCL* bodies, int numCollidables, const struct b3Collidable* collidables, const struct b3GpuNarrowPhaseInternalData* narrowphaseData) { //castRaysHost(rays,hitResults,numBodies,bodies,numCollidables,collidables,narrowphaseData); B3_PROFILE("castRaysGPU"); b3OpenCLArray<b3RayInfo> gpuRays(m_data->m_context,m_data->m_q); b3OpenCLArray<b3RayHit> gpuHitResults(m_data->m_context,m_data->m_q); { B3_PROFILE("raycast copyFromHost"); gpuRays.copyFromHost(rays); gpuHitResults.resize(hitResults.size()); gpuHitResults.copyFromHost(hitResults); } //run kernel { B3_PROFILE("raycast launch1D"); b3LauncherCL launcher(m_data->m_q,m_data->m_raytraceKernel,"m_raytraceKernel"); int numRays = rays.size(); launcher.setConst(numRays); launcher.setBuffer(gpuRays.getBufferCL()); launcher.setBuffer(gpuHitResults.getBufferCL()); launcher.setConst(numBodies); launcher.setBuffer(narrowphaseData->m_bodyBufferGPU->getBufferCL()); launcher.setBuffer(narrowphaseData->m_collidablesGPU->getBufferCL()); launcher.setBuffer(narrowphaseData->m_convexFacesGPU->getBufferCL()); launcher.setBuffer(narrowphaseData->m_convexPolyhedraGPU->getBufferCL()); launcher.launch1D(numRays); clFinish(m_data->m_q); } //copy results { B3_PROFILE("raycast copyToHost"); gpuHitResults.copyToHost(hitResults); } }
void b3GpuRaycast::castRaysHost(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults, int numBodies,const struct b3RigidBodyCL* bodies, int numCollidables,const struct b3Collidable* collidables) { // return castRays(rays,hitResults,numBodies,bodies,numCollidables,collidables); B3_PROFILE("castRaysHost"); for (int r=0;r<rays.size();r++) { b3Vector3 rayFrom = rays[r].m_from; b3Vector3 rayTo = rays[r].m_to; //if there is a hit, color the pixels bool hits = false; for (int b=0;b<numBodies && !hits;b++) { const b3Vector3& pos = bodies[b].m_pos; const b3Quaternion& orn = bodies[b].m_quat; b3Scalar radius = 1; if (sphere_intersect(pos, radius, rayFrom, rayTo)) hits = true; } if (hits) hitResults[r].m_hitFraction = 0.f; } }
void b3GeometryUtil::getVerticesFromPlaneEquations(const b3AlignedObjectArray<b3Vector3>& planeEquations , b3AlignedObjectArray<b3Vector3>& verticesOut ) { const int numbrushes = planeEquations.size(); // brute force: for (int i=0;i<numbrushes;i++) { const b3Vector3& N1 = planeEquations[i]; for (int j=i+1;j<numbrushes;j++) { const b3Vector3& N2 = planeEquations[j]; for (int k=j+1;k<numbrushes;k++) { const b3Vector3& N3 = planeEquations[k]; b3Vector3 n2n3; n2n3 = N2.cross(N3); b3Vector3 n3n1; n3n1 = N3.cross(N1); b3Vector3 n1n2; n1n2 = N1.cross(N2); if ( ( n2n3.length2() > b3Scalar(0.0001) ) && ( n3n1.length2() > b3Scalar(0.0001) ) && ( n1n2.length2() > b3Scalar(0.0001) ) ) { //point P out of 3 plane equations: // d1 ( N2 * N3 ) + d2 ( N3 * N1 ) + d3 ( N1 * N2 ) //P = ------------------------------------------------------------------------- // N1 . ( N2 * N3 ) b3Scalar quotient = (N1.dot(n2n3)); if (b3Fabs(quotient) > b3Scalar(0.000001)) { quotient = b3Scalar(-1.) / quotient; n2n3 *= N1[3]; n3n1 *= N2[3]; n1n2 *= N3[3]; b3Vector3 potentialVertex = n2n3; potentialVertex += n3n1; potentialVertex += n1n2; potentialVertex *= quotient; //check if inside, and replace supportingVertexOut if needed if (isPointInsidePlanes(planeEquations,potentialVertex,b3Scalar(0.01))) { verticesOut.push_back(potentialVertex); } } } } } } }
void b3GpuRaycast::castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults, int numBodies,const struct b3RigidBodyCL* bodies, int numCollidables, const struct b3Collidable* collidables) { B3_PROFILE("castRaysGPU"); b3OpenCLArray<b3RayInfo> gpuRays(m_data->m_context,m_data->m_q); gpuRays.copyFromHost(rays); b3OpenCLArray<b3RayHit> gpuHitResults(m_data->m_context,m_data->m_q); gpuHitResults.resize(hitResults.size()); b3OpenCLArray<b3RigidBodyCL> gpuBodies(m_data->m_context,m_data->m_q); gpuBodies.resize(numBodies); gpuBodies.copyFromHostPointer(bodies,numBodies); b3OpenCLArray<b3Collidable> gpuCollidables(m_data->m_context,m_data->m_q); gpuCollidables.resize(numCollidables); gpuCollidables.copyFromHostPointer(collidables,numCollidables); //run kernel { B3_PROFILE("raycast launch1D"); b3LauncherCL launcher(m_data->m_q,m_data->m_raytraceKernel); int numRays = rays.size(); launcher.setConst(numRays); launcher.setBuffer(gpuRays.getBufferCL()); launcher.setBuffer(gpuHitResults.getBufferCL()); launcher.setConst(numBodies); launcher.setBuffer(gpuBodies.getBufferCL()); launcher.setBuffer(gpuCollidables.getBufferCL()); launcher.launch1D(numRays); clFinish(m_data->m_q); } //copy results gpuHitResults.copyToHost(hitResults); }
bool notExist(const b3Vector3& planeEquation,const b3AlignedObjectArray<b3Vector3>& planeEquations) { int numbrushes = planeEquations.size(); for (int i=0;i<numbrushes;i++) { const b3Vector3& N1 = planeEquations[i]; if (planeEquation.dot(N1) > b3Scalar(0.999)) { return false; } } return true; }
static void clipEdge(const mat<4, 3, float>& triangleIn, int vertexIndexA, int vertexIndexB, b3AlignedObjectArray<Vec4f>& vertices) { Vec4f v0New = triangleIn.col(vertexIndexA); Vec4f v1New = triangleIn.col(vertexIndexB); bool v0Inside = v0New[3] > 0.f && v0New[2] > -v0New[3]; bool v1Inside = v1New[3] > 0.f && v1New[2] > -v1New[3]; if (v0Inside && v1Inside) { } else if (v0Inside || v1Inside) { float d0 = v0New[2] + v0New[3]; float d1 = v1New[2] + v1New[3]; float factor = 1.0 / (d1 - d0); Vec4f newVertex = (v0New * d1 - v1New * d0) * factor; if (v0Inside) { v1New = newVertex; } else { v0New = newVertex; } } else { return; } if (vertices.size() == 0 || !(equals(vertices[vertices.size() - 1], v0New))) { vertices.push_back(v0New); } vertices.push_back(v1New); }
bool b3GeometryUtil::areVerticesBehindPlane(const b3Vector3& planeNormal, const b3AlignedObjectArray<b3Vector3>& vertices, b3Scalar margin) { int numvertices = vertices.size(); for (int i=0;i<numvertices;i++) { const b3Vector3& N1 = vertices[i]; b3Scalar dist = b3Scalar(planeNormal.dot(N1))+b3Scalar(planeNormal[3])-margin; if (dist>b3Scalar(0.)) { return false; } } return true; }
bool b3GeometryUtil::isPointInsidePlanes(const b3AlignedObjectArray<b3Vector3>& planeEquations, const b3Vector3& point, b3Scalar margin) { int numbrushes = planeEquations.size(); for (int i=0;i<numbrushes;i++) { const b3Vector3& N1 = planeEquations[i]; b3Scalar dist = b3Scalar(N1.dot(point))+b3Scalar(N1[3])-margin; if (dist>b3Scalar(0.)) { return false; } } return true; }
void b3GeometryUtil::getPlaneEquationsFromVertices(b3AlignedObjectArray<b3Vector3>& vertices, b3AlignedObjectArray<b3Vector3>& planeEquationsOut ) { const int numvertices = vertices.size(); // brute force: for (int i=0;i<numvertices;i++) { const b3Vector3& N1 = vertices[i]; for (int j=i+1;j<numvertices;j++) { const b3Vector3& N2 = vertices[j]; for (int k=j+1;k<numvertices;k++) { const b3Vector3& N3 = vertices[k]; b3Vector3 planeEquation,edge0,edge1; edge0 = N2-N1; edge1 = N3-N1; b3Scalar normalSign = b3Scalar(1.); for (int ww=0;ww<2;ww++) { planeEquation = normalSign * edge0.cross(edge1); if (planeEquation.length2() > b3Scalar(0.0001)) { planeEquation.normalize(); if (notExist(planeEquation,planeEquationsOut)) { planeEquation[3] = -planeEquation.dot(N1); //check if inside, and replace supportingVertexOut if needed if (areVerticesBehindPlane(planeEquation,vertices,b3Scalar(0.01))) { planeEquationsOut.push_back(planeEquation); } } } normalSign = b3Scalar(-1.); } } } } }
void MyComboBoxCallback(int comboId, const char* item) { //printf("comboId = %d, item = %s\n",comboId, item); if (comboId==DEMO_SELECTION_COMBOBOX) { //find selected item for (int i=0;i<allNames.size();i++) { if (strcmp(item,allNames[i])==0) { selectDemo(i); saveCurrentSettings(sCurrentDemoIndex,startFileName); break; } } } }
void MyComboBoxCallback(int comboId, const char* item) { int numDemos = demoNames.size(); for (int i=0;i<numDemos;i++) { if (!strcmp(demoNames[i],item)) { if (selectedDemo != i) { gReset = true; selectedDemo = i; printf("selected demo %s!\n", item); } } } }
///todo: add some acceleration structure (AABBs, tree etc) void b3GpuRaycast::castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults, int numBodies,const struct b3RigidBodyData* bodies, int numCollidables, const struct b3Collidable* collidables, const struct b3GpuNarrowPhaseInternalData* narrowphaseData, class b3GpuBroadphaseInterface* broadphase) { //castRaysHost(rays,hitResults,numBodies,bodies,numCollidables,collidables,narrowphaseData); B3_PROFILE("castRaysGPU"); { B3_PROFILE("raycast copyFromHost"); m_data->m_gpuRays->copyFromHost(rays); m_data->m_gpuHitResults->copyFromHost(hitResults); } int numRays = hitResults.size(); { m_data->m_firstRayRigidPairIndexPerRay->resize(numRays); m_data->m_numRayRigidPairsPerRay->resize(numRays); m_data->m_gpuNumRayRigidPairs->resize(1); m_data->m_gpuRayRigidPairs->resize(numRays * 16); } //run kernel const bool USE_BRUTE_FORCE_RAYCAST = false; if(USE_BRUTE_FORCE_RAYCAST) { B3_PROFILE("raycast launch1D"); b3LauncherCL launcher(m_data->m_q,m_data->m_raytraceKernel,"m_raytraceKernel"); int numRays = rays.size(); launcher.setConst(numRays); launcher.setBuffer(m_data->m_gpuRays->getBufferCL()); launcher.setBuffer(m_data->m_gpuHitResults->getBufferCL()); launcher.setConst(numBodies); launcher.setBuffer(narrowphaseData->m_bodyBufferGPU->getBufferCL()); launcher.setBuffer(narrowphaseData->m_collidablesGPU->getBufferCL()); launcher.setBuffer(narrowphaseData->m_convexFacesGPU->getBufferCL()); launcher.setBuffer(narrowphaseData->m_convexPolyhedraGPU->getBufferCL()); launcher.launch1D(numRays); clFinish(m_data->m_q); } else { m_data->m_plbvh->build( broadphase->getAllAabbsGPU(), broadphase->getSmallAabbIndicesGPU(), broadphase->getLargeAabbIndicesGPU() ); m_data->m_plbvh->testRaysAgainstBvhAabbs(*m_data->m_gpuRays, *m_data->m_gpuNumRayRigidPairs, *m_data->m_gpuRayRigidPairs); int numRayRigidPairs = -1; m_data->m_gpuNumRayRigidPairs->copyToHostPointer(&numRayRigidPairs, 1); if( numRayRigidPairs > m_data->m_gpuRayRigidPairs->size() ) { numRayRigidPairs = m_data->m_gpuRayRigidPairs->size(); m_data->m_gpuNumRayRigidPairs->copyFromHostPointer(&numRayRigidPairs, 1); } m_data->m_gpuRayRigidPairs->resize(numRayRigidPairs); //Radix sort needs b3OpenCLArray::size() to be correct //Sort ray-rigid pairs by ray index { B3_PROFILE("sort ray-rigid pairs"); m_data->m_radixSorter->execute( *reinterpret_cast< b3OpenCLArray<b3SortData>* >(m_data->m_gpuRayRigidPairs) ); } //detect start,count of each ray pair { B3_PROFILE("detect ray-rigid pair index ranges"); { B3_PROFILE("reset ray-rigid pair index ranges"); m_data->m_fill->execute(*m_data->m_firstRayRigidPairIndexPerRay, numRayRigidPairs, numRays); //atomic_min used to find first index m_data->m_fill->execute(*m_data->m_numRayRigidPairsPerRay, 0, numRays); clFinish(m_data->m_q); } b3BufferInfoCL bufferInfo[] = { b3BufferInfoCL( m_data->m_gpuRayRigidPairs->getBufferCL() ), b3BufferInfoCL( m_data->m_firstRayRigidPairIndexPerRay->getBufferCL() ), b3BufferInfoCL( m_data->m_numRayRigidPairsPerRay->getBufferCL() ) }; b3LauncherCL launcher(m_data->m_q, m_data->m_findRayRigidPairIndexRanges, "m_findRayRigidPairIndexRanges"); launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst(numRayRigidPairs); launcher.launch1D(numRayRigidPairs); clFinish(m_data->m_q); } { B3_PROFILE("ray-rigid intersection"); b3BufferInfoCL bufferInfo[] = { b3BufferInfoCL( m_data->m_gpuRays->getBufferCL() ), b3BufferInfoCL( m_data->m_gpuHitResults->getBufferCL() ), b3BufferInfoCL( m_data->m_firstRayRigidPairIndexPerRay->getBufferCL() ), b3BufferInfoCL( m_data->m_numRayRigidPairsPerRay->getBufferCL() ), b3BufferInfoCL( narrowphaseData->m_bodyBufferGPU->getBufferCL() ), b3BufferInfoCL( narrowphaseData->m_collidablesGPU->getBufferCL() ), b3BufferInfoCL( narrowphaseData->m_convexFacesGPU->getBufferCL() ), b3BufferInfoCL( narrowphaseData->m_convexPolyhedraGPU->getBufferCL() ), b3BufferInfoCL( m_data->m_gpuRayRigidPairs->getBufferCL() ) }; b3LauncherCL launcher(m_data->m_q, m_data->m_raytracePairsKernel, "m_raytracePairsKernel"); launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst(numRays); launcher.launch1D(numRays); clFinish(m_data->m_q); } } //copy results { B3_PROFILE("raycast copyToHost"); m_data->m_gpuHitResults->copyToHost(hitResults); } }
void b3GpuRaycast::castRaysHost(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults, int numBodies,const struct b3RigidBodyData* bodies, int numCollidables,const struct b3Collidable* collidables, const struct b3GpuNarrowPhaseInternalData* narrowphaseData) { // return castRays(rays,hitResults,numBodies,bodies,numCollidables,collidables); B3_PROFILE("castRaysHost"); for (int r=0;r<rays.size();r++) { b3Vector3 rayFrom = rays[r].m_from; b3Vector3 rayTo = rays[r].m_to; float hitFraction = hitResults[r].m_hitFraction; int hitBodyIndex= -1; b3Vector3 hitNormal; for (int b=0;b<numBodies;b++) { const b3Vector3& pos = bodies[b].m_pos; const b3Quaternion& orn = bodies[b].m_quat; switch (collidables[bodies[b].m_collidableIdx].m_shapeType) { case SHAPE_SPHERE: { b3Scalar radius = collidables[bodies[b].m_collidableIdx].m_radius; if (sphere_intersect(pos, radius, rayFrom, rayTo,hitFraction)) { hitBodyIndex = b; b3Vector3 hitPoint; hitPoint.setInterpolate3(rays[r].m_from, rays[r].m_to,hitFraction); hitNormal = (hitPoint-bodies[b].m_pos).normalize(); } } case SHAPE_CONVEX_HULL: { b3Transform convexWorldTransform; convexWorldTransform.setIdentity(); convexWorldTransform.setOrigin(bodies[b].m_pos); convexWorldTransform.setRotation(bodies[b].m_quat); b3Transform convexWorld2Local = convexWorldTransform.inverse(); b3Vector3 rayFromLocal = convexWorld2Local(rayFrom); b3Vector3 rayToLocal = convexWorld2Local(rayTo); int shapeIndex = collidables[bodies[b].m_collidableIdx].m_shapeIndex; const b3ConvexPolyhedronData& poly = narrowphaseData->m_convexPolyhedra[shapeIndex]; if (rayConvex(rayFromLocal, rayToLocal,poly,narrowphaseData->m_convexFaces, hitFraction, hitNormal)) { hitBodyIndex = b; } break; } default: { static bool once=true; if (once) { once=false; b3Warning("Raytest: unsupported shape type\n"); } } } } if (hitBodyIndex>=0) { hitResults[r].m_hitFraction = hitFraction; hitResults[r].m_hitPoint.setInterpolate3(rays[r].m_from, rays[r].m_to,hitFraction); hitResults[r].m_hitNormal = hitNormal; hitResults[r].m_hitBody = hitBodyIndex; } } }
void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem inertiaBuf, int numContacts, cl_mem contactBuf, const b3Config& config, int static0Index) { B3_PROFILE("solveContacts"); m_data->m_bodyBufferGPU->setFromOpenCLBuffer(bodyBuf,numBodies); m_data->m_inertiaBufferGPU->setFromOpenCLBuffer(inertiaBuf,numBodies); m_data->m_pBufContactOutGPU->setFromOpenCLBuffer(contactBuf,numContacts); if (optionalSortContactsDeterminism) { if (!gCpuSortContactsDeterminism) { B3_PROFILE("GPU Sort contact constraints (determinism)"); m_data->m_pBufContactOutGPUCopy->resize(numContacts); m_data->m_contactKeyValues->resize(numContacts); m_data->m_pBufContactOutGPU->copyToCL(m_data->m_pBufContactOutGPUCopy->getBufferCL(),numContacts,0,0); { b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataChildShapeBKernel,"m_setDeterminismSortDataChildShapeBKernel"); launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL()); launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL()); launcher.setConst(numContacts); launcher.launch1D( numContacts, 64 ); } m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues); { b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataChildShapeAKernel,"m_setDeterminismSortDataChildShapeAKernel"); launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL()); launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL()); launcher.setConst(numContacts); launcher.launch1D( numContacts, 64 ); } m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues); { b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataBodyBKernel,"m_setDeterminismSortDataBodyBKernel"); launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL()); launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL()); launcher.setConst(numContacts); launcher.launch1D( numContacts, 64 ); } m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues); { b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataBodyAKernel,"m_setDeterminismSortDataBodyAKernel"); launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL()); launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL()); launcher.setConst(numContacts); launcher.launch1D( numContacts, 64 ); } m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues); { B3_PROFILE("gpu reorderContactKernel (determinism)"); b3Int4 cdata; cdata.x = numContacts; //b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ), b3BufferInfoCL( m_data->m_solverGPU->m_contactBuffer2->getBufferCL()) // , b3BufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) }; b3LauncherCL launcher(m_data->m_queue,m_data->m_solverGPU->m_reorderContactKernel,"m_reorderContactKernel"); launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL()); launcher.setBuffer(m_data->m_pBufContactOutGPU->getBufferCL()); launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL()); launcher.setConst( cdata ); launcher.launch1D( numContacts, 64 ); } } else { B3_PROFILE("CPU Sort contact constraints (determinism)"); b3AlignedObjectArray<b3Contact4> cpuConstraints; m_data->m_pBufContactOutGPU->copyToHost(cpuConstraints); bool sort = true; if (sort) { cpuConstraints.quickSort(b3ContactCmp); for (int i=0;i<cpuConstraints.size();i++) { cpuConstraints[i].m_batchIdx = i; } } m_data->m_pBufContactOutGPU->copyFromHost(cpuConstraints); if (m_debugOutput==100) { for (int i=0;i<cpuConstraints.size();i++) { printf("c[%d].m_bodyA = %d, m_bodyB = %d, batchId = %d\n",i,cpuConstraints[i].m_bodyAPtrAndSignBit,cpuConstraints[i].m_bodyBPtrAndSignBit, cpuConstraints[i].m_batchIdx); } } m_debugOutput++; } } int nContactOut = m_data->m_pBufContactOutGPU->size(); bool useSolver = true; if (useSolver) { float dt=1./60.; b3ConstraintCfg csCfg( dt ); csCfg.m_enableParallelSolve = true; csCfg.m_batchCellSize = 6; csCfg.m_staticIdx = static0Index; b3OpenCLArray<b3RigidBodyData>* bodyBuf = m_data->m_bodyBufferGPU; void* additionalData = 0;//m_data->m_frictionCGPU; const b3OpenCLArray<b3InertiaData>* shapeBuf = m_data->m_inertiaBufferGPU; b3OpenCLArray<b3GpuConstraint4>* contactConstraintOut = m_data->m_contactCGPU; int nContacts = nContactOut; int maxNumBatches = 0; if (!gUseLargeBatches) { if( m_data->m_solverGPU->m_contactBuffer2) { m_data->m_solverGPU->m_contactBuffer2->resize(nContacts); } if( m_data->m_solverGPU->m_contactBuffer2 == 0 ) { m_data->m_solverGPU->m_contactBuffer2 = new b3OpenCLArray<b3Contact4>(m_data->m_context,m_data->m_queue, nContacts ); m_data->m_solverGPU->m_contactBuffer2->resize(nContacts); } //clFinish(m_data->m_queue); { B3_PROFILE("batching"); //@todo: just reserve it, without copy of original contact (unless we use warmstarting) const b3OpenCLArray<b3RigidBodyData>* bodyNative = bodyBuf; { //b3OpenCLArray<b3RigidBodyData>* bodyNative = b3OpenCLArrayUtils::map<adl::TYPE_CL, true>( data->m_device, bodyBuf ); //b3OpenCLArray<b3Contact4>* contactNative = b3OpenCLArrayUtils::map<adl::TYPE_CL, true>( data->m_device, contactsIn ); const int sortAlignment = 512; // todo. get this out of sort if( csCfg.m_enableParallelSolve ) { int sortSize = B3NEXTMULTIPLEOF( nContacts, sortAlignment ); b3OpenCLArray<unsigned int>* countsNative = m_data->m_solverGPU->m_numConstraints; b3OpenCLArray<unsigned int>* offsetsNative = m_data->m_solverGPU->m_offsets; if (!gCpuSetSortData) { // 2. set cell idx B3_PROFILE("GPU set cell idx"); struct CB { int m_nContacts; int m_staticIdx; float m_scale; b3Int4 m_nSplit; }; b3Assert( sortSize%64 == 0 ); CB cdata; cdata.m_nContacts = nContacts; cdata.m_staticIdx = csCfg.m_staticIdx; cdata.m_scale = 1.f/csCfg.m_batchCellSize; cdata.m_nSplit.x = B3_SOLVER_N_SPLIT_X; cdata.m_nSplit.y = B3_SOLVER_N_SPLIT_Y; cdata.m_nSplit.z = B3_SOLVER_N_SPLIT_Z; m_data->m_solverGPU->m_sortDataBuffer->resize(nContacts); b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ), b3BufferInfoCL( bodyBuf->getBufferCL()), b3BufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) }; b3LauncherCL launcher(m_data->m_queue, m_data->m_solverGPU->m_setSortDataKernel,"m_setSortDataKernel" ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( cdata.m_nContacts ); launcher.setConst( cdata.m_scale ); launcher.setConst(cdata.m_nSplit); launcher.setConst(cdata.m_staticIdx); launcher.launch1D( sortSize, 64 ); } else { m_data->m_solverGPU->m_sortDataBuffer->resize(nContacts); b3AlignedObjectArray<b3SortData> sortDataCPU; m_data->m_solverGPU->m_sortDataBuffer->copyToHost(sortDataCPU); b3AlignedObjectArray<b3Contact4> contactCPU; m_data->m_pBufContactOutGPU->copyToHost(contactCPU); b3AlignedObjectArray<b3RigidBodyData> bodiesCPU; bodyBuf->copyToHost(bodiesCPU); float scale = 1.f/csCfg.m_batchCellSize; b3Int4 nSplit; nSplit.x = B3_SOLVER_N_SPLIT_X; nSplit.y = B3_SOLVER_N_SPLIT_Y; nSplit.z = B3_SOLVER_N_SPLIT_Z; SetSortDataCPU(&contactCPU[0], &bodiesCPU[0], &sortDataCPU[0], nContacts,scale,nSplit,csCfg.m_staticIdx); m_data->m_solverGPU->m_sortDataBuffer->copyFromHost(sortDataCPU); } if (!gCpuRadixSort) { // 3. sort by cell idx B3_PROFILE("gpuRadixSort"); //int n = B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT; //int sortBit = 32; //if( n <= 0xffff ) sortBit = 16; //if( n <= 0xff ) sortBit = 8; //adl::RadixSort<adl::TYPE_CL>::execute( data->m_sort, *data->m_sortDataBuffer, sortSize ); //adl::RadixSort32<adl::TYPE_CL>::execute( data->m_sort32, *data->m_sortDataBuffer, sortSize ); b3OpenCLArray<b3SortData>& keyValuesInOut = *(m_data->m_solverGPU->m_sortDataBuffer); this->m_data->m_solverGPU->m_sort32->execute(keyValuesInOut); } else { b3OpenCLArray<b3SortData>& keyValuesInOut = *(m_data->m_solverGPU->m_sortDataBuffer); b3AlignedObjectArray<b3SortData> hostValues; keyValuesInOut.copyToHost(hostValues); hostValues.quickSort(sortfnc); keyValuesInOut.copyFromHost(hostValues); } if (gUseScanHost) { // 4. find entries B3_PROFILE("cpuBoundSearch"); b3AlignedObjectArray<unsigned int> countsHost; countsNative->copyToHost(countsHost); b3AlignedObjectArray<b3SortData> sortDataHost; m_data->m_solverGPU->m_sortDataBuffer->copyToHost(sortDataHost); //m_data->m_solverGPU->m_search->executeHost(*m_data->m_solverGPU->m_sortDataBuffer,nContacts,*countsNative,B3_SOLVER_N_CELLS,b3BoundSearchCL::COUNT); m_data->m_solverGPU->m_search->executeHost(sortDataHost,nContacts,countsHost,B3_SOLVER_N_CELLS,b3BoundSearchCL::COUNT); countsNative->copyFromHost(countsHost); //adl::BoundSearch<adl::TYPE_CL>::execute( data->m_search, *data->m_sortDataBuffer, nContacts, *countsNative, // B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT, adl::BoundSearchBase::COUNT ); //unsigned int sum; //m_data->m_solverGPU->m_scan->execute(*countsNative,*offsetsNative, B3_SOLVER_N_CELLS);//,&sum ); b3AlignedObjectArray<unsigned int> offsetsHost; offsetsHost.resize(offsetsNative->size()); m_data->m_solverGPU->m_scan->executeHost(countsHost,offsetsHost, B3_SOLVER_N_CELLS);//,&sum ); offsetsNative->copyFromHost(offsetsHost); //printf("sum = %d\n",sum); } else { // 4. find entries B3_PROFILE("gpuBoundSearch"); m_data->m_solverGPU->m_search->execute(*m_data->m_solverGPU->m_sortDataBuffer,nContacts,*countsNative,B3_SOLVER_N_CELLS,b3BoundSearchCL::COUNT); m_data->m_solverGPU->m_scan->execute(*countsNative,*offsetsNative, B3_SOLVER_N_CELLS);//,&sum ); } if (nContacts) { // 5. sort constraints by cellIdx if (gReorderContactsOnCpu) { B3_PROFILE("cpu m_reorderContactKernel"); b3AlignedObjectArray<b3SortData> sortDataHost; m_data->m_solverGPU->m_sortDataBuffer->copyToHost(sortDataHost); b3AlignedObjectArray<b3Contact4> inContacts; b3AlignedObjectArray<b3Contact4> outContacts; m_data->m_pBufContactOutGPU->copyToHost(inContacts); outContacts.resize(inContacts.size()); for (int i=0;i<nContacts;i++) { int srcIdx = sortDataHost[i].y; outContacts[i] = inContacts[srcIdx]; } m_data->m_solverGPU->m_contactBuffer2->copyFromHost(outContacts); /* "void ReorderContactKernel(__global struct b3Contact4Data* in, __global struct b3Contact4Data* out, __global int2* sortData, int4 cb )\n" "{\n" " int nContacts = cb.x;\n" " int gIdx = GET_GLOBAL_IDX;\n" " if( gIdx < nContacts )\n" " {\n" " int srcIdx = sortData[gIdx].y;\n" " out[gIdx] = in[srcIdx];\n" " }\n" "}\n" */ } else { B3_PROFILE("gpu m_reorderContactKernel"); b3Int4 cdata; cdata.x = nContacts; b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ), b3BufferInfoCL( m_data->m_solverGPU->m_contactBuffer2->getBufferCL()) , b3BufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) }; b3LauncherCL launcher(m_data->m_queue,m_data->m_solverGPU->m_reorderContactKernel,"m_reorderContactKernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( cdata ); launcher.launch1D( nContacts, 64 ); } } } } //clFinish(m_data->m_queue); // { // b3AlignedObjectArray<unsigned int> histogram; // m_data->m_solverGPU->m_numConstraints->copyToHost(histogram); // printf(",,,\n"); // } if (nContacts) { if (gUseCpuCopyConstraints) { for (int i=0;i<nContacts;i++) { m_data->m_pBufContactOutGPU->copyFromOpenCLArray(*m_data->m_solverGPU->m_contactBuffer2); // m_data->m_solverGPU->m_contactBuffer2->getBufferCL(); // m_data->m_pBufContactOutGPU->getBufferCL() } } else { B3_PROFILE("gpu m_copyConstraintKernel"); b3Int4 cdata; cdata.x = nContacts; b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_data->m_solverGPU->m_contactBuffer2->getBufferCL() ), b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ) }; b3LauncherCL launcher(m_data->m_queue, m_data->m_solverGPU->m_copyConstraintKernel,"m_copyConstraintKernel" ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( cdata ); launcher.launch1D( nContacts, 64 ); //we use the clFinish for proper benchmark/profile clFinish(m_data->m_queue); } } bool compareGPU = false; if (nContacts) { if (!gCpuBatchContacts) { B3_PROFILE("gpu batchContacts"); maxNumBatches = 150;//250; m_data->m_solverGPU->batchContacts( m_data->m_pBufContactOutGPU, nContacts, m_data->m_solverGPU->m_numConstraints, m_data->m_solverGPU->m_offsets, csCfg.m_staticIdx ); clFinish(m_data->m_queue); } else { B3_PROFILE("cpu batchContacts"); static b3AlignedObjectArray<b3Contact4> cpuContacts; b3OpenCLArray<b3Contact4>* contactsIn = m_data->m_solverGPU->m_contactBuffer2; { B3_PROFILE("copyToHost"); contactsIn->copyToHost(cpuContacts); } b3OpenCLArray<unsigned int>* countsNative = m_data->m_solverGPU->m_numConstraints; b3OpenCLArray<unsigned int>* offsetsNative = m_data->m_solverGPU->m_offsets; b3AlignedObjectArray<unsigned int> nNativeHost; b3AlignedObjectArray<unsigned int> offsetsNativeHost; { B3_PROFILE("countsNative/offsetsNative copyToHost"); countsNative->copyToHost(nNativeHost); offsetsNative->copyToHost(offsetsNativeHost); } int numNonzeroGrid=0; if (gUseLargeBatches) { m_data->m_batchSizes.resize(B3_MAX_NUM_BATCHES); int totalNumConstraints = cpuContacts.size(); int simdWidth =numBodies+1;//-1;//64;//-1;//32; int numBatches = sortConstraintByBatch3( &cpuContacts[0], totalNumConstraints, totalNumConstraints+1,csCfg.m_staticIdx ,numBodies,&m_data->m_batchSizes[0]); // on GPU maxNumBatches = b3Max(numBatches,maxNumBatches); static int globalMaxBatch = 0; if (maxNumBatches>globalMaxBatch ) { globalMaxBatch = maxNumBatches; b3Printf("maxNumBatches = %d\n",maxNumBatches); } } else { m_data->m_batchSizes.resize(B3_SOLVER_N_CELLS*B3_MAX_NUM_BATCHES); B3_PROFILE("cpu batch grid"); for(int i=0; i<B3_SOLVER_N_CELLS; i++) { int n = (nNativeHost)[i]; int offset = (offsetsNativeHost)[i]; if( n ) { numNonzeroGrid++; int simdWidth =numBodies+1;//-1;//64;//-1;//32; int numBatches = sortConstraintByBatch3( &cpuContacts[0]+offset, n, simdWidth,csCfg.m_staticIdx ,numBodies,&m_data->m_batchSizes[i*B3_MAX_NUM_BATCHES]); // on GPU maxNumBatches = b3Max(numBatches,maxNumBatches); static int globalMaxBatch = 0; if (maxNumBatches>globalMaxBatch ) { globalMaxBatch = maxNumBatches; b3Printf("maxNumBatches = %d\n",maxNumBatches); } //we use the clFinish for proper benchmark/profile } } //clFinish(m_data->m_queue); } { B3_PROFILE("m_contactBuffer->copyFromHost"); m_data->m_solverGPU->m_contactBuffer2->copyFromHost((b3AlignedObjectArray<b3Contact4>&)cpuContacts); } } } } } //printf("maxNumBatches = %d\n", maxNumBatches); if (gUseLargeBatches) { if (nContacts) { B3_PROFILE("cpu batchContacts"); static b3AlignedObjectArray<b3Contact4> cpuContacts; // b3OpenCLArray<b3Contact4>* contactsIn = m_data->m_solverGPU->m_contactBuffer2; { B3_PROFILE("copyToHost"); m_data->m_pBufContactOutGPU->copyToHost(cpuContacts); } b3OpenCLArray<unsigned int>* countsNative = m_data->m_solverGPU->m_numConstraints; b3OpenCLArray<unsigned int>* offsetsNative = m_data->m_solverGPU->m_offsets; int numNonzeroGrid=0; { m_data->m_batchSizes.resize(B3_MAX_NUM_BATCHES); int totalNumConstraints = cpuContacts.size(); int simdWidth =numBodies+1;//-1;//64;//-1;//32; int numBatches = sortConstraintByBatch3( &cpuContacts[0], totalNumConstraints, totalNumConstraints+1,csCfg.m_staticIdx ,numBodies,&m_data->m_batchSizes[0]); // on GPU maxNumBatches = b3Max(numBatches,maxNumBatches); static int globalMaxBatch = 0; if (maxNumBatches>globalMaxBatch ) { globalMaxBatch = maxNumBatches; b3Printf("maxNumBatches = %d\n",maxNumBatches); } } { B3_PROFILE("m_contactBuffer->copyFromHost"); m_data->m_solverGPU->m_contactBuffer2->copyFromHost((b3AlignedObjectArray<b3Contact4>&)cpuContacts); } } } if (nContacts) { B3_PROFILE("gpu convertToConstraints"); m_data->m_solverGPU->convertToConstraints( bodyBuf, shapeBuf, m_data->m_solverGPU->m_contactBuffer2, contactConstraintOut, additionalData, nContacts, (b3SolverBase::ConstraintCfg&) csCfg ); clFinish(m_data->m_queue); } if (1) { int numIter = 4; m_data->m_solverGPU->m_nIterations = numIter;//10 if (!gCpuSolveConstraint) { B3_PROFILE("GPU solveContactConstraint"); /*m_data->m_solverGPU->solveContactConstraint( m_data->m_bodyBufferGPU, m_data->m_inertiaBufferGPU, m_data->m_contactCGPU,0, nContactOut , maxNumBatches); */ //m_data->m_batchSizesGpu->copyFromHost(m_data->m_batchSizes); if (gUseLargeBatches) { solveContactConstraintBatchSizes(m_data->m_bodyBufferGPU, m_data->m_inertiaBufferGPU, m_data->m_contactCGPU,0, nContactOut , maxNumBatches,numIter,&m_data->m_batchSizes); } else { solveContactConstraint( m_data->m_bodyBufferGPU, m_data->m_inertiaBufferGPU, m_data->m_contactCGPU,0, nContactOut , maxNumBatches,numIter,&m_data->m_batchSizes);//m_data->m_batchSizesGpu); } } else { B3_PROFILE("Host solveContactConstraint"); m_data->m_solverGPU->solveContactConstraintHost(m_data->m_bodyBufferGPU, m_data->m_inertiaBufferGPU, m_data->m_contactCGPU,0, nContactOut ,maxNumBatches,&m_data->m_batchSizes); } } #if 0 if (0) { B3_PROFILE("read body velocities back to CPU"); //read body updated linear/angular velocities back to CPU m_data->m_bodyBufferGPU->read( m_data->m_bodyBufferCPU->m_ptr,numOfConvexRBodies); adl::DeviceUtils::waitForCompletion( m_data->m_deviceCL ); } #endif } }
void b3CpuNarrowPhase::computeContacts(b3AlignedObjectArray<b3Int4>& pairs, b3AlignedObjectArray<b3Aabb>& aabbsWorldSpace, b3AlignedObjectArray<b3RigidBodyData>& bodies) { int nPairs = pairs.size(); int numContacts = 0; int maxContactCapacity = m_data->m_config.m_maxContactCapacity; m_data->m_contacts.resize(maxContactCapacity); for (int i = 0; i < nPairs; i++) { int bodyIndexA = pairs[i].x; int bodyIndexB = pairs[i].y; int collidableIndexA = bodies[bodyIndexA].m_collidableIdx; int collidableIndexB = bodies[bodyIndexB].m_collidableIdx; if (m_data->m_collidablesCPU[collidableIndexA].m_shapeType == SHAPE_SPHERE && m_data->m_collidablesCPU[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL) { // computeContactSphereConvex(i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,&bodies[0], // &m_data->m_collidablesCPU[0],&hostConvexData[0],&hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity); } if (m_data->m_collidablesCPU[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL && m_data->m_collidablesCPU[collidableIndexB].m_shapeType == SHAPE_SPHERE) { // computeContactSphereConvex(i,bodyIndexB,bodyIndexA,collidableIndexB,collidableIndexA,&bodies[0], // &m_data->m_collidablesCPU[0],&hostConvexData[0],&hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity); //printf("convex-sphere\n"); } if (m_data->m_collidablesCPU[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL && m_data->m_collidablesCPU[collidableIndexB].m_shapeType == SHAPE_PLANE) { // computeContactPlaneConvex(i,bodyIndexB,bodyIndexA,collidableIndexB,collidableIndexA,&bodies[0], // &m_data->m_collidablesCPU[0],&hostConvexData[0],&hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity); // printf("convex-plane\n"); } if (m_data->m_collidablesCPU[collidableIndexA].m_shapeType == SHAPE_PLANE && m_data->m_collidablesCPU[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL) { // computeContactPlaneConvex(i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,&bodies[0], // &m_data->m_collidablesCPU[0],&hostConvexData[0],&hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity); // printf("plane-convex\n"); } if (m_data->m_collidablesCPU[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS && m_data->m_collidablesCPU[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS) { // computeContactCompoundCompound(i,bodyIndexB,bodyIndexA,collidableIndexB,collidableIndexA,&bodies[0], // &m_data->m_collidablesCPU[0],&hostConvexData[0],&cpuChildShapes[0], hostAabbsWorldSpace,hostAabbsLocalSpace,hostVertices,hostUniqueEdges,hostIndices,hostFaces,&hostContacts[0], // nContacts,maxContactCapacity,treeNodesCPU,subTreesCPU,bvhInfoCPU); // printf("convex-plane\n"); } if (m_data->m_collidablesCPU[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS && m_data->m_collidablesCPU[collidableIndexB].m_shapeType == SHAPE_PLANE) { // computeContactPlaneCompound(i,bodyIndexB,bodyIndexA,collidableIndexB,collidableIndexA,&bodies[0], // &m_data->m_collidablesCPU[0],&hostConvexData[0],&cpuChildShapes[0], &hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity); // printf("convex-plane\n"); } if (m_data->m_collidablesCPU[collidableIndexA].m_shapeType == SHAPE_PLANE && m_data->m_collidablesCPU[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS) { // computeContactPlaneCompound(i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,&bodies[0], // &m_data->m_collidablesCPU[0],&hostConvexData[0],&cpuChildShapes[0],&hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity); // printf("plane-convex\n"); } if (m_data->m_collidablesCPU[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL && m_data->m_collidablesCPU[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL) { //printf("pairs[i].z=%d\n",pairs[i].z); //int contactIndex = computeContactConvexConvex2(i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,bodies, // m_data->m_collidablesCPU,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,oldHostContacts); int contactIndex = b3ContactConvexConvexSAT(i, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, bodies, m_data->m_collidablesCPU, m_data->m_convexPolyhedra, m_data->m_convexVertices, m_data->m_uniqueEdges, m_data->m_convexIndices, m_data->m_convexFaces, m_data->m_contacts, numContacts, maxContactCapacity); if (contactIndex >= 0) { pairs[i].z = contactIndex; } // printf("plane-convex\n"); } } m_data->m_contacts.resize(numContacts); }
void b3RadixSort32CL::executeHost(b3AlignedObjectArray<b3SortData>& inout, int sortBits /* = 32 */) { int n = inout.size(); const int BITS_PER_PASS = 8; const int NUM_TABLES = (1<<BITS_PER_PASS); int tables[NUM_TABLES]; int counter[NUM_TABLES]; b3SortData* src = &inout[0]; b3AlignedObjectArray<b3SortData> workbuffer; workbuffer.resize(inout.size()); b3SortData* dst = &workbuffer[0]; int count=0; for(int startBit=0; startBit<sortBits; startBit+=BITS_PER_PASS) { for(int i=0; i<NUM_TABLES; i++) { tables[i] = 0; } for(int i=0; i<n; i++) { int tableIdx = (src[i].m_key >> startBit) & (NUM_TABLES-1); tables[tableIdx]++; } //#define TEST #ifdef TEST printf("histogram size=%d\n",NUM_TABLES); for (int i=0;i<NUM_TABLES;i++) { if (tables[i]!=0) { printf("tables[%d]=%d]\n",i,tables[i]); } } #endif //TEST // prefix scan int sum = 0; for(int i=0; i<NUM_TABLES; i++) { int iData = tables[i]; tables[i] = sum; sum += iData; counter[i] = 0; } // distribute for(int i=0; i<n; i++) { int tableIdx = (src[i].m_key >> startBit) & (NUM_TABLES-1); dst[tables[tableIdx] + counter[tableIdx]] = src[i]; counter[tableIdx] ++; } b3Swap( src, dst ); count++; } if (count&1) { b3Assert(0);//need to copy } }
b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlyIterations(b3OpenCLArray<b3GpuGenericConstraint>* gpuConstraints1, int numConstraints, const b3ContactSolverInfo& infoGlobal) { //only create the batches once. //@todo: incrementally update batches when constraints are added/activated and/or removed/deactivated B3_PROFILE("GpuSolveGroupCacheFriendlyIterations"); bool createBatches = m_gpuData->m_batchSizes.size() == 0; { if (createBatches) { m_gpuData->m_batchSizes.resize(0); { m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints); B3_PROFILE("batch joints"); b3Assert(batchConstraints.size() == numConstraints); int simdWidth = numConstraints + 1; int numBodies = m_tmpSolverBodyPool.size(); sortConstraintByBatch3(&batchConstraints[0], numConstraints, simdWidth, m_staticIdx, numBodies); m_gpuData->m_gpuBatchConstraints->copyFromHost(batchConstraints); } } else { /*b3AlignedObjectArray<b3BatchConstraint> cpuCheckBatches; m_gpuData->m_gpuBatchConstraints->copyToHost(cpuCheckBatches); b3Assert(cpuCheckBatches.size()==batchConstraints.size()); printf(".\n"); */ //>copyFromHost(batchConstraints); } int maxIterations = infoGlobal.m_numIterations; bool useBatching = true; if (useBatching) { if (!useGpuSolveJointConstraintRows) { B3_PROFILE("copy to host"); m_gpuData->m_gpuSolverBodies->copyToHost(m_tmpSolverBodyPool); m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints); m_gpuData->m_gpuConstraintRows->copyToHost(m_tmpSolverNonContactConstraintPool); m_gpuData->m_gpuConstraintInfo1->copyToHost(m_gpuData->m_cpuConstraintInfo1); m_gpuData->m_gpuConstraintRowOffsets->copyToHost(m_gpuData->m_cpuConstraintRowOffsets); gpuConstraints1->copyToHost(m_gpuData->m_cpuConstraints); } for (int iteration = 0; iteration < maxIterations; iteration++) { int batchOffset = 0; int constraintOffset = 0; int numBatches = m_gpuData->m_batchSizes.size(); for (int bb = 0; bb < numBatches; bb++) { int numConstraintsInBatch = m_gpuData->m_batchSizes[bb]; if (useGpuSolveJointConstraintRows) { B3_PROFILE("solveJointConstraintRowsKernels"); /* __kernel void solveJointConstraintRows(__global b3GpuSolverBody* solverBodies, __global b3BatchConstraint* batchConstraints, __global b3SolverConstraint* rows, __global unsigned int* numConstraintRowsInfo1, __global unsigned int* rowOffsets, __global b3GpuGenericConstraint* constraints, int batchOffset, int numConstraintsInBatch*/ b3LauncherCL launcher(m_gpuData->m_queue, m_gpuData->m_solveJointConstraintRowsKernels, "m_solveJointConstraintRowsKernels"); launcher.setBuffer(m_gpuData->m_gpuSolverBodies->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuBatchConstraints->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuConstraintRows->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuConstraintRowOffsets->getBufferCL()); launcher.setBuffer(gpuConstraints1->getBufferCL()); //to detect disabled constraints launcher.setConst(batchOffset); launcher.setConst(numConstraintsInBatch); launcher.launch1D(numConstraintsInBatch); } else //useGpu { for (int b = 0; b < numConstraintsInBatch; b++) { const b3BatchConstraint& c = batchConstraints[batchOffset + b]; /*printf("-----------\n"); printf("bb=%d\n",bb); printf("c.batchId = %d\n", c.m_batchId); */ b3Assert(c.m_batchId == bb); b3GpuGenericConstraint* constraint = &m_gpuData->m_cpuConstraints[c.m_originalConstraintIndex]; if (constraint->m_flags & B3_CONSTRAINT_FLAG_ENABLED) { int numConstraintRows = m_gpuData->m_cpuConstraintInfo1[c.m_originalConstraintIndex]; int constraintOffset = m_gpuData->m_cpuConstraintRowOffsets[c.m_originalConstraintIndex]; for (int jj = 0; jj < numConstraintRows; jj++) { // b3GpuSolverConstraint& constraint = m_tmpSolverNonContactConstraintPool[constraintOffset + jj]; //resolveSingleConstraintRowGenericSIMD(m_tmpSolverBodyPool[constraint.m_solverBodyIdA],m_tmpSolverBodyPool[constraint.m_solverBodyIdB],constraint); resolveSingleConstraintRowGeneric2(&m_tmpSolverBodyPool[constraint.m_solverBodyIdA], &m_tmpSolverBodyPool[constraint.m_solverBodyIdB], &constraint); } } } } //useGpu batchOffset += numConstraintsInBatch; constraintOffset += numConstraintsInBatch; } } //for (int iteration... if (!useGpuSolveJointConstraintRows) { { B3_PROFILE("copy from host"); m_gpuData->m_gpuSolverBodies->copyFromHost(m_tmpSolverBodyPool); m_gpuData->m_gpuBatchConstraints->copyFromHost(batchConstraints); m_gpuData->m_gpuConstraintRows->copyFromHost(m_tmpSolverNonContactConstraintPool); } //B3_PROFILE("copy to host"); //m_gpuData->m_gpuSolverBodies->copyToHost(m_tmpSolverBodyPool); } //int sz = sizeof(b3GpuSolverBody); //printf("cpu sizeof(b3GpuSolverBody)=%d\n",sz); } else { for (int iteration = 0; iteration < maxIterations; iteration++) { int numJoints = m_tmpSolverNonContactConstraintPool.size(); for (int j = 0; j < numJoints; j++) { b3GpuSolverConstraint& constraint = m_tmpSolverNonContactConstraintPool[j]; resolveSingleConstraintRowGeneric2(&m_tmpSolverBodyPool[constraint.m_solverBodyIdA], &m_tmpSolverBodyPool[constraint.m_solverBodyIdB], &constraint); } if (!m_usePgs) { averageVelocities(); } } } } clFinish(m_gpuData->m_queue); return 0.f; }
int main(int argc, char* argv[]) { sOpenGLVerbose = false; float dt = 1./120.f; int width = 1024; int height=768; app = new SimpleOpenGL3App("AllBullet2Demos",width,height); app->m_instancingRenderer->setCameraDistance(13); app->m_instancingRenderer->setCameraPitch(0); app->m_instancingRenderer->setCameraTargetPosition(b3MakeVector3(0,0,0)); app->m_window->setMouseMoveCallback(MyMouseMoveCallback); app->m_window->setMouseButtonCallback(MyMouseButtonCallback); app->m_window->setKeyboardCallback(MyKeyboardCallback); GLint err = glGetError(); assert(err==GL_NO_ERROR); sth_stash* fontstash=app->getFontStash(); gui = new GwenUserInterface; gui->init(width,height,fontstash,app->m_window->getRetinaScale()); int numDemos = sizeof(allDemos)/sizeof(BulletDemoEntry); for (int i=0;i<numDemos;i++) { allNames.push_back(allDemos[i].m_name); } selectDemo(loadCurrentDemoEntry(startFileName)); gui->registerComboBox(DEMO_SELECTION_COMBOBOX,allNames.size(),&allNames[0],sCurrentDemoIndex); //const char* names2[] = {"comboF", "comboG","comboH"}; //gui->registerComboBox(2,3,&names2[0],0); gui->setComboBoxCallback(MyComboBoxCallback); do { GLint err = glGetError(); assert(err==GL_NO_ERROR); app->m_instancingRenderer->init(); app->m_instancingRenderer->updateCamera(); app->drawGrid(); if (0) { char bla[1024]; static int frameCount = 0; frameCount++; sprintf(bla,"Simple test frame %d", frameCount); app->drawText(bla,10,10); } if (sCurrentDemo) { if (!pauseSimulation) sCurrentDemo->stepSimulation(1./60.f); sCurrentDemo->renderScene(); } static int toggle = 1; if (1) { gui->draw(app->m_instancingRenderer->getScreenWidth(),app->m_instancingRenderer->getScreenHeight()); } toggle=1-toggle; app->swapBuffer(); } while (!app->m_window->requestedExit()); selectDemo(0); delete gui; delete app; return 0; }