Beispiel #1
0
void	CLPhysicsDemo::stepSimulation()
{
	int sz = sizeof(ConvexPolyhedronCL2);
	int sz1 = sizeof(ConvexPolyhedronCL);
	btAssert(sz==sz1);

	int b1 = sizeof(Body2);
	int b2 = sizeof(RigidBodyBase::Body);
	btAssert(b1==b2);

	int ciErrNum=CL_SUCCESS;


	
	if (1 && m_numPhysicsInstances)
	{

		gFpIO.m_numObjects = m_numPhysicsInstances;
		
		
		if (useSapGpuBroadphase)
		{
			gFpIO.m_dAABB = m_data->m_BroadphaseSap->getAabbBuffer();
		} else
		{
			gFpIO.m_dAABB = m_data->m_BroadphaseGrid->getAabbBuffer();
		}
		gFpIO.m_dlocalShapeAABB = (cl_mem)m_data->m_localShapeAABBGPU->getBufferCL();
		gFpIO.m_numOverlap = 0;


		{
			BT_PROFILE("setupGpuAabbs");
			setupGpuAabbsFull(gFpIO,m_narrowphaseAndSolver->getBodiesGpu(), m_narrowphaseAndSolver->getCollidablesGpu() );
        //    setupGpuAabbsSimple(gFpIO);
		}

		{

		}

		if (1)
		{
			BT_PROFILE("calculateOverlappingPairs");
			
			if (useSapGpuBroadphase)
			{
				m_data->m_BroadphaseSap->calculateOverlappingPairs();
				gFpIO.m_numOverlap = m_data->m_BroadphaseSap->getNumOverlap();
				gFpIO.m_dAllOverlappingPairs = m_data->m_BroadphaseSap->getOverlappingPairBuffer();
			}
			else
			{
				m_data->m_BroadphaseGrid->calculateOverlappingPairs();
				gFpIO.m_dAllOverlappingPairs = m_data->m_BroadphaseGrid->getOverlappingPairBuffer();
				gFpIO.m_numOverlap = m_data->m_BroadphaseGrid->getNumOverlap();
			}
		}
		
		//printf("gFpIO.m_numOverlap = %d\n",gFpIO.m_numOverlap );
		if (gFpIO.m_numOverlap>=0 && gFpIO.m_numOverlap<MAX_BROADPHASE_COLLISION_CL)
		{
			//colorPairsOpenCL(gFpIO);

			if (runOpenCLKernels)
			{
				{
					BT_PROFILE("setupBodies");
					if (m_narrowphaseAndSolver)
						setupBodies(gFpIO, m_data->m_linVelBuf->getBufferCL(), m_data->m_angVelBuf->getBufferCL(), m_narrowphaseAndSolver->getBodiesGpu(), m_narrowphaseAndSolver->getBodyInertiasGpu());
				}
				
				{
					BT_PROFILE("computeContacts");
					if (m_narrowphaseAndSolver)
						m_narrowphaseAndSolver->computeContacts(gFpIO.m_dAllOverlappingPairs,gFpIO.m_numOverlap, gFpIO.m_dAABB,gFpIO.m_numObjects);
				}

				bool useOpenCLSolver = true;
				if (useOpenCLSolver)
				{
					BT_PROFILE("solve Contact Constraints (OpenCL)");
					if (m_narrowphaseAndSolver)
						m_narrowphaseAndSolver->solveContacts();
				} else
				{
					BT_PROFILE("solve Contact Constraints CPU/serial");
					if (m_narrowphaseAndSolver && m_data->m_pgsSolver && m_narrowphaseAndSolver->getNumContactsGpu())
					{
						btGpuNarrowphaseAndSolver* np = m_narrowphaseAndSolver;
						
						btAlignedObjectArray<RigidBodyBase::Body> hostBodies;
						btOpenCLArray<RigidBodyBase::Body> gpuBodies(g_cxMainContext,g_cqCommandQue,0,true);
						gpuBodies.setFromOpenCLBuffer(np->getBodiesGpu(),np->getNumBodiesGpu());
						gpuBodies.copyToHost(hostBodies);

						btAlignedObjectArray<RigidBodyBase::Inertia> hostInertias;
						btOpenCLArray<RigidBodyBase::Inertia> gpuInertias(g_cxMainContext,g_cqCommandQue,0,true);
						gpuInertias.setFromOpenCLBuffer(np->getBodyInertiasGpu(),np->getNumBodiesGpu());
						gpuInertias.copyToHost(hostInertias);

						btAlignedObjectArray<Contact4> hostContacts;
						btOpenCLArray<Contact4> gpuContacts(g_cxMainContext,g_cqCommandQue,0,true);
						gpuContacts.setFromOpenCLBuffer(np->getContactsGpu(),np->getNumContactsGpu());
						gpuContacts.copyToHost(hostContacts);

						{
							BT_PROFILE("pgsSolver::solveContacts");
							m_data->m_pgsSolver->solveContacts(np->getNumBodiesGpu(),&hostBodies[0],&hostInertias[0],np->getNumContactsGpu(),&hostContacts[0]);
						}
						
						gpuBodies.copyFromHost(hostBodies);


					}
					
					
				}
				
				{
					BT_PROFILE("copyBodyVelocities");
					if (m_narrowphaseAndSolver)
						copyBodyVelocities(gFpIO, m_data->m_linVelBuf->getBufferCL(), m_data->m_angVelBuf->getBufferCL(), m_narrowphaseAndSolver->getBodiesGpu());

				}
			}

		} else
		{
			printf("error, gFpIO.m_numOverlap = %d\n",gFpIO.m_numOverlap);
			btAssert(0);
		}


		{
			BT_PROFILE("integrateTransforms");

			if (runOpenCLKernels)
			{
				bool integrateOnGpu = true;
				if (integrateOnGpu)
				{
					int numObjects = m_numPhysicsInstances;
					int offset = m_maxShapeBufferCapacityInBytes/4;

					ciErrNum = clSetKernelArg(g_integrateTransformsKernel2, 0, sizeof(int), &offset);
					ciErrNum = clSetKernelArg(g_integrateTransformsKernel2, 1, sizeof(int), &numObjects);
					cl_mem bodyGpuBuffer = m_narrowphaseAndSolver->getBodiesGpu();
					ciErrNum = clSetKernelArg(g_integrateTransformsKernel2, 2, sizeof(cl_mem), (void*)&bodyGpuBuffer );
	
					cl_mem lv = m_data->m_linVelBuf->getBufferCL();
					cl_mem av = m_data->m_angVelBuf->getBufferCL();
					cl_mem btimes = m_data->m_bodyTimes->getBufferCL();

					ciErrNum = clSetKernelArg(g_integrateTransformsKernel2, 3, sizeof(cl_mem), (void*)&lv);
					ciErrNum = clSetKernelArg(g_integrateTransformsKernel2, 4, sizeof(cl_mem), (void*)&av);
					ciErrNum = clSetKernelArg(g_integrateTransformsKernel2, 5, sizeof(cl_mem), (void*)&btimes);
					
					
					

					size_t workGroupSize = 64;
					size_t	numWorkItems = workGroupSize*((m_numPhysicsInstances + (workGroupSize)) / workGroupSize);
				
					if (workGroupSize>numWorkItems)
						workGroupSize=numWorkItems;

					ciErrNum = clEnqueueNDRangeKernel(g_cqCommandQue, g_integrateTransformsKernel2, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0);
					oclCHECKERROR(ciErrNum, CL_SUCCESS);
				} else
				{
#ifdef _WIN32
					//debug velocity
					btAlignedObjectArray<btVector3> linvel;
					m_data->m_linVelBuf->copyToHost(linvel);
					for (int i=0;i<linvel.size();i++)
					{
						btAssert(_finite(linvel[i].x()));
					}
#endif
                    btAssert(0);

				}
			} 
		}

		
			

	}

	oclCHECKERROR(ciErrNum, CL_SUCCESS);

	

	if (runOpenCLKernels)
	{
		BT_PROFILE("clFinish");
		clFinish(g_cqCommandQue);
	}

	
}
Beispiel #2
0
void	CLPhysicsDemo::stepSimulation()
{
	int sz = sizeof(ConvexPolyhedronCL2);
	int sz1 = sizeof(ConvexPolyhedronCL);
	btAssert(sz==sz1);

	int b1 = sizeof(Body2);
	int b2 = sizeof(RigidBodyBase::Body);
	btAssert(b1==b2);

	BT_PROFILE("simulationLoop");
	
	
	cl_int ciErrNum = CL_SUCCESS;


	if(m_data->m_useInterop)
	{
#ifndef __APPLE__
		clBuffer = g_interopBuffer->getCLBUffer();
		BT_PROFILE("clEnqueueAcquireGLObjects");
		{
			BT_PROFILE("clEnqueueAcquireGLObjects");
			ciErrNum = clEnqueueAcquireGLObjects(g_cqCommandQue, 1, &clBuffer, 0, 0, NULL);
			clFinish(g_cqCommandQue);
		}

#else
        assert(0);

#endif
	} else
	{

		glBindBuffer(GL_ARRAY_BUFFER, cube_vbo);
		glFlush();

		BT_PROFILE("glMapBuffer and clEnqueueWriteBuffer");

		blocking=  CL_TRUE;
		hostPtr=  (char*)glMapBuffer( GL_ARRAY_BUFFER,GL_READ_WRITE);//GL_WRITE_ONLY
		if (!clBuffer)
		{
			clBuffer = clCreateBuffer(g_cxMainContext, CL_MEM_READ_WRITE, VBOsize, 0, &ciErrNum);
		} 
		clFinish(g_cqCommandQue);
			oclCHECKERROR(ciErrNum, CL_SUCCESS);

		ciErrNum = clEnqueueWriteBuffer (	g_cqCommandQue,
 			clBuffer,
 			blocking,
 			0,
 			VBOsize,
 			hostPtr,0,0,0
		);
		clFinish(g_cqCommandQue);
	}



	oclCHECKERROR(ciErrNum, CL_SUCCESS);
	if (1 && m_numPhysicsInstances)
	{

		gFpIO.m_numObjects = m_numPhysicsInstances;
		gFpIO.m_positionOffset = SHAPE_VERTEX_BUFFER_SIZE/4;
		gFpIO.m_clObjectsBuffer = clBuffer;
		if (useSapGpuBroadphase)
		{
			gFpIO.m_dAABB = m_data->m_BroadphaseSap->getAabbBuffer();
		} else
		{
			gFpIO.m_dAABB = m_data->m_BroadphaseGrid->getAabbBuffer();
		}
		gFpIO.m_dlocalShapeAABB = (cl_mem)m_data->m_localShapeAABBGPU->getBufferCL();
		gFpIO.m_numOverlap = 0;


		{
			BT_PROFILE("setupGpuAabbs");
			setupGpuAabbsFull(gFpIO,narrowphaseAndSolver->getBodiesGpu(), narrowphaseAndSolver->getCollidablesGpu() );
        //    setupGpuAabbsSimple(gFpIO);
		}

		{

		}

		if (1)
		{
			BT_PROFILE("calculateOverlappingPairs");
			
			if (useSapGpuBroadphase)
			{
				m_data->m_BroadphaseSap->calculateOverlappingPairs();
				gFpIO.m_dAllOverlappingPairs = m_data->m_BroadphaseSap->getOverlappingPairBuffer();
				gFpIO.m_numOverlap = m_data->m_BroadphaseSap->getNumOverlap();
			}
			else
			{
				m_data->m_BroadphaseGrid->calculateOverlappingPairs();
				gFpIO.m_dAllOverlappingPairs = m_data->m_BroadphaseGrid->getOverlappingPairBuffer();
				gFpIO.m_numOverlap = m_data->m_BroadphaseGrid->getNumOverlap();
			}
		}
		
		//printf("gFpIO.m_numOverlap = %d\n",gFpIO.m_numOverlap );
		if (gFpIO.m_numOverlap>=0 && gFpIO.m_numOverlap<MAX_BROADPHASE_COLLISION_CL)
		{
			colorPairsOpenCL(gFpIO);

			if (runOpenCLKernels)
			{
				{
					//BT_PROFILE("setupBodies");
					if (narrowphaseAndSolver)
						setupBodies(gFpIO, m_data->m_linVelBuf->getBufferCL(), m_data->m_angVelBuf->getBufferCL(), narrowphaseAndSolver->getBodiesGpu(), narrowphaseAndSolver->getBodyInertiasGpu());
				}
				
				{
					BT_PROFILE("computeContactsAndSolver");
					if (narrowphaseAndSolver)
						narrowphaseAndSolver->computeContactsAndSolver(gFpIO.m_dAllOverlappingPairs,gFpIO.m_numOverlap, gFpIO.m_dAABB,gFpIO.m_numObjects);
				}

				
				{
					BT_PROFILE("copyBodyVelocities");
					if (narrowphaseAndSolver)
						copyBodyVelocities(gFpIO, m_data->m_linVelBuf->getBufferCL(), m_data->m_angVelBuf->getBufferCL(), narrowphaseAndSolver->getBodiesGpu(), narrowphaseAndSolver->getBodyInertiasGpu());
				}
			}

		} else
		{
			printf("error, gFpIO.m_numOverlap = %d\n",gFpIO.m_numOverlap);
			btAssert(0);
		}


		{
			BT_PROFILE("integrateTransforms");

			if (runOpenCLKernels)
			{
				bool integrateOnGpu = true;
				if (integrateOnGpu)
				{
					int numObjects = m_numPhysicsInstances;
					int offset = SHAPE_VERTEX_BUFFER_SIZE/4;

					ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 0, sizeof(int), &offset);
					ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 1, sizeof(int), &numObjects);
					ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 2, sizeof(cl_mem), (void*)&clBuffer );
	
					cl_mem lv = m_data->m_linVelBuf->getBufferCL();
					cl_mem av = m_data->m_angVelBuf->getBufferCL();
					cl_mem btimes = m_data->m_bodyTimes->getBufferCL();

					ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 3, sizeof(cl_mem), (void*)&lv);
					ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 4, sizeof(cl_mem), (void*)&av);
					ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 5, sizeof(cl_mem), (void*)&btimes);
					
					
					

					size_t workGroupSize = 64;
					size_t	numWorkItems = workGroupSize*((m_numPhysicsInstances + (workGroupSize)) / workGroupSize);
				
					if (workGroupSize>numWorkItems)
						workGroupSize=numWorkItems;

					ciErrNum = clEnqueueNDRangeKernel(g_cqCommandQue, g_integrateTransformsKernel, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0);
					oclCHECKERROR(ciErrNum, CL_SUCCESS);
				} else
				{
					//debug velocity
					btAlignedObjectArray<btVector3> linvel;
					m_data->m_linVelBuf->copyToHost(linvel);
					for (int i=0;i<linvel.size();i++)
					{
						btAssert(_finite(linvel[i].x()));
					}
					btAssert(0);

				}
			} 
		}
			

	}

	if(m_data->m_useInterop)
	{
#ifndef __APPLE__
		BT_PROFILE("clEnqueueReleaseGLObjects");
		ciErrNum = clEnqueueReleaseGLObjects(g_cqCommandQue, 1, &clBuffer, 0, 0, 0);
		clFinish(g_cqCommandQue);
#endif
	}
	else
	{
		BT_PROFILE("clEnqueueReadBuffer clReleaseMemObject and glUnmapBuffer");
		ciErrNum = clEnqueueReadBuffer (	g_cqCommandQue,
 		clBuffer,
 		blocking,
 		0,
 		VBOsize,
 		hostPtr,0,0,0);

		//clReleaseMemObject(clBuffer);
		clFinish(g_cqCommandQue);
		glUnmapBuffer( GL_ARRAY_BUFFER);
		glFlush();
	}

	oclCHECKERROR(ciErrNum, CL_SUCCESS);


	if (runOpenCLKernels)
	{
		BT_PROFILE("clFinish");
		clFinish(g_cqCommandQue);
	}

	
}
void	CLPhysicsDemo::stepSimulation()
{
	BT_PROFILE("simulationLoop");
	
	{
		BT_PROFILE("glFinish");
		glFinish();
	}
	cl_int ciErrNum = CL_SUCCESS;


	if(m_data->m_useInterop)
	{
		clBuffer = g_interopBuffer->getCLBUffer();
		BT_PROFILE("clEnqueueAcquireGLObjects");
		ciErrNum = clEnqueueAcquireGLObjects(g_cqCommandQue, 1, &clBuffer, 0, 0, NULL);
		adl::DeviceUtils::waitForCompletion( g_deviceCL );
	} else
	{

		glBindBuffer(GL_ARRAY_BUFFER, cube_vbo);
		glFlush();

		BT_PROFILE("glMapBuffer and clEnqueueWriteBuffer");

		blocking=  CL_TRUE;
		hostPtr=  (char*)glMapBuffer( GL_ARRAY_BUFFER,GL_READ_WRITE);//GL_WRITE_ONLY
		if (!clBuffer)
		{
			clBuffer = clCreateBuffer(g_cxMainContext, CL_MEM_READ_WRITE, VBOsize, 0, &ciErrNum);
		} 
		adl::DeviceUtils::waitForCompletion( g_deviceCL );
			oclCHECKERROR(ciErrNum, CL_SUCCESS);

		ciErrNum = clEnqueueWriteBuffer (	g_cqCommandQue,
 			clBuffer,
 			blocking,
 			0,
 			VBOsize,
 			hostPtr,0,0,0
		);
		adl::DeviceUtils::waitForCompletion( g_deviceCL );
	}



	oclCHECKERROR(ciErrNum, CL_SUCCESS);
	if (runOpenCLKernels && m_numPhysicsInstances)
	{

		gFpIO.m_numObjects = m_numPhysicsInstances;
		gFpIO.m_positionOffset = SHAPE_VERTEX_BUFFER_SIZE/4;
		gFpIO.m_clObjectsBuffer = clBuffer;
		gFpIO.m_dAABB = m_data->m_Broadphase->m_dAABB;
		gFpIO.m_dlocalShapeAABB = (cl_mem)m_data->m_localShapeAABB->m_ptr;
		gFpIO.m_numOverlap = 0;
		{
			BT_PROFILE("setupGpuAabbs");
			setupGpuAabbsFull(gFpIO,narrowphaseAndSolver->getBodiesGpu() );
		}
		if (1)
		{
			BT_PROFILE("calculateOverlappingPairs");
			m_data->m_Broadphase->calculateOverlappingPairs(0, m_numPhysicsInstances);
			gFpIO.m_dAllOverlappingPairs = m_data->m_Broadphase->m_dAllOverlappingPairs;
			gFpIO.m_numOverlap = m_data->m_Broadphase->m_numPrefixSum;
		}
		
		//printf("gFpIO.m_numOverlap = %d\n",gFpIO.m_numOverlap );
		if (gFpIO.m_numOverlap>=0 && gFpIO.m_numOverlap<MAX_BROADPHASE_COLLISION_CL)
		{
			colorPairsOpenCL(gFpIO);

			if (1)
			{
				{
					//BT_PROFILE("setupBodies");
					if (narrowphaseAndSolver)
						setupBodies(gFpIO, gLinVelMem, gAngVelMem, narrowphaseAndSolver->getBodiesGpu(), narrowphaseAndSolver->getBodyInertiasGpu());
				}
				if (gFpIO.m_numOverlap)
				{
					BT_PROFILE("computeContactsAndSolver");
					if (narrowphaseAndSolver)
						narrowphaseAndSolver->computeContactsAndSolver(gFpIO.m_dAllOverlappingPairs,gFpIO.m_numOverlap);
				}

				{
					BT_PROFILE("copyBodyVelocities");
					if (narrowphaseAndSolver)
						copyBodyVelocities(gFpIO, gLinVelMem, gAngVelMem, narrowphaseAndSolver->getBodiesGpu(), narrowphaseAndSolver->getBodyInertiasGpu());
				}
			}

		} else
		{
			printf("error, gFpIO.m_numOverlap = %d\n",gFpIO.m_numOverlap);
			btAssert(0);
		}


		{
			BT_PROFILE("integrateTransforms");

			if (runOpenCLKernels)
			{
				int numObjects = m_numPhysicsInstances;
				int offset = SHAPE_VERTEX_BUFFER_SIZE/4;

				ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 0, sizeof(int), &offset);
				ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 1, sizeof(int), &numObjects);
				ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 2, sizeof(cl_mem), (void*)&clBuffer );

				ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 3, sizeof(cl_mem), (void*)&gLinVelMem);
				ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 4, sizeof(cl_mem), (void*)&gAngVelMem);
				ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 5, sizeof(cl_mem), (void*)&gBodyTimes);
					
					
					

				size_t workGroupSize = 64;
				size_t	numWorkItems = workGroupSize*((m_numPhysicsInstances + (workGroupSize)) / workGroupSize);
				
				if (workGroupSize>numWorkItems)
					workGroupSize=numWorkItems;

				ciErrNum = clEnqueueNDRangeKernel(g_cqCommandQue, g_integrateTransformsKernel, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0);
				oclCHECKERROR(ciErrNum, CL_SUCCESS);
			}
		}
			

	}

	if(m_data->m_useInterop)
	{
		BT_PROFILE("clEnqueueReleaseGLObjects");
		ciErrNum = clEnqueueReleaseGLObjects(g_cqCommandQue, 1, &clBuffer, 0, 0, 0);
		adl::DeviceUtils::waitForCompletion( g_deviceCL );
	}
	else
	{
		BT_PROFILE("clEnqueueReadBuffer clReleaseMemObject and glUnmapBuffer");
		ciErrNum = clEnqueueReadBuffer (	g_cqCommandQue,
 		clBuffer,
 		blocking,
 		0,
 		VBOsize,
 		hostPtr,0,0,0);

		//clReleaseMemObject(clBuffer);
		adl::DeviceUtils::waitForCompletion( g_deviceCL );
		glUnmapBuffer( GL_ARRAY_BUFFER);
		glFlush();
	}

	oclCHECKERROR(ciErrNum, CL_SUCCESS);


	if (runOpenCLKernels)
	{
		BT_PROFILE("clFinish");
		clFinish(g_cqCommandQue);
	}

	
}