Ejemplo n.º 1
0
void	CLPhysicsDemo::stepSimulation()
{
	int sz = sizeof(ConvexPolyhedronCL2);
	int sz1 = sizeof(ConvexPolyhedronCL);
	btAssert(sz==sz1);

	int b1 = sizeof(Body2);
	int b2 = sizeof(RigidBodyBase::Body);
	btAssert(b1==b2);

	BT_PROFILE("simulationLoop");
	
	
	cl_int ciErrNum = CL_SUCCESS;


	if(m_data->m_useInterop)
	{
#ifndef __APPLE__
		clBuffer = g_interopBuffer->getCLBUffer();
		BT_PROFILE("clEnqueueAcquireGLObjects");
		{
			BT_PROFILE("clEnqueueAcquireGLObjects");
			ciErrNum = clEnqueueAcquireGLObjects(g_cqCommandQue, 1, &clBuffer, 0, 0, NULL);
			clFinish(g_cqCommandQue);
		}

#else
        assert(0);

#endif
	} else
	{

		glBindBuffer(GL_ARRAY_BUFFER, cube_vbo);
		glFlush();

		BT_PROFILE("glMapBuffer and clEnqueueWriteBuffer");

		blocking=  CL_TRUE;
		hostPtr=  (char*)glMapBuffer( GL_ARRAY_BUFFER,GL_READ_WRITE);//GL_WRITE_ONLY
		if (!clBuffer)
		{
			clBuffer = clCreateBuffer(g_cxMainContext, CL_MEM_READ_WRITE, VBOsize, 0, &ciErrNum);
		} 
		clFinish(g_cqCommandQue);
			oclCHECKERROR(ciErrNum, CL_SUCCESS);

		ciErrNum = clEnqueueWriteBuffer (	g_cqCommandQue,
 			clBuffer,
 			blocking,
 			0,
 			VBOsize,
 			hostPtr,0,0,0
		);
		clFinish(g_cqCommandQue);
	}



	oclCHECKERROR(ciErrNum, CL_SUCCESS);
	if (1 && m_numPhysicsInstances)
	{

		gFpIO.m_numObjects = m_numPhysicsInstances;
		gFpIO.m_positionOffset = SHAPE_VERTEX_BUFFER_SIZE/4;
		gFpIO.m_clObjectsBuffer = clBuffer;
		if (useSapGpuBroadphase)
		{
			gFpIO.m_dAABB = m_data->m_BroadphaseSap->getAabbBuffer();
		} else
		{
			gFpIO.m_dAABB = m_data->m_BroadphaseGrid->getAabbBuffer();
		}
		gFpIO.m_dlocalShapeAABB = (cl_mem)m_data->m_localShapeAABBGPU->getBufferCL();
		gFpIO.m_numOverlap = 0;


		{
			BT_PROFILE("setupGpuAabbs");
			setupGpuAabbsFull(gFpIO,narrowphaseAndSolver->getBodiesGpu(), narrowphaseAndSolver->getCollidablesGpu() );
        //    setupGpuAabbsSimple(gFpIO);
		}

		{

		}

		if (1)
		{
			BT_PROFILE("calculateOverlappingPairs");
			
			if (useSapGpuBroadphase)
			{
				m_data->m_BroadphaseSap->calculateOverlappingPairs();
				gFpIO.m_dAllOverlappingPairs = m_data->m_BroadphaseSap->getOverlappingPairBuffer();
				gFpIO.m_numOverlap = m_data->m_BroadphaseSap->getNumOverlap();
			}
			else
			{
				m_data->m_BroadphaseGrid->calculateOverlappingPairs();
				gFpIO.m_dAllOverlappingPairs = m_data->m_BroadphaseGrid->getOverlappingPairBuffer();
				gFpIO.m_numOverlap = m_data->m_BroadphaseGrid->getNumOverlap();
			}
		}
		
		//printf("gFpIO.m_numOverlap = %d\n",gFpIO.m_numOverlap );
		if (gFpIO.m_numOverlap>=0 && gFpIO.m_numOverlap<MAX_BROADPHASE_COLLISION_CL)
		{
			colorPairsOpenCL(gFpIO);

			if (runOpenCLKernels)
			{
				{
					//BT_PROFILE("setupBodies");
					if (narrowphaseAndSolver)
						setupBodies(gFpIO, m_data->m_linVelBuf->getBufferCL(), m_data->m_angVelBuf->getBufferCL(), narrowphaseAndSolver->getBodiesGpu(), narrowphaseAndSolver->getBodyInertiasGpu());
				}
				
				{
					BT_PROFILE("computeContactsAndSolver");
					if (narrowphaseAndSolver)
						narrowphaseAndSolver->computeContactsAndSolver(gFpIO.m_dAllOverlappingPairs,gFpIO.m_numOverlap, gFpIO.m_dAABB,gFpIO.m_numObjects);
				}

				
				{
					BT_PROFILE("copyBodyVelocities");
					if (narrowphaseAndSolver)
						copyBodyVelocities(gFpIO, m_data->m_linVelBuf->getBufferCL(), m_data->m_angVelBuf->getBufferCL(), narrowphaseAndSolver->getBodiesGpu(), narrowphaseAndSolver->getBodyInertiasGpu());
				}
			}

		} else
		{
			printf("error, gFpIO.m_numOverlap = %d\n",gFpIO.m_numOverlap);
			btAssert(0);
		}


		{
			BT_PROFILE("integrateTransforms");

			if (runOpenCLKernels)
			{
				bool integrateOnGpu = true;
				if (integrateOnGpu)
				{
					int numObjects = m_numPhysicsInstances;
					int offset = SHAPE_VERTEX_BUFFER_SIZE/4;

					ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 0, sizeof(int), &offset);
					ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 1, sizeof(int), &numObjects);
					ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 2, sizeof(cl_mem), (void*)&clBuffer );
	
					cl_mem lv = m_data->m_linVelBuf->getBufferCL();
					cl_mem av = m_data->m_angVelBuf->getBufferCL();
					cl_mem btimes = m_data->m_bodyTimes->getBufferCL();

					ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 3, sizeof(cl_mem), (void*)&lv);
					ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 4, sizeof(cl_mem), (void*)&av);
					ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 5, sizeof(cl_mem), (void*)&btimes);
					
					
					

					size_t workGroupSize = 64;
					size_t	numWorkItems = workGroupSize*((m_numPhysicsInstances + (workGroupSize)) / workGroupSize);
				
					if (workGroupSize>numWorkItems)
						workGroupSize=numWorkItems;

					ciErrNum = clEnqueueNDRangeKernel(g_cqCommandQue, g_integrateTransformsKernel, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0);
					oclCHECKERROR(ciErrNum, CL_SUCCESS);
				} else
				{
					//debug velocity
					btAlignedObjectArray<btVector3> linvel;
					m_data->m_linVelBuf->copyToHost(linvel);
					for (int i=0;i<linvel.size();i++)
					{
						btAssert(_finite(linvel[i].x()));
					}
					btAssert(0);

				}
			} 
		}
			

	}

	if(m_data->m_useInterop)
	{
#ifndef __APPLE__
		BT_PROFILE("clEnqueueReleaseGLObjects");
		ciErrNum = clEnqueueReleaseGLObjects(g_cqCommandQue, 1, &clBuffer, 0, 0, 0);
		clFinish(g_cqCommandQue);
#endif
	}
	else
	{
		BT_PROFILE("clEnqueueReadBuffer clReleaseMemObject and glUnmapBuffer");
		ciErrNum = clEnqueueReadBuffer (	g_cqCommandQue,
 		clBuffer,
 		blocking,
 		0,
 		VBOsize,
 		hostPtr,0,0,0);

		//clReleaseMemObject(clBuffer);
		clFinish(g_cqCommandQue);
		glUnmapBuffer( GL_ARRAY_BUFFER);
		glFlush();
	}

	oclCHECKERROR(ciErrNum, CL_SUCCESS);


	if (runOpenCLKernels)
	{
		BT_PROFILE("clFinish");
		clFinish(g_cqCommandQue);
	}

	
}
Ejemplo n.º 2
0
void	CLPhysicsDemo::stepSimulation()
{
	BT_PROFILE("simulationLoop");
	
	{
		BT_PROFILE("glFinish");
		glFinish();
	}
	cl_int ciErrNum = CL_SUCCESS;


	if(m_data->m_useInterop)
	{
		clBuffer = g_interopBuffer->getCLBUffer();
		BT_PROFILE("clEnqueueAcquireGLObjects");
		ciErrNum = clEnqueueAcquireGLObjects(g_cqCommandQue, 1, &clBuffer, 0, 0, NULL);
		adl::DeviceUtils::waitForCompletion( g_deviceCL );
	} else
	{

		glBindBuffer(GL_ARRAY_BUFFER, cube_vbo);
		glFlush();

		BT_PROFILE("glMapBuffer and clEnqueueWriteBuffer");

		blocking=  CL_TRUE;
		hostPtr=  (char*)glMapBuffer( GL_ARRAY_BUFFER,GL_READ_WRITE);//GL_WRITE_ONLY
		if (!clBuffer)
		{
			clBuffer = clCreateBuffer(g_cxMainContext, CL_MEM_READ_WRITE, VBOsize, 0, &ciErrNum);
		} 
		adl::DeviceUtils::waitForCompletion( g_deviceCL );
			oclCHECKERROR(ciErrNum, CL_SUCCESS);

		ciErrNum = clEnqueueWriteBuffer (	g_cqCommandQue,
 			clBuffer,
 			blocking,
 			0,
 			VBOsize,
 			hostPtr,0,0,0
		);
		adl::DeviceUtils::waitForCompletion( g_deviceCL );
	}



	oclCHECKERROR(ciErrNum, CL_SUCCESS);
	if (runOpenCLKernels && m_numPhysicsInstances)
	{

		gFpIO.m_numObjects = m_numPhysicsInstances;
		gFpIO.m_positionOffset = SHAPE_VERTEX_BUFFER_SIZE/4;
		gFpIO.m_clObjectsBuffer = clBuffer;
		gFpIO.m_dAABB = m_data->m_Broadphase->m_dAABB;
		gFpIO.m_dlocalShapeAABB = (cl_mem)m_data->m_localShapeAABB->m_ptr;
		gFpIO.m_numOverlap = 0;
		{
			BT_PROFILE("setupGpuAabbs");
			setupGpuAabbsFull(gFpIO,narrowphaseAndSolver->getBodiesGpu() );
		}
		if (1)
		{
			BT_PROFILE("calculateOverlappingPairs");
			m_data->m_Broadphase->calculateOverlappingPairs(0, m_numPhysicsInstances);
			gFpIO.m_dAllOverlappingPairs = m_data->m_Broadphase->m_dAllOverlappingPairs;
			gFpIO.m_numOverlap = m_data->m_Broadphase->m_numPrefixSum;
		}
		
		//printf("gFpIO.m_numOverlap = %d\n",gFpIO.m_numOverlap );
		if (gFpIO.m_numOverlap>=0 && gFpIO.m_numOverlap<MAX_BROADPHASE_COLLISION_CL)
		{
			colorPairsOpenCL(gFpIO);

			if (1)
			{
				{
					//BT_PROFILE("setupBodies");
					if (narrowphaseAndSolver)
						setupBodies(gFpIO, gLinVelMem, gAngVelMem, narrowphaseAndSolver->getBodiesGpu(), narrowphaseAndSolver->getBodyInertiasGpu());
				}
				if (gFpIO.m_numOverlap)
				{
					BT_PROFILE("computeContactsAndSolver");
					if (narrowphaseAndSolver)
						narrowphaseAndSolver->computeContactsAndSolver(gFpIO.m_dAllOverlappingPairs,gFpIO.m_numOverlap);
				}

				{
					BT_PROFILE("copyBodyVelocities");
					if (narrowphaseAndSolver)
						copyBodyVelocities(gFpIO, gLinVelMem, gAngVelMem, narrowphaseAndSolver->getBodiesGpu(), narrowphaseAndSolver->getBodyInertiasGpu());
				}
			}

		} else
		{
			printf("error, gFpIO.m_numOverlap = %d\n",gFpIO.m_numOverlap);
			btAssert(0);
		}


		{
			BT_PROFILE("integrateTransforms");

			if (runOpenCLKernels)
			{
				int numObjects = m_numPhysicsInstances;
				int offset = SHAPE_VERTEX_BUFFER_SIZE/4;

				ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 0, sizeof(int), &offset);
				ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 1, sizeof(int), &numObjects);
				ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 2, sizeof(cl_mem), (void*)&clBuffer );

				ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 3, sizeof(cl_mem), (void*)&gLinVelMem);
				ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 4, sizeof(cl_mem), (void*)&gAngVelMem);
				ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 5, sizeof(cl_mem), (void*)&gBodyTimes);
					
					
					

				size_t workGroupSize = 64;
				size_t	numWorkItems = workGroupSize*((m_numPhysicsInstances + (workGroupSize)) / workGroupSize);
				
				if (workGroupSize>numWorkItems)
					workGroupSize=numWorkItems;

				ciErrNum = clEnqueueNDRangeKernel(g_cqCommandQue, g_integrateTransformsKernel, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0);
				oclCHECKERROR(ciErrNum, CL_SUCCESS);
			}
		}
			

	}

	if(m_data->m_useInterop)
	{
		BT_PROFILE("clEnqueueReleaseGLObjects");
		ciErrNum = clEnqueueReleaseGLObjects(g_cqCommandQue, 1, &clBuffer, 0, 0, 0);
		adl::DeviceUtils::waitForCompletion( g_deviceCL );
	}
	else
	{
		BT_PROFILE("clEnqueueReadBuffer clReleaseMemObject and glUnmapBuffer");
		ciErrNum = clEnqueueReadBuffer (	g_cqCommandQue,
 		clBuffer,
 		blocking,
 		0,
 		VBOsize,
 		hostPtr,0,0,0);

		//clReleaseMemObject(clBuffer);
		adl::DeviceUtils::waitForCompletion( g_deviceCL );
		glUnmapBuffer( GL_ARRAY_BUFFER);
		glFlush();
	}

	oclCHECKERROR(ciErrNum, CL_SUCCESS);


	if (runOpenCLKernels)
	{
		BT_PROFILE("clFinish");
		clFinish(g_cqCommandQue);
	}

	
}