예제 #1
0
void initCL( void* glCtx, void* glDC )
{
	int ciErrNum = 0;

#if defined(CL_PLATFORM_MINI_CL)
	cl_device_type deviceType = CL_DEVICE_TYPE_CPU;
#elif defined(CL_PLATFORM_AMD)
	cl_device_type deviceType = CL_DEVICE_TYPE_GPU;
#elif defined(CL_PLATFORM_NVIDIA)
	cl_device_type deviceType = CL_DEVICE_TYPE_GPU;
#else
	cl_device_type deviceType = CL_DEVICE_TYPE_CPU;
#endif

    //g_cxMainContext = btOclCommon::createContextFromType(CL_DEVICE_TYPE_ALL, &ciErrNum);
	//g_cxMainContext = btOclCommon::createContextFromType(CL_DEVICE_TYPE_GPU, &ciErrNum);
	//g_cxMainContext = btOclCommon::createContextFromType(CL_DEVICE_TYPE_CPU, &ciErrNum);
	//try CL_DEVICE_TYPE_DEBUG for sequential, non-threaded execution, when using MiniCL on CPU, it gives a full callstack at the crash in the kernel
//#ifdef USE_MINICL
//	g_cxMainContext = btOclCommon::createContextFromType(CL_DEVICE_TYPE_DEBUG, &ciErrNum);
//#else
	g_cxMainContext = btOclCommon::createContextFromType(deviceType, &ciErrNum, (intptr_t)glCtx, (intptr_t)glDC);
//#endif
	
	oclCHECKERROR(ciErrNum, CL_SUCCESS);
	g_cdDevice = btOclGetMaxFlopsDev(g_cxMainContext);
	
	btOclPrintDevInfo(g_cdDevice);

	// create a command-queue
	g_cqCommandQue = clCreateCommandQueue(g_cxMainContext, g_cdDevice, 0, &ciErrNum);
	oclCHECKERROR(ciErrNum, CL_SUCCESS);
}
예제 #2
0
void InitCL()
{
	void* glCtx=0;
	void* glDC = 0;

#ifdef _WIN32
	glCtx = wglGetCurrentContext();
#else //!_WIN32
	GLXContext glCtx = glXGetCurrentContext();
#endif //!_WIN32
	glDC = wglGetCurrentDC();

	int ciErrNum = 0;
	cl_device_type deviceType = CL_DEVICE_TYPE_ALL;//GPU;
	g_cxMainContext = btOpenCLUtils::createContextFromType(deviceType, &ciErrNum, glCtx, glDC);
	oclCHECKERROR(ciErrNum, CL_SUCCESS);

	int numDev = btOpenCLUtils::getNumDevices(g_cxMainContext);

	if (numDev>0)
	{
		g_device= btOpenCLUtils::getDevice(g_cxMainContext,0);
		btOpenCLUtils::printDeviceInfo(g_device);
		// create a command-queue
		g_cqCommandQue = clCreateCommandQueue(g_cxMainContext, g_device, 0, &ciErrNum);
		oclCHECKERROR(ciErrNum, CL_SUCCESS);
		//normally you would create and execute kernels using this command queue

	}


}
void btParticlesDynamicsWorld::runComputeCellIdKernel()
{
    cl_int ciErrNum;
#if 0
	if(m_useCpuControls[SIMSTAGE_COMPUTE_CELL_ID]->m_active)
	{	// CPU version
		unsigned int memSize = sizeof(btVector3) * m_numParticles;
		ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dPos, CL_TRUE, 0, memSize, &(m_hPos[0]), 0, NULL, NULL);
		oclCHECKERROR(ciErrNum, CL_SUCCESS);
		for(int index = 0; index < m_numParticles; index++)
		{
			btVector3 pos = m_hPos[index];
			btInt4 gridPos = cpu_getGridPos(pos, &m_simParams);
			unsigned int hash = cpu_getPosHash(gridPos, &m_simParams);
			m_hPosHash[index].x = hash;
			m_hPosHash[index].y = index;
		}
		memSize = sizeof(btInt2) * m_numParticles;
		ciErrNum = clEnqueueWriteBuffer(m_cqCommandQue, m_dPosHash, CL_TRUE, 0, memSize, &(m_hPosHash[0]), 0, NULL, NULL);
		oclCHECKERROR(ciErrNum, CL_SUCCESS);
	}
	else
#endif
	{
		BT_PROFILE("ComputeCellId");
		runKernelWithWorkgroupSize(PARTICLES_KERNEL_COMPUTE_CELL_ID, m_numParticles);
		ciErrNum = clFinish(m_cqCommandQue);
		oclCHECKERROR(ciErrNum, CL_SUCCESS);
	}
/*
	// check
	int memSize = sizeof(btInt2) * m_hashSize;
    ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dPosHash, CL_TRUE, 0, memSize, &(m_hPosHash[0]), 0, NULL, NULL);
    oclCHECKERROR(ciErrNum, CL_SUCCESS);

	memSize = sizeof(float) * 4 * m_numParticles;
    ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dPos, CL_TRUE, 0, memSize, &(m_hPos[0]), 0, NULL, NULL);
    oclCHECKERROR(ciErrNum, CL_SUCCESS);
*/

	{
		BT_PROFILE("Copy VBO");
		// Explicit Copy (until OpenGL interop will work)
		// map the PBO to copy data from the CL buffer via host
		glBindBufferARB(GL_ARRAY_BUFFER, m_vbo);    
		// map the buffer object into client's memory
		void* ptr = glMapBufferARB(GL_ARRAY_BUFFER, GL_WRITE_ONLY_ARB);
		ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dPos, CL_TRUE, 0, sizeof(float) * 4 * m_numParticles, ptr, 0, NULL, NULL);
		oclCHECKERROR(ciErrNum, CL_SUCCESS);
		glUnmapBufferARB(GL_ARRAY_BUFFER); 
		glBindBufferARB(GL_ARRAY_BUFFER,0);
	}
}
예제 #4
0
void InitCL(int preferredDeviceIndex, int preferredPlatformIndex, bool useInterop)
{
	void* glCtx=0;
	void* glDC = 0;

#ifdef _WIN32
	glCtx = wglGetCurrentContext();
	glDC = wglGetCurrentDC();
#else //!_WIN32
#ifndef __APPLE__
    GLXContext glCtx = glXGetCurrentContext();
    glDC = wglGetCurrentDC();//??
#endif
#endif //!_WIN32

    
	int ciErrNum = 0;
//#ifdef CL_PLATFORM_INTEL
//	cl_device_type deviceType = CL_DEVICE_TYPE_ALL;
//#else
	cl_device_type deviceType = CL_DEVICE_TYPE_GPU;
//#endif

	

	if (useInterop)
	{
		g_cxMainContext = btOpenCLUtils::createContextFromType(deviceType, &ciErrNum, glCtx, glDC);
	} else
	{
		g_cxMainContext = btOpenCLUtils::createContextFromType(deviceType, &ciErrNum, 0,0,preferredDeviceIndex, preferredPlatformIndex);
	}


	oclCHECKERROR(ciErrNum, CL_SUCCESS);

	int numDev = btOpenCLUtils::getNumDevices(g_cxMainContext);

	if (numDev>0)
	{
		g_device= btOpenCLUtils::getDevice(g_cxMainContext,0);
		g_cqCommandQue = clCreateCommandQueue(g_cxMainContext, g_device, 0, &ciErrNum);
		oclCHECKERROR(ciErrNum, CL_SUCCESS);
        
        btOpenCLUtils::printDeviceInfo(g_device);
		
		btOpenCLUtils::getDeviceInfo(g_device,&info);
		g_deviceName = info.m_deviceName;

	}

}
예제 #5
0
void initCL( void* glCtx, void* glDC )
{
	int ciErrNum = 0;

#if defined(CL_PLATFORM_MINI_CL)
	cl_device_type deviceType = CL_DEVICE_TYPE_CPU;//or use CL_DEVICE_TYPE_DEBUG to debug MiniCL
#elif defined(CL_PLATFORM_INTEL)
	cl_device_type deviceType = CL_DEVICE_TYPE_CPU;
#elif defined(CL_PLATFORM_AMD)
	cl_device_type deviceType = CL_DEVICE_TYPE_GPU;
#elif defined(CL_PLATFORM_NVIDIA)
	cl_device_type deviceType = CL_DEVICE_TYPE_GPU;
#else
#ifdef __APPLE__
	cl_device_type deviceType = CL_DEVICE_TYPE_ALL;//GPU;
#else
	cl_device_type deviceType = CL_DEVICE_TYPE_CPU;//CL_DEVICE_TYPE_ALL
#endif//__APPLE__
#endif
	
	g_cxMainContext = btOclCommon::createContextFromType(deviceType, &ciErrNum, glCtx, glDC);
	
	switch (deviceType)
	{
		case CL_DEVICE_TYPE_GPU:
			printf("createContextFromType(CL_DEVICE_TYPE_GPU)\n");
			break;
		case CL_DEVICE_TYPE_CPU:
			printf("createContextFromType(CL_DEVICE_TYPE_CPU)\n");
			break;
		case CL_DEVICE_TYPE_ALL:
			printf("createContextFromType(CL_DEVICE_TYPE_ALL)\n");
			break;
			
		default:
			printf("createContextFromType(unknown device type %d\n",(int)deviceType);
	};	

	//#endif


	
	oclCHECKERROR(ciErrNum, CL_SUCCESS);
	g_cdDevice = btOclGetMaxFlopsDev(g_cxMainContext);
	
	btOclPrintDevInfo(g_cdDevice);

	// create a command-queue
	g_cqCommandQue = clCreateCommandQueue(g_cxMainContext, g_cdDevice, 0, &ciErrNum);
	oclCHECKERROR(ciErrNum, CL_SUCCESS);
}
예제 #6
0
void InitCL(int preferredDeviceIndex, int preferredPlatformIndex)
{
	void* glCtx=0;
	void* glDC = 0;

#ifdef _WIN32
	glCtx = wglGetCurrentContext();
#else //!_WIN32
	GLXContext glCtx = glXGetCurrentContext();
#endif //!_WIN32
	glDC = wglGetCurrentDC();

	int ciErrNum = 0;
#ifdef CL_PLATFORM_INTEL
	cl_device_type deviceType = CL_DEVICE_TYPE_ALL;
#else
	cl_device_type deviceType = CL_DEVICE_TYPE_GPU;
#endif

	

	if (USE_GL_CL_INTEROP)
	{
		g_cxMainContext = btOpenCLUtils::createContextFromType(deviceType, &ciErrNum, glCtx, glDC);
	} else
	{
		g_cxMainContext = btOpenCLUtils::createContextFromType(deviceType, &ciErrNum, 0,0,preferredDeviceIndex, preferredPlatformIndex);
	}


	oclCHECKERROR(ciErrNum, CL_SUCCESS);

	int numDev = btOpenCLUtils::getNumDevices(g_cxMainContext);

	if (numDev>0)
	{
		g_device= btOpenCLUtils::getDevice(g_cxMainContext,0);
		btOpenCLDeviceInfo clInfo;
		btOpenCLUtils::getDeviceInfo(g_device,clInfo);
		btOpenCLUtils::printDeviceInfo(g_device);
		// create a command-queue
		g_cqCommandQue = clCreateCommandQueue(g_cxMainContext, g_device, 0, &ciErrNum);
		oclCHECKERROR(ciErrNum, CL_SUCCESS);
		//normally you would create and execute kernels using this command queue

	}


}
예제 #7
0
void btGpuDemo3dOCLWrap::initKernel(int kernelId, char* pName)
{
	
	cl_int ciErrNum;
	cl_kernel kernel = clCreateKernel(m_cpProgram, pName, &ciErrNum);
	oclCHECKERROR(ciErrNum, CL_SUCCESS);
	size_t wgSize;
	ciErrNum = clGetKernelWorkGroupInfo(kernel, m_cdDevice, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wgSize, NULL);
	oclCHECKERROR(ciErrNum, CL_SUCCESS);
	m_kernels[kernelId].m_Id = kernelId;
	m_kernels[kernelId].m_kernel = kernel;
	m_kernels[kernelId].m_name = pName;
	m_kernels[kernelId].m_workgroupSize = (int)wgSize;
	return;
}
예제 #8
0
void CL2GL(CLPhysicsDemo& demo)
{
	int VBOsize = demo.m_maxShapeBufferCapacityInBytes+demo.m_numPhysicsInstances*(4+4+4+3)*sizeof(float);

	int ciErrNum;
	if(useInterop)
	{
#ifndef __APPLE__
		BT_PROFILE("clEnqueueReleaseGLObjects");
		ciErrNum = clEnqueueReleaseGLObjects(g_cqCommandQue, 1, &clBuffer, 0, 0, 0);
		clFinish(g_cqCommandQue);
#endif
	}
	else
	{
		BT_PROFILE("clEnqueueReadBuffer clReleaseMemObject and glUnmapBuffer");
		ciErrNum = clEnqueueReadBuffer (	g_cqCommandQue,
 		clBuffer,
 		blocking,
 		0,
 		VBOsize,
 		hostPtr,0,0,0);

		//clReleaseMemObject(clBuffer);
		clFinish(g_cqCommandQue);
		glUnmapBuffer( GL_ARRAY_BUFFER);
		glFlush();
	}

	oclCHECKERROR(ciErrNum, CL_SUCCESS);

}
void btParticlesDynamicsWorld::runSortHashKernel()
{
	cl_int ciErrNum;
	int memSize = m_numParticles * sizeof(btInt2);
	if(m_useCpuControls[SIMSTAGE_SORT_CELL_ID]->m_active)
	{
		// CPU version
		// get hash from GPU
		ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, m_dPosHash, CL_TRUE, 0, memSize, &(m_hPosHash[0]), 0, NULL, NULL);
		oclCHECKERROR(ciErrNum, CL_SUCCESS);
		// sort
		class btHashPosKey
		{
		public:
			unsigned int hash;
			unsigned int index;
			void quickSort(btHashPosKey* pData, int lo, int hi)
			{
				int i=lo, j=hi;
				btHashPosKey x = pData[(lo+hi)/2];
				do
				{    
					while(pData[i].hash < x.hash) i++; 
					while(x.hash < pData[j].hash) j--;
					if(i <= j)
					{
						btHashPosKey t = pData[i];
						pData[i] = pData[j];
						pData[j] = t;
						i++; j--;
					}
				} while(i <= j);
				if(lo < j) pData->quickSort(pData, lo, j);
				if(i < hi) pData->quickSort(pData, i, hi);
			}
예제 #10
0
void btGpuDemo3dOCLWrap::copyArrayFromDevice(void* host, const cl_mem device, unsigned int size, int hostOffs, int devOffs)
{
    cl_int ciErrNum;
	char* pHost = (char*)host + hostOffs;
    ciErrNum = clEnqueueReadBuffer(m_cqCommandQue, device, CL_TRUE, devOffs, size, pHost, 0, NULL, NULL);
    oclCHECKERROR(ciErrNum, CL_SUCCESS);
}
예제 #11
0
void InitCL(int preferredDeviceIndex, int preferredPlatformIndex)
{
	bool useInterop = false;

	void* glCtx=0;
	void* glDC = 0;

	int ciErrNum = 0;
//#ifdef CL_PLATFORM_INTEL
//	cl_device_type deviceType = CL_DEVICE_TYPE_ALL;
//#else
	cl_device_type deviceType = CL_DEVICE_TYPE_GPU;
//#endif

	

	if (useInterop)
	{
	//	g_cxMainContext = btOpenCLUtils::createContextFromType(deviceType, &ciErrNum, glCtx, glDC);
	} else
	{
		cl_platform_id platform;
		g_cxMainContext = btOpenCLUtils::createContextFromType(deviceType, &ciErrNum, 0,0,preferredDeviceIndex, preferredPlatformIndex, &platform);
		if (g_cxMainContext && platform)
		{
			btOpenCLUtils::printPlatformInfo(platform);
		}
	}


	oclCHECKERROR(ciErrNum, CL_SUCCESS);

	int numDev = btOpenCLUtils::getNumDevices(g_cxMainContext);

	if (numDev>0)
	{
		g_device= btOpenCLUtils::getDevice(g_cxMainContext,0);
		g_cqCommandQueue = clCreateCommandQueue(g_cxMainContext, g_device, 0, &ciErrNum);
		oclCHECKERROR(ciErrNum, CL_SUCCESS);
        
		
        btOpenCLUtils::printDeviceInfo(g_device);

	}

}
예제 #12
0
void BasicGpuDemo::initCL(int preferredDeviceIndex, int preferredPlatformIndex)
{
	void* glCtx=0;
	void* glDC = 0;
	
	
    
	int ciErrNum = 0;
	//#ifdef CL_PLATFORM_INTEL
	//cl_device_type deviceType = CL_DEVICE_TYPE_ALL;
	//#else
	cl_device_type deviceType = CL_DEVICE_TYPE_GPU;
	//#endif
	
	cl_platform_id platformId;
	
	//	if (useInterop)
	//	{
	//		m_data->m_clContext = b3OpenCLUtils::createContextFromType(deviceType, &ciErrNum, glCtx, glDC);
	//	} else
	{
		m_clData->m_clContext = b3OpenCLUtils::createContextFromType(deviceType, &ciErrNum, 0,0,preferredDeviceIndex, preferredPlatformIndex,&platformId);
		b3OpenCLUtils::printPlatformInfo(platformId);
	}
	
	
	oclCHECKERROR(ciErrNum, CL_SUCCESS);
	
	int numDev = b3OpenCLUtils::getNumDevices(m_clData->m_clContext);
	
	if (numDev>0)
	{
		m_clData->m_clDevice= b3OpenCLUtils::getDevice(m_clData->m_clContext,0);
		m_clData->m_clQueue = clCreateCommandQueue(m_clData->m_clContext, m_clData->m_clDevice, 0, &ciErrNum);
		oclCHECKERROR(ciErrNum, CL_SUCCESS);
        
        b3OpenCLUtils::printDeviceInfo(m_clData->m_clDevice);
		b3OpenCLDeviceInfo info;
		b3OpenCLUtils::getDeviceInfo(m_clData->m_clDevice,&info);
		m_clData->m_clDeviceName = info.m_deviceName;
		m_clData->m_clInitialized = true;
		
	}
	
}
예제 #13
0
void GL2CL(CLPhysicsDemo& demo, GLInstancingRenderer& render)
{
	BT_PROFILE("simulationLoop");
	int VBOsize = demo.m_maxShapeBufferCapacityInBytes+demo.m_numPhysicsInstances*(4+4+4+3)*sizeof(float);
	
	cl_int ciErrNum = CL_SUCCESS;


	if(useInterop)
	{
#ifndef __APPLE__
		clBuffer = g_interopBuffer->getCLBUffer();
		BT_PROFILE("clEnqueueAcquireGLObjects");
		{
			BT_PROFILE("clEnqueueAcquireGLObjects");
			ciErrNum = clEnqueueAcquireGLObjects(g_cqCommandQue, 1, &clBuffer, 0, 0, NULL);
			clFinish(g_cqCommandQue);
		}

#else
        assert(0);

#endif
	} else
	{

		glBindBuffer(GL_ARRAY_BUFFER, render.getInternalData()->m_vbo);
		glFlush();

		BT_PROFILE("glMapBuffer and clEnqueueWriteBuffer");

		blocking=  CL_TRUE;
		hostPtr=  (char*)glMapBuffer( GL_ARRAY_BUFFER,GL_READ_WRITE);//GL_WRITE_ONLY
		

		if (!clBuffer)
		{
			int maxVBOsize = demo.m_maxShapeBufferCapacityInBytes+MAX_CONVEX_BODIES_CL*(4+4+4+3)*sizeof(float);
			clBuffer = clCreateBuffer(g_cxMainContext, CL_MEM_READ_WRITE,maxVBOsize, 0, &ciErrNum);
			clFinish(g_cqCommandQue);
			oclCHECKERROR(ciErrNum, CL_SUCCESS);

		ciErrNum = clEnqueueWriteBuffer (	g_cqCommandQue,
 			clBuffer,
 			blocking,
 			0,
 			VBOsize,
 			hostPtr,0,0,0
		);
		clFinish(g_cqCommandQue);
		} 
		
	}

	gFpIO.m_clObjectsBuffer = clBuffer;
	gFpIO.m_positionOffset = demo.m_maxShapeBufferCapacityInBytes/4;
}
void btParticlesDynamicsWorld::grabSimulationData()
{
//	const btVector3& gravity = getGravity();
	//btVector3 gravity(0., -0.06, 0.);
	//btVector3 gravity(0., -0.0003f, 0.);
	btVector3 gravity(0,-0.0003,0);

	

	m_simParams.m_gravity[0] = gravity[0];
	m_simParams.m_gravity[1] = gravity[1];
	m_simParams.m_gravity[2] = gravity[2];
	m_simParams.m_particleRad = m_particleRad;
	m_simParams.m_globalDamping = 1.0f;
	m_simParams.m_boundaryDamping = -0.5f;

//	m_simParams.m_collisionDamping = 0.02f;
//	m_simParams.m_spring = 0.5f;
//	m_simParams.m_shear = 0.1f;
//	m_simParams.m_attraction = 0.0f;
	m_simParams.m_collisionDamping = 0.025f;//0.02f;
	m_simParams.m_spring = 0.5f;
	m_simParams.m_shear = 0.1f;
	m_simParams.m_attraction = 0.001f;



	// copy data to GPU
    cl_int ciErrNum;
	unsigned int memSize = sizeof(btVector3) * m_numParticles;
    ciErrNum = clEnqueueWriteBuffer(m_cqCommandQue, m_dPos, CL_TRUE, 0, memSize, &(m_hPos[0]), 0, NULL, NULL);
    oclCHECKERROR(ciErrNum, CL_SUCCESS);
    ciErrNum = clEnqueueWriteBuffer(m_cqCommandQue, m_dVel, CL_TRUE, 0, memSize, &(m_hVel[0]), 0, NULL, NULL);
    oclCHECKERROR(ciErrNum, CL_SUCCESS);
	memSize = sizeof(btSimParams);
    ciErrNum = clEnqueueWriteBuffer(m_cqCommandQue, m_dSimParams, CL_TRUE, 0, memSize, &m_simParams, 0, NULL, NULL);
    oclCHECKERROR(ciErrNum, CL_SUCCESS);
	memSize = m_hashSize * sizeof(btInt2);
	ciErrNum = clEnqueueWriteBuffer(m_cqCommandQue, m_dPosHash, CL_TRUE, 0, memSize, &(m_hPosHash[0]), 0, NULL, NULL);
	oclCHECKERROR(ciErrNum, CL_SUCCESS);
}
void btParticlesDynamicsWorld::allocateBuffers()
{
    cl_int ciErrNum;
	// positions of spheres
	m_hPos.resize(m_numParticles);
	m_hVel.resize(m_numParticles);
	m_hSortedPos.resize(m_numParticles);
	m_hSortedVel.resize(m_numParticles);
	m_hPosHash.resize(m_hashSize); 
	for(int i = 0; i < m_hashSize; i++) { m_hPosHash[i].x = 0x7FFFFFFF; m_hPosHash[i].y = 0; }
    unsigned int memSize = sizeof(btVector3) *  m_numParticles;
    m_dPos = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
    oclCHECKERROR(ciErrNum, CL_SUCCESS);
    m_dVel = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
    oclCHECKERROR(ciErrNum, CL_SUCCESS);
    m_dSortedPos = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
    oclCHECKERROR(ciErrNum, CL_SUCCESS);
    m_dSortedVel = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
    oclCHECKERROR(ciErrNum, CL_SUCCESS);
	memSize = m_hashSize * sizeof(btInt2);
	m_dPosHash = clCreateBuffer(m_cxMainContext, CL_MEM_READ_ONLY, memSize, NULL, &ciErrNum);
    oclCHECKERROR(ciErrNum, CL_SUCCESS);

	// global simulation parameters
	memSize = sizeof(btSimParams);
	m_dSimParams = clCreateBuffer(m_cxMainContext, CL_MEM_READ_ONLY, memSize, NULL, &ciErrNum);
    oclCHECKERROR(ciErrNum, CL_SUCCESS);
}
btParallelConstraintSolver::btParallelConstraintSolver()
{
	//initialize MiniCL here
    cl_int ciErrNum;

	s_cxMainContext = clCreateContextFromType(0, CL_DEVICE_TYPE_ALL, NULL, NULL, &ciErrNum);
    oclCHECKERROR(ciErrNum, CL_SUCCESS);

    size_t dataSzBytes;
    cl_device_id* clDevices;
    clGetContextInfo(s_cxMainContext, CL_CONTEXT_DEVICES, 0, NULL, &dataSzBytes);
    clDevices = (cl_device_id*) malloc(dataSzBytes);

    clGetContextInfo(s_cxMainContext, CL_CONTEXT_DEVICES, dataSzBytes, clDevices, NULL);

    s_cdDevice = clDevices[0];

	free(clDevices);

	// create a command-queue
	s_cqCommandQue = clCreateCommandQueue(s_cxMainContext, s_cdDevice, 0, &ciErrNum);
	oclCHECKERROR(ciErrNum, CL_SUCCESS);

	size_t program_length = 0;
	s_cpProgram = clCreateProgramWithSource(s_cxMainContext, 1, NULL, &program_length, &ciErrNum);
	oclCHECKERROR(ciErrNum, CL_SUCCESS);

    // Create kernels
    s_setupContactKernel = clCreateKernel(s_cpProgram, "kSetupContact", &ciErrNum);
	oclCHECKERROR(ciErrNum, CL_SUCCESS);

	size_t wgSize;
	ciErrNum = clGetKernelWorkGroupInfo(s_setupContactKernel, s_cdDevice, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wgSize, NULL);
	m_localGroupSize = wgSize; 

    s_solveContactKernel = clCreateKernel(s_cpProgram, "kSolveContact", &ciErrNum);
	oclCHECKERROR(ciErrNum, CL_SUCCESS);

}
예제 #17
0
파일: main.cpp 프로젝트: 20-sim/bullet3
void initCL(int preferredDeviceIndex, int preferredPlatformIndex)
{
	//void* glCtx=0;
	//void* glDC = 0;
	int ciErrNum = 0;
	//bound search and radix sort only work on GPU right now (assume 32 or 64 width workgroup without barriers)

	cl_device_type deviceType = CL_DEVICE_TYPE_ALL;

	g_context = b3OpenCLUtils::createContextFromType(deviceType, &ciErrNum, 0,0,preferredDeviceIndex, preferredPlatformIndex);
	oclCHECKERROR(ciErrNum, CL_SUCCESS);
	int numDev = b3OpenCLUtils::getNumDevices(g_context);
	if (numDev>0)
	{
		b3OpenCLDeviceInfo info;
		g_device= b3OpenCLUtils::getDevice(g_context,0);
		g_queue = clCreateCommandQueue(g_context, g_device, 0, &ciErrNum);
		oclCHECKERROR(ciErrNum, CL_SUCCESS);
        b3OpenCLUtils::printDeviceInfo(g_device);
		b3OpenCLUtils::getDeviceInfo(g_device,&info);
		g_deviceName = info.m_deviceName;
	}
}
예제 #18
0
void btGpuDemo3dOCLWrap::runKernelWithWorkgroupSize(int kernelId, int globalSize)
{
	if(globalSize <= 0)
	{
		return;
	}
	cl_kernel kernelFunc = m_kernels[kernelId].m_kernel;
	cl_int ciErrNum = clSetKernelArg(kernelFunc, 0, sizeof(int), (void*)&globalSize);
	oclCHECKERROR(ciErrNum, CL_SUCCESS);
	int workgroupSize = m_kernels[kernelId].m_workgroupSize;
	if(workgroupSize <= 0)
	{ // let OpenCL library calculate workgroup size
		size_t globalWorkSize[2];
		globalWorkSize[0] = globalSize;
		globalWorkSize[1] = 1;
		ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, kernelFunc, 1, NULL, globalWorkSize, NULL, 0,0,0 );
	}
	else
	{
		size_t localWorkSize[2], globalWorkSize[2];
		workgroupSize = btMin(workgroupSize, globalSize);
		int num_t = globalSize / workgroupSize;
		int num_g = num_t * workgroupSize;
		if(num_g < globalSize)
		{
			num_t++;
		}
		localWorkSize[0]  = workgroupSize;
		globalWorkSize[0] = num_t * workgroupSize;
		localWorkSize[1] = 1;
		globalWorkSize[1] = 1;
		ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, kernelFunc, 1, NULL, globalWorkSize, localWorkSize, 0,0,0 );
	}
	oclCHECKERROR(ciErrNum, CL_SUCCESS);
	ciErrNum = clFlush(m_cqCommandQue);
	oclCHECKERROR(ciErrNum, CL_SUCCESS);
}
btScalar btParallelConstraintSolver::solveGroupCacheFriendlyIterations(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer,btStackAlloc* stackAlloc)
{
	BT_PROFILE("runIterations");
#if USE_SEQUENTIAL_SOLVER
	return btSequentialImpulseConstraintSolver::solveGroupCacheFriendlyIterations(bodies, numBodies, manifoldPtr, numManifolds, constraints, numConstraints, infoGlobal, debugDrawer, stackAlloc);
#else
	int iteration;
	#if RUN_KERNELS_DIRECTLY
		for ( iteration = 0;iteration<infoGlobal.m_numIterations;iteration++)
		{
			for(int i = 0; i < m_localGroupSize; i++)
			{
				kSolveContact(this, m_taskParams, (btContactSolverInfo*)&infoGlobal, i);
			}
		}
	#else
		cl_int ciErrNum;
		btParallelConstraintSolver* pSolver = this;
		btParallelConstraintSolverSetupTaskParams* pTaskParams = m_taskParams;
		btContactSolverInfo* pInfoGlobal = (btContactSolverInfo*)&infoGlobal;
		ciErrNum  = clSetKernelArg(s_solveContactKernel, 0, sizeof(cl_mem), (void*)&pSolver);
		ciErrNum |= clSetKernelArg(s_solveContactKernel, 1, sizeof(cl_mem), (void*)&pTaskParams);
		ciErrNum |= clSetKernelArg(s_solveContactKernel, 2, sizeof(cl_mem), (void*)&pInfoGlobal);
		oclCHECKERROR(ciErrNum, CL_SUCCESS);
		size_t szWorkSize[1];
		szWorkSize[0] = m_localGroupSize;
		for ( iteration = 0;iteration<infoGlobal.m_numIterations;iteration++)
		{
			ciErrNum = clEnqueueNDRangeKernel(s_cqCommandQue, s_solveContactKernel, 1, NULL, szWorkSize, szWorkSize, 0, NULL, NULL);
			oclCHECKERROR(ciErrNum, CL_SUCCESS);
			clFlush(s_cqCommandQue);
		}
	#endif
	return 0.f;
#endif
}
예제 #20
0
btGridBroadphaseCl::btGridBroadphaseCl(	btOverlappingPairCache* overlappingPairCache,
							const btVector3& cellSize, 
							int gridSizeX, int gridSizeY, int gridSizeZ, 
							int maxSmallProxies, int maxLargeProxies, int maxPairsPerSmallProxy,
							btScalar maxSmallProxySize,
							int maxSmallProxiesPerCell,
							cl_context context,
							cl_device_id device,
							cl_command_queue queue,
							adl::DeviceCL* deviceCL)
:bt3dGridBroadphaseOCL(overlappingPairCache,cellSize,
				gridSizeX, gridSizeY, gridSizeZ, 
						maxSmallProxies, maxLargeProxies, maxPairsPerSmallProxy,
						maxSmallProxySize,maxSmallProxiesPerCell,
						context,device,queue,deviceCL)			
{
	m_computeAabbKernel = m_deviceCL->getKernel(COMPUTE_AABB_KERNEL_PATH,"computeAabb","",spComputeAabbSource);

	m_countOverlappingPairs = m_deviceCL->getKernel(COMPUTE_AABB_KERNEL_PATH,"countOverlappingpairs","",spComputeAabbSource);

	m_squeezePairCaches = m_deviceCL->getKernel(COMPUTE_AABB_KERNEL_PATH,"squeezePairCaches","",spComputeAabbSource);

	m_aabbConstBuffer = new adl::Buffer<MyAabbConstData >(m_deviceCL,1,adl::BufferBase::BUFFER_CONST);

	size_t memSize = m_maxHandles * m_maxPairsPerBody * sizeof(unsigned int)*2;
	cl_int ciErrNum=0;
	m_dAllOverlappingPairs = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);

	memset(m_hAllOverlappingPairs, 0x00, sizeof(MyUint2)*m_maxHandles * m_maxPairsPerBody);
	copyArrayToDevice(m_dAllOverlappingPairs, m_hAllOverlappingPairs, m_maxHandles * m_maxPairsPerBody * sizeof(MyUint2));

	
	
	oclCHECKERROR(ciErrNum, CL_SUCCESS);

	

}
예제 #21
0
void btGridBroadphaseCl::calculateOverlappingPairs(float* positions, int numObjects)
{
	btDispatcher* dispatcher=0;

	// update constants
	{
		BT_PROFILE("setParameters");
		setParameters(&m_params);
	}

	// prepare AABB array
	{
		BT_PROFILE("prepareAABB");
		prepareAABB(positions, numObjects);
	}
	// calculate hash
	{
		BT_PROFILE("calcHashAABB");
		calcHashAABB();
	}

	{
		BT_PROFILE("sortHash");
		// sort bodies based on hash
		sortHash();
	}

	// find start of each cell
	{
		BT_PROFILE("findCellStart");
		findCellStart();
	}
	
	{
		BT_PROFILE("findOverlappingPairs");
		// findOverlappingPairs (small/small)
		findOverlappingPairs();
	}

	// add pairs to CPU cache
	{
		BT_PROFILE("computePairCacheChanges");
#if 0
		computePairCacheChanges();
#else
		int ciErrNum=0;

		ciErrNum=clSetKernelArg((cl_kernel)m_countOverlappingPairs->m_kernel, 0, sizeof(int), (void*)&numObjects);
		ciErrNum=clSetKernelArg((cl_kernel)m_countOverlappingPairs->m_kernel, 1, sizeof(cl_mem),(void*)&m_dPairBuff);
		ciErrNum=clSetKernelArg((cl_kernel)m_countOverlappingPairs->m_kernel, 2, sizeof(cl_mem),(void*)&m_dPairBuffStartCurr);
		ciErrNum=clSetKernelArg((cl_kernel)m_countOverlappingPairs->m_kernel, 3, sizeof(cl_mem),(void*)&m_dPairScanChanged);
		ciErrNum=clSetKernelArg((cl_kernel)m_countOverlappingPairs->m_kernel, 4, sizeof(cl_mem),(void*)&m_dAABB);


		size_t localWorkSize=64;
		size_t numWorkItems = localWorkSize*((numObjects+ (localWorkSize)) / localWorkSize);

	
		ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, (cl_kernel)m_countOverlappingPairs->m_kernel, 1, NULL, &numWorkItems, &localWorkSize, 0,0,0 );
oclCHECKERROR(ciErrNum, CL_SUCCESS);
		ciErrNum = clFlush(m_cqCommandQue);
#endif


	}
	{
		BT_PROFILE("scanOverlappingPairBuff");
		scanOverlappingPairBuff(false);
	}
	{
		BT_PROFILE("squeezeOverlappingPairBuff");
//#define FORCE_CPU
#ifdef FORCE_CPU
		bt3dGridBroadphaseOCL::squeezeOverlappingPairBuff();
		copyArrayToDevice(m_dPairsChangedXY, m_hPairsChangedXY, sizeof( MyUint2) * m_numPrefixSum); //gSum
#else
		//squeezeOverlappingPairBuff();
		int ciErrNum = 0;
		ciErrNum=clSetKernelArg((cl_kernel)m_squeezePairCaches->m_kernel, 0, sizeof(int), (void*)&numObjects);
		ciErrNum=clSetKernelArg((cl_kernel)m_squeezePairCaches->m_kernel, 1, sizeof(cl_mem),(void*)&m_dPairBuff);
		ciErrNum=clSetKernelArg((cl_kernel)m_squeezePairCaches->m_kernel, 2, sizeof(cl_mem),(void*)&m_dPairBuffStartCurr);
		ciErrNum=clSetKernelArg((cl_kernel)m_squeezePairCaches->m_kernel, 3, sizeof(cl_mem),(void*)&m_dPairScanChanged);
		ciErrNum=clSetKernelArg((cl_kernel)m_squeezePairCaches->m_kernel, 4, sizeof(cl_mem),(void*)&m_dAllOverlappingPairs);
		ciErrNum=clSetKernelArg((cl_kernel)m_squeezePairCaches->m_kernel, 5, sizeof(cl_mem),(void*)&m_dAABB);

		size_t workGroupSize = 64;
		size_t numWorkItems = workGroupSize*((numObjects+ (workGroupSize)) / workGroupSize);

	
		ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, (cl_kernel)m_squeezePairCaches->m_kernel, 1, NULL, &numWorkItems, &workGroupSize, 0,0,0 );
		oclCHECKERROR(ciErrNum, CL_SUCCESS);
		

//		copyArrayFromDevice(m_hAllOverlappingPairs, m_dAllOverlappingPairs, sizeof(unsigned int) * m_numPrefixSum*2); //gSum
//		clFinish(m_cqCommandQue);
#endif

	}


	return;
}
예제 #22
0
void	CLPhysicsDemo::stepSimulation()
{
	int sz = sizeof(ConvexPolyhedronCL2);
	int sz1 = sizeof(ConvexPolyhedronCL);
	btAssert(sz==sz1);

	int b1 = sizeof(Body2);
	int b2 = sizeof(RigidBodyBase::Body);
	btAssert(b1==b2);

	BT_PROFILE("simulationLoop");
	
	
	cl_int ciErrNum = CL_SUCCESS;


	if(m_data->m_useInterop)
	{
#ifndef __APPLE__
		clBuffer = g_interopBuffer->getCLBUffer();
		BT_PROFILE("clEnqueueAcquireGLObjects");
		{
			BT_PROFILE("clEnqueueAcquireGLObjects");
			ciErrNum = clEnqueueAcquireGLObjects(g_cqCommandQue, 1, &clBuffer, 0, 0, NULL);
			clFinish(g_cqCommandQue);
		}

#else
        assert(0);

#endif
	} else
	{

		glBindBuffer(GL_ARRAY_BUFFER, cube_vbo);
		glFlush();

		BT_PROFILE("glMapBuffer and clEnqueueWriteBuffer");

		blocking=  CL_TRUE;
		hostPtr=  (char*)glMapBuffer( GL_ARRAY_BUFFER,GL_READ_WRITE);//GL_WRITE_ONLY
		if (!clBuffer)
		{
			clBuffer = clCreateBuffer(g_cxMainContext, CL_MEM_READ_WRITE, VBOsize, 0, &ciErrNum);
		} 
		clFinish(g_cqCommandQue);
			oclCHECKERROR(ciErrNum, CL_SUCCESS);

		ciErrNum = clEnqueueWriteBuffer (	g_cqCommandQue,
 			clBuffer,
 			blocking,
 			0,
 			VBOsize,
 			hostPtr,0,0,0
		);
		clFinish(g_cqCommandQue);
	}



	oclCHECKERROR(ciErrNum, CL_SUCCESS);
	if (1 && m_numPhysicsInstances)
	{

		gFpIO.m_numObjects = m_numPhysicsInstances;
		gFpIO.m_positionOffset = SHAPE_VERTEX_BUFFER_SIZE/4;
		gFpIO.m_clObjectsBuffer = clBuffer;
		if (useSapGpuBroadphase)
		{
			gFpIO.m_dAABB = m_data->m_BroadphaseSap->getAabbBuffer();
		} else
		{
			gFpIO.m_dAABB = m_data->m_BroadphaseGrid->getAabbBuffer();
		}
		gFpIO.m_dlocalShapeAABB = (cl_mem)m_data->m_localShapeAABBGPU->getBufferCL();
		gFpIO.m_numOverlap = 0;


		{
			BT_PROFILE("setupGpuAabbs");
			setupGpuAabbsFull(gFpIO,narrowphaseAndSolver->getBodiesGpu(), narrowphaseAndSolver->getCollidablesGpu() );
        //    setupGpuAabbsSimple(gFpIO);
		}

		{

		}

		if (1)
		{
			BT_PROFILE("calculateOverlappingPairs");
			
			if (useSapGpuBroadphase)
			{
				m_data->m_BroadphaseSap->calculateOverlappingPairs();
				gFpIO.m_dAllOverlappingPairs = m_data->m_BroadphaseSap->getOverlappingPairBuffer();
				gFpIO.m_numOverlap = m_data->m_BroadphaseSap->getNumOverlap();
			}
			else
			{
				m_data->m_BroadphaseGrid->calculateOverlappingPairs();
				gFpIO.m_dAllOverlappingPairs = m_data->m_BroadphaseGrid->getOverlappingPairBuffer();
				gFpIO.m_numOverlap = m_data->m_BroadphaseGrid->getNumOverlap();
			}
		}
		
		//printf("gFpIO.m_numOverlap = %d\n",gFpIO.m_numOverlap );
		if (gFpIO.m_numOverlap>=0 && gFpIO.m_numOverlap<MAX_BROADPHASE_COLLISION_CL)
		{
			colorPairsOpenCL(gFpIO);

			if (runOpenCLKernels)
			{
				{
					//BT_PROFILE("setupBodies");
					if (narrowphaseAndSolver)
						setupBodies(gFpIO, m_data->m_linVelBuf->getBufferCL(), m_data->m_angVelBuf->getBufferCL(), narrowphaseAndSolver->getBodiesGpu(), narrowphaseAndSolver->getBodyInertiasGpu());
				}
				
				{
					BT_PROFILE("computeContactsAndSolver");
					if (narrowphaseAndSolver)
						narrowphaseAndSolver->computeContactsAndSolver(gFpIO.m_dAllOverlappingPairs,gFpIO.m_numOverlap, gFpIO.m_dAABB,gFpIO.m_numObjects);
				}

				
				{
					BT_PROFILE("copyBodyVelocities");
					if (narrowphaseAndSolver)
						copyBodyVelocities(gFpIO, m_data->m_linVelBuf->getBufferCL(), m_data->m_angVelBuf->getBufferCL(), narrowphaseAndSolver->getBodiesGpu(), narrowphaseAndSolver->getBodyInertiasGpu());
				}
			}

		} else
		{
			printf("error, gFpIO.m_numOverlap = %d\n",gFpIO.m_numOverlap);
			btAssert(0);
		}


		{
			BT_PROFILE("integrateTransforms");

			if (runOpenCLKernels)
			{
				bool integrateOnGpu = true;
				if (integrateOnGpu)
				{
					int numObjects = m_numPhysicsInstances;
					int offset = SHAPE_VERTEX_BUFFER_SIZE/4;

					ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 0, sizeof(int), &offset);
					ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 1, sizeof(int), &numObjects);
					ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 2, sizeof(cl_mem), (void*)&clBuffer );
	
					cl_mem lv = m_data->m_linVelBuf->getBufferCL();
					cl_mem av = m_data->m_angVelBuf->getBufferCL();
					cl_mem btimes = m_data->m_bodyTimes->getBufferCL();

					ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 3, sizeof(cl_mem), (void*)&lv);
					ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 4, sizeof(cl_mem), (void*)&av);
					ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 5, sizeof(cl_mem), (void*)&btimes);
					
					
					

					size_t workGroupSize = 64;
					size_t	numWorkItems = workGroupSize*((m_numPhysicsInstances + (workGroupSize)) / workGroupSize);
				
					if (workGroupSize>numWorkItems)
						workGroupSize=numWorkItems;

					ciErrNum = clEnqueueNDRangeKernel(g_cqCommandQue, g_integrateTransformsKernel, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0);
					oclCHECKERROR(ciErrNum, CL_SUCCESS);
				} else
				{
					//debug velocity
					btAlignedObjectArray<btVector3> linvel;
					m_data->m_linVelBuf->copyToHost(linvel);
					for (int i=0;i<linvel.size();i++)
					{
						btAssert(_finite(linvel[i].x()));
					}
					btAssert(0);

				}
			} 
		}
			

	}

	if(m_data->m_useInterop)
	{
#ifndef __APPLE__
		BT_PROFILE("clEnqueueReleaseGLObjects");
		ciErrNum = clEnqueueReleaseGLObjects(g_cqCommandQue, 1, &clBuffer, 0, 0, 0);
		clFinish(g_cqCommandQue);
#endif
	}
	else
	{
		BT_PROFILE("clEnqueueReadBuffer clReleaseMemObject and glUnmapBuffer");
		ciErrNum = clEnqueueReadBuffer (	g_cqCommandQue,
 		clBuffer,
 		blocking,
 		0,
 		VBOsize,
 		hostPtr,0,0,0);

		//clReleaseMemObject(clBuffer);
		clFinish(g_cqCommandQue);
		glUnmapBuffer( GL_ARRAY_BUFFER);
		glFlush();
	}

	oclCHECKERROR(ciErrNum, CL_SUCCESS);


	if (runOpenCLKernels)
	{
		BT_PROFILE("clFinish");
		clFinish(g_cqCommandQue);
	}

	
}
예제 #23
0
void ParticleDemo::clientMoveAndDisplay()
{
	int numParticles = NUM_PARTICLES_X*NUM_PARTICLES_Y*NUM_PARTICLES_Z;
	GLuint vbo = m_instancingRenderer->getInternalData()->m_vbo;
	glBindBuffer(GL_ARRAY_BUFFER, vbo);
	glFlush();

	int posArraySize = numParticles*sizeof(float)*4;

	cl_bool blocking=  CL_TRUE;
	char* hostPtr=  (char*)glMapBufferRange( GL_ARRAY_BUFFER,m_instancingRenderer->getMaxShapeCapacity(),posArraySize, GL_MAP_WRITE_BIT|GL_MAP_READ_BIT );//GL_READ_WRITE);//GL_WRITE_ONLY
		GLint err = glGetError();
    assert(err==GL_NO_ERROR);
	glFinish();

	

#if 1



	//do some stuff using the OpenCL buffer

	bool useCpu = false;
	if (useCpu)
	{
		

		float* posBuffer = (float*)hostPtr;
		
		for (int i=0;i<numParticles;i++)
		{
			posBuffer[i*4+1] += 0.1;
		}
	}
	else
	{
		cl_int ciErrNum;
		if (!m_data->m_clPositionBuffer)
		{
			m_data->m_clPositionBuffer = clCreateBuffer(m_clData->m_clContext, CL_MEM_READ_WRITE,
				posArraySize, 0, &ciErrNum);

			clFinish(m_clData->m_clQueue);
			oclCHECKERROR(ciErrNum, CL_SUCCESS);
			ciErrNum = clEnqueueWriteBuffer (	m_clData->m_clQueue,m_data->m_clPositionBuffer,
 				blocking,0,posArraySize,hostPtr,0,0,0
			);
			clFinish(m_clData->m_clQueue);
		}
	

		



		if (0)
		{
			b3BufferInfoCL bInfo[] = { 
				b3BufferInfoCL( m_data->m_velocitiesGPU->getBufferCL(), true ),
				b3BufferInfoCL( m_data->m_clPositionBuffer)
			};
			
			b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_updatePositionsKernel );

			launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
			launcher.setConst( numParticles);

			launcher.launch1D( numParticles);
			clFinish(m_clData->m_clQueue);
	
		}


		if (1)
		{
			b3BufferInfoCL bInfo[] = { 
				b3BufferInfoCL( m_data->m_clPositionBuffer),
				b3BufferInfoCL( m_data->m_velocitiesGPU->getBufferCL() ),
				b3BufferInfoCL( m_data->m_simParamGPU->getBufferCL(),true)
			};
			
			b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_updatePositionsKernel2 );

			launcher.setConst( numParticles);
			launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
			float timeStep = 1.f/60.f;
			launcher.setConst( timeStep);

			launcher.launch1D( numParticles);
			clFinish(m_clData->m_clQueue);
	
		}

		{
			b3BufferInfoCL bInfo[] = { 
				b3BufferInfoCL( m_data->m_clPositionBuffer),
				b3BufferInfoCL( m_data->m_broadphaseGPU->getAabbBufferWS()),
			};
			
			b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_updateAabbsKernel );
			launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
			launcher.setConst( m_data->m_simParamCPU[0].m_particleRad);
			launcher.setConst( numParticles);
			
			launcher.launch1D( numParticles);
			clFinish(m_clData->m_clQueue);
		}

		//broadphase
		int numPairsGPU=0;
		cl_mem pairsGPU  = 0;

		{
			m_data->m_broadphaseGPU->calculateOverlappingPairs(64*numParticles);
			pairsGPU = m_data->m_broadphaseGPU->getOverlappingPairBuffer();
			numPairsGPU = m_data->m_broadphaseGPU->getNumOverlap();
		}

		if (numPairsGPU)
		{
			b3BufferInfoCL bInfo[] = { 
				b3BufferInfoCL( m_data->m_clPositionBuffer),
				b3BufferInfoCL( m_data->m_velocitiesGPU->getBufferCL() ),
				b3BufferInfoCL( m_data->m_broadphaseGPU->getOverlappingPairBuffer(),true),
			};
			
			b3LauncherCL launcher(m_clData->m_clQueue, m_data->m_collideParticlesKernel);
			launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
			launcher.setConst( numPairsGPU);
			launcher.launch1D( numPairsGPU);
			clFinish(m_clData->m_clQueue);

			//__kernel void collideParticlesKernel(  __global float4* pPos, __global float4* pVel, __global int2* pairs, const int numPairs)
		}


		if (1)
		{
			ciErrNum = clEnqueueReadBuffer (	m_clData->m_clQueue,
				m_data->m_clPositionBuffer,
	 			blocking,
 				0,
 				posArraySize,
 			hostPtr,0,0,0);

			//clReleaseMemObject(clBuffer);
			clFinish(m_clData->m_clQueue);

			
		}
	}
	
#endif

	glUnmapBuffer( GL_ARRAY_BUFFER);
	glFlush();

	/*
	int numParticles = NUM_PARTICLES_X*NUM_PARTICLES_Y*NUM_PARTICLES_Z;
	for (int objectIndex=0;objectIndex<numParticles;objectIndex++)
	{
		float pos[4]={0,0,0,0};
		float orn[4]={0,0,0,1};

//		m_instancingRenderer->writeSingleInstanceTransformToGPU(pos,orn,i);
		{
			glBindBuffer(GL_ARRAY_BUFFER, m_instancingRenderer->getInternalData()->m_vbo);
			glFlush();

			char* orgBase =  (char*)glMapBuffer( GL_ARRAY_BUFFER,GL_READ_WRITE);
			//b3GraphicsInstance* gfxObj = m_graphicsInstances[k];
			int totalNumInstances= numParticles;
	

			int POSITION_BUFFER_SIZE = (totalNumInstances*sizeof(float)*4);

			char* base = orgBase;
			int capInBytes = m_instancingRenderer->getMaxShapeCapacity();

			float* positions = (float*)(base+capInBytes);
			float* orientations = (float*)(base+capInBytes+ POSITION_BUFFER_SIZE);

			positions[objectIndex*4+1] += 0.1f;
			glUnmapBuffer( GL_ARRAY_BUFFER);
			glFlush();
		}
	}
	*/

	
}
예제 #24
0
void ParticleDemo::setupScene(const ConstructionInfo& ci)
{

	initCL(ci.preferredOpenCLDeviceIndex,ci.preferredOpenCLPlatformIndex);
	
	int numParticles = NUM_PARTICLES_X*NUM_PARTICLES_Y*NUM_PARTICLES_Z;

	
	int maxObjects = NUM_PARTICLES_X*NUM_PARTICLES_Y*NUM_PARTICLES_Z+1024;
	
	int maxPairsSmallProxy = 32;
	float radius = 3.f*m_data->m_simParamCPU[0].m_particleRad;

	m_data->m_broadphaseGPU = new b3GpuSapBroadphase(m_clData->m_clContext ,m_clData->m_clDevice,m_clData->m_clQueue);//overlappingPairCache,b3Vector3(4.f, 4.f, 4.f), 128, 128, 128,maxObjects, maxObjects, maxPairsSmallProxy, 100.f, 128,

	/*m_data->m_broadphaseGPU = new b3GridBroadphaseCl(overlappingPairCache,b3Vector3(radius,radius,radius), 128, 128, 128,
		maxObjects, maxObjects, maxPairsSmallProxy, 100.f, 128,
			m_clData->m_clContext ,m_clData->m_clDevice,m_clData->m_clQueue);
			*/

	m_data->m_velocitiesGPU = new b3OpenCLArray<b3Vector3>(m_clData->m_clContext,m_clData->m_clQueue,numParticles);
	m_data->m_velocitiesCPU.resize(numParticles);
	for (int i=0;i<numParticles;i++)
	{
		m_data->m_velocitiesCPU[i].setValue(0,0,0);
	}
	m_data->m_velocitiesGPU->copyFromHost(m_data->m_velocitiesCPU);

	m_data->m_simParamGPU = new b3OpenCLArray<b3SimParams>(m_clData->m_clContext,m_clData->m_clQueue,1,false);
	m_data->m_simParamGPU->copyFromHost(m_data->m_simParamCPU);

	cl_int pErrNum;

	cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_clData->m_clContext,m_clData->m_clDevice,particleKernelsString,0,"",INTEROPKERNEL_SRC_PATH);
	m_data->m_updatePositionsKernel = b3OpenCLUtils::compileCLKernelFromString(m_clData->m_clContext, m_clData->m_clDevice,particleKernelsString, "updatePositionsKernel" ,&pErrNum,prog);
	oclCHECKERROR(pErrNum, CL_SUCCESS);
	m_data->m_updatePositionsKernel2 = b3OpenCLUtils::compileCLKernelFromString(m_clData->m_clContext, m_clData->m_clDevice,particleKernelsString, "integrateMotionKernel" ,&pErrNum,prog);
	oclCHECKERROR(pErrNum, CL_SUCCESS);

	m_data->m_updateAabbsKernel= b3OpenCLUtils::compileCLKernelFromString(m_clData->m_clContext, m_clData->m_clDevice,particleKernelsString, "updateAabbsKernel" ,&pErrNum,prog);
	oclCHECKERROR(pErrNum, CL_SUCCESS);

	m_data->m_collideParticlesKernel = b3OpenCLUtils::compileCLKernelFromString(m_clData->m_clContext, m_clData->m_clDevice,particleKernelsString, "collideParticlesKernel" ,&pErrNum,prog);
	oclCHECKERROR(pErrNum, CL_SUCCESS);

	m_instancingRenderer = ci.m_instancingRenderer;

	int strideInBytes = 9*sizeof(float);
	bool pointSprite = true;
	int shapeId =-1;

	if (pointSprite)
	{
		int numVertices = sizeof(point_sphere_vertices)/strideInBytes;
		int numIndices = sizeof(point_sphere_indices)/sizeof(int);
		shapeId = m_instancingRenderer->registerShape(&point_sphere_vertices[0],numVertices,point_sphere_indices,numIndices,B3_GL_POINTS);
	} else
	{
		int numVertices = sizeof(low_sphere_vertices)/strideInBytes;
		int numIndices = sizeof(low_sphere_indices)/sizeof(int);
		shapeId = m_instancingRenderer->registerShape(&low_sphere_vertices[0],numVertices,low_sphere_indices,numIndices);
	}

	float position[4] = {0,0,0,0};
	float quaternion[4] = {0,0,0,1};
	float color[4]={1,0,0,1};
	float scaling[4] = {0.023,0.023,0.023,1};

	int userIndex = 0;
	for (int x=0;x<NUM_PARTICLES_X;x++)
	{
		for (int y=0;y<NUM_PARTICLES_Y;y++)
		{
			for (int z=0;z<NUM_PARTICLES_Z;z++)
			{
				float rad = m_data->m_simParamCPU[0].m_particleRad;
				position[0] = x*(rad*3);
				position[1] = y*(rad*3);
				position[2] = z*(rad*3);

				color[0] = float(x)/float(NUM_PARTICLES_X);
				color[1] = float(y)/float(NUM_PARTICLES_Y);
				color[2] = float(z)/float(NUM_PARTICLES_Z);

				int id = m_instancingRenderer->registerGraphicsInstance(shapeId,position,quaternion,color,scaling);
				
				void* userPtr = (void*)userIndex;
				int collidableIndex = userIndex;
				b3Vector3 aabbMin,aabbMax;
				b3Vector3 particleRadius(rad,rad,rad);

				aabbMin = b3Vector3(position[0],position[1],position[2])-particleRadius;
				aabbMax = b3Vector3(position[0],position[1],position[2])+particleRadius;
				m_data->m_broadphaseGPU->createProxy(aabbMin,aabbMax,collidableIndex,1,1);
				userIndex++;

			}
		}
	}
	m_data->m_broadphaseGPU->writeAabbsToGpu();

	float camPos[4]={1.5,0.5,2.5,0};
	m_instancingRenderer->setCameraTargetPosition(camPos);
	m_instancingRenderer->setCameraDistance(4);
	m_instancingRenderer->writeTransforms();

}
btScalar btParallelConstraintSolver::solveGroupCacheFriendlySetup(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer,btStackAlloc* stackAlloc)
{
	BT_PROFILE("solveGroupCacheFriendlySetup");
#if USE_SEQUENTIAL_SOLVER
	return btSequentialImpulseConstraintSolver::solveGroupCacheFriendlySetup(bodies, numBodies, manifoldPtr, numManifolds, constraints, numConstraints, infoGlobal, debugDrawer, stackAlloc);
#else
	(void)stackAlloc;
	(void)debugDrawer;
// nCall++;
// printf("Call : %d\n", nCall);
// if(nCall == 18)
// {
//	printf("=========  HIT : %d\n", nCall);
// }

	if (!(numConstraints + numManifolds))
	{
		//		printf("empty\n");
		return 0.f;
	}

	int numConstr;
	{
		BT_PROFILE("prepareBatches");
		numConstr = prepareBatches(manifoldPtr, numManifolds, infoGlobal);
	}

	if(!numConstr)
	{
		return 0.f;
	}

	{
		BT_PROFILE("runSetupKernel");

	#if RUN_KERNELS_DIRECTLY
		for(int i = 0; i < m_localGroupSize; i++)
		{
			kSetupContact(this, m_taskParams, (btContactSolverInfo*)&infoGlobal, i);
		}
	#else
		// Set the Argument values
		cl_int ciErrNum;
		btParallelConstraintSolver* pSolver = this;
		btParallelConstraintSolverSetupTaskParams* pTaskParams = m_taskParams;
		btContactSolverInfo* pInfoGlobal = (btContactSolverInfo*)&infoGlobal;
		ciErrNum  = clSetKernelArg(s_setupContactKernel, 0, sizeof(cl_mem), (void*)&pSolver);
		ciErrNum |= clSetKernelArg(s_setupContactKernel, 1, sizeof(cl_mem), (void*)&pTaskParams);
		ciErrNum |= clSetKernelArg(s_setupContactKernel, 2, sizeof(cl_mem), (void*)&pInfoGlobal);
		oclCHECKERROR(ciErrNum, CL_SUCCESS);
		size_t szWorkSize[1];
		szWorkSize[0] = m_localGroupSize;
		ciErrNum = clEnqueueNDRangeKernel(s_cqCommandQue, s_setupContactKernel, 1, NULL, szWorkSize, szWorkSize, 0, NULL, NULL);
		oclCHECKERROR(ciErrNum, CL_SUCCESS);
		clFlush(s_cqCommandQue);
	#endif
	}
#if 0
	{
		FILE* ff = fopen("C:\\ttt\\theadstat.txt", "wt");
		int totalCont = m_tmpSolverContactConstraintPool.size();
		int nRows = (totalCont  + m_localGroupSize - 1)/ m_localGroupSize;
		for(int n = 0; n < nRows; n++)
		{
			for(int i = 0; i < m_localGroupSize; i++)
			{
				int indx = m_taskParams[i].m_startIndex + n;
				if(indx < totalCont)
				{
					int curr = m_tmpSolverContactConstraintPool[indx].m_numConsecutiveRowsPerKernel;
					int delta;
					if(n)
					{
						delta = curr - m_tmpSolverContactConstraintPool[indx-1].m_numConsecutiveRowsPerKernel;
					}
					else delta = 0;
					fprintf(ff, "%10d(%6d)\t", curr, delta);
				}
			}
			fprintf(ff, "\n");
		}
		fclose(ff);
	}
#endif
	{
		BT_PROFILE("setOrder");

		///@todo: use stack allocator for such temporarily memory, same for solver bodies/constraints
		int numConstraintPool = m_tmpSolverContactConstraintPool.size();
		int numFrictionPool = m_tmpSolverContactFrictionConstraintPool.size();
		m_orderTmpConstraintPool.resize(numConstraintPool);
		m_orderFrictionConstraintPool.resize(numFrictionPool);
		{
			int i;
			for (i=0;i<numConstraintPool;i++)
			{
				m_orderTmpConstraintPool[i] = i;
			}
			for (i=0;i<numFrictionPool;i++)
			{
				m_orderFrictionConstraintPool[i] = i;
			}
		}
	}
	return 0.f;
#endif
}
예제 #26
0
void	CLPhysicsDemo::stepSimulation()
{
	int sz = sizeof(ConvexPolyhedronCL2);
	int sz1 = sizeof(ConvexPolyhedronCL);
	btAssert(sz==sz1);

	int b1 = sizeof(Body2);
	int b2 = sizeof(RigidBodyBase::Body);
	btAssert(b1==b2);

	int ciErrNum=CL_SUCCESS;


	
	if (1 && m_numPhysicsInstances)
	{

		gFpIO.m_numObjects = m_numPhysicsInstances;
		
		
		if (useSapGpuBroadphase)
		{
			gFpIO.m_dAABB = m_data->m_BroadphaseSap->getAabbBuffer();
		} else
		{
			gFpIO.m_dAABB = m_data->m_BroadphaseGrid->getAabbBuffer();
		}
		gFpIO.m_dlocalShapeAABB = (cl_mem)m_data->m_localShapeAABBGPU->getBufferCL();
		gFpIO.m_numOverlap = 0;


		{
			BT_PROFILE("setupGpuAabbs");
			setupGpuAabbsFull(gFpIO,m_narrowphaseAndSolver->getBodiesGpu(), m_narrowphaseAndSolver->getCollidablesGpu() );
        //    setupGpuAabbsSimple(gFpIO);
		}

		{

		}

		if (1)
		{
			BT_PROFILE("calculateOverlappingPairs");
			
			if (useSapGpuBroadphase)
			{
				m_data->m_BroadphaseSap->calculateOverlappingPairs();
				gFpIO.m_numOverlap = m_data->m_BroadphaseSap->getNumOverlap();
				gFpIO.m_dAllOverlappingPairs = m_data->m_BroadphaseSap->getOverlappingPairBuffer();
			}
			else
			{
				m_data->m_BroadphaseGrid->calculateOverlappingPairs();
				gFpIO.m_dAllOverlappingPairs = m_data->m_BroadphaseGrid->getOverlappingPairBuffer();
				gFpIO.m_numOverlap = m_data->m_BroadphaseGrid->getNumOverlap();
			}
		}
		
		//printf("gFpIO.m_numOverlap = %d\n",gFpIO.m_numOverlap );
		if (gFpIO.m_numOverlap>=0 && gFpIO.m_numOverlap<MAX_BROADPHASE_COLLISION_CL)
		{
			//colorPairsOpenCL(gFpIO);

			if (runOpenCLKernels)
			{
				{
					BT_PROFILE("setupBodies");
					if (m_narrowphaseAndSolver)
						setupBodies(gFpIO, m_data->m_linVelBuf->getBufferCL(), m_data->m_angVelBuf->getBufferCL(), m_narrowphaseAndSolver->getBodiesGpu(), m_narrowphaseAndSolver->getBodyInertiasGpu());
				}
				
				{
					BT_PROFILE("computeContacts");
					if (m_narrowphaseAndSolver)
						m_narrowphaseAndSolver->computeContacts(gFpIO.m_dAllOverlappingPairs,gFpIO.m_numOverlap, gFpIO.m_dAABB,gFpIO.m_numObjects);
				}

				bool useOpenCLSolver = true;
				if (useOpenCLSolver)
				{
					BT_PROFILE("solve Contact Constraints (OpenCL)");
					if (m_narrowphaseAndSolver)
						m_narrowphaseAndSolver->solveContacts();
				} else
				{
					BT_PROFILE("solve Contact Constraints CPU/serial");
					if (m_narrowphaseAndSolver && m_data->m_pgsSolver && m_narrowphaseAndSolver->getNumContactsGpu())
					{
						btGpuNarrowphaseAndSolver* np = m_narrowphaseAndSolver;
						
						btAlignedObjectArray<RigidBodyBase::Body> hostBodies;
						btOpenCLArray<RigidBodyBase::Body> gpuBodies(g_cxMainContext,g_cqCommandQue,0,true);
						gpuBodies.setFromOpenCLBuffer(np->getBodiesGpu(),np->getNumBodiesGpu());
						gpuBodies.copyToHost(hostBodies);

						btAlignedObjectArray<RigidBodyBase::Inertia> hostInertias;
						btOpenCLArray<RigidBodyBase::Inertia> gpuInertias(g_cxMainContext,g_cqCommandQue,0,true);
						gpuInertias.setFromOpenCLBuffer(np->getBodyInertiasGpu(),np->getNumBodiesGpu());
						gpuInertias.copyToHost(hostInertias);

						btAlignedObjectArray<Contact4> hostContacts;
						btOpenCLArray<Contact4> gpuContacts(g_cxMainContext,g_cqCommandQue,0,true);
						gpuContacts.setFromOpenCLBuffer(np->getContactsGpu(),np->getNumContactsGpu());
						gpuContacts.copyToHost(hostContacts);

						{
							BT_PROFILE("pgsSolver::solveContacts");
							m_data->m_pgsSolver->solveContacts(np->getNumBodiesGpu(),&hostBodies[0],&hostInertias[0],np->getNumContactsGpu(),&hostContacts[0]);
						}
						
						gpuBodies.copyFromHost(hostBodies);


					}
					
					
				}
				
				{
					BT_PROFILE("copyBodyVelocities");
					if (m_narrowphaseAndSolver)
						copyBodyVelocities(gFpIO, m_data->m_linVelBuf->getBufferCL(), m_data->m_angVelBuf->getBufferCL(), m_narrowphaseAndSolver->getBodiesGpu());

				}
			}

		} else
		{
			printf("error, gFpIO.m_numOverlap = %d\n",gFpIO.m_numOverlap);
			btAssert(0);
		}


		{
			BT_PROFILE("integrateTransforms");

			if (runOpenCLKernels)
			{
				bool integrateOnGpu = true;
				if (integrateOnGpu)
				{
					int numObjects = m_numPhysicsInstances;
					int offset = m_maxShapeBufferCapacityInBytes/4;

					ciErrNum = clSetKernelArg(g_integrateTransformsKernel2, 0, sizeof(int), &offset);
					ciErrNum = clSetKernelArg(g_integrateTransformsKernel2, 1, sizeof(int), &numObjects);
					cl_mem bodyGpuBuffer = m_narrowphaseAndSolver->getBodiesGpu();
					ciErrNum = clSetKernelArg(g_integrateTransformsKernel2, 2, sizeof(cl_mem), (void*)&bodyGpuBuffer );
	
					cl_mem lv = m_data->m_linVelBuf->getBufferCL();
					cl_mem av = m_data->m_angVelBuf->getBufferCL();
					cl_mem btimes = m_data->m_bodyTimes->getBufferCL();

					ciErrNum = clSetKernelArg(g_integrateTransformsKernel2, 3, sizeof(cl_mem), (void*)&lv);
					ciErrNum = clSetKernelArg(g_integrateTransformsKernel2, 4, sizeof(cl_mem), (void*)&av);
					ciErrNum = clSetKernelArg(g_integrateTransformsKernel2, 5, sizeof(cl_mem), (void*)&btimes);
					
					
					

					size_t workGroupSize = 64;
					size_t	numWorkItems = workGroupSize*((m_numPhysicsInstances + (workGroupSize)) / workGroupSize);
				
					if (workGroupSize>numWorkItems)
						workGroupSize=numWorkItems;

					ciErrNum = clEnqueueNDRangeKernel(g_cqCommandQue, g_integrateTransformsKernel2, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0);
					oclCHECKERROR(ciErrNum, CL_SUCCESS);
				} else
				{
#ifdef _WIN32
					//debug velocity
					btAlignedObjectArray<btVector3> linvel;
					m_data->m_linVelBuf->copyToHost(linvel);
					for (int i=0;i<linvel.size();i++)
					{
						btAssert(_finite(linvel[i].x()));
					}
#endif
                    btAssert(0);

				}
			} 
		}

		
			

	}

	oclCHECKERROR(ciErrNum, CL_SUCCESS);

	

	if (runOpenCLKernels)
	{
		BT_PROFILE("clFinish");
		clFinish(g_cqCommandQue);
	}

	
}
예제 #27
0
void	CLPhysicsDemo::stepSimulation()
{
	BT_PROFILE("simulationLoop");
	
	{
		BT_PROFILE("glFinish");
		glFinish();
	}
	cl_int ciErrNum = CL_SUCCESS;


	if(m_data->m_useInterop)
	{
		clBuffer = g_interopBuffer->getCLBUffer();
		BT_PROFILE("clEnqueueAcquireGLObjects");
		ciErrNum = clEnqueueAcquireGLObjects(g_cqCommandQue, 1, &clBuffer, 0, 0, NULL);
		adl::DeviceUtils::waitForCompletion( g_deviceCL );
	} else
	{

		glBindBuffer(GL_ARRAY_BUFFER, cube_vbo);
		glFlush();

		BT_PROFILE("glMapBuffer and clEnqueueWriteBuffer");

		blocking=  CL_TRUE;
		hostPtr=  (char*)glMapBuffer( GL_ARRAY_BUFFER,GL_READ_WRITE);//GL_WRITE_ONLY
		if (!clBuffer)
		{
			clBuffer = clCreateBuffer(g_cxMainContext, CL_MEM_READ_WRITE, VBOsize, 0, &ciErrNum);
		} 
		adl::DeviceUtils::waitForCompletion( g_deviceCL );
			oclCHECKERROR(ciErrNum, CL_SUCCESS);

		ciErrNum = clEnqueueWriteBuffer (	g_cqCommandQue,
 			clBuffer,
 			blocking,
 			0,
 			VBOsize,
 			hostPtr,0,0,0
		);
		adl::DeviceUtils::waitForCompletion( g_deviceCL );
	}



	oclCHECKERROR(ciErrNum, CL_SUCCESS);
	if (runOpenCLKernels && m_numPhysicsInstances)
	{

		gFpIO.m_numObjects = m_numPhysicsInstances;
		gFpIO.m_positionOffset = SHAPE_VERTEX_BUFFER_SIZE/4;
		gFpIO.m_clObjectsBuffer = clBuffer;
		gFpIO.m_dAABB = m_data->m_Broadphase->m_dAABB;
		gFpIO.m_dlocalShapeAABB = (cl_mem)m_data->m_localShapeAABB->m_ptr;
		gFpIO.m_numOverlap = 0;
		{
			BT_PROFILE("setupGpuAabbs");
			setupGpuAabbsFull(gFpIO,narrowphaseAndSolver->getBodiesGpu() );
		}
		if (1)
		{
			BT_PROFILE("calculateOverlappingPairs");
			m_data->m_Broadphase->calculateOverlappingPairs(0, m_numPhysicsInstances);
			gFpIO.m_dAllOverlappingPairs = m_data->m_Broadphase->m_dAllOverlappingPairs;
			gFpIO.m_numOverlap = m_data->m_Broadphase->m_numPrefixSum;
		}
		
		//printf("gFpIO.m_numOverlap = %d\n",gFpIO.m_numOverlap );
		if (gFpIO.m_numOverlap>=0 && gFpIO.m_numOverlap<MAX_BROADPHASE_COLLISION_CL)
		{
			colorPairsOpenCL(gFpIO);

			if (1)
			{
				{
					//BT_PROFILE("setupBodies");
					if (narrowphaseAndSolver)
						setupBodies(gFpIO, gLinVelMem, gAngVelMem, narrowphaseAndSolver->getBodiesGpu(), narrowphaseAndSolver->getBodyInertiasGpu());
				}
				if (gFpIO.m_numOverlap)
				{
					BT_PROFILE("computeContactsAndSolver");
					if (narrowphaseAndSolver)
						narrowphaseAndSolver->computeContactsAndSolver(gFpIO.m_dAllOverlappingPairs,gFpIO.m_numOverlap);
				}

				{
					BT_PROFILE("copyBodyVelocities");
					if (narrowphaseAndSolver)
						copyBodyVelocities(gFpIO, gLinVelMem, gAngVelMem, narrowphaseAndSolver->getBodiesGpu(), narrowphaseAndSolver->getBodyInertiasGpu());
				}
			}

		} else
		{
			printf("error, gFpIO.m_numOverlap = %d\n",gFpIO.m_numOverlap);
			btAssert(0);
		}


		{
			BT_PROFILE("integrateTransforms");

			if (runOpenCLKernels)
			{
				int numObjects = m_numPhysicsInstances;
				int offset = SHAPE_VERTEX_BUFFER_SIZE/4;

				ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 0, sizeof(int), &offset);
				ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 1, sizeof(int), &numObjects);
				ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 2, sizeof(cl_mem), (void*)&clBuffer );

				ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 3, sizeof(cl_mem), (void*)&gLinVelMem);
				ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 4, sizeof(cl_mem), (void*)&gAngVelMem);
				ciErrNum = clSetKernelArg(g_integrateTransformsKernel, 5, sizeof(cl_mem), (void*)&gBodyTimes);
					
					
					

				size_t workGroupSize = 64;
				size_t	numWorkItems = workGroupSize*((m_numPhysicsInstances + (workGroupSize)) / workGroupSize);
				
				if (workGroupSize>numWorkItems)
					workGroupSize=numWorkItems;

				ciErrNum = clEnqueueNDRangeKernel(g_cqCommandQue, g_integrateTransformsKernel, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0);
				oclCHECKERROR(ciErrNum, CL_SUCCESS);
			}
		}
			

	}

	if(m_data->m_useInterop)
	{
		BT_PROFILE("clEnqueueReleaseGLObjects");
		ciErrNum = clEnqueueReleaseGLObjects(g_cqCommandQue, 1, &clBuffer, 0, 0, 0);
		adl::DeviceUtils::waitForCompletion( g_deviceCL );
	}
	else
	{
		BT_PROFILE("clEnqueueReadBuffer clReleaseMemObject and glUnmapBuffer");
		ciErrNum = clEnqueueReadBuffer (	g_cqCommandQue,
 		clBuffer,
 		blocking,
 		0,
 		VBOsize,
 		hostPtr,0,0,0);

		//clReleaseMemObject(clBuffer);
		adl::DeviceUtils::waitForCompletion( g_deviceCL );
		glUnmapBuffer( GL_ARRAY_BUFFER);
		glFlush();
	}

	oclCHECKERROR(ciErrNum, CL_SUCCESS);


	if (runOpenCLKernels)
	{
		BT_PROFILE("clFinish");
		clFinish(g_cqCommandQue);
	}

	
}
예제 #28
0
void btGpuDemo3dOCLWrap::initCL(int argc, char** argv)
{
    cl_int ciErrNum;

//	m_cxMainContext = clCreateContextFromType(0, CL_DEVICE_TYPE_ALL, NULL, NULL, &ciErrNum);
	m_cxMainContext = btOclCommon::createContextFromType(CL_DEVICE_TYPE_ALL, &ciErrNum);
    oclCHECKERROR(ciErrNum, CL_SUCCESS);
 
	m_cdDevice = btOclGetMaxFlopsDev(m_cxMainContext);

	// create a command-queue
	m_cqCommandQue = clCreateCommandQueue(m_cxMainContext, m_cdDevice, 0, &ciErrNum);
	oclCHECKERROR(ciErrNum, CL_SUCCESS);

	// Program Setup
	size_t program_length;
	char* fileName = "Gpu3dDemoOCL.cl";
	FILE * fp = fopen(fileName, "rb");
	char newFileName[512];
	
	
	if (fp == NULL)
	{
		sprintf(newFileName,"..//%s",fileName);
		fp = fopen(newFileName, "rb");
		if (fp)
			fileName = newFileName;
	}
	
	if (fp == NULL)
	{
		sprintf(newFileName,"Demos//Gpu3dDemo//%s",fileName);
		fp = fopen(newFileName, "rb");
		if (fp)
			fileName = newFileName;
	}

	if (fp == NULL)
	{
		sprintf(newFileName,"..//..//..//..//..//Demos//Gpu3dDemo//%s",fileName);
		fp = fopen(newFileName, "rb");
		if (fp)
			fileName = newFileName;
		else
		{
			printf("cannot find %s\n",newFileName);
			exit(0);
		}
	}

	char *source = btOclLoadProgSource(fileName, "", &program_length);
	if(source == NULL)
	{
		printf("ERROR : OpenCL can't load file %s\n", fileName);
	}
	btAssert(source != NULL);

	// create the program
	printf("OpenCL compiles %s ...", fileName);
	m_cpProgram = clCreateProgramWithSource(m_cxMainContext, 1, (const char**)&source, &program_length, &ciErrNum);
	oclCHECKERROR(ciErrNum, CL_SUCCESS);
	free(source);

	// build the program
	ciErrNum = clBuildProgram(m_cpProgram, 0, NULL, "-I .", NULL, NULL);
	if(ciErrNum != CL_SUCCESS)
	{
		// write out standard error
		char cBuildLog[10240];
		clGetProgramBuildInfo(m_cpProgram, btOclGetFirstDev(m_cxMainContext), CL_PROGRAM_BUILD_LOG, 
							  sizeof(cBuildLog), cBuildLog, NULL );
		printf("\n\n%s\n\n\n", cBuildLog);
		printf("Press ENTER key to terminate the program\n");
		getchar();
		exit(-1); 
	}
	printf("OK\n");

}
예제 #29
0
void btGpuDemo3dOCLWrap::allocateBuffers(int maxObjs, int maxConstr, int maxPointsPerConstr, int maxBatches)
{
    cl_int ciErrNum;

	m_maxObj = maxObjs;
	m_maxConstr = maxConstr;
	m_maxPointsPerConstr = maxPointsPerConstr;

    unsigned int memSize = sizeof(float)* 4 * m_maxObj * 4;
    m_dTrans = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
    oclCHECKERROR(ciErrNum, CL_SUCCESS);

	memSize = sizeof(float)* 4 * m_maxObj;
	m_dVel = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
    oclCHECKERROR(ciErrNum, CL_SUCCESS);

	m_dAngVel = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
    oclCHECKERROR(ciErrNum, CL_SUCCESS);

	memSize = sizeof(int) * m_maxConstr;
	m_dIds = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
    oclCHECKERROR(ciErrNum, CL_SUCCESS);

	memSize = sizeof(int) * 2 * m_maxConstr;
	m_dBatchIds = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
    oclCHECKERROR(ciErrNum, CL_SUCCESS);

	memSize = sizeof(float) * m_maxConstr * m_maxPointsPerConstr;
	m_dLambdaDtBox = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
    oclCHECKERROR(ciErrNum, CL_SUCCESS);
	m_dPositionConstraint = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
    oclCHECKERROR(ciErrNum, CL_SUCCESS);

	memSize = sizeof(float) * 4 * m_maxConstr * m_maxPointsPerConstr;
	m_dNormal = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
    oclCHECKERROR(ciErrNum, CL_SUCCESS);
	m_dContact = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
    oclCHECKERROR(ciErrNum, CL_SUCCESS);

	memSize = sizeof(float) * m_maxObj * 4 * 2;
	m_dForceTorqueDamp = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
    oclCHECKERROR(ciErrNum, CL_SUCCESS);

	memSize = sizeof(float) * m_maxObj * 4 * 3;
	m_dInvInertiaMass = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
    oclCHECKERROR(ciErrNum, CL_SUCCESS);

	memSize = sizeof(float) * 4 * 2;
	m_dParams = clCreateBuffer(m_cxMainContext, CL_MEM_READ_WRITE, memSize, NULL, &ciErrNum);
    oclCHECKERROR(ciErrNum, CL_SUCCESS);

}
예제 #30
0
void btGpuDemo3dOCLWrap::setKernelArg(int kernelId, int argNum, int argSize, void* argPtr)
{
    cl_int ciErrNum;
	ciErrNum  = clSetKernelArg(m_kernels[kernelId].m_kernel, argNum, argSize, argPtr);
    oclCHECKERROR(ciErrNum, CL_SUCCESS);
}