Beispiel #1
0
void b3GpuNarrowPhase::computeContacts(cl_mem broadphasePairs, int numBroadphasePairs, cl_mem aabbsWS, int numObjects)
{
	int nContactOut = 0;

	int maxTriConvexPairCapacity = m_data->m_config.m_maxTriConvexPairCapacity;
	b3OpenCLArray<b3Int4> triangleConvexPairs(m_context,m_queue, maxTriConvexPairCapacity);
	int numTriConvexPairsOut=0;
	
	b3OpenCLArray<b3Int2> broadphasePairsGPU(m_context,m_queue);
	broadphasePairsGPU.setFromOpenCLBuffer(broadphasePairs,numBroadphasePairs);
	b3OpenCLArray<b3YetAnotherAabb> clAabbArray(this->m_context,this->m_queue);
	clAabbArray.setFromOpenCLBuffer(aabbsWS,numObjects);

	m_data->m_gpuSatCollision->computeConvexConvexContactsGPUSAT(
		&broadphasePairsGPU, numBroadphasePairs,
		m_data->m_bodyBufferGPU,
		m_data->m_pBufContactOutGPU,
		nContactOut,
		m_data->m_config.m_maxContactCapacity,
		*m_data->m_convexPolyhedraGPU,
		*m_data->m_convexVerticesGPU,
		*m_data->m_uniqueEdgesGPU,
		*m_data->m_convexFacesGPU,
		*m_data->m_convexIndicesGPU,
		*m_data->m_collidablesGPU,
		*m_data->m_gpuChildShapes,
		clAabbArray,
		*m_data->m_worldVertsB1GPU,
		*m_data->m_clippingFacesOutGPU,
		*m_data->m_worldNormalsAGPU,
		*m_data->m_worldVertsA1GPU,
		*m_data->m_worldVertsB2GPU,
		m_data->m_bvhData,
		m_data->m_treeNodesGPU,
		m_data->m_subTreesGPU,
		m_data->m_bvhInfoGPU,
		numObjects,
		maxTriConvexPairCapacity,
		triangleConvexPairs,
		numTriConvexPairsOut
		);

}
void btGpuNarrowphaseAndSolver::computeContactsAndSolver(cl_mem broadphasePairs, int numBroadphasePairs) 
{


	BT_PROFILE("computeContactsAndSolver");
	bool bGPU = (m_internalData != 0);
	int maxBodyIndex = m_internalData->m_numAcceleratedRigidBodies;

	if (!maxBodyIndex)
		return;
	int numOfConvexRBodies = maxBodyIndex;

	ChNarrowphaseBase::Config cfgNP;
	cfgNP.m_collisionMargin = 0.01f;
	int nContactOut = 0;
	//printf("convexPairsOut.m_size = %d\n",m_internalData->m_convexPairsOutGPU->m_size);


	btOpenCLArray<int2> broadphasePairsGPU(m_context,m_queue);
	broadphasePairsGPU.setFromOpenCLBuffer(broadphasePairs,numBroadphasePairs);

	bool useCulling = true;
	if (useCulling)
	{
		BT_PROFILE("ChNarrowphase::culling");
		clFinish(m_queue);

		numPairsOut = m_internalData->m_narrowPhase->culling(
			&broadphasePairsGPU, 
			numBroadphasePairs,
			m_internalData->m_bodyBufferGPU, m_internalData->m_ShapeBuffer,
			m_internalData->m_convexPairsOutGPU,
			cfgNP);
	}
	{
			if (m_planeBodyIndex>=0)
			{
				BT_PROFILE("ChNarrowphase:: plane versus convex");
				//todo: get rid of this dynamic allocation
				int2* hostPairs = new int2[m_internalData->m_numAcceleratedRigidBodies-1];
				int index=0;
				for (int i=0;i<m_internalData->m_numAcceleratedRigidBodies;i++)
				{
					if (i!=m_planeBodyIndex)
					{
						hostPairs[index].x = m_planeBodyIndex;
						hostPairs[index].y = i;
						index++;
					}
				}
				assert(m_internalData->m_numAcceleratedRigidBodies-1 == index);
				m_internalData->m_planePairs->copyFromHostPointer(hostPairs,index);
				clFinish(m_queue);

				delete[]hostPairs;
				//convex versus plane
				m_internalData->m_narrowPhase->execute(m_internalData->m_planePairs, index, m_internalData->m_bodyBufferGPU, m_internalData->m_ShapeBuffer, 
					0,0,m_internalData->m_pBufContactOutGPU, nContactOut, cfgNP);
			}
	}
	{
		BT_PROFILE("ChNarrowphase::execute");
		if (useCulling)
		{
			//convex versus convex
			//m_internalData->m_narrowPhase->execute(m_internalData->m_convexPairsOutGPU,numPairsOut, m_internalData->m_bodyBufferGPU, m_internalData->m_ShapeBuffer, m_internalData->m_pBufContactOutGPU, nContactOut, cfgNP);
#define USE_CONVEX_CONVEX_HOST 1
#ifdef USE_CONVEX_CONVEX_HOST
			m_internalData->m_convexPairsOutGPU->resize(numPairsOut);
			m_internalData->m_pBufContactOutGPU->resize(nContactOut);
			
			m_internalData->m_gpuSatCollision->computeConvexConvexContactsHost(
				m_internalData->m_convexPairsOutGPU,
				numPairsOut, 
				m_internalData->m_bodyBufferGPU, 
				m_internalData->m_ShapeBuffer, 
				m_internalData->m_pBufContactOutGPU, 
				nContactOut, cfgNP, m_internalData->m_convexPolyhedra,m_internalData->m_convexVertices,m_internalData->m_uniqueEdges,
				m_internalData->m_convexFaces,m_internalData->m_convexIndices);
#else

			m_internalData->m_narrowPhase->execute(
				m_internalData->m_convexPairsOutGPU,
				numPairsOut, 
				m_internalData->m_bodyBufferGPU, 
				m_internalData->m_ShapeBuffer, 
				m_internalData->m_pBufContactOutGPU, 
				nContactOut, cfgNP);
#endif


		} else
		{
			m_internalData->m_narrowPhase->execute(&broadphasePairsGPU, numBroadphasePairs, m_internalData->m_bodyBufferGPU, m_internalData->m_ShapeBuffer, m_internalData->m_pBufContactOutGPU, nContactOut, cfgNP);
		}

		clFinish(m_queue);
	}
	
	if (!nContactOut)
		return;
	

	bool useSolver = true;//true;//false;

	if (useSolver)
	{
		float dt=1./60.;
		SolverBase::ConstraintCfg csCfg( dt );
		csCfg.m_enableParallelSolve = true;
		csCfg.m_averageExtent = 0.2f;//@TODO m_averageObjExtent;
		csCfg.m_staticIdx = m_planeBodyIndex;

		btOpenCLArray<Contact4>* contactsIn = m_internalData->m_pBufContactOutGPU;
		const btOpenCLArray<RigidBodyBase::Body>* bodyBuf = m_internalData->m_bodyBufferGPU;
		void* additionalData = m_internalData->m_frictionCGPU;
		const btOpenCLArray<RigidBodyBase::Inertia>* shapeBuf = m_internalData->m_inertiaBufferGPU;
		SolverData contactCOut = m_internalData->m_contactCGPU;
		int nContacts = nContactOut;

		bool useCPU=false;

		{
			BT_PROFILE("GPU batch");

			{
				//@todo: just reserve it, without copy of original contact (unless we use warmstarting)
				if( m_internalData->m_solverGPU->m_contactBuffer)
				{
					m_internalData->m_solverGPU->m_contactBuffer->resize(nContacts);
				}

				if( m_internalData->m_solverGPU->m_contactBuffer == 0 )
				{
					m_internalData->m_solverGPU->m_contactBuffer = new btOpenCLArray<Contact4>(m_context,m_queue, nContacts );
					m_internalData->m_solverGPU->m_contactBuffer->resize(nContacts);
				}

				btOpenCLArray<Contact4>* contactNative  = contactsIn;
				const btOpenCLArray<RigidBodyBase::Body>* bodyNative = bodyBuf;
				

				{
					
					//btOpenCLArray<RigidBodyBase::Body>* bodyNative = btOpenCLArrayUtils::map<adl::TYPE_CL, true>( data->m_device, bodyBuf );
					//btOpenCLArray<Contact4>* contactNative = btOpenCLArrayUtils::map<adl::TYPE_CL, true>( data->m_device, contactsIn );

					const int sortAlignment = 512; // todo. get this out of sort
					if( csCfg.m_enableParallelSolve )
					{
						

						int sortSize = NEXTMULTIPLEOF( nContacts, sortAlignment );

						btOpenCLArray<u32>* countsNative = m_internalData->m_solverGPU->m_numConstraints;
						btOpenCLArray<u32>* offsetsNative = m_internalData->m_solverGPU->m_offsets;

						{	//	2. set cell idx
							BT_PROFILE("GPU set cell idx");
							struct CB
							{
								int m_nContacts;
								int m_staticIdx;
								float m_scale;
								int m_nSplit;
							};

							ADLASSERT( sortSize%64 == 0 );
							CB cdata;
							cdata.m_nContacts = nContacts;
							cdata.m_staticIdx = csCfg.m_staticIdx;
							cdata.m_scale = 1.f/(BT_SOLVER_N_OBJ_PER_SPLIT*csCfg.m_averageExtent);
							cdata.m_nSplit = BT_SOLVER_N_SPLIT;

							m_internalData->m_solverGPU->m_sortDataBuffer->resize(nContacts);

							
							btBufferInfoCL bInfo[] = { btBufferInfoCL( contactNative->getBufferCL() ), btBufferInfoCL( bodyBuf->getBufferCL()), btBufferInfoCL( m_internalData->m_solverGPU->m_sortDataBuffer->getBufferCL()) };
							btLauncherCL launcher(m_queue, m_internalData->m_solverGPU->m_setSortDataKernel );
							launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
							launcher.setConst( cdata );
							launcher.launch1D( sortSize, 64 );
						}
						bool gpuRadixSort=true;
						if (gpuRadixSort)
						{	//	3. sort by cell idx
							BT_PROFILE("gpuRadixSort");
							int n = BT_SOLVER_N_SPLIT*BT_SOLVER_N_SPLIT;
							int sortBit = 32;
							//if( n <= 0xffff ) sortBit = 16;
							//if( n <= 0xff ) sortBit = 8;
							//adl::RadixSort<adl::TYPE_CL>::execute( data->m_sort, *data->m_sortDataBuffer, sortSize );
							//adl::RadixSort32<adl::TYPE_CL>::execute( data->m_sort32, *data->m_sortDataBuffer, sortSize );
							btOpenCLArray<btSortData>& keyValuesInOut = *(m_internalData->m_solverGPU->m_sortDataBuffer);
							this->m_internalData->m_solverGPU->m_sort32->execute(keyValuesInOut);

							/*btAlignedObjectArray<btSortData> hostValues;
							keyValuesInOut.copyToHost(hostValues);
							printf("hostValues.size=%d\n",hostValues.size());
							*/

						}



						
						{	
							//	4. find entries
							BT_PROFILE("gpuBoundSearch");
							
							m_internalData->m_solverGPU->m_search->execute(*m_internalData->m_solverGPU->m_sortDataBuffer,nContacts,*countsNative,
								BT_SOLVER_N_SPLIT*BT_SOLVER_N_SPLIT,btBoundSearchCL::COUNT);

							
							//adl::BoundSearch<adl::TYPE_CL>::execute( data->m_search, *data->m_sortDataBuffer, nContacts, *countsNative, 
							//	BT_SOLVER_N_SPLIT*BT_SOLVER_N_SPLIT, adl::BoundSearchBase::COUNT );

							//unsigned int sum;
							m_internalData->m_solverGPU->m_scan->execute(*countsNative,*offsetsNative, BT_SOLVER_N_SPLIT*BT_SOLVER_N_SPLIT);//,&sum );
							//printf("sum = %d\n",sum);
						} 


						{	//	5. sort constraints by cellIdx
							{
								BT_PROFILE("gpu m_reorderContactKernel");
							
								btInt4 cdata; 
								cdata.x = nContacts;

								btBufferInfoCL bInfo[] = { btBufferInfoCL( contactNative->getBufferCL() ), btBufferInfoCL( m_internalData->m_solverGPU->m_contactBuffer->getBufferCL())
									, btBufferInfoCL( m_internalData->m_solverGPU->m_sortDataBuffer->getBufferCL()) };
								btLauncherCL launcher(m_queue,m_internalData->m_solverGPU->m_reorderContactKernel);
								launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
								launcher.setConst( cdata );
								launcher.launch1D( nContacts, 64 );
							}
						}

					}

				}

				clFinish(m_queue);

				{
					BT_PROFILE("gpu m_copyConstraintKernel");
					
					btInt4 cdata; cdata.x = nContacts;
					btBufferInfoCL bInfo[] = { btBufferInfoCL(  m_internalData->m_solverGPU->m_contactBuffer->getBufferCL() ), btBufferInfoCL( contactNative->getBufferCL() ) };
					btLauncherCL launcher(m_queue, m_internalData->m_solverGPU->m_copyConstraintKernel );
					launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
					launcher.setConst(  cdata );
					launcher.launch1D( nContacts, 64 );
					clFinish(m_queue);
				}
					
				bool compareGPU = false;
				if (gpuBatchContacts)
				{
					BT_PROFILE("gpu batchContacts");
					m_internalData->m_solverGPU->batchContacts( contactNative, nContacts, m_internalData->m_solverGPU->m_numConstraints, m_internalData->m_solverGPU->m_offsets, csCfg.m_staticIdx );
				}

				if (1)
				{
					BT_PROFILE("gpu convertToConstraints");
					m_internalData->m_solverGPU->convertToConstraints( bodyBuf, shapeBuf, contactNative, contactCOut, additionalData, nContacts, csCfg );
					clFinish(m_queue);
				}

			}
		}


		if (1)
		{
			BT_PROFILE("GPU solveContactConstraint");
			m_internalData->m_solverGPU->m_nIterations = 4;//10
			m_internalData->m_solverGPU->solveContactConstraint(m_internalData->m_bodyBufferGPU, 
				m_internalData->m_inertiaBufferGPU, 
				m_internalData->m_contactCGPU,
				0, 
				nContactOut );

			clFinish(m_queue);
		}


#if 0
		if (0)
		{
			BT_PROFILE("read body velocities back to CPU");
			//read body updated linear/angular velocities back to CPU
			m_internalData->m_bodyBufferGPU->read(
				m_internalData->m_bodyBufferCPU->m_ptr,numOfConvexRBodies);
			adl::DeviceUtils::waitForCompletion( m_internalData->m_deviceCL );
		}
#endif

	}

}