bool	btMultiSapBroadphase::testAabbOverlap(btBroadphaseProxy* childProxy0,btBroadphaseProxy* childProxy1)
{
	btMultiSapProxy* multiSapProxy0 = (btMultiSapProxy*)childProxy0->m_multiSapParentProxy;
		btMultiSapProxy* multiSapProxy1 = (btMultiSapProxy*)childProxy1->m_multiSapParentProxy;

		return	TestAabbAgainstAabb2(multiSapProxy0->m_aabbMin,multiSapProxy0->m_aabbMax,
			multiSapProxy1->m_aabbMin,multiSapProxy1->m_aabbMax);
		
}
示例#2
0
	virtual bool	process(const btBroadphaseProxy* proxy)
	{
		btVector3 proxyAabbMin,proxyAabbMax;
		btCollisionObject* colObj0 = (btCollisionObject*)proxy->m_clientObject;
		colObj0->getCollisionShape()->getAabb(colObj0->getWorldTransform(),proxyAabbMin,proxyAabbMax);
		if (TestAabbAgainstAabb2(proxyAabbMin,proxyAabbMax,m_queryAabbMin,m_queryAabbMax))
		{
			m_numOverlap++;
		}
		return true;
	}
	void	ProcessChildShape(btCollisionShape* childShape,int index)
	{
		
		btCompoundShape* compoundShape = static_cast<btCompoundShape*>(m_compoundColObj->getCollisionShape());


		//backup
		btTransform	orgTrans = m_compoundColObj->getWorldTransform();
		btTransform	orgInterpolationTrans = m_compoundColObj->getInterpolationWorldTransform();
		const btTransform& childTrans = compoundShape->getChildTransform(index);
		btTransform	newChildWorldTrans = orgTrans*childTrans ;

		//perform an AABB check first
		btVector3 aabbMin0,aabbMax0,aabbMin1,aabbMax1;
		childShape->getAabb(newChildWorldTrans,aabbMin0,aabbMax0);
		m_otherObj->getCollisionShape()->getAabb(m_otherObj->getWorldTransform(),aabbMin1,aabbMax1);

		if (TestAabbAgainstAabb2(aabbMin0,aabbMax0,aabbMin1,aabbMax1))
		{

			m_compoundColObj->setWorldTransform( newChildWorldTrans);
			m_compoundColObj->setInterpolationWorldTransform(newChildWorldTrans);

			//the contactpoint is still projected back using the original inverted worldtrans
			btCollisionShape* tmpShape = m_compoundColObj->getCollisionShape();
			m_compoundColObj->internalSetTemporaryCollisionShape( childShape );

			if (!m_childCollisionAlgorithms[index])
				m_childCollisionAlgorithms[index] = m_dispatcher->findAlgorithm(m_compoundColObj,m_otherObj,m_sharedManifold);

			///detect swapping case
			if (m_resultOut->getBody0Internal() == m_compoundColObj)
			{
				m_resultOut->setShapeIdentifiersA(-1,index);
			} else
			{
				m_resultOut->setShapeIdentifiersB(-1,index);
			}

			m_childCollisionAlgorithms[index]->processCollision(m_compoundColObj,m_otherObj,m_dispatchInfo,m_resultOut);
			if (m_dispatchInfo.m_debugDraw && (m_dispatchInfo.m_debugDraw->getDebugMode() & btIDebugDraw::DBG_DrawAabb))
			{
				btVector3 worldAabbMin,worldAabbMax;
				m_dispatchInfo.m_debugDraw->drawAabb(aabbMin0,aabbMax0,btVector3(1,1,1));
				m_dispatchInfo.m_debugDraw->drawAabb(aabbMin1,aabbMax1,btVector3(1,1,1));
			}
			
			//revert back transform
			m_compoundColObj->internalSetTemporaryCollisionShape( tmpShape);
			m_compoundColObj->setWorldTransform(  orgTrans );
			m_compoundColObj->setInterpolationWorldTransform(orgInterpolationTrans);
		}
	}
示例#4
0
void	btSimpleBroadphase::aabbTest(const btVector3& aabbMin, const btVector3& aabbMax, btBroadphaseAabbCallback& callback)
{
	for (int i=0; i <= m_LastHandleIndex; i++)
	{
		btSimpleBroadphaseProxy* proxy = &m_pHandles[i];
		if(!proxy->m_clientObject)
		{
			continue;
		}
		if (TestAabbAgainstAabb2(aabbMin,aabbMax,proxy->m_aabbMin,proxy->m_aabbMax))
		{
			callback.process(proxy);
		}
	}
}
示例#5
0
void	CollisionWorld::RayTest(const SimdVector3& rayFromWorld, const SimdVector3& rayToWorld, RayResultCallback& resultCallback)
{

	
	SimdTransform	rayFromTrans,rayToTrans;
	rayFromTrans.setIdentity();
	rayFromTrans.setOrigin(rayFromWorld);
	rayToTrans.setIdentity();
	
	rayToTrans.setOrigin(rayToWorld);

	//do culling based on aabb (rayFrom/rayTo)
	SimdVector3 rayAabbMin = rayFromWorld;
	SimdVector3 rayAabbMax = rayFromWorld;
	rayAabbMin.setMin(rayToWorld);
	rayAabbMax.setMax(rayToWorld);


	/// brute force go over all objects. Once there is a broadphase, use that, or
	/// add a raycast against aabb first.
	
	std::vector<CollisionObject*>::iterator iter;
	
	for (iter=m_collisionObjects.begin();
	!(iter==m_collisionObjects.end()); iter++)
	{
		
		CollisionObject*	collisionObject= (*iter);

		//RigidcollisionObject* collisionObject = ctrl->GetRigidcollisionObject();
		SimdVector3 collisionObjectAabbMin,collisionObjectAabbMax;
		collisionObject->m_collisionShape->GetAabb(collisionObject->m_worldTransform,collisionObjectAabbMin,collisionObjectAabbMax);

		//check aabb overlap

		if (TestAabbAgainstAabb2(rayAabbMin,rayAabbMax,collisionObjectAabbMin,collisionObjectAabbMax))
		{
			RayTestSingle(rayFromTrans,rayToTrans,
				collisionObject,
					 collisionObject->m_collisionShape,
					  collisionObject->m_worldTransform,
					  resultCallback);
			
		}
	}

}
示例#6
0
	void	ProcessChildShape(const btCollisionShape* childShape,int index)
	{
		btAssert(index>=0);
		const btCompoundShape* compoundShape = static_cast<const btCompoundShape*>(m_compoundColObj->getCollisionShape());
		btAssert(index<compoundShape->getNumChildShapes());


		//backup
		btTransform	orgTrans = m_compoundColObj->getWorldTransform();
		const btTransform& childTrans = compoundShape->getChildTransform(index);
		btTransform	newChildWorldTrans = orgTrans*childTrans ;

		//perform an AABB check first
		btVector3 aabbMin0,aabbMax0,aabbMin1,aabbMax1;
		childShape->getAabb(newChildWorldTrans,aabbMin0,aabbMax0);
		m_otherObj->getCollisionShape()->getAabb(m_otherObj->getWorldTransform(),aabbMin1,aabbMax1);

		if (TestAabbAgainstAabb2(aabbMin0,aabbMax0,aabbMin1,aabbMax1))
		{
			//the contactpoint is still projected back using the original inverted worldtrans
			btCollider childCollider(m_compoundColObj, childShape, m_compoundColObj->getCollisionObject(), newChildWorldTrans);

			if (!m_childCollisionAlgorithms[index])
				m_childCollisionAlgorithms[index] = m_dispatcher->findAlgorithm(&childCollider,m_otherObj,m_sharedManifold);

			///detect swapping case
			if (m_resultOut->getBody0Internal() == m_compoundColObj->getCollisionObject())
			{
				m_resultOut->setShapeIdentifiersA(-1,index);
			} else
			{
				m_resultOut->setShapeIdentifiersB(-1,index);
			}
			btCollisionProcessInfo processInfo(childCollider, *m_otherObj, m_dispatchInfo, m_resultOut, m_dispatcher);
			m_childCollisionAlgorithms[index]->processCollision(processInfo);

			if (m_dispatchInfo.m_debugDraw && (m_dispatchInfo.m_debugDraw->getDebugMode() & btIDebugDraw::DBG_DrawAabb))
			{
				btVector3 worldAabbMin,worldAabbMax;
				m_dispatchInfo.m_debugDraw->drawAabb(aabbMin0,aabbMax0,btVector3(1,1,1));
				m_dispatchInfo.m_debugDraw->drawAabb(aabbMin1,aabbMax1,btVector3(1,1,1));
			}
		}
	}
void	btMultiSapBroadphase::setAabb(btBroadphaseProxy* proxy,const btVector3& aabbMin,const btVector3& aabbMax, btDispatcher* dispatcher)
{
	btMultiSapProxy* multiProxy = static_cast<btMultiSapProxy*>(proxy);
	multiProxy->m_aabbMin = aabbMin;
	multiProxy->m_aabbMax = aabbMax;
	
	
//	bool fullyContained = false;
//	bool alreadyInSimple = false;
	


	
	struct MyNodeOverlapCallback : public btNodeOverlapCallback
	{
		btMultiSapBroadphase*	m_multiSap;
		btMultiSapProxy*		m_multiProxy;
		btDispatcher*			m_dispatcher;

		MyNodeOverlapCallback(btMultiSapBroadphase* multiSap,btMultiSapProxy* multiProxy,btDispatcher* dispatcher)
			:m_multiSap(multiSap),
			m_multiProxy(multiProxy),
			m_dispatcher(dispatcher)
		{

		}

		virtual void processNode(int /*nodeSubPart*/, int broadphaseIndex)
		{
			btBroadphaseInterface* childBroadphase = m_multiSap->getBroadphaseArray()[broadphaseIndex];

			int containingBroadphaseIndex = -1;
			//already found?
			for (int i=0;i<m_multiProxy->m_bridgeProxies.size();i++)
			{

				if (m_multiProxy->m_bridgeProxies[i]->m_childBroadphase == childBroadphase)
				{
					containingBroadphaseIndex = i;
					break;
				}
			}
			if (containingBroadphaseIndex<0)
			{
				//add it
				btBroadphaseProxy* childProxy = childBroadphase->createProxy(m_multiProxy->m_aabbMin,m_multiProxy->m_aabbMax,m_multiProxy->m_shapeType,m_multiProxy->m_clientObject,m_multiProxy->m_collisionFilterGroup,m_multiProxy->m_collisionFilterMask, m_dispatcher,m_multiProxy);
				m_multiSap->addToChildBroadphase(m_multiProxy,childProxy,childBroadphase);

			}
		}
	};

	MyNodeOverlapCallback	myNodeCallback(this,multiProxy,dispatcher);



	
	if (m_optimizedAabbTree)
		m_optimizedAabbTree->reportAabbOverlappingNodex(&myNodeCallback,aabbMin,aabbMax);

	int i;

	for ( i=0;i<multiProxy->m_bridgeProxies.size();i++)
	{
		btVector3 worldAabbMin,worldAabbMax;
		multiProxy->m_bridgeProxies[i]->m_childBroadphase->getBroadphaseAabb(worldAabbMin,worldAabbMax);
		bool overlapsBroadphase = TestAabbAgainstAabb2(worldAabbMin,worldAabbMax,multiProxy->m_aabbMin,multiProxy->m_aabbMax);
		if (!overlapsBroadphase)
		{
			//remove it now
			btBridgeProxy* bridgeProxy = multiProxy->m_bridgeProxies[i];

			btBroadphaseProxy* childProxy = bridgeProxy->m_childProxy;
			bridgeProxy->m_childBroadphase->destroyProxy(childProxy,dispatcher);
			
			multiProxy->m_bridgeProxies.swap( i,multiProxy->m_bridgeProxies.size()-1);
			multiProxy->m_bridgeProxies.pop_back();

		}
	}


	/*

	if (1)
	{

		//find broadphase that contain this multiProxy
		int numChildBroadphases = getBroadphaseArray().size();
		for (int i=0;i<numChildBroadphases;i++)
		{
			btBroadphaseInterface* childBroadphase = getBroadphaseArray()[i];
			btVector3 worldAabbMin,worldAabbMax;
			childBroadphase->getBroadphaseAabb(worldAabbMin,worldAabbMax);
			bool overlapsBroadphase = TestAabbAgainstAabb2(worldAabbMin,worldAabbMax,multiProxy->m_aabbMin,multiProxy->m_aabbMax);
			
		//	fullyContained = fullyContained || boxIsContainedWithinBox(worldAabbMin,worldAabbMax,multiProxy->m_aabbMin,multiProxy->m_aabbMax);
			int containingBroadphaseIndex = -1;
			
			//if already contains this
			
			for (int i=0;i<multiProxy->m_bridgeProxies.size();i++)
			{
				if (multiProxy->m_bridgeProxies[i]->m_childBroadphase == childBroadphase)
				{
					containingBroadphaseIndex = i;
				}
				alreadyInSimple = alreadyInSimple || (multiProxy->m_bridgeProxies[i]->m_childBroadphase == m_simpleBroadphase);
			}

			if (overlapsBroadphase)
			{
				if (containingBroadphaseIndex<0)
				{
					btBroadphaseProxy* childProxy = childBroadphase->createProxy(aabbMin,aabbMax,multiProxy->m_shapeType,multiProxy->m_clientObject,multiProxy->m_collisionFilterGroup,multiProxy->m_collisionFilterMask, dispatcher);
					childProxy->m_multiSapParentProxy = multiProxy;
					addToChildBroadphase(multiProxy,childProxy,childBroadphase);
				}
			} else
			{
				if (containingBroadphaseIndex>=0)
				{
					//remove
					btBridgeProxy* bridgeProxy = multiProxy->m_bridgeProxies[containingBroadphaseIndex];

					btBroadphaseProxy* childProxy = bridgeProxy->m_childProxy;
					bridgeProxy->m_childBroadphase->destroyProxy(childProxy,dispatcher);
					
					multiProxy->m_bridgeProxies.swap( containingBroadphaseIndex,multiProxy->m_bridgeProxies.size()-1);
					multiProxy->m_bridgeProxies.pop_back();
				}
			}
		}


		///If we are in no other child broadphase, stick the proxy in the global 'simple' broadphase (brute force)
		///hopefully we don't end up with many entries here (can assert/provide feedback on stats)
		if (0)//!multiProxy->m_bridgeProxies.size())
		{
			///we don't pass the userPtr but our multisap proxy. We need to patch this, before processing an actual collision
			///this is needed to be able to calculate the aabb overlap
			btBroadphaseProxy* childProxy = m_simpleBroadphase->createProxy(aabbMin,aabbMax,multiProxy->m_shapeType,multiProxy->m_clientObject,multiProxy->m_collisionFilterGroup,multiProxy->m_collisionFilterMask, dispatcher);
			childProxy->m_multiSapParentProxy = multiProxy;
			addToChildBroadphase(multiProxy,childProxy,m_simpleBroadphase);
		}
	}

	if (!multiProxy->m_bridgeProxies.size())
	{
		///we don't pass the userPtr but our multisap proxy. We need to patch this, before processing an actual collision
		///this is needed to be able to calculate the aabb overlap
		btBroadphaseProxy* childProxy = m_simpleBroadphase->createProxy(aabbMin,aabbMax,multiProxy->m_shapeType,multiProxy->m_clientObject,multiProxy->m_collisionFilterGroup,multiProxy->m_collisionFilterMask, dispatcher);
		childProxy->m_multiSapParentProxy = multiProxy;
		addToChildBroadphase(multiProxy,childProxy,m_simpleBroadphase);
	}
*/


	//update
	for ( i=0;i<multiProxy->m_bridgeProxies.size();i++)
	{
		btBridgeProxy* bridgeProxyRef = multiProxy->m_bridgeProxies[i];
		bridgeProxyRef->m_childBroadphase->setAabb(bridgeProxyRef->m_childProxy,aabbMin,aabbMax,dispatcher);
	}

}
void  btGpuSapBroadphase::calculateOverlappingPairs(bool forceHost)
{
	int axis = 0;//todo on GPU for now hardcode

	btAssert(m_allAabbsCPU.size() == m_allAabbsGPU.size());
	

	if (forceHost)
	{

	btAlignedObjectArray<btSapAabb> allHostAabbs;
	m_allAabbsGPU.copyToHost(allHostAabbs);
	
	{
		int numSmallAabbs = m_smallAabbsCPU.size();
		for (int j=0;j<numSmallAabbs;j++)
		{
			//sync aabb
			int aabbIndex = m_smallAabbsCPU[j].m_signedMaxIndices[3];
			m_smallAabbsCPU[j] = allHostAabbs[aabbIndex];
			m_smallAabbsCPU[j].m_signedMaxIndices[3] = aabbIndex;
		}
	}

	{
		int numLargeAabbs = m_largeAabbsCPU.size();
		for (int j=0;j<numLargeAabbs;j++)
		{
			//sync aabb
			int aabbIndex = m_largeAabbsCPU[j].m_signedMaxIndices[3];
			m_largeAabbsCPU[j] = allHostAabbs[aabbIndex];
			m_largeAabbsCPU[j].m_signedMaxIndices[3] = aabbIndex;

		}
	}

	btAlignedObjectArray<btInt2> hostPairs;

	{
		int numSmallAabbs = m_smallAabbsCPU.size();
		for (int i=0;i<numSmallAabbs;i++)
		{
			float reference = m_smallAabbsCPU[i].m_max[axis];

			for (int j=i+1;j<numSmallAabbs;j++)
			{
				if (TestAabbAgainstAabb2((btVector3&)m_smallAabbsCPU[i].m_min, (btVector3&)m_smallAabbsCPU[i].m_max,
					(btVector3&)m_smallAabbsCPU[j].m_min,(btVector3&)m_smallAabbsCPU[j].m_max))
				{
					btInt2 pair;
					pair.x = m_smallAabbsCPU[i].m_minIndices[3];//store the original index in the unsorted aabb array
					pair.y = m_smallAabbsCPU[j].m_minIndices[3];
					hostPairs.push_back(pair);
				}
			}
		}
	}

	
	{
		int numSmallAabbs = m_smallAabbsCPU.size();
		for (int i=0;i<numSmallAabbs;i++)
		{
			float reference = m_smallAabbsCPU[i].m_max[axis];
			int numLargeAabbs = m_largeAabbsCPU.size();

			for (int j=0;j<numLargeAabbs;j++)
			{
				if (TestAabbAgainstAabb2((btVector3&)m_smallAabbsCPU[i].m_min, (btVector3&)m_smallAabbsCPU[i].m_max,
					(btVector3&)m_largeAabbsCPU[j].m_min,(btVector3&)m_largeAabbsCPU[j].m_max))
				{
					btInt2 pair;
					pair.x = m_largeAabbsCPU[j].m_minIndices[3];
					pair.y = m_smallAabbsCPU[i].m_minIndices[3];//store the original index in the unsorted aabb array
					hostPairs.push_back(pair);
				}
			}
		}
	}


	if (hostPairs.size())
	{
		m_overlappingPairs.copyFromHost(hostPairs);
	} else
	{
		m_overlappingPairs.resize(0);
	}

	return;
	}

	{

	bool syncOnHost = false;

	if (syncOnHost)
	{
		BT_PROFILE("Synchronize m_smallAabbsGPU (CPU/slow)");
		btAlignedObjectArray<btSapAabb> allHostAabbs;
		m_allAabbsGPU.copyToHost(allHostAabbs);

		m_smallAabbsGPU.copyToHost(m_smallAabbsCPU);
		{
			int numSmallAabbs = m_smallAabbsCPU.size();
			for (int j=0;j<numSmallAabbs;j++)
			{
				//sync aabb
				int aabbIndex = m_smallAabbsCPU[j].m_signedMaxIndices[3];
				m_smallAabbsCPU[j] = allHostAabbs[aabbIndex];
				m_smallAabbsCPU[j].m_signedMaxIndices[3] = aabbIndex;
			}
		}
		m_smallAabbsGPU.copyFromHost(m_smallAabbsCPU);
	
	} else
	{
		{
			int numSmallAabbs = m_smallAabbsGPU.size();
			BT_PROFILE("copyAabbsKernelSmall");
			btBufferInfoCL bInfo[] = { 
				btBufferInfoCL( m_allAabbsGPU.getBufferCL(), true ), 
				btBufferInfoCL( m_smallAabbsGPU.getBufferCL()),
			};

			btLauncherCL launcher(m_queue, m_copyAabbsKernel );
			launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
			launcher.setConst( numSmallAabbs  );
			int num = numSmallAabbs;
			launcher.launch1D( num);
			clFinish(m_queue);
		}
	}

	if (syncOnHost)
	{
		BT_PROFILE("Synchronize m_largeAabbsGPU (CPU/slow)");
		btAlignedObjectArray<btSapAabb> allHostAabbs;
		m_allAabbsGPU.copyToHost(allHostAabbs);

		m_largeAabbsGPU.copyToHost(m_largeAabbsCPU);
		{
			int numLargeAabbs = m_largeAabbsCPU.size();
			for (int j=0;j<numLargeAabbs;j++)
			{
				//sync aabb
				int aabbIndex = m_largeAabbsCPU[j].m_signedMaxIndices[3];
				m_largeAabbsCPU[j] = allHostAabbs[aabbIndex];
				m_largeAabbsCPU[j].m_signedMaxIndices[3] = aabbIndex;
			}
		}
		m_largeAabbsGPU.copyFromHost(m_largeAabbsCPU);
	
	} else
	{
		int numLargeAabbs = m_largeAabbsGPU.size();
		
		if (numLargeAabbs)
		{
			BT_PROFILE("copyAabbsKernelLarge");
			btBufferInfoCL bInfo[] = { 
				btBufferInfoCL( m_allAabbsGPU.getBufferCL(), true ), 
				btBufferInfoCL( m_largeAabbsGPU.getBufferCL()),
			};

			btLauncherCL launcher(m_queue, m_copyAabbsKernel );
			launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
			launcher.setConst( numLargeAabbs  );
			int num = numLargeAabbs;
			launcher.launch1D( num);
			clFinish(m_queue);
		}
	}




		BT_PROFILE("GPU SAP");
		
		int numSmallAabbs = m_smallAabbsGPU.size();
		m_gpuSmallSortData.resize(numSmallAabbs);
		int numLargeAabbs = m_smallAabbsGPU.size();

#if 1
		if (m_smallAabbsGPU.size())
		{
			BT_PROFILE("flipFloatKernel");
			btBufferInfoCL bInfo[] = { btBufferInfoCL( m_smallAabbsGPU.getBufferCL(), true ), btBufferInfoCL( m_gpuSmallSortData.getBufferCL())};
			btLauncherCL launcher(m_queue, m_flipFloatKernel );
			launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
			launcher.setConst( numSmallAabbs  );
			launcher.setConst( axis  );
			
			int num = numSmallAabbs;
			launcher.launch1D( num);
			clFinish(m_queue);
		}

		{
			BT_PROFILE("gpu radix sort\n");
			m_sorter->execute(m_gpuSmallSortData);
			clFinish(m_queue);
		}

		m_gpuSmallSortedAabbs.resize(numSmallAabbs);
		if (numSmallAabbs)
		{
			BT_PROFILE("scatterKernel");
			btBufferInfoCL bInfo[] = { btBufferInfoCL( m_smallAabbsGPU.getBufferCL(), true ), btBufferInfoCL( m_gpuSmallSortData.getBufferCL(),true),btBufferInfoCL(m_gpuSmallSortedAabbs.getBufferCL())};
			btLauncherCL launcher(m_queue, m_scatterKernel );
			launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
			launcher.setConst( numSmallAabbs);
			int num = numSmallAabbs;
			launcher.launch1D( num);
			clFinish(m_queue);
			
		}
        

			int maxPairsPerBody = 64;
			int maxPairs = maxPairsPerBody * numSmallAabbs;//todo
			m_overlappingPairs.resize(maxPairs);

			btOpenCLArray<int> pairCount(m_context, m_queue);
			pairCount.push_back(0);
            int numPairs=0;

			{
				int numLargeAabbs = m_largeAabbsGPU.size();
				if (numLargeAabbs && numSmallAabbs)
				{
					BT_PROFILE("sap2Kernel");
					btBufferInfoCL bInfo[] = { btBufferInfoCL( m_largeAabbsGPU.getBufferCL() ),btBufferInfoCL( m_gpuSmallSortedAabbs.getBufferCL() ), btBufferInfoCL( m_overlappingPairs.getBufferCL() ), btBufferInfoCL(pairCount.getBufferCL())};
					btLauncherCL launcher(m_queue, m_sap2Kernel);
					launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
					launcher.setConst(   numLargeAabbs  );
					launcher.setConst( numSmallAabbs);
					launcher.setConst( axis  );
					launcher.setConst( maxPairs  );
//@todo: use actual maximum work item sizes of the device instead of hardcoded values
					launcher.launch2D( numLargeAabbs, numSmallAabbs,4,64);
                
					numPairs = pairCount.at(0);
					if (numPairs >maxPairs)
						numPairs =maxPairs;
					
				}
			}
			if (m_gpuSmallSortedAabbs.size())
			{
				BT_PROFILE("sapKernel");
				btBufferInfoCL bInfo[] = { btBufferInfoCL( m_gpuSmallSortedAabbs.getBufferCL() ), btBufferInfoCL( m_overlappingPairs.getBufferCL() ), btBufferInfoCL(pairCount.getBufferCL())};
				btLauncherCL launcher(m_queue, m_sapKernel);
				launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
				launcher.setConst( numSmallAabbs  );
				launcher.setConst( axis  );
				launcher.setConst( maxPairs  );

			
				int num = numSmallAabbs;
#if 0                
                int buffSize = launcher.getSerializationBufferSize();
                unsigned char* buf = new unsigned char[buffSize+sizeof(int)];
                for (int i=0;i<buffSize+1;i++)
                {
                    unsigned char* ptr = (unsigned char*)&buf[i];
                    *ptr = 0xff;
                }
                int actualWrite = launcher.serializeArguments(buf,buffSize);
                
                unsigned char* cptr = (unsigned char*)&buf[buffSize];
    //            printf("buf[buffSize] = %d\n",*cptr);
                
                assert(buf[buffSize]==0xff);//check for buffer overrun
                int* ptr = (int*)&buf[buffSize];
                
                *ptr = num;
                
                FILE* f = fopen("m_sapKernelArgs.bin","wb");
                fwrite(buf,buffSize+sizeof(int),1,f);
                fclose(f);
#endif//

                launcher.launch1D( num);
				clFinish(m_queue);
                
                numPairs = pairCount.at(0);
                if (numPairs>maxPairs)
					numPairs = maxPairs;
			}
			
#else
        int numPairs = 0;
        
        
        btLauncherCL launcher(m_queue, m_sapKernel);

        const char* fileName = "m_sapKernelArgs.bin";
        FILE* f = fopen(fileName,"rb");
        if (f)
        {
            int sizeInBytes=0;
            if (fseek(f, 0, SEEK_END) || (sizeInBytes = ftell(f)) == EOF || fseek(f, 0, SEEK_SET)) 
            {
                printf("error, cannot get file size\n");
                exit(0);
            }
            
            unsigned char* buf = (unsigned char*) malloc(sizeInBytes);
            fread(buf,sizeInBytes,1,f);
            int serializedBytes = launcher.deserializeArgs(buf, sizeInBytes,m_context);
            int num = *(int*)&buf[serializedBytes];
            launcher.launch1D( num);
            
            btOpenCLArray<int> pairCount(m_context, m_queue);
            int numElements = launcher.m_arrays[2]->size()/sizeof(int);
            pairCount.setFromOpenCLBuffer(launcher.m_arrays[2]->getBufferCL(),numElements);
            numPairs = pairCount.at(0);
            //printf("overlapping pairs = %d\n",numPairs);
            btAlignedObjectArray<btInt2>		hostOoverlappingPairs;
            btOpenCLArray<btInt2> tmpGpuPairs(m_context,m_queue);
            tmpGpuPairs.setFromOpenCLBuffer(launcher.m_arrays[1]->getBufferCL(),numPairs );
   
            tmpGpuPairs.copyToHost(hostOoverlappingPairs);
            m_overlappingPairs.copyFromHost(hostOoverlappingPairs);
            //printf("hello %d\n", m_overlappingPairs.size());
            free(buf);
            fclose(f);
            
        } else {
            printf("error: cannot find file %s\n",fileName);
        }
        
        clFinish(m_queue);

        
#endif

			
        m_overlappingPairs.resize(numPairs);
		
	}//BT_PROFILE("GPU_RADIX SORT");

}
	void	ProcessChildShape(const btCollisionShape* childShape,int index)
	{
		btAssert(index>=0);
		const btCompoundShape* compoundShape = static_cast<const btCompoundShape*>(m_compoundColObjWrap->getCollisionShape());
		btAssert(index<compoundShape->getNumChildShapes());


		//backup
		btTransform	orgTrans = m_compoundColObjWrap->getWorldTransform();
		
		const btTransform& childTrans = compoundShape->getChildTransform(index);
		btTransform	newChildWorldTrans = orgTrans*childTrans ;

		//perform an AABB check first
		btVector3 aabbMin0,aabbMax0,aabbMin1,aabbMax1;
		childShape->getAabb(newChildWorldTrans,aabbMin0,aabbMax0);
		m_otherObjWrap->getCollisionShape()->getAabb(m_otherObjWrap->getWorldTransform(),aabbMin1,aabbMax1);

		if (gCompoundChildShapePairCallback)
		{
			if (!gCompoundChildShapePairCallback(m_otherObjWrap->getCollisionShape(), childShape))
				return;
		}

		if (TestAabbAgainstAabb2(aabbMin0,aabbMax0,aabbMin1,aabbMax1))
		{

			btCollisionObjectWrapper compoundWrap(this->m_compoundColObjWrap,childShape,m_compoundColObjWrap->getCollisionObject(),newChildWorldTrans,-1,index);


			//the contactpoint is still projected back using the original inverted worldtrans
			if (!m_childCollisionAlgorithms[index])
				m_childCollisionAlgorithms[index] = m_dispatcher->findAlgorithm(&compoundWrap,m_otherObjWrap,m_sharedManifold);

			
			const btCollisionObjectWrapper* tmpWrap = 0;

			///detect swapping case
			if (m_resultOut->getBody0Internal() == m_compoundColObjWrap->getCollisionObject())
			{
				tmpWrap = m_resultOut->getBody0Wrap();
				m_resultOut->setBody0Wrap(&compoundWrap);
				m_resultOut->setShapeIdentifiersA(-1,index);
			} else
			{
				tmpWrap = m_resultOut->getBody1Wrap();
				m_resultOut->setBody1Wrap(&compoundWrap);
				m_resultOut->setShapeIdentifiersB(-1,index);
			}


			m_childCollisionAlgorithms[index]->processCollision(&compoundWrap,m_otherObjWrap,m_dispatchInfo,m_resultOut);

#if 0
			if (m_dispatchInfo.m_debugDraw && (m_dispatchInfo.m_debugDraw->getDebugMode() & btIDebugDraw::DBG_DrawAabb))
			{
				btVector3 worldAabbMin,worldAabbMax;
				m_dispatchInfo.m_debugDraw->drawAabb(aabbMin0,aabbMax0,btVector3(1,1,1));
				m_dispatchInfo.m_debugDraw->drawAabb(aabbMin1,aabbMax1,btVector3(1,1,1));
			}
#endif

			if (m_resultOut->getBody0Internal() == m_compoundColObjWrap->getCollisionObject())
			{
				m_resultOut->setBody0Wrap(tmpWrap);
			} else
			{
				m_resultOut->setBody1Wrap(tmpWrap);
			}
			
		}
	}
void btCompoundCompoundCollisionAlgorithm::processCollision (const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
{

	const btCollisionObjectWrapper* col0ObjWrap = body0Wrap;
	const btCollisionObjectWrapper* col1ObjWrap= body1Wrap;

	btAssert (col0ObjWrap->getCollisionShape()->isCompound());
	btAssert (col1ObjWrap->getCollisionShape()->isCompound());
	const btCompoundShape* compoundShape0 = static_cast<const btCompoundShape*>(col0ObjWrap->getCollisionShape());
	const btCompoundShape* compoundShape1 = static_cast<const btCompoundShape*>(col1ObjWrap->getCollisionShape());

	const btDbvt* tree0 = compoundShape0->getDynamicAabbTree();
	const btDbvt* tree1 = compoundShape1->getDynamicAabbTree();
	if (!tree0 || !tree1)
	{
		return btCompoundCollisionAlgorithm::processCollision(body0Wrap,body1Wrap,dispatchInfo,resultOut);
	}
	///btCompoundShape might have changed:
	////make sure the internal child collision algorithm caches are still valid
	if ((compoundShape0->getUpdateRevision() != m_compoundShapeRevision0) || (compoundShape1->getUpdateRevision() != m_compoundShapeRevision1))
	{
		///clear all
		removeChildAlgorithms();
		m_compoundShapeRevision0 = compoundShape0->getUpdateRevision();
		m_compoundShapeRevision1 = compoundShape1->getUpdateRevision();

	}


	///we need to refresh all contact manifolds
	///note that we should actually recursively traverse all children, btCompoundShape can nested more then 1 level deep
	///so we should add a 'refreshManifolds' in the btCollisionAlgorithm
	{
		int i;
		btManifoldArray manifoldArray;
		btSimplePairArray& pairs = m_childCollisionAlgorithmCache->getOverlappingPairArray();
		for (i=0;i<pairs.size();i++)
		{
			if (pairs[i].m_userPointer)
			{
				btCollisionAlgorithm* algo = (btCollisionAlgorithm*) pairs[i].m_userPointer;
				algo->getAllContactManifolds(manifoldArray);
				for (int m=0;m<manifoldArray.size();m++)
				{
					if (manifoldArray[m]->getNumContacts())
					{
						resultOut->setPersistentManifold(manifoldArray[m]);
						resultOut->refreshContactPoints();
						resultOut->setPersistentManifold(0);
					}
				}
				manifoldArray.resize(0);
			}
		}
	}


	

	btCompoundCompoundLeafCallback callback(col0ObjWrap,col1ObjWrap,this->m_dispatcher,dispatchInfo,resultOut,this->m_childCollisionAlgorithmCache,m_sharedManifold);


	const btTransform	xform=col0ObjWrap->getWorldTransform().inverse()*col1ObjWrap->getWorldTransform();
	MycollideTT(tree0->m_root,tree1->m_root,xform,&callback);

	//printf("#compound-compound child/leaf overlap =%d                      \r",callback.m_numOverlapPairs);

	//remove non-overlapping child pairs

	{
		btAssert(m_removePairs.size()==0);

		//iterate over all children, perform an AABB check inside ProcessChildShape
		btSimplePairArray& pairs = m_childCollisionAlgorithmCache->getOverlappingPairArray();
		
		int i;
		btManifoldArray	manifoldArray;
        
		

        
        
        btVector3 aabbMin0,aabbMax0,aabbMin1,aabbMax1;        
        
		for (i=0;i<pairs.size();i++)
		{
			if (pairs[i].m_userPointer)
			{
				btCollisionAlgorithm* algo = (btCollisionAlgorithm*)pairs[i].m_userPointer;

				{
					btTransform	orgTrans0;
					const btCollisionShape* childShape0 = 0;
					
					btTransform	newChildWorldTrans0;
					btTransform	orgInterpolationTrans0;
					childShape0 = compoundShape0->getChildShape(pairs[i].m_indexA);
					orgTrans0 = col0ObjWrap->getWorldTransform();
					orgInterpolationTrans0 = col0ObjWrap->getWorldTransform();
					const btTransform& childTrans0 = compoundShape0->getChildTransform(pairs[i].m_indexA);
					newChildWorldTrans0 = orgTrans0*childTrans0 ;
					childShape0->getAabb(newChildWorldTrans0,aabbMin0,aabbMax0);
				}

				{
					btTransform	orgInterpolationTrans1;
					const btCollisionShape* childShape1 = 0;
					btTransform	orgTrans1;
					btTransform	newChildWorldTrans1;

					childShape1 = compoundShape1->getChildShape(pairs[i].m_indexB);
					orgTrans1 = col1ObjWrap->getWorldTransform();
					orgInterpolationTrans1 = col1ObjWrap->getWorldTransform();
					const btTransform& childTrans1 = compoundShape1->getChildTransform(pairs[i].m_indexB);
					newChildWorldTrans1 = orgTrans1*childTrans1 ;
					childShape1->getAabb(newChildWorldTrans1,aabbMin1,aabbMax1);
				}
				
				

				if (!TestAabbAgainstAabb2(aabbMin0,aabbMax0,aabbMin1,aabbMax1))
				{
					algo->~btCollisionAlgorithm();
					m_dispatcher->freeCollisionAlgorithm(algo);
					m_removePairs.push_back(btSimplePair(pairs[i].m_indexA,pairs[i].m_indexB));
				}
			}
		}
		for (int i=0;i<m_removePairs.size();i++)
		{
			m_childCollisionAlgorithmCache->removeOverlappingPair(m_removePairs[i].m_indexA,m_removePairs[i].m_indexB);
		}
		m_removePairs.clear();
	}

}
	void		Process(const btDbvtNode* leaf0,const btDbvtNode* leaf1)
	{
		m_numOverlapPairs++;


		int childIndex0 = leaf0->dataAsInt;
		int childIndex1 = leaf1->dataAsInt;
		

		btAssert(childIndex0>=0);
		btAssert(childIndex1>=0);


		const btCompoundShape* compoundShape0 = static_cast<const btCompoundShape*>(m_compound0ColObjWrap->getCollisionShape());
		btAssert(childIndex0<compoundShape0->getNumChildShapes());

		const btCompoundShape* compoundShape1 = static_cast<const btCompoundShape*>(m_compound1ColObjWrap->getCollisionShape());
		btAssert(childIndex1<compoundShape1->getNumChildShapes());

		const btCollisionShape* childShape0 = compoundShape0->getChildShape(childIndex0);
		const btCollisionShape* childShape1 = compoundShape1->getChildShape(childIndex1);

		//backup
		btTransform	orgTrans0 = m_compound0ColObjWrap->getWorldTransform();
		const btTransform& childTrans0 = compoundShape0->getChildTransform(childIndex0);
		btTransform	newChildWorldTrans0 = orgTrans0*childTrans0 ;
		
		btTransform	orgTrans1 = m_compound1ColObjWrap->getWorldTransform();
		const btTransform& childTrans1 = compoundShape1->getChildTransform(childIndex1);
		btTransform	newChildWorldTrans1 = orgTrans1*childTrans1 ;
		

		//perform an AABB check first
		btVector3 aabbMin0,aabbMax0,aabbMin1,aabbMax1;
		childShape0->getAabb(newChildWorldTrans0,aabbMin0,aabbMax0);
		childShape1->getAabb(newChildWorldTrans1,aabbMin1,aabbMax1);
		
		if (gCompoundCompoundChildShapePairCallback)
		{
			if (!gCompoundCompoundChildShapePairCallback(childShape0,childShape1))
				return;
		}

		if (TestAabbAgainstAabb2(aabbMin0,aabbMax0,aabbMin1,aabbMax1))
		{
			btCollisionObjectWrapper compoundWrap0(this->m_compound0ColObjWrap,childShape0, m_compound0ColObjWrap->getCollisionObject(),newChildWorldTrans0,-1,childIndex0);
			btCollisionObjectWrapper compoundWrap1(this->m_compound1ColObjWrap,childShape1,m_compound1ColObjWrap->getCollisionObject(),newChildWorldTrans1,-1,childIndex1);
			

			btSimplePair* pair = m_childCollisionAlgorithmCache->findPair(childIndex0,childIndex1);

			btCollisionAlgorithm* colAlgo = 0;

			if (pair)
			{
				colAlgo = (btCollisionAlgorithm*)pair->m_userPointer;
				
			} else
			{
				colAlgo = m_dispatcher->findAlgorithm(&compoundWrap0,&compoundWrap1,m_sharedManifold);
				pair = m_childCollisionAlgorithmCache->addOverlappingPair(childIndex0,childIndex1);
				btAssert(pair);
				pair->m_userPointer = colAlgo;
			}

			btAssert(colAlgo);
						
			const btCollisionObjectWrapper* tmpWrap0 = 0;
			const btCollisionObjectWrapper* tmpWrap1 = 0;

			tmpWrap0 = m_resultOut->getBody0Wrap();
			tmpWrap1 = m_resultOut->getBody1Wrap();

			m_resultOut->setBody0Wrap(&compoundWrap0);
			m_resultOut->setBody1Wrap(&compoundWrap1);

			m_resultOut->setShapeIdentifiersA(-1,childIndex0);
			m_resultOut->setShapeIdentifiersB(-1,childIndex1);


			colAlgo->processCollision(&compoundWrap0,&compoundWrap1,m_dispatchInfo,m_resultOut);
			
			m_resultOut->setBody0Wrap(tmpWrap0);
			m_resultOut->setBody1Wrap(tmpWrap1);
			


		}
	}