Exemple #1
0
void LeafNode::print(std::ostream &os) const
{
    os << "LEAF(" << pairCount() << ")" << std::endl;
    BOOST_FOREACH(const libbruce::kv_pair &p, pairs)
        os << "  " << p.first << " -> " << p.second << std::endl;
    if (!overflow.empty())
        os << "  Overflow " << overflow.count << " @ " << overflow.nodeID << std::endl;
}
bool PokerHandEvaluator::isStraight() {
    if (pairCount() > 0) return false;
    Rank lowest = getLowest();
    for (std::list<Card>::iterator it = cards.begin(); it != cards.end(); ++it) {
        Rank cardRank = it->getRank();
        for (std::list<Card>::iterator it2 = cards.begin(); it2 != cards.end(); ++it2) {

        }
    }
}
HandType PokerHandEvaluator::evaluate(Hand hand) {
    cards = hand.getCards();
    if (isFlush()) {
        return FLUSH;
    }
    if (hasFourOfKind() != NORANK) {
        return FOUROFAKIND;
    }
    if (hasTriple() != NORANK && pairCount() >= 2) {
        return HOUSE;
    }
    if (hasTriple() != NORANK) {
        return THREEOFAKIND;
    }
    if (pairCount() == 2) {
        return TWOPAIR;
    }
    if (pairCount() == 1) {
        return ONEPAIR;
    }


    return HIGHCARD;
}
void  btGpuSapBroadphase::calculateOverlappingPairs(bool forceHost)
{
	int axis = 0;//todo on GPU for now hardcode

	btAssert(m_allAabbsCPU.size() == m_allAabbsGPU.size());
	

	if (forceHost)
	{

	btAlignedObjectArray<btSapAabb> allHostAabbs;
	m_allAabbsGPU.copyToHost(allHostAabbs);
	
	{
		int numSmallAabbs = m_smallAabbsCPU.size();
		for (int j=0;j<numSmallAabbs;j++)
		{
			//sync aabb
			int aabbIndex = m_smallAabbsCPU[j].m_signedMaxIndices[3];
			m_smallAabbsCPU[j] = allHostAabbs[aabbIndex];
			m_smallAabbsCPU[j].m_signedMaxIndices[3] = aabbIndex;
		}
	}

	{
		int numLargeAabbs = m_largeAabbsCPU.size();
		for (int j=0;j<numLargeAabbs;j++)
		{
			//sync aabb
			int aabbIndex = m_largeAabbsCPU[j].m_signedMaxIndices[3];
			m_largeAabbsCPU[j] = allHostAabbs[aabbIndex];
			m_largeAabbsCPU[j].m_signedMaxIndices[3] = aabbIndex;

		}
	}

	btAlignedObjectArray<btInt2> hostPairs;

	{
		int numSmallAabbs = m_smallAabbsCPU.size();
		for (int i=0;i<numSmallAabbs;i++)
		{
			float reference = m_smallAabbsCPU[i].m_max[axis];

			for (int j=i+1;j<numSmallAabbs;j++)
			{
				if (TestAabbAgainstAabb2((btVector3&)m_smallAabbsCPU[i].m_min, (btVector3&)m_smallAabbsCPU[i].m_max,
					(btVector3&)m_smallAabbsCPU[j].m_min,(btVector3&)m_smallAabbsCPU[j].m_max))
				{
					btInt2 pair;
					pair.x = m_smallAabbsCPU[i].m_minIndices[3];//store the original index in the unsorted aabb array
					pair.y = m_smallAabbsCPU[j].m_minIndices[3];
					hostPairs.push_back(pair);
				}
			}
		}
	}

	
	{
		int numSmallAabbs = m_smallAabbsCPU.size();
		for (int i=0;i<numSmallAabbs;i++)
		{
			float reference = m_smallAabbsCPU[i].m_max[axis];
			int numLargeAabbs = m_largeAabbsCPU.size();

			for (int j=0;j<numLargeAabbs;j++)
			{
				if (TestAabbAgainstAabb2((btVector3&)m_smallAabbsCPU[i].m_min, (btVector3&)m_smallAabbsCPU[i].m_max,
					(btVector3&)m_largeAabbsCPU[j].m_min,(btVector3&)m_largeAabbsCPU[j].m_max))
				{
					btInt2 pair;
					pair.x = m_largeAabbsCPU[j].m_minIndices[3];
					pair.y = m_smallAabbsCPU[i].m_minIndices[3];//store the original index in the unsorted aabb array
					hostPairs.push_back(pair);
				}
			}
		}
	}


	if (hostPairs.size())
	{
		m_overlappingPairs.copyFromHost(hostPairs);
	} else
	{
		m_overlappingPairs.resize(0);
	}

	return;
	}

	{

	bool syncOnHost = false;

	if (syncOnHost)
	{
		BT_PROFILE("Synchronize m_smallAabbsGPU (CPU/slow)");
		btAlignedObjectArray<btSapAabb> allHostAabbs;
		m_allAabbsGPU.copyToHost(allHostAabbs);

		m_smallAabbsGPU.copyToHost(m_smallAabbsCPU);
		{
			int numSmallAabbs = m_smallAabbsCPU.size();
			for (int j=0;j<numSmallAabbs;j++)
			{
				//sync aabb
				int aabbIndex = m_smallAabbsCPU[j].m_signedMaxIndices[3];
				m_smallAabbsCPU[j] = allHostAabbs[aabbIndex];
				m_smallAabbsCPU[j].m_signedMaxIndices[3] = aabbIndex;
			}
		}
		m_smallAabbsGPU.copyFromHost(m_smallAabbsCPU);
	
	} else
	{
		{
			int numSmallAabbs = m_smallAabbsGPU.size();
			BT_PROFILE("copyAabbsKernelSmall");
			btBufferInfoCL bInfo[] = { 
				btBufferInfoCL( m_allAabbsGPU.getBufferCL(), true ), 
				btBufferInfoCL( m_smallAabbsGPU.getBufferCL()),
			};

			btLauncherCL launcher(m_queue, m_copyAabbsKernel );
			launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
			launcher.setConst( numSmallAabbs  );
			int num = numSmallAabbs;
			launcher.launch1D( num);
			clFinish(m_queue);
		}
	}

	if (syncOnHost)
	{
		BT_PROFILE("Synchronize m_largeAabbsGPU (CPU/slow)");
		btAlignedObjectArray<btSapAabb> allHostAabbs;
		m_allAabbsGPU.copyToHost(allHostAabbs);

		m_largeAabbsGPU.copyToHost(m_largeAabbsCPU);
		{
			int numLargeAabbs = m_largeAabbsCPU.size();
			for (int j=0;j<numLargeAabbs;j++)
			{
				//sync aabb
				int aabbIndex = m_largeAabbsCPU[j].m_signedMaxIndices[3];
				m_largeAabbsCPU[j] = allHostAabbs[aabbIndex];
				m_largeAabbsCPU[j].m_signedMaxIndices[3] = aabbIndex;
			}
		}
		m_largeAabbsGPU.copyFromHost(m_largeAabbsCPU);
	
	} else
	{
		int numLargeAabbs = m_largeAabbsGPU.size();
		
		if (numLargeAabbs)
		{
			BT_PROFILE("copyAabbsKernelLarge");
			btBufferInfoCL bInfo[] = { 
				btBufferInfoCL( m_allAabbsGPU.getBufferCL(), true ), 
				btBufferInfoCL( m_largeAabbsGPU.getBufferCL()),
			};

			btLauncherCL launcher(m_queue, m_copyAabbsKernel );
			launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
			launcher.setConst( numLargeAabbs  );
			int num = numLargeAabbs;
			launcher.launch1D( num);
			clFinish(m_queue);
		}
	}




		BT_PROFILE("GPU SAP");
		
		int numSmallAabbs = m_smallAabbsGPU.size();
		m_gpuSmallSortData.resize(numSmallAabbs);
		int numLargeAabbs = m_smallAabbsGPU.size();

#if 1
		if (m_smallAabbsGPU.size())
		{
			BT_PROFILE("flipFloatKernel");
			btBufferInfoCL bInfo[] = { btBufferInfoCL( m_smallAabbsGPU.getBufferCL(), true ), btBufferInfoCL( m_gpuSmallSortData.getBufferCL())};
			btLauncherCL launcher(m_queue, m_flipFloatKernel );
			launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
			launcher.setConst( numSmallAabbs  );
			launcher.setConst( axis  );
			
			int num = numSmallAabbs;
			launcher.launch1D( num);
			clFinish(m_queue);
		}

		{
			BT_PROFILE("gpu radix sort\n");
			m_sorter->execute(m_gpuSmallSortData);
			clFinish(m_queue);
		}

		m_gpuSmallSortedAabbs.resize(numSmallAabbs);
		if (numSmallAabbs)
		{
			BT_PROFILE("scatterKernel");
			btBufferInfoCL bInfo[] = { btBufferInfoCL( m_smallAabbsGPU.getBufferCL(), true ), btBufferInfoCL( m_gpuSmallSortData.getBufferCL(),true),btBufferInfoCL(m_gpuSmallSortedAabbs.getBufferCL())};
			btLauncherCL launcher(m_queue, m_scatterKernel );
			launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
			launcher.setConst( numSmallAabbs);
			int num = numSmallAabbs;
			launcher.launch1D( num);
			clFinish(m_queue);
			
		}
        

			int maxPairsPerBody = 64;
			int maxPairs = maxPairsPerBody * numSmallAabbs;//todo
			m_overlappingPairs.resize(maxPairs);

			btOpenCLArray<int> pairCount(m_context, m_queue);
			pairCount.push_back(0);
            int numPairs=0;

			{
				int numLargeAabbs = m_largeAabbsGPU.size();
				if (numLargeAabbs && numSmallAabbs)
				{
					BT_PROFILE("sap2Kernel");
					btBufferInfoCL bInfo[] = { btBufferInfoCL( m_largeAabbsGPU.getBufferCL() ),btBufferInfoCL( m_gpuSmallSortedAabbs.getBufferCL() ), btBufferInfoCL( m_overlappingPairs.getBufferCL() ), btBufferInfoCL(pairCount.getBufferCL())};
					btLauncherCL launcher(m_queue, m_sap2Kernel);
					launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
					launcher.setConst(   numLargeAabbs  );
					launcher.setConst( numSmallAabbs);
					launcher.setConst( axis  );
					launcher.setConst( maxPairs  );
//@todo: use actual maximum work item sizes of the device instead of hardcoded values
					launcher.launch2D( numLargeAabbs, numSmallAabbs,4,64);
                
					numPairs = pairCount.at(0);
					if (numPairs >maxPairs)
						numPairs =maxPairs;
					
				}
			}
			if (m_gpuSmallSortedAabbs.size())
			{
				BT_PROFILE("sapKernel");
				btBufferInfoCL bInfo[] = { btBufferInfoCL( m_gpuSmallSortedAabbs.getBufferCL() ), btBufferInfoCL( m_overlappingPairs.getBufferCL() ), btBufferInfoCL(pairCount.getBufferCL())};
				btLauncherCL launcher(m_queue, m_sapKernel);
				launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) );
				launcher.setConst( numSmallAabbs  );
				launcher.setConst( axis  );
				launcher.setConst( maxPairs  );

			
				int num = numSmallAabbs;
#if 0                
                int buffSize = launcher.getSerializationBufferSize();
                unsigned char* buf = new unsigned char[buffSize+sizeof(int)];
                for (int i=0;i<buffSize+1;i++)
                {
                    unsigned char* ptr = (unsigned char*)&buf[i];
                    *ptr = 0xff;
                }
                int actualWrite = launcher.serializeArguments(buf,buffSize);
                
                unsigned char* cptr = (unsigned char*)&buf[buffSize];
    //            printf("buf[buffSize] = %d\n",*cptr);
                
                assert(buf[buffSize]==0xff);//check for buffer overrun
                int* ptr = (int*)&buf[buffSize];
                
                *ptr = num;
                
                FILE* f = fopen("m_sapKernelArgs.bin","wb");
                fwrite(buf,buffSize+sizeof(int),1,f);
                fclose(f);
#endif//

                launcher.launch1D( num);
				clFinish(m_queue);
                
                numPairs = pairCount.at(0);
                if (numPairs>maxPairs)
					numPairs = maxPairs;
			}
			
#else
        int numPairs = 0;
        
        
        btLauncherCL launcher(m_queue, m_sapKernel);

        const char* fileName = "m_sapKernelArgs.bin";
        FILE* f = fopen(fileName,"rb");
        if (f)
        {
            int sizeInBytes=0;
            if (fseek(f, 0, SEEK_END) || (sizeInBytes = ftell(f)) == EOF || fseek(f, 0, SEEK_SET)) 
            {
                printf("error, cannot get file size\n");
                exit(0);
            }
            
            unsigned char* buf = (unsigned char*) malloc(sizeInBytes);
            fread(buf,sizeInBytes,1,f);
            int serializedBytes = launcher.deserializeArgs(buf, sizeInBytes,m_context);
            int num = *(int*)&buf[serializedBytes];
            launcher.launch1D( num);
            
            btOpenCLArray<int> pairCount(m_context, m_queue);
            int numElements = launcher.m_arrays[2]->size()/sizeof(int);
            pairCount.setFromOpenCLBuffer(launcher.m_arrays[2]->getBufferCL(),numElements);
            numPairs = pairCount.at(0);
            //printf("overlapping pairs = %d\n",numPairs);
            btAlignedObjectArray<btInt2>		hostOoverlappingPairs;
            btOpenCLArray<btInt2> tmpGpuPairs(m_context,m_queue);
            tmpGpuPairs.setFromOpenCLBuffer(launcher.m_arrays[1]->getBufferCL(),numPairs );
   
            tmpGpuPairs.copyToHost(hostOoverlappingPairs);
            m_overlappingPairs.copyFromHost(hostOoverlappingPairs);
            //printf("hello %d\n", m_overlappingPairs.size());
            free(buf);
            fclose(f);
            
        } else {
            printf("error: cannot find file %s\n",fileName);
        }
        
        clFinish(m_queue);

        
#endif

			
        m_overlappingPairs.resize(numPairs);
		
	}//BT_PROFILE("GPU_RADIX SORT");

}
Exemple #5
0
int testSapKernel_computePairsKernelOriginal(int kernelIndex)
{
           
    const char* sapSrc = sapCL;
    const char* sapFastSrc = sapFastCL;
    

    
        cl_int errNum=0;
        
        cl_program sapProg = btOpenCLUtils::compileCLProgramFromString(g_cxMainContext,g_device,sapSrc,&errNum,"","../../opencl/broadphase_benchmark/sap.cl");
        btAssert(errNum==CL_SUCCESS);
#ifndef __APPLE__
    cl_program sapFastProg = btOpenCLUtils::compileCLProgramFromString(g_cxMainContext,g_device,sapFastSrc,&errNum,"","../../opencl/broadphase_benchmark/sapFast.cl");
    btAssert(errNum==CL_SUCCESS);
#endif
    
        cl_kernel m_sapKernel = 0;
        
        switch (kernelIndex)
        {
            case 0:
                m_sapKernel = btOpenCLUtils::compileCLKernelFromString(g_cxMainContext, g_device,sapSrc, "computePairsKernelOriginal",&errNum,sapProg );
                break;
            case 1:
                m_sapKernel = btOpenCLUtils::compileCLKernelFromString(g_cxMainContext, g_device,sapSrc, "computePairsKernelBarrier",&errNum,sapProg );
                break;
            case 2:
                m_sapKernel = btOpenCLUtils::compileCLKernelFromString(g_cxMainContext, g_device,sapSrc, "computePairsKernelLocalSharedMemory",&errNum,sapProg );
                break;
#ifndef __APPLE__
            case 3:
                m_sapKernel = btOpenCLUtils::compileCLKernelFromString(g_cxMainContext, g_device,sapFastSrc, "computePairsKernel",&errNum,sapFastProg );
                break;
#endif
                
            default:
            {
                assert(0);
            }
        }
        
        btAssert(errNum==CL_SUCCESS);
        
        

        btLauncherCL launcher(g_cqCommandQue, m_sapKernel);
        const char* fileName = "m_sapKernelArgs.bin";
        FILE* f = fopen(fileName,"rb");
        if (f)
        {
            int sizeInBytes=0;
            if (fseek(f, 0, SEEK_END) || (sizeInBytes = ftell(f)) == EOF || fseek(f, 0, SEEK_SET)) 
            {
                printf("error, cannot get file size\n");
                exit(0);
            }
            
            unsigned char* buf = (unsigned char*) malloc(sizeInBytes);
            fread(buf,sizeInBytes,1,f);
            int serializedBytes = launcher.deserializeArgs(buf, sizeInBytes,g_cxMainContext);
            int num = *(int*)&buf[serializedBytes];

            launcher.launch1D( num);
            btOpenCLArray<int> pairCount(g_cxMainContext, g_cqCommandQue);
            int numElements = launcher.m_arrays[2]->size()/sizeof(int);
            pairCount.setFromOpenCLBuffer(launcher.m_arrays[2]->getBufferCL(),numElements);
            int count = pairCount.at(0);
            printf("overlapping pairs = %d\n",count);
           
            
        } else {
            printf("error: cannot find file %s\n",fileName);
        }

        
    clFinish(g_cqCommandQue);
        
    clReleaseKernel(m_sapKernel);
    clReleaseProgram(sapProg);
    clFinish(g_cqCommandQue);
       
}
void  b3GpuGridBroadphase::calculateOverlappingPairs(int maxPairs)
{
	B3_PROFILE("b3GpuGridBroadphase::calculateOverlappingPairs");
	

	if (0)
	{
		calculateOverlappingPairsHost(maxPairs);
	/*
		b3AlignedObjectArray<b3Int4> cpuPairs;
		m_gpuPairs.copyToHost(cpuPairs);
		printf("host m_gpuPairs.size()=%d\n",m_gpuPairs.size());
		for (int i=0;i<m_gpuPairs.size();i++)
		{
			printf("host pair %d = %d,%d\n",i,cpuPairs[i].x,cpuPairs[i].y);
		}
		*/
		return;
	}
	
	


	
	int numSmallAabbs = m_smallAabbsMappingGPU.size();

	b3OpenCLArray<int> pairCount(m_context,m_queue);
	pairCount.push_back(0);
	m_gpuPairs.resize(maxPairs);//numSmallAabbs*maxPairsPerBody);

	{
		int numLargeAabbs = m_largeAabbsMappingGPU.size();
		if (numLargeAabbs && numSmallAabbs)
		{
			B3_PROFILE("sap2Kernel");
			b3BufferInfoCL bInfo[] = { 
				b3BufferInfoCL( m_allAabbsGPU1.getBufferCL() ),
				b3BufferInfoCL( m_largeAabbsMappingGPU.getBufferCL() ),
				b3BufferInfoCL( m_smallAabbsMappingGPU.getBufferCL() ), 
				b3BufferInfoCL( m_gpuPairs.getBufferCL() ), 
				b3BufferInfoCL(pairCount.getBufferCL())};
			b3LauncherCL launcher(m_queue, m_sap2Kernel,"m_sap2Kernel");
			launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
			launcher.setConst(   numLargeAabbs  );
			launcher.setConst( numSmallAabbs);
			launcher.setConst( 0  );//axis is not used
			launcher.setConst( maxPairs  );
	//@todo: use actual maximum work item sizes of the device instead of hardcoded values
			launcher.launch2D( numLargeAabbs, numSmallAabbs,4,64);
                
			int numPairs = pairCount.at(0);
			
			if (numPairs >maxPairs)
			{
				b3Error("Error running out of pairs: numPairs = %d, maxPairs = %d.\n", numPairs, maxPairs);
				numPairs =maxPairs;
			}
		}
	}




	if (numSmallAabbs)
	{
		B3_PROFILE("gridKernel");
		m_hashGpu.resize(numSmallAabbs);
		{
			B3_PROFILE("kCalcHashAABB");
			b3LauncherCL launch(m_queue,kCalcHashAABB,"kCalcHashAABB");
			launch.setConst(numSmallAabbs);
			launch.setBuffer(m_allAabbsGPU1.getBufferCL());
			launch.setBuffer(m_smallAabbsMappingGPU.getBufferCL());
			launch.setBuffer(m_hashGpu.getBufferCL());
			launch.setBuffer(this->m_paramsGPU.getBufferCL());
			launch.launch1D(numSmallAabbs);
		}

		m_sorter->execute(m_hashGpu);
		
		int numCells = this->m_paramsCPU.m_gridSize[0]*this->m_paramsCPU.m_gridSize[1]*this->m_paramsCPU.m_gridSize[2];
		m_cellStartGpu.resize(numCells);
		//b3AlignedObjectArray<int >			cellStartCpu;
				
		
		{
			B3_PROFILE("kClearCellStart");
			b3LauncherCL launch(m_queue,kClearCellStart,"kClearCellStart");
			launch.setConst(numCells);
			launch.setBuffer(m_cellStartGpu.getBufferCL());
			launch.launch1D(numCells);
			//m_cellStartGpu.copyToHost(cellStartCpu);
			//printf("??\n");

		}


		{
			B3_PROFILE("kFindCellStart");
			b3LauncherCL launch(m_queue,kFindCellStart,"kFindCellStart");
			launch.setConst(numSmallAabbs);
			launch.setBuffer(m_hashGpu.getBufferCL());
			launch.setBuffer(m_cellStartGpu.getBufferCL());
			launch.launch1D(numSmallAabbs);
			//m_cellStartGpu.copyToHost(cellStartCpu);
			//printf("??\n");

		}
		
		{
			B3_PROFILE("kFindOverlappingPairs");
			
			
			b3LauncherCL launch(m_queue,kFindOverlappingPairs,"kFindOverlappingPairs");
			launch.setConst(numSmallAabbs);
			launch.setBuffer(m_allAabbsGPU1.getBufferCL());
			launch.setBuffer(m_smallAabbsMappingGPU.getBufferCL());
			launch.setBuffer(m_hashGpu.getBufferCL());
			launch.setBuffer(m_cellStartGpu.getBufferCL());
			
			launch.setBuffer(m_paramsGPU.getBufferCL());
			//launch.setBuffer(0);
			launch.setBuffer(pairCount.getBufferCL());
			launch.setBuffer(m_gpuPairs.getBufferCL());
			
			launch.setConst(maxPairs);
			launch.launch1D(numSmallAabbs);
			

			int numPairs = pairCount.at(0);
			if (numPairs >maxPairs)
			{
				b3Error("Error running out of pairs: numPairs = %d, maxPairs = %d.\n", numPairs, maxPairs);
				numPairs =maxPairs;
			}
			
			m_gpuPairs.resize(numPairs);
	
			if (0)
			{
				b3AlignedObjectArray<b3Int4> pairsCpu;
				m_gpuPairs.copyToHost(pairsCpu);
			
				printf("m_gpuPairs.size()=%d\n",m_gpuPairs.size());
				for (int i=0;i<m_gpuPairs.size();i++)
				{
					printf("pair %d = %d,%d\n",i,pairsCpu[i].x,pairsCpu[i].y);
				}

				printf("?!?\n");
			}
			
		}
	

	}

	



	//calculateOverlappingPairsHost(maxPairs);
}