void LeafNode::print(std::ostream &os) const { os << "LEAF(" << pairCount() << ")" << std::endl; BOOST_FOREACH(const libbruce::kv_pair &p, pairs) os << " " << p.first << " -> " << p.second << std::endl; if (!overflow.empty()) os << " Overflow " << overflow.count << " @ " << overflow.nodeID << std::endl; }
bool PokerHandEvaluator::isStraight() { if (pairCount() > 0) return false; Rank lowest = getLowest(); for (std::list<Card>::iterator it = cards.begin(); it != cards.end(); ++it) { Rank cardRank = it->getRank(); for (std::list<Card>::iterator it2 = cards.begin(); it2 != cards.end(); ++it2) { } } }
HandType PokerHandEvaluator::evaluate(Hand hand) { cards = hand.getCards(); if (isFlush()) { return FLUSH; } if (hasFourOfKind() != NORANK) { return FOUROFAKIND; } if (hasTriple() != NORANK && pairCount() >= 2) { return HOUSE; } if (hasTriple() != NORANK) { return THREEOFAKIND; } if (pairCount() == 2) { return TWOPAIR; } if (pairCount() == 1) { return ONEPAIR; } return HIGHCARD; }
void btGpuSapBroadphase::calculateOverlappingPairs(bool forceHost) { int axis = 0;//todo on GPU for now hardcode btAssert(m_allAabbsCPU.size() == m_allAabbsGPU.size()); if (forceHost) { btAlignedObjectArray<btSapAabb> allHostAabbs; m_allAabbsGPU.copyToHost(allHostAabbs); { int numSmallAabbs = m_smallAabbsCPU.size(); for (int j=0;j<numSmallAabbs;j++) { //sync aabb int aabbIndex = m_smallAabbsCPU[j].m_signedMaxIndices[3]; m_smallAabbsCPU[j] = allHostAabbs[aabbIndex]; m_smallAabbsCPU[j].m_signedMaxIndices[3] = aabbIndex; } } { int numLargeAabbs = m_largeAabbsCPU.size(); for (int j=0;j<numLargeAabbs;j++) { //sync aabb int aabbIndex = m_largeAabbsCPU[j].m_signedMaxIndices[3]; m_largeAabbsCPU[j] = allHostAabbs[aabbIndex]; m_largeAabbsCPU[j].m_signedMaxIndices[3] = aabbIndex; } } btAlignedObjectArray<btInt2> hostPairs; { int numSmallAabbs = m_smallAabbsCPU.size(); for (int i=0;i<numSmallAabbs;i++) { float reference = m_smallAabbsCPU[i].m_max[axis]; for (int j=i+1;j<numSmallAabbs;j++) { if (TestAabbAgainstAabb2((btVector3&)m_smallAabbsCPU[i].m_min, (btVector3&)m_smallAabbsCPU[i].m_max, (btVector3&)m_smallAabbsCPU[j].m_min,(btVector3&)m_smallAabbsCPU[j].m_max)) { btInt2 pair; pair.x = m_smallAabbsCPU[i].m_minIndices[3];//store the original index in the unsorted aabb array pair.y = m_smallAabbsCPU[j].m_minIndices[3]; hostPairs.push_back(pair); } } } } { int numSmallAabbs = m_smallAabbsCPU.size(); for (int i=0;i<numSmallAabbs;i++) { float reference = m_smallAabbsCPU[i].m_max[axis]; int numLargeAabbs = m_largeAabbsCPU.size(); for (int j=0;j<numLargeAabbs;j++) { if (TestAabbAgainstAabb2((btVector3&)m_smallAabbsCPU[i].m_min, (btVector3&)m_smallAabbsCPU[i].m_max, (btVector3&)m_largeAabbsCPU[j].m_min,(btVector3&)m_largeAabbsCPU[j].m_max)) { btInt2 pair; pair.x = m_largeAabbsCPU[j].m_minIndices[3]; pair.y = m_smallAabbsCPU[i].m_minIndices[3];//store the original index in the unsorted aabb array hostPairs.push_back(pair); } } } } if (hostPairs.size()) { m_overlappingPairs.copyFromHost(hostPairs); } else { m_overlappingPairs.resize(0); } return; } { bool syncOnHost = false; if (syncOnHost) { BT_PROFILE("Synchronize m_smallAabbsGPU (CPU/slow)"); btAlignedObjectArray<btSapAabb> allHostAabbs; m_allAabbsGPU.copyToHost(allHostAabbs); m_smallAabbsGPU.copyToHost(m_smallAabbsCPU); { int numSmallAabbs = m_smallAabbsCPU.size(); for (int j=0;j<numSmallAabbs;j++) { //sync aabb int aabbIndex = m_smallAabbsCPU[j].m_signedMaxIndices[3]; m_smallAabbsCPU[j] = allHostAabbs[aabbIndex]; m_smallAabbsCPU[j].m_signedMaxIndices[3] = aabbIndex; } } m_smallAabbsGPU.copyFromHost(m_smallAabbsCPU); } else { { int numSmallAabbs = m_smallAabbsGPU.size(); BT_PROFILE("copyAabbsKernelSmall"); btBufferInfoCL bInfo[] = { btBufferInfoCL( m_allAabbsGPU.getBufferCL(), true ), btBufferInfoCL( m_smallAabbsGPU.getBufferCL()), }; btLauncherCL launcher(m_queue, m_copyAabbsKernel ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) ); launcher.setConst( numSmallAabbs ); int num = numSmallAabbs; launcher.launch1D( num); clFinish(m_queue); } } if (syncOnHost) { BT_PROFILE("Synchronize m_largeAabbsGPU (CPU/slow)"); btAlignedObjectArray<btSapAabb> allHostAabbs; m_allAabbsGPU.copyToHost(allHostAabbs); m_largeAabbsGPU.copyToHost(m_largeAabbsCPU); { int numLargeAabbs = m_largeAabbsCPU.size(); for (int j=0;j<numLargeAabbs;j++) { //sync aabb int aabbIndex = m_largeAabbsCPU[j].m_signedMaxIndices[3]; m_largeAabbsCPU[j] = allHostAabbs[aabbIndex]; m_largeAabbsCPU[j].m_signedMaxIndices[3] = aabbIndex; } } m_largeAabbsGPU.copyFromHost(m_largeAabbsCPU); } else { int numLargeAabbs = m_largeAabbsGPU.size(); if (numLargeAabbs) { BT_PROFILE("copyAabbsKernelLarge"); btBufferInfoCL bInfo[] = { btBufferInfoCL( m_allAabbsGPU.getBufferCL(), true ), btBufferInfoCL( m_largeAabbsGPU.getBufferCL()), }; btLauncherCL launcher(m_queue, m_copyAabbsKernel ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) ); launcher.setConst( numLargeAabbs ); int num = numLargeAabbs; launcher.launch1D( num); clFinish(m_queue); } } BT_PROFILE("GPU SAP"); int numSmallAabbs = m_smallAabbsGPU.size(); m_gpuSmallSortData.resize(numSmallAabbs); int numLargeAabbs = m_smallAabbsGPU.size(); #if 1 if (m_smallAabbsGPU.size()) { BT_PROFILE("flipFloatKernel"); btBufferInfoCL bInfo[] = { btBufferInfoCL( m_smallAabbsGPU.getBufferCL(), true ), btBufferInfoCL( m_gpuSmallSortData.getBufferCL())}; btLauncherCL launcher(m_queue, m_flipFloatKernel ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) ); launcher.setConst( numSmallAabbs ); launcher.setConst( axis ); int num = numSmallAabbs; launcher.launch1D( num); clFinish(m_queue); } { BT_PROFILE("gpu radix sort\n"); m_sorter->execute(m_gpuSmallSortData); clFinish(m_queue); } m_gpuSmallSortedAabbs.resize(numSmallAabbs); if (numSmallAabbs) { BT_PROFILE("scatterKernel"); btBufferInfoCL bInfo[] = { btBufferInfoCL( m_smallAabbsGPU.getBufferCL(), true ), btBufferInfoCL( m_gpuSmallSortData.getBufferCL(),true),btBufferInfoCL(m_gpuSmallSortedAabbs.getBufferCL())}; btLauncherCL launcher(m_queue, m_scatterKernel ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) ); launcher.setConst( numSmallAabbs); int num = numSmallAabbs; launcher.launch1D( num); clFinish(m_queue); } int maxPairsPerBody = 64; int maxPairs = maxPairsPerBody * numSmallAabbs;//todo m_overlappingPairs.resize(maxPairs); btOpenCLArray<int> pairCount(m_context, m_queue); pairCount.push_back(0); int numPairs=0; { int numLargeAabbs = m_largeAabbsGPU.size(); if (numLargeAabbs && numSmallAabbs) { BT_PROFILE("sap2Kernel"); btBufferInfoCL bInfo[] = { btBufferInfoCL( m_largeAabbsGPU.getBufferCL() ),btBufferInfoCL( m_gpuSmallSortedAabbs.getBufferCL() ), btBufferInfoCL( m_overlappingPairs.getBufferCL() ), btBufferInfoCL(pairCount.getBufferCL())}; btLauncherCL launcher(m_queue, m_sap2Kernel); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) ); launcher.setConst( numLargeAabbs ); launcher.setConst( numSmallAabbs); launcher.setConst( axis ); launcher.setConst( maxPairs ); //@todo: use actual maximum work item sizes of the device instead of hardcoded values launcher.launch2D( numLargeAabbs, numSmallAabbs,4,64); numPairs = pairCount.at(0); if (numPairs >maxPairs) numPairs =maxPairs; } } if (m_gpuSmallSortedAabbs.size()) { BT_PROFILE("sapKernel"); btBufferInfoCL bInfo[] = { btBufferInfoCL( m_gpuSmallSortedAabbs.getBufferCL() ), btBufferInfoCL( m_overlappingPairs.getBufferCL() ), btBufferInfoCL(pairCount.getBufferCL())}; btLauncherCL launcher(m_queue, m_sapKernel); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) ); launcher.setConst( numSmallAabbs ); launcher.setConst( axis ); launcher.setConst( maxPairs ); int num = numSmallAabbs; #if 0 int buffSize = launcher.getSerializationBufferSize(); unsigned char* buf = new unsigned char[buffSize+sizeof(int)]; for (int i=0;i<buffSize+1;i++) { unsigned char* ptr = (unsigned char*)&buf[i]; *ptr = 0xff; } int actualWrite = launcher.serializeArguments(buf,buffSize); unsigned char* cptr = (unsigned char*)&buf[buffSize]; // printf("buf[buffSize] = %d\n",*cptr); assert(buf[buffSize]==0xff);//check for buffer overrun int* ptr = (int*)&buf[buffSize]; *ptr = num; FILE* f = fopen("m_sapKernelArgs.bin","wb"); fwrite(buf,buffSize+sizeof(int),1,f); fclose(f); #endif// launcher.launch1D( num); clFinish(m_queue); numPairs = pairCount.at(0); if (numPairs>maxPairs) numPairs = maxPairs; } #else int numPairs = 0; btLauncherCL launcher(m_queue, m_sapKernel); const char* fileName = "m_sapKernelArgs.bin"; FILE* f = fopen(fileName,"rb"); if (f) { int sizeInBytes=0; if (fseek(f, 0, SEEK_END) || (sizeInBytes = ftell(f)) == EOF || fseek(f, 0, SEEK_SET)) { printf("error, cannot get file size\n"); exit(0); } unsigned char* buf = (unsigned char*) malloc(sizeInBytes); fread(buf,sizeInBytes,1,f); int serializedBytes = launcher.deserializeArgs(buf, sizeInBytes,m_context); int num = *(int*)&buf[serializedBytes]; launcher.launch1D( num); btOpenCLArray<int> pairCount(m_context, m_queue); int numElements = launcher.m_arrays[2]->size()/sizeof(int); pairCount.setFromOpenCLBuffer(launcher.m_arrays[2]->getBufferCL(),numElements); numPairs = pairCount.at(0); //printf("overlapping pairs = %d\n",numPairs); btAlignedObjectArray<btInt2> hostOoverlappingPairs; btOpenCLArray<btInt2> tmpGpuPairs(m_context,m_queue); tmpGpuPairs.setFromOpenCLBuffer(launcher.m_arrays[1]->getBufferCL(),numPairs ); tmpGpuPairs.copyToHost(hostOoverlappingPairs); m_overlappingPairs.copyFromHost(hostOoverlappingPairs); //printf("hello %d\n", m_overlappingPairs.size()); free(buf); fclose(f); } else { printf("error: cannot find file %s\n",fileName); } clFinish(m_queue); #endif m_overlappingPairs.resize(numPairs); }//BT_PROFILE("GPU_RADIX SORT"); }
int testSapKernel_computePairsKernelOriginal(int kernelIndex) { const char* sapSrc = sapCL; const char* sapFastSrc = sapFastCL; cl_int errNum=0; cl_program sapProg = btOpenCLUtils::compileCLProgramFromString(g_cxMainContext,g_device,sapSrc,&errNum,"","../../opencl/broadphase_benchmark/sap.cl"); btAssert(errNum==CL_SUCCESS); #ifndef __APPLE__ cl_program sapFastProg = btOpenCLUtils::compileCLProgramFromString(g_cxMainContext,g_device,sapFastSrc,&errNum,"","../../opencl/broadphase_benchmark/sapFast.cl"); btAssert(errNum==CL_SUCCESS); #endif cl_kernel m_sapKernel = 0; switch (kernelIndex) { case 0: m_sapKernel = btOpenCLUtils::compileCLKernelFromString(g_cxMainContext, g_device,sapSrc, "computePairsKernelOriginal",&errNum,sapProg ); break; case 1: m_sapKernel = btOpenCLUtils::compileCLKernelFromString(g_cxMainContext, g_device,sapSrc, "computePairsKernelBarrier",&errNum,sapProg ); break; case 2: m_sapKernel = btOpenCLUtils::compileCLKernelFromString(g_cxMainContext, g_device,sapSrc, "computePairsKernelLocalSharedMemory",&errNum,sapProg ); break; #ifndef __APPLE__ case 3: m_sapKernel = btOpenCLUtils::compileCLKernelFromString(g_cxMainContext, g_device,sapFastSrc, "computePairsKernel",&errNum,sapFastProg ); break; #endif default: { assert(0); } } btAssert(errNum==CL_SUCCESS); btLauncherCL launcher(g_cqCommandQue, m_sapKernel); const char* fileName = "m_sapKernelArgs.bin"; FILE* f = fopen(fileName,"rb"); if (f) { int sizeInBytes=0; if (fseek(f, 0, SEEK_END) || (sizeInBytes = ftell(f)) == EOF || fseek(f, 0, SEEK_SET)) { printf("error, cannot get file size\n"); exit(0); } unsigned char* buf = (unsigned char*) malloc(sizeInBytes); fread(buf,sizeInBytes,1,f); int serializedBytes = launcher.deserializeArgs(buf, sizeInBytes,g_cxMainContext); int num = *(int*)&buf[serializedBytes]; launcher.launch1D( num); btOpenCLArray<int> pairCount(g_cxMainContext, g_cqCommandQue); int numElements = launcher.m_arrays[2]->size()/sizeof(int); pairCount.setFromOpenCLBuffer(launcher.m_arrays[2]->getBufferCL(),numElements); int count = pairCount.at(0); printf("overlapping pairs = %d\n",count); } else { printf("error: cannot find file %s\n",fileName); } clFinish(g_cqCommandQue); clReleaseKernel(m_sapKernel); clReleaseProgram(sapProg); clFinish(g_cqCommandQue); }
void b3GpuGridBroadphase::calculateOverlappingPairs(int maxPairs) { B3_PROFILE("b3GpuGridBroadphase::calculateOverlappingPairs"); if (0) { calculateOverlappingPairsHost(maxPairs); /* b3AlignedObjectArray<b3Int4> cpuPairs; m_gpuPairs.copyToHost(cpuPairs); printf("host m_gpuPairs.size()=%d\n",m_gpuPairs.size()); for (int i=0;i<m_gpuPairs.size();i++) { printf("host pair %d = %d,%d\n",i,cpuPairs[i].x,cpuPairs[i].y); } */ return; } int numSmallAabbs = m_smallAabbsMappingGPU.size(); b3OpenCLArray<int> pairCount(m_context,m_queue); pairCount.push_back(0); m_gpuPairs.resize(maxPairs);//numSmallAabbs*maxPairsPerBody); { int numLargeAabbs = m_largeAabbsMappingGPU.size(); if (numLargeAabbs && numSmallAabbs) { B3_PROFILE("sap2Kernel"); b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_allAabbsGPU1.getBufferCL() ), b3BufferInfoCL( m_largeAabbsMappingGPU.getBufferCL() ), b3BufferInfoCL( m_smallAabbsMappingGPU.getBufferCL() ), b3BufferInfoCL( m_gpuPairs.getBufferCL() ), b3BufferInfoCL(pairCount.getBufferCL())}; b3LauncherCL launcher(m_queue, m_sap2Kernel,"m_sap2Kernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( numLargeAabbs ); launcher.setConst( numSmallAabbs); launcher.setConst( 0 );//axis is not used launcher.setConst( maxPairs ); //@todo: use actual maximum work item sizes of the device instead of hardcoded values launcher.launch2D( numLargeAabbs, numSmallAabbs,4,64); int numPairs = pairCount.at(0); if (numPairs >maxPairs) { b3Error("Error running out of pairs: numPairs = %d, maxPairs = %d.\n", numPairs, maxPairs); numPairs =maxPairs; } } } if (numSmallAabbs) { B3_PROFILE("gridKernel"); m_hashGpu.resize(numSmallAabbs); { B3_PROFILE("kCalcHashAABB"); b3LauncherCL launch(m_queue,kCalcHashAABB,"kCalcHashAABB"); launch.setConst(numSmallAabbs); launch.setBuffer(m_allAabbsGPU1.getBufferCL()); launch.setBuffer(m_smallAabbsMappingGPU.getBufferCL()); launch.setBuffer(m_hashGpu.getBufferCL()); launch.setBuffer(this->m_paramsGPU.getBufferCL()); launch.launch1D(numSmallAabbs); } m_sorter->execute(m_hashGpu); int numCells = this->m_paramsCPU.m_gridSize[0]*this->m_paramsCPU.m_gridSize[1]*this->m_paramsCPU.m_gridSize[2]; m_cellStartGpu.resize(numCells); //b3AlignedObjectArray<int > cellStartCpu; { B3_PROFILE("kClearCellStart"); b3LauncherCL launch(m_queue,kClearCellStart,"kClearCellStart"); launch.setConst(numCells); launch.setBuffer(m_cellStartGpu.getBufferCL()); launch.launch1D(numCells); //m_cellStartGpu.copyToHost(cellStartCpu); //printf("??\n"); } { B3_PROFILE("kFindCellStart"); b3LauncherCL launch(m_queue,kFindCellStart,"kFindCellStart"); launch.setConst(numSmallAabbs); launch.setBuffer(m_hashGpu.getBufferCL()); launch.setBuffer(m_cellStartGpu.getBufferCL()); launch.launch1D(numSmallAabbs); //m_cellStartGpu.copyToHost(cellStartCpu); //printf("??\n"); } { B3_PROFILE("kFindOverlappingPairs"); b3LauncherCL launch(m_queue,kFindOverlappingPairs,"kFindOverlappingPairs"); launch.setConst(numSmallAabbs); launch.setBuffer(m_allAabbsGPU1.getBufferCL()); launch.setBuffer(m_smallAabbsMappingGPU.getBufferCL()); launch.setBuffer(m_hashGpu.getBufferCL()); launch.setBuffer(m_cellStartGpu.getBufferCL()); launch.setBuffer(m_paramsGPU.getBufferCL()); //launch.setBuffer(0); launch.setBuffer(pairCount.getBufferCL()); launch.setBuffer(m_gpuPairs.getBufferCL()); launch.setConst(maxPairs); launch.launch1D(numSmallAabbs); int numPairs = pairCount.at(0); if (numPairs >maxPairs) { b3Error("Error running out of pairs: numPairs = %d, maxPairs = %d.\n", numPairs, maxPairs); numPairs =maxPairs; } m_gpuPairs.resize(numPairs); if (0) { b3AlignedObjectArray<b3Int4> pairsCpu; m_gpuPairs.copyToHost(pairsCpu); printf("m_gpuPairs.size()=%d\n",m_gpuPairs.size()); for (int i=0;i<m_gpuPairs.size();i++) { printf("pair %d = %d,%d\n",i,pairsCpu[i].x,pairsCpu[i].y); } printf("?!?\n"); } } } //calculateOverlappingPairsHost(maxPairs); }