void testBvhTrimesh(oclContext& iContext) { oclDevice& lDevice = iContext.getDevice(0); oclBuffer bfVertex(iContext, "bfVertex"); bfVertex.create<cl_float4> (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, 1000); if (bfVertex.map(CL_MAP_WRITE)) { // initiaize vertices cl_float4* lPtr = bfVertex.ptr<cl_float4>(); for (unsigned int i=0; i<1000; i++) { lPtr[i].s[0] = (float)rand()/RAND_MAX-0.5; lPtr[i].s[1] = (float)rand()/RAND_MAX-0.5; lPtr[i].s[2] = (float)rand()/RAND_MAX-0.5; lPtr[i].s[3] = 1; } bfVertex.unmap(); } else return; oclBuffer bfIndex(iContext, "bfIndex"); bfIndex.create<cl_uint> (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, 1000); if (bfIndex.map(CL_MAP_WRITE)) { // initiaize indecies cl_uint* lPtr = bfIndex.ptr<cl_uint>(); for (unsigned int i=0; i<1000; i++) { lPtr[i] = i; } bfIndex.unmap(); } else return; oclBvhTrimesh clProgram(iContext); if (clProgram.compile()) { if (clProgram.compute(lDevice, bfVertex, bfIndex)) { cl_uint lRootNode = clProgram.getRootNode(); oclBuffer& lNodes = clProgram.getNodeBuffer(); if (lNodes.map(CL_MAP_READ)) { oclBvhTrimesh::BVHNode* lPtr = lNodes.ptr<oclBvhTrimesh::BVHNode>(); Log(INFO) << "BVH Root (min):" << lPtr[lRootNode].bbMin; Log(INFO) << "BVH Root (max):" << lPtr[lRootNode].bbMax; Log(INFO) << "BVH Root (left):" << lPtr[lRootNode].left; Log(INFO) << "BVH Root (right):" << lPtr[lRootNode].right; lNodes.unmap(); } } } };
void testFluid3D0(oclContext& iContext) { oclDevice& lDevice = iContext.getDevice(0); oclFluid3D clProgram(iContext); if (clProgram.compile()) { oclBuffer* lBuffer = clProgram.getPositionBuffer(); unsigned int lCount = clProgram.getParticleCount(); if (lBuffer->map(CL_MAP_WRITE)) { // initialize particle positions cl_float4* lPtr = lBuffer->ptr<cl_float4>(); for (unsigned int i=0; i<lCount; i++) { lPtr[i].s[0] = (float)rand()/RAND_MAX-0.5; lPtr[i].s[1] = (float)rand()/RAND_MAX-0.5; lPtr[i].s[2] = (float)rand()/RAND_MAX-0.5; lPtr[i].s[3] = 0; } lBuffer->unmap(); } Log(INFO) << "Computing particles for 7 seconds "; DWORD lEnd = GetTickCount() + 7000; while (lEnd > GetTickCount()) { clProgram.compute(lDevice); clFinish(lDevice); cout << "."; } cout << "\n"; if (lBuffer->map(CL_MAP_READ)) { // compute average paricle position cl_float4 lAvg = { 0,0,0,0}; cl_float4* lPtr = lBuffer->ptr<cl_float4>(); for (unsigned int i=0; i<lCount; i++) { lAvg.s[0] += lPtr[i].s[0]; lAvg.s[1] += lPtr[i].s[1]; lAvg.s[2] += lPtr[i].s[2]; } lAvg.s[0] /= lCount; lAvg.s[1] /= lCount; lAvg.s[2] /= lCount; Log(INFO) << "Average particle position = " << *lPtr; lBuffer->unmap(); } } }
void testRadixSort(oclContext& iContext) { oclDevice& lDevice = iContext.getDevice(0); oclBuffer bfKey(iContext, "bfKey"); oclBuffer bfVal(iContext, "bfVal"); bfKey.create<cl_uint> (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, 1024); bfVal.create<cl_uint> (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, 1024); if (!bfKey.map(CL_MAP_READ | CL_MAP_WRITE)) { return; } if (!bfVal.map(CL_MAP_READ | CL_MAP_WRITE)) { return; } cl_uint* ptrKey = bfKey.ptr<cl_uint>(); cl_uint* ptrVal = bfVal.ptr<cl_uint>(); for (int i=0; i<1024; i++) { ptrKey[i] = rand(); ptrVal[i] = i; } bfVal.write(); bfKey.write(); oclRadixSort clProgram(iContext); if (clProgram.compile()) { clProgram.compute(lDevice, bfKey, bfVal, 0, 32); bfVal.read(); bfKey.read(); for (int i=1; i<1024; i++) { if (ptrKey[i] < ptrKey[i-1]) { Log(WARN) << "array not sorted"; break; } } } bfVal.unmap(); bfKey.unmap(); Log(INFO) << "testRadixSort completed"; }
oclError & oclConnection :: init_program_kernels (oclConnection * const con) { print_optional (" ** Initializing oclConnection for types ", ocl_precision_trait <T, S> :: getTypeString (), con -> m_verbose); // get sources int size; std::vector <std::pair <const char *, ::size_t> > sources; const std::vector <std::string> filenames = ocl_precision_trait <T, S> :: getFilenames (); for (std::vector <std::string> ::const_iterator it = filenames.begin (); it != filenames.end (); ++it) { const char * tmp_src = ReadSource <T, S> (*it, &size); sources.push_back (std::pair <const char*, ::size_t> (tmp_src, size)); } clProgram prog; clKernels kernels; try { // create program prog = clProgram (con -> m_cont, sources, & con -> m_error); *(ocl_precision_trait <T, S> :: getProgram (con)) = prog; // build program con -> m_error = prog.build (con -> m_devs); // create kernels con -> m_error = prog.createKernels (&kernels); *(ocl_precision_trait <T, S> :: getKernels (con)) = kernels; } catch (cl::Error cle) { cout << " Type: " << ocl_precision_trait <T, S> :: getTypeString () << std::endl; cout << "Error while building program: " << cle.what () << endl; cout << "Build Status: " << prog.getBuildInfo<CL_PROGRAM_BUILD_STATUS> (con -> m_devs [0]) << endl; cout << "Build Options:\t" << prog.getBuildInfo<CL_PROGRAM_BUILD_OPTIONS> (con -> m_devs [0]) << endl; cout << "Build Log:\t " << prog.getBuildInfo<CL_PROGRAM_BUILD_LOG> (con -> m_devs [0]) << endl; std::cout << " Error flag: " << cle.err () << " (" << con -> errorString (cle.err ()) << ")" << std::endl; throw -1; } }
LSHReservoirSampler::LSHReservoirSampler(LSH *hashFamIn, unsigned int numHashPerFamily, unsigned int numHashFamilies, unsigned int reservoirSize, unsigned int dimension, unsigned int numSecHash, unsigned int maxSamples, unsigned int queryProbes, unsigned int hashingProbes, float tableAllocFraction) { #if !defined SECONDARY_HASHING if (numHashPerFamily != numSecHash) { std::cout << "[LSHReservoirSampler::LSHReservoirSampler] Fatal, secondary hashing disabled. " << std::endl; } #endif initVariables(numHashPerFamily, numHashFamilies, reservoirSize, dimension, numSecHash, maxSamples, queryProbes, hashingProbes, tableAllocFraction); #if defined USE_OPENCL clPlatformDevices(); clContext(); clProgram(); clKernels(); clCommandQueue(); #endif _hashFamily = hashFamIn; #if defined CL_TEST_CPU float cpu_test_size = (float)CL_TEST_CPU*(float)sizeof(int) / (float)1000000000; printf("Testing CPU Device %d Allocation (%3.1f GiB) Bandwidth.\n", CL_CPU_DEVICE, cpu_test_size); clTestAlloc(CL_TEST_CPU, &context_cpu, &command_queue_cpu); #endif #if defined CL_TEST_GPU float gpu_test_size = (float)CL_TEST_GPU*(float)sizeof(int) / (float)1000000000; printf("Testing GPU Device %d Allocation (%3.1f GiB) Bandwidth.\n", CL_DEVICE_ID, gpu_test_size); clTestAlloc(CL_TEST_GPU, &context_gpu, &command_queue_gpu); #endif initHelper(_numTables, _rangePow, _reservoirSize); }
void testFluid3D1(oclContext& iContext) { oclDevice& lDevice = iContext.getDevice(0); oclFluid3D clProgram(iContext); if (clProgram.compile()) { oclBuffer* lBuffer = clProgram.getPositionBuffer(); unsigned int lCount = clProgram.getParticleCount(); if (lBuffer->map(CL_MAP_WRITE)) { // initialize particle positions cl_float4* lPtr = lBuffer->ptr<cl_float4>(); for (unsigned int i=0; i<lCount; i++) { lPtr[i].s[0] = (float)rand()/RAND_MAX-0.5; lPtr[i].s[1] = (float)rand()/RAND_MAX-0.5; lPtr[i].s[2] = (float)rand()/RAND_MAX-0.5; lPtr[i].s[3] = 0; } lBuffer->unmap(); } // implement event handler structure struct srtFluid : public srtEvent { srtFluid(oclFluid3D& iFluid, oclDevice& iDevice) : srtEvent(oclFluid3D::EVT_INTEGRATE) , clGravity(iFluid.getKernel("clGravity")) , clIntegrateVelocity(iFluid.getKernel("clIntegrateVelocity")) , clIntegrateForce(iFluid.getKernel("clIntegrateForce")) , mDevice(iDevice) , mFluid(iFluid) { } bool operator() (oclProgram& iSource) { // call all the fluid kernels except clipBox size_t lPartcleCount = mFluid.getParticleCount(); sStatusCL = clEnqueueNDRangeKernel(mDevice, *clIntegrateForce, 1, NULL, &lPartcleCount, &mFluid.cLocalSize, 0, NULL, clIntegrateForce->getEvent()); if (!oclSuccess("clEnqueueNDRangeKernel", &mFluid)) { return false; } sStatusCL = clEnqueueNDRangeKernel(mDevice, *clGravity, 1, NULL, &lPartcleCount, &mFluid.cLocalSize, 0, NULL, clGravity->getEvent()); if (!oclSuccess("clEnqueueNDRangeKernel", &mFluid)) { return false; } sStatusCL = clEnqueueNDRangeKernel(mDevice, *clIntegrateVelocity, 1, NULL, &lPartcleCount, &mFluid.cLocalSize, 0, NULL, clIntegrateVelocity->getEvent()); if (!oclSuccess("clEnqueueNDRangeKernel", &mFluid)) { return false; } return 1; } oclDevice& mDevice; oclFluid3D& mFluid; oclKernel* clGravity; oclKernel* clIntegrateVelocity; oclKernel* clIntegrateForce; } evtHandler(clProgram, lDevice); clProgram.addEventHandler(evtHandler); Log(INFO) << "Computing particles for 7 seconds "; DWORD lEnd = GetTickCount() + 7000; while (lEnd > GetTickCount()) { clProgram.compute(lDevice); clFinish(lDevice); cout << "."; } cout << "\n"; if (lBuffer->map(CL_MAP_READ)) { // compute average paricle position cl_float4 lAvg = { 0,0,0,0}; cl_float4* lPtr = lBuffer->ptr<cl_float4>(); for (unsigned int i=0; i<lCount; i++) { lAvg.s[0] += lPtr[i].s[0]; lAvg.s[1] += lPtr[i].s[1]; lAvg.s[2] += lPtr[i].s[2]; } lAvg.s[0] /= lCount; lAvg.s[1] /= lCount; lAvg.s[2] /= lCount; Log(INFO) << "Average particle position = " << *lPtr; lBuffer->unmap(); } } }