LSHReservoirSampler::LSHReservoirSampler(LSH *hashFamIn, unsigned int numHashPerFamily, unsigned int numHashFamilies, unsigned int reservoirSize, unsigned int dimension, unsigned int numSecHash, unsigned int maxSamples, unsigned int queryProbes, unsigned int hashingProbes, float tableAllocFraction) { #if !defined SECONDARY_HASHING if (numHashPerFamily != numSecHash) { std::cout << "[LSHReservoirSampler::LSHReservoirSampler] Fatal, secondary hashing disabled. " << std::endl; } #endif initVariables(numHashPerFamily, numHashFamilies, reservoirSize, dimension, numSecHash, maxSamples, queryProbes, hashingProbes, tableAllocFraction); #if defined USE_OPENCL clPlatformDevices(); clContext(); clProgram(); clKernels(); clCommandQueue(); #endif _hashFamily = hashFamIn; #if defined CL_TEST_CPU float cpu_test_size = (float)CL_TEST_CPU*(float)sizeof(int) / (float)1000000000; printf("Testing CPU Device %d Allocation (%3.1f GiB) Bandwidth.\n", CL_CPU_DEVICE, cpu_test_size); clTestAlloc(CL_TEST_CPU, &context_cpu, &command_queue_cpu); #endif #if defined CL_TEST_GPU float gpu_test_size = (float)CL_TEST_GPU*(float)sizeof(int) / (float)1000000000; printf("Testing GPU Device %d Allocation (%3.1f GiB) Bandwidth.\n", CL_DEVICE_ID, gpu_test_size); clTestAlloc(CL_TEST_GPU, &context_gpu, &command_queue_gpu); #endif initHelper(_numTables, _rangePow, _reservoirSize); }
// Initialize buffers, textures, etc. void initObjects() { // Put a bunch of particles into space particle* particleData = (particle*)malloc(sizeof(particle) * NUM_PARTICLES); GLuint* particleElements = (GLuint*)malloc(sizeof(GLuint) * NUM_PARTICLES); srand(666); for(int i = 0; i < NUM_PARTICLES; i++) { particleData[i].x = centeredUnitRand() * 1.0f * 1.99f; particleData[i].y = centeredUnitRand() * 1.0f * 1.99f; particleData[i].z = centeredUnitRand() * 1.0f * 1.99f; particleData[i].w = 0.0f; particleElements[i] = i; } for(int i = 0; i < 2; i++) { particles.vertexBuffer[i] = makeBO( GL_ARRAY_BUFFER, particleData, sizeof(particle) * NUM_PARTICLES, GL_DYNAMIC_DRAW ); } particles.elementBuffer = makeBO( GL_ELEMENT_ARRAY_BUFFER, particleElements, sizeof(GLuint) * NUM_PARTICLES, GL_STATIC_DRAW ); free(particleData); free(particleElements); // Prepare a screen quad to render postprocessed things. Vector quadData[] = { {-1.0f, -1.0f, 0.0f}, { 1.0f, -1.0f, 0.0f}, { 1.0f, 1.0f, 0.0f}, {-1.0f, 1.0f, 0.0f} }; GLuint quadElements[] = {0, 1, 3, 1, 2, 3}; screenQuad.vertexBuffer = makeBO( GL_ARRAY_BUFFER, quadData, sizeof(Vector) * 4, GL_STATIC_DRAW ); screenQuad.elementBuffer = makeBO( GL_ELEMENT_ARRAY_BUFFER, quadElements, sizeof(GLuint) * 6, GL_STATIC_DRAW ); // Load textures. terrain.envTexture = loadTexture("skymap_b.tga"); terrain.lowTexture = loadTexture("sand.tga"); terrain.highTexture = loadTexture("stone.tga"); // Create a VAO and bind it glGenVertexArrays(1, &vertexArray); glBindVertexArray(vertexArray); // Prepare a lot of zero'd data particle* zeroData = (particle*)malloc(sizeof(particle) * NUM_PARTICLES); cl_int* zeroGrid = (cl_int*)malloc(sizeof(cl_int) * GRID_SIZE * GRID_SIZE * GRID_SIZE); for(int i = 0; i < NUM_PARTICLES; i++) { zeroData[i].x = 0.0f; zeroData[i].y = 0.0f; zeroData[i].z = 0.0f; zeroData[i].w = 0.0f; } for(int i = 0; i < GRID_SIZE * GRID_SIZE * GRID_SIZE; i++) { zeroGrid[i] = 0; } // Share some buffers with OpenCL and create some more for(int i = 0; i < 2; i++) { particles.particleBuffer[i] = sharedBuffer(particles.vertexBuffer[i], CL_MEM_READ_WRITE); particles.velocityBuffer[i] = clCreateBuffer( clContext(), CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, NUM_PARTICLES * sizeof(cl_float) * 4, zeroData, NULL ); particles.gridBuffer[i] = clCreateBuffer( clContext(), CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(cl_int) * GRID_SIZE * GRID_SIZE * GRID_SIZE, zeroGrid, NULL ); } particles.dataBuffer = clCreateBuffer( clContext(), CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, NUM_PARTICLES * sizeof(cl_float) * 4, zeroData, NULL ); particles.offsetBuffer = clCreateBuffer( clContext(), CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, NUM_PARTICLES * sizeof(cl_int), zeroData, NULL ); particles.gridSizeBuffer = clCreateBuffer( clContext(), CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(cl_int) * GRID_SIZE * GRID_SIZE * GRID_SIZE, zeroGrid, NULL ); for(int i = 0; i < 27; i++) { zeroGrid[i] = i; } particles.cellSelectBuffer = clCreateBuffer( clContext(), CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(cl_int) * 27, zeroGrid, NULL ); free(zeroData); free(zeroGrid); // Load terrain terrain.heightData = loadPGM("grand_canyon.pgm", 4096, 4096); particles.terrainBuffer = clCreateBuffer( clContext(), CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(cl_float) * 4096 * 4096, terrain.heightData, NULL ); // Make terrain texture terrain.heightTexture = genFloatTexture(terrain.heightData, 4096, 4096); // Make terrain geometry PaddedVector* terrainVertices = (PaddedVector*)malloc(sizeof(PaddedVector) * 512 * 512); for(int x = 0; x < 4096; x += 8) { for(int y = 0; y < 4096; y += 8) { int xx = x / 8; int yy = y / 8; PaddedVector v = PadVector(MakeVector( xx * (1.0f / 512.0f) * (AABB_XZ * 2.0f) - AABB_XZ, terrain.heightData[(x / 8) + 4096 * (y / 8)] * 128.0f - 3.0f - 3.0f, yy * (1.0f / 512.0f) * (AABB_XZ * 2.0f) - AABB_XZ )); v.pad = 1.0f; terrainVertices[xx + 512 * yy] = v; } } terrain.vertexBuffer = makeBO( GL_ARRAY_BUFFER, terrainVertices, sizeof(PaddedVector) * 512 * 512, GL_STATIC_DRAW ); free(terrainVertices); GLuint* terrainElements = (GLuint*)malloc(sizeof(GLuint) * 512 * 512 * 6); int quadIndex = 0; for(int x = 0; x < 511; x++) { for(int y = 0; y < 511; y++) { terrainElements[quadIndex * 6 + 0] = (x + 0) + (y + 0) * 512; terrainElements[quadIndex * 6 + 1] = (x + 1) + (y + 1) * 512; terrainElements[quadIndex * 6 + 2] = (x + 1) + (y + 0) * 512; terrainElements[quadIndex * 6 + 3] = (x + 0) + (y + 0) * 512; terrainElements[quadIndex * 6 + 4] = (x + 0) + (y + 1) * 512; terrainElements[quadIndex * 6 + 5] = (x + 1) + (y + 1) * 512; quadIndex++; } } terrain.elementBuffer = makeBO( GL_ELEMENT_ARRAY_BUFFER, terrainElements, sizeof(GLuint) * 512 * 512 * 6, GL_STATIC_DRAW ); free(terrainElements); }
// constructor oclConnection:: oclConnection ( const char * filename_A_type, const char * filename_AB_type, const char * filename_A_cx_type, const char * filename_AB_cx_type, const char * filename_A_mx_type, const char * filename_AB_mx_type, cl_device_type device_type, VerbosityLevel verbose ) : m_current_ocl_objects (), m_current_buffers (), m_loaded_ocl_objects () { m_verbose = verbose; // platform clPlatforms tmp_platforms; m_error = clPlatform::get (& tmp_platforms); print_optional (" ** # of platforms: %d", tmp_platforms.size(), VERB_LOW); if (tmp_platforms.size() > 0) m_plat = tmp_platforms [0]; // choose first available device else throw oclError ("No platform available", "oclConnection :: CTOR"); // devices m_error = m_plat.getDevices (device_type, &m_devs); print_optional (" ** # of devices on platform: %d", m_devs.size(), VERB_LOW); std::string vendor; print_optional (" ** device type (0): ", (m_devs [0].getInfo (CL_DEVICE_VENDOR, &vendor), vendor.c_str ()), VERB_LOW); if (m_devs.size() == 0) throw oclError ("No devices available on this platform", "oclConnection :: CTOR"); // context /** ViennaCL **/ /* TODO */ m_cont = clContext ( viennacl::ocl::current_context () . handle () . get ()); //clContext (m_devs); // same context for all devices // command queues for (clDevices::iterator it = m_devs.begin(); it < m_devs.end(); ++it) // iterate over all devices and create a command queue for each { m_comqs.push_back (clCommandQueue (m_cont, *it)); } init_program_kernels < float, float> (this); init_program_kernels <double, double> (this); init_program_kernels < cxfl, float> (this); init_program_kernels < cxfl, cxfl> (this); init_program_kernels < cxdb, double> (this); init_program_kernels < cxdb, cxdb> (this); // set current kernel! num_kernel = 0; mp_actKernel = & (m_kernels_ff [num_kernel]); // defined default value !!! print_optional (" ** oclConnection constructed!", m_verbose); /** * setup ViennaCL */ print_optional (" ** setup ViennaCl!", m_verbose); viennacl :: vector <float> tmp (10); viennacl :: vector <double> tmp2 (10); tmp = tmp + tmp; }