extern void opencl_set_kernel_arg(cl_kernel kernel, ushort i, cl_var var) { char name[128], str[152]; clCheckError(clGetKernelInfo(kernel, CL_KERNEL_FUNCTION_NAME, 128, name, NULL), "while getting kernel name"); sprintf(str, "setting args for kernel %s", name); clCheckError(clSetKernelArg(kernel, i, var.n>1 ? sizeof(cl_mem) : var.type_size, var.val), str); }
extern cl_program opencl_create_program_from_source(const char *kernel_filename, const char *options) { cl_program program; int err; char * kernel_src = malloc(11+strlen(kernel_filename)), * opts = malloc(strlen(options)+5); strcat(strcat(strcpy(kernel_src, "#include<"), kernel_filename), ">"); program = clCreateProgramWithSource(context, 1, (const char **)&kernel_src, NULL, &err); clCheckError(err, "creating 'include' program "); err = clBuildProgram(program, 0, NULL, strcat(strcpy(opts, options), " -I."), NULL, NULL); if (err == CL_BUILD_PROGRAM_FAILURE) { char * build_log; uint i; for (i=0; i<ndevices; i++) { size_t log_size; clGetProgramBuildInfo(program, devices[i], CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size); if (log_size>2) { fprintf(stderr, "OpenCL> %s build log [device #%i]:\n", kernel_filename, i); build_log = (char *)malloc(log_size); clGetProgramBuildInfo(program, devices[i], CL_PROGRAM_BUILD_LOG, log_size, build_log, NULL); fputs(build_log, stderr); free(build_log); } } } clCheckError(err, "building program"); return program; }
extern void opencl_init(uint platform_id, cl_device_type device_type) { cl_platform_id platforms[platform_id + 1]; clCheckError(clGetPlatformIDs(platform_id + 1, platforms, NULL), "getting platform id"); platform = platforms[platform_id]; cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 }; clCheckError(clGetDeviceIDs(platform, device_type, 0, NULL, &ndevices), "getting devices number"); devices = (cl_device_id *)malloc(ndevices * sizeof(cl_device_id)); int err = clGetDeviceIDs(platform, device_type, ndevices, devices, NULL); if (err != CL_SUCCESS) { if (device_type == CL_DEVICE_TYPE_GPU) fputs("OpenCL> Error: no capable GPU device found!\n", stderr); else if (device_type == CL_DEVICE_TYPE_CPU) fputs("OpenCL> Error: no capable CPU device found!\n", stderr); else fputs("OpenCL> Error: no capable device found!\n", stderr); clCheckError(err, "getting devices ids"); } cid = 0; context = clCreateContext(cps, ndevices, devices, NULL, NULL, &err); clCheckError(err, "creating context"); queues = (cl_command_queue *)malloc(ndevices * sizeof(cl_command_queue)); uint i; for (i=0; i < ndevices; i++) queues[i] = clCreateCommandQueue(context, devices[i], 0, &err); clCheckError(err, "creating command queue"); GWS[0] = LWS[0] = 64; ND = 1; GWS[1] = GWS[2] = LWS[1] = LWS[2] = 0; }
extern void opencl_run_kernel(cl_kernel kernel) { //clGetKernelInfo(kernel, CL_KERNEL_FUNCTION_NAME, 0, NULL, &n); char name[128], str[145]; clCheckError(clGetKernelInfo(kernel, CL_KERNEL_FUNCTION_NAME, 128, name, NULL), "while getting kernel name"); sprintf(str, "launching kernel %s", name); clCheckError(clEnqueueNDRangeKernel(queues[cid], kernel, ND, NULL, GWS, LWS, 0, NULL, NULL), str); }
extern void opencl_write_program_to_file(const cl_program program, const char * output_filename) { size_t binary_size; clCheckError(clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &binary_size, NULL), "getting binary size"); unsigned char * binary = (unsigned char *)malloc(binary_size); clCheckError(clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(char *), &binary, NULL), "getting binary"); FILE * f = fopen(output_filename, "w"); if (f==NULL) { fprintf(stderr, "Error opening \"%s\"!\n", output_filename); exit(EXIT_FAILURE); } uint i; for (i=0; i<binary_size; i++) fputc(binary[i], f); fclose(f); }
extern void opencl_done() { clCheckError(clReleaseContext(context), "releasing context"); uint i; for (i=0; i<ndevices; i++) { clCheckError(clReleaseCommandQueue(queues[i]), "releasing queues"); clCheckError(clReleaseDevice(devices[i]), "releasing devices"); } free(queues); free(devices); }
void LSHReservoirSampler::clCommandQueue() { // Create command queue.Properties(2): CL_QUEUE_PROFILING_ENABLE, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE. #ifdef OPENCL_2XX command_queue_gpu = clCreateCommandQueueWithProperties(context_gpu, devices_gpu[CL_DEVICE_ID], NULL, &_err); clCheckError(_err, "[OpenCL] Couldn't create command queue for GPU."); //command_queue_cpu = clCreateCommandQueueWithProperties(context_cpu, devices_cpu[CL_CPU_DEVICE], NULL, &_err); //clCheckError(_err, "[OpenCL] Couldn't create command queue for CPU."); #else command_queue_gpu = clCreateCommandQueue(context_gpu, devices_gpu[CL_DEVICE_ID], NULL, &_err); clCheckError(_err, "[OpenCL] Couldn't create command queue for GPU."); //command_queue_cpu = clCreateCommandQueue(context_cpu, devices_cpu[CL_CPU_DEVICE], NULL, &_err); //clCheckError(_err, "[OpenCL] Couldn't create command queue for CPU."); #endif }
bool AutoExposure::init(cl_context context, cl_device_id device, QSize computeSize, int updatePeriod) { assert(!_initialized); _updatePeriod= updatePeriod; _lumaSize= computeSize; // Compile kernel and set arguments CLUtils::KernelDefines downDefines; downDefines["GAMMA_CORRECT"]= "2.2f"; _downKernel= CLUtils::loadKernelPath(context, device, ":/kernels/lumaDownsample.cl", "lumaDownsample", downDefines, QStringList("../res/kernels/")); if(!_downKernel) { debugWarning("Could not compile kernel."); return false; } _lumaData= (uchar*)malloc(lumaDataBytes()); if(!_lumaData) { debugWarning("Could not allocate data."); return false; } cl_int error; // This image could be WRITE_ONLY _lumaImage= clCreateImage2D(context, CL_MEM_READ_WRITE, clFormatGL(GL_R), _lumaSize.width(), _lumaSize.height(), 0, 0, &error); if(clCheckError(error, "clCreateImage2D")) { free(_lumaData); return false; } _initialized= true; return true; }
extern void opencl_set_kernel_args(cl_kernel kernel, ...) { char name[128], str[152]; clCheckError(clGetKernelInfo(kernel, CL_KERNEL_FUNCTION_NAME, 128, name, NULL), "while getting kernel name"); sprintf(str, "setting args for kernel %s", name); uint i, n; clCheckError(clGetKernelInfo(kernel, CL_KERNEL_NUM_ARGS, sizeof(uint), &n, NULL), str); cl_var var; va_list vl; va_start(vl, kernel); for (i=0; i<n; i++) { var = va_arg(vl, cl_var); clCheckError(clSetKernelArg(kernel, i, var.n>1 ? sizeof(cl_mem) : var.type_size, var.val), str); } va_end(vl); }
extern cl_kernel opencl_create_kernel(cl_program program, const char *kernel_name) { char str[144]; sprintf(str, "creating kernel %s", kernel_name); int err; cl_kernel kernel = clCreateKernel(program, kernel_name, &err); clCheckError(err, str); return kernel; }
extern cl_kernel opencl_create_kernel(cl_program program, const char * kernel_name) { char * str = (char *)malloc(strlen(kernel_name) + 17); sprintf(str, "creating kernel %s", kernel_name); int err; cl_kernel kernel = clCreateKernel(program, kernel_name, &err); clCheckError(err, str); return kernel; }
extern void opencl_get_var(const cl_var var, void * val) { if (var.n==1) { uint i; for (i=0; i<var.type_size; i++) ((char *)val)[i] = var.val[i]; } else clCheckError(clEnqueueReadBuffer(queues[cid], *((cl_mem *)var.val), CL_TRUE, 0, var.type_size*var.n, val, 0, NULL, NULL), "reading from buffer"); }
// I may change it if I want to set var by value (not with pointer arg) (use ... in args + check var.type_size). // so basically I may "overload" a function to receive variables with different types by using "..." (if I check for type (or type size) and use correspondent va_arg()). extern void opencl_set_var(cl_var var, const void * val) { if (var.n==1) { if (var.val == NULL) var.val = malloc(var.type_size); uint i; for (i=0; i<var.type_size; i++) var.val[i] = ((char *)val)[i]; } else { if (var.val == NULL) { int err; var.val = malloc(sizeof(cl_mem)); *((cl_mem *)var.val) = clCreateBuffer(context, CL_MEM_READ_WRITE, var.type_size*var.n, NULL, &err); clCheckError(err, "creating buffer"); } clCheckError(clEnqueueWriteBuffer(queues[cid], *((cl_mem *)var.val), CL_TRUE, 0, var.type_size*var.n, val, 0, NULL, NULL), "writing to buffer"); } }
//extern cl_program opencl_create_program(const char *kernel_filename, ...) extern cl_program opencl_create_program(const char *kernel_filename, const char *options) { cl_program program; int err; FILE * kf = fopen(kernel_filename, "r"); if (kf==NULL) clCheckError(CL_SUCCESS+1, "opening kernel file"); size_t kfs = 0; while (fgetc(kf)!=EOF) kfs++; rewind(kf); char * kernel_src = (char *)malloc(kfs); uint i; for (i=0; i<kfs; i++) kernel_src[i] = fgetc(kf); fclose(kf); for (i=0; kernel_filename[i]!='.' && kernel_filename[i]!='\0'; i++); if (kernel_filename[i]!='\0' && kernel_filename[i+1]!='\0' && kernel_filename[i+1]=='c' && (kernel_filename[i+2]=='\0' || kernel_filename[i+2]=='l')) { program = clCreateProgramWithSource(context, 1, (const char **)&kernel_src, (const size_t *)&kfs, &err); clCheckError(err, "creating program from source"); err = clBuildProgram(program, 0, NULL, options, NULL, NULL); if (err == CL_BUILD_PROGRAM_FAILURE) { size_t log_size; clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size); if (log_size>2) { fprintf(stderr, "OpenCL> %s build log:\n", kernel_filename); char * build_log = (char *)malloc(log_size); clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, log_size, build_log, NULL); fputs(build_log, stderr); free(build_log); build_log = NULL; } } clCheckError(err, "building program"); } else { program = clCreateProgramWithBinary(context, 1, &device_id, (const size_t *)&kfs, (const unsigned char **)&kernel_src, NULL, &err); clCheckError(err, "creating program from binary"); } free(kernel_src); kernel_src = NULL; return program; }
void AutoExposure::update(cl_command_queue queue, cl_mem image) { assert(_initialized); // If auto-exposure is disabled, don't do anything if(!_autoExposure) return; // Increse/decrease exposure from the difference between the current luma // average with an expected 0.5 frame average. _exposure *= 1.0f + qBound(-_adjustSpeed, 0.5f - _exposureData.meteringAverage, _adjustSpeed); _updateCounter++; if(_updateCounter % _updatePeriod) return; _updateCounter= 0; int ai= 0; clKernelArg(_downKernel, ai++, image); clKernelArg(_downKernel, ai++, _lumaImage); if(!clLaunchKernelEvent(_downKernel, queue, _lumaSize, "AE/Downsample")) return; // Download image data cl_event& downloadEvent= analytics.clEvent("AE/Download"); size_t origin[3]= { 0,0,0 }; size_t region[3]= { (size_t)_lumaSize.width(), (size_t)_lumaSize.height(), 1 }; cl_int error= clEnqueueReadImage(queue, _lumaImage, CL_FALSE, origin, region, _lumaSize.width(),0,_lumaData,0,0, &downloadEvent); if(clCheckError(error, "clEnqueueReadImage")) return; error= clSetEventCallback(downloadEvent, CL_COMPLETE, exposureCallback, (void*)this); clCheckError(error, "clSetEventCallback"); // When the download is done, exposureCallback will be called }
int main() { const cl_uint n_max = 16; cl_uint i, j, n_platforms, n_devices; cl_platform_id platforms[n_max]; cl_device_id devices[n_max]; clCheckError(clGetPlatformIDs(0, NULL, &n_platforms), "getting number of available platforms"); // seems strange: here first argument may be not 0, but not in clGetDeviceIDs clCheckError(clGetPlatformIDs(n_platforms, platforms, NULL), "getting available platforms' IDs"); for (i=0; i<n_platforms; i++) { printf("\n============= Platform #%u =============\n", i+1); if (!clSoftCheckError(clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, 0, NULL, &n_devices), "getting number of available devices")) // here the third argument must be 0 { if (!clSoftCheckError(clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, n_devices, devices, NULL), "getting available devices' IDs")) for (j=0; j<n_devices; j++) { printf("\n============== Device #%u ==============\n", j+1); show_device_info(devices[j]); } } } return 0; }
extern void opencl_free_var(cl_var var) { if (var.n==1) { free(var.val); var.val = NULL; } else { clCheckError(clReleaseMemObject(*((cl_mem *)var.val)), "releasing buffer"); free(var.val); var.val = NULL; } }
void LSHReservoirSampler::clContext() { cl_uint num_context_devices; // TODO, currenly use 1 device for each platform. // GPU context. context_gpu = clCreateContext(NULL, 1, devices_gpu + CL_DEVICE_ID, NULL, NULL, &_err); clCheckError(_err, "[OpenCL] Couldn't create a context."); _err = clGetContextInfo(context_gpu, CL_CONTEXT_NUM_DEVICES, sizeof(cl_uint), &num_context_devices, NULL); printf("[OpenCL] Created GPU Context with %d device. \n", num_context_devices); // CPU context. //context_cpu = clCreateContext(NULL, 1, devices_cpu + CL_CPU_DEVICE, NULL, NULL, &_err); //clCheckError(_err, "[OpenCL] Couldn't create a context."); //_err = clGetContextInfo(context_cpu, CL_CONTEXT_NUM_DEVICES, // sizeof(cl_uint), &num_context_devices, NULL); //printf("[OpenCL] Created CPU Context with %d device. \n", num_context_devices); }
extern void opencl_init(cl_device_type_id device_type_id) { cl_platform_id platform_id; int err; err = clGetPlatformIDs(1, &platform_id, NULL); clCheckError(err, "getting platform id"); cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform_id, 0 }; switch (device_type_id) { case CPU: if (clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_CPU, 1, &device_id, NULL) != CL_SUCCESS) { if (clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 1, &device_id, NULL) != CL_SUCCESS) { fputs("OpenCL> Error: no capable devices found!\n", stderr); clCheckError(-1, "getting device id"); } fputs("OpenCL> Warning: CPU device not found, using GPU instead!\n", stderr); } break; case GPU: if (clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 1, &device_id, NULL) != CL_SUCCESS) { if (clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_CPU, 1, &device_id, NULL) != CL_SUCCESS) { fputs("OpenCL> Error: no capable devices found!\n", stderr); clCheckError(-1, "getting device id"); } fputs("OpenCL> Warning: GPU device not found, using CPU instead!\n", stderr); } break; default: if (clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 1, &device_id, NULL) != CL_SUCCESS) { if (clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_CPU, 1, &device_id, NULL) != CL_SUCCESS) { fputs("OpenCL> Error: no capable devices found!\n", stderr); clCheckError(-1, "getting device id"); } fputs("OpenCL> using CPU device\n", stderr); } else fputs("OpenCL> using GPU device\n", stderr); } context = clCreateContext(cps, 1, &device_id, NULL, NULL, &err); clCheckError(err, "creating context"); queue = clCreateCommandQueue(context, device_id, 0, &err); clCheckError(err, "creating command queue"); GWS[0] = LWS[0] = 64; ND = 1; GWS[1] = GWS[2] = LWS[1] = LWS[2] = 0; }
extern cl_var opencl_create_var(size_t type_size, uint n, cl_mem_flags flags, const void * val) { cl_var var; var.type_size = type_size; var.n = n; if (var.n==1) var.val = malloc(type_size); else { int err; var.val = malloc(sizeof(cl_mem)); //va_list vl; //va_start(vl, n); //cl_mem_flags flags = va_arg(vl, cl_mem_flags); // unidentified problems with this approach //va_end(vl); *((cl_mem *)var.val) = clCreateBuffer(context, flags==0 ? CL_MEM_READ_WRITE : flags, var.type_size*n, NULL, &err); clCheckError(err, "creating buffer"); } if (val != NULL) opencl_set_var(var, val); return var; }
void LSHReservoirSampler::clPlatformDevices() { // Platforms. cl_uint num_platforms; cl_int platform_index = -1; _err = clGetPlatformIDs(1, NULL, &num_platforms); printf("[OpenCL] %d platform found. \n", num_platforms); clCheckError(_err, "[OpenCL] Couldn't find any platforms."); platforms = (cl_platform_id*)malloc(sizeof(cl_platform_id) * num_platforms); clGetPlatformIDs(num_platforms, platforms, NULL); cl_uint num_devices; // GPU Platform. _err = clGetDeviceIDs(platforms[CL_PLATFORM_ID], CL_DEVICE_TYPE_ALL, 1, NULL, &num_devices); printf("[OpenCL] %d GPU device found. \n", num_devices); clCheckError(_err, "[OpenCL] Couldn't find any GPU devices."); devices_gpu = (cl_device_id*)malloc(sizeof(cl_device_id) * num_devices); clGetDeviceIDs(platforms[CL_PLATFORM_ID], CL_DEVICE_TYPE_ALL, num_devices, devices_gpu, NULL); // CPU Platform. //_err = clGetDeviceIDs(platforms[CL_CPU_PLATFORM], CL_DEVICE_TYPE_ALL, 1, NULL, &num_devices); //printf("[OpenCL] %d CPU device found. \n", num_devices); //clCheckError(_err, "[OpenCL] Couldn't find any CPU devices."); //devices_cpu = (cl_device_id*)malloc(sizeof(cl_device_id) * num_devices); //clGetDeviceIDs(platforms[CL_CPU_PLATFORM], CL_DEVICE_TYPE_ALL, num_devices, devices_cpu, NULL); #ifdef PRINT_CLINFO cl_uint q0; size_t q1; cl_ulong q2; char name_data[48], ext_data[4096]; for (int d = 0; d < num_devices; d++) { printf("\n"); clGetDeviceInfo(devices_gpu[d], CL_DEVICE_NAME, sizeof(name_data), name_data, NULL); printf("<<< Platform %d Device Info: %s >>> \n", d, name_data); clGetDeviceInfo(devices_gpu[d], CL_DEVICE_ADDRESS_BITS, sizeof(q0), &q0, NULL); printf("CL_DEVICE_ADDRESS_BITS: %u\n", q0); clGetDeviceInfo(devices_gpu[d], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(q0), &q0, NULL); printf("CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS: %u\n", q0); clGetDeviceInfo(devices_gpu[d], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(q2), &q2, NULL); printf("CL_DEVICE_GLOBAL_MEM_SIZE: %" PRIu64 "\n", q2); clGetDeviceInfo(devices_gpu[d], CL_DEVICE_LOCAL_MEM_SIZE, sizeof(q2), &q2, NULL); printf("CL_DEVICE_LOCAL_MEM_SIZE: %" PRIu64 "\n", q2); clGetDeviceInfo(devices_gpu[d], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(q0), &q0, NULL); printf("CL_DEVICE_MAX_COMPUTE_UNITS: %u\n", q0); clGetDeviceInfo(devices_gpu[d], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(q1), &q1, NULL); printf("CL_DEVICE_MAX_WORK_GROUP_SIZE: %zu\n", q1); clGetDeviceInfo(devices_gpu[d], CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(q0), &q0, NULL); printf("CL_DEVICE_MAX_MEM_ALLOC_SIZE: %u\n", q0); clGetDeviceInfo(devices_gpu[d], CL_DEVICE_MAX_PARAMETER_SIZE, sizeof(q0), &q0, NULL); printf("CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT: %u\n", q0); clGetDeviceInfo(devices_gpu[d], CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(q0), &q0, NULL); printf("CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT: %u\n", q0); clGetDeviceInfo(devices_gpu[d], CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, sizeof(q0), &q0, NULL); printf("CL_DEVICE_MAX_PARAMETER_SIZE: %u\n", q0); clGetDeviceInfo(devices_gpu[d], CL_DEVICE_EXTENSIONS, sizeof(ext_data), ext_data, NULL); printf("CL_DEVICE_EXTENSIONS: %s\n", ext_data); printf("\n"); } #endif }
extern void opencl_free_kernel(cl_kernel kernel) { clCheckError(clReleaseKernel(kernel), "releasing kernel"); }
extern void opencl_free_program(cl_program program) { clCheckError(clReleaseProgram(program), "releasing program"); }
extern void opencl_sync() { clCheckError(clFinish(queues[cid]), "finishing queue"); }
void LSHReservoirSampler::clProgram() { // Load and creat program. FILE *program_handle; const char *file_name[] = { PROGRAM_FILE_1, PROGRAM_FILE_2 }; const char options[] = "-cl-finite-math-only -cl-no-signed-zeros -w -cl-mad-enable -cl-fast-relaxed-math -I ./"; size_t program_size[NUM_FILES]; size_t log_size; char *program_buffer[NUM_FILES]; for (int i = 0; i < NUM_FILES; i++) { program_handle = fopen(file_name[i], "r"); if (program_handle == NULL) { perror("[OpenCL] Couldn't find the program file"); pause(); exit(1); } fseek(program_handle, 0, SEEK_END); program_size[i] = ftell(program_handle); rewind(program_handle); program_buffer[i] = (char*)malloc(program_size[i] + 1); program_buffer[i][program_size[i]] = '\0'; fread(program_buffer[i], sizeof(char), program_size[i], program_handle); fclose(program_handle); printf("[OpenCL] Program %d loaded, %d characters. \n", i, (int) program_size[i]); } program_gpu = clCreateProgramWithSource(context_gpu, NUM_FILES, (const char**)program_buffer, program_size, &_err); clCheckError(_err, "[OpenCL] Couldn't create CL program for GPU."); //program_cpu = clCreateProgramWithSource(context_cpu, NUM_FILES, // (const char**)program_buffer, program_size, &_err); //clCheckError(_err, "[OpenCL] Couldn't create CL program for CPU."); // Build GPU program. _err = clBuildProgram(program_gpu, 1, devices_gpu + CL_DEVICE_ID, options, NULL, NULL); if (_err < 0) { clGetProgramBuildInfo(program_gpu, devices_gpu[CL_DEVICE_ID], CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size); _program_log = (char*)malloc(log_size + 1); _program_log[log_size] = '\0'; clGetProgramBuildInfo(program_gpu, devices_gpu[CL_DEVICE_ID], CL_PROGRAM_BUILD_LOG, log_size + 1, _program_log, NULL); printf("%s\n", _program_log); free(_program_log); system("pause"); exit(1); } // Build CPU program. //_err = clBuildProgram(program_cpu, 1, devices_cpu + CL_CPU_DEVICE, options, NULL, NULL); //if (_err < 0) { // clGetProgramBuildInfo(program_cpu, devices_cpu[CL_CPU_DEVICE], // CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size); // _program_log = (char*)malloc(log_size + 1); // _program_log[log_size] = '\0'; // clGetProgramBuildInfo(program_cpu, devices_cpu[CL_CPU_DEVICE], // CL_PROGRAM_BUILD_LOG, // log_size + 1, _program_log, NULL); // printf("%s\n", _program_log); // free(_program_log); // system("pause"); // exit(1); //} for (int i = 0; i < NUM_FILES; i++) { free(program_buffer[i]); } }
void LSHReservoirSampler::initHelper(int numTablesIn, int numHashPerFamilyIn, int reservoriSizeIn) { /* Reservoir Random Number. */ std::cout << "[LSHReservoirSampler::initHelper] Generating random number for reservoir sampling ..." << std::endl; std::default_random_engine generator1; std::uniform_int_distribution<unsigned int> distribution_a(0, 0x7FFFFFFF); _sechash_a = distribution_a(generator1) * 2 + 1; std::uniform_int_distribution<unsigned int> distribution_b(0, 0xFFFFFFFF >> _numSecHash); _sechash_b = distribution_b(generator1); _global_rand = new unsigned int[_maxReservoirRand]; for (unsigned int i = 0; i < _maxReservoirRand; i++) { std::uniform_int_distribution<unsigned int> distribution(0, i); _global_rand[i] = distribution(generator1); } #if defined OPENCL_HASHTABLE _globalRand_obj = clCreateBuffer(context_gpu, CL_MEM_READ_WRITE, _maxReservoirRand * sizeof(unsigned int), NULL, &_err); _err = clEnqueueWriteBuffer(command_queue_gpu, _globalRand_obj, CL_TRUE, 0, _maxReservoirRand * sizeof(unsigned int), _global_rand, 0, NULL, NULL); #endif std::cout << "Completed. " << std::endl; /* Hash tables. */ _tableMemReservoirMax = (_numTables - 1) * _aggNumReservoirs + _numReservoirsHashed; _tableMemMax = _tableMemReservoirMax * (1 + _reservoirSize); _tablePointerMax = _numTables * _numReservoirsHashed; #if defined OPENCL_HASHTABLE std::cout << "Initializing GPU-OpenCL tables and pointers ... " << std::endl; _tableMem_obj = clCreateBuffer(context_gpu, CL_MEM_READ_WRITE, _tableMemMax * sizeof(unsigned int), NULL, &_err); clCheckError(_err, "[initHelper] Failed to alloc GPU _tableMem_obj."); _err = clEnqueueFillBuffer(command_queue_gpu, _tableMem_obj, &_zero, sizeof(const int), 0, _tableMemMax * sizeof(unsigned int), 0, NULL, NULL); clCheckError(_err, "[initHelper] Failed to init GPU _tableMem_obj."); _tableMemAllocator_obj = clCreateBuffer(context_gpu, CL_MEM_READ_WRITE, _numTables * sizeof(unsigned int), NULL, &_err); clCheckError(_err, "[initHelper] Failed to alloc GPU _tableMemAllocator_obj."); _err = clEnqueueFillBuffer(command_queue_gpu, _tableMemAllocator_obj, &_zero, sizeof(const int), 0, _numTables * sizeof(unsigned int), 0, NULL, NULL); clCheckError(_err, "[initHelper] Failed to init GPU _tableMemAllocator_obj."); _tablePointers_obj = clCreateBuffer(context_gpu, CL_MEM_READ_WRITE, _tablePointerMax * sizeof(unsigned int), NULL, &_err); clCheckError(_err, "[initHelper] Failed to alloc GPU _tablePointers_obj."); _err = clEnqueueFillBuffer(command_queue_gpu, _tablePointers_obj, &_tableNull, sizeof(const int), 0, _tablePointerMax * sizeof(unsigned int), 0, NULL, NULL); clCheckError(_err, "[initHelper] Failed to init GPU _tablePointers_obj."); clFinish(command_queue_gpu); std::cout << "Completed. \n"; #elif defined CPU_TB std::cout << "Initializing CPU tables and pointers ... " << std::endl; _tableMem = new unsigned int[_tableMemMax](); _tableMemAllocator = new unsigned int[_numTables](); _tablePointers = new unsigned int[_tablePointerMax]; _tablePointersLock = new omp_lock_t[_tablePointerMax]; std::cout << "Completed. " << std::endl; std::cout << "Initializing CPU tablePointers/Locks ... " << std::endl; for (unsigned long long i = 0; i < _tablePointerMax; i++) { _tablePointers[i] = TABLENULL; omp_init_lock(_tablePointersLock + i); } std::cout << "Completed. " << std::endl; std::cout << "Initializing CPU tableCountersLocks ... " << std::endl; _tableCountersLock = new omp_lock_t[_tableMemReservoirMax]; for (unsigned long long i = 0; i < _tableMemReservoirMax; i++) { omp_init_lock(_tableCountersLock + i); } std::cout << "Completed. " << std::endl; #endif /* Hashing counter. */ _sequentialIDCounter_kernel = 0; }
uint opencl_get_platforms_number() { uint nplatforms; clCheckError(clGetPlatformIDs(0, NULL, &nplatforms), "getting platforms number"); return nplatforms; }