void tonemap(DeviceTask& task) { /* cast arguments to cl types */ cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer); cl_mem d_rgba = CL_MEM_PTR(task.rgba); cl_mem d_buffer = CL_MEM_PTR(task.buffer); cl_int d_x = task.x; cl_int d_y = task.y; cl_int d_w = task.w; cl_int d_h = task.h; cl_int d_sample = task.sample; cl_int d_resolution = task.resolution; cl_int d_offset = task.offset; cl_int d_stride = task.stride; /* sample arguments */ int narg = 0; ciErr = 0; ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_data), (void*)&d_data); ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_rgba), (void*)&d_rgba); ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_buffer), (void*)&d_buffer); #define KERNEL_TEX(type, ttype, name) \ ciErr |= set_kernel_arg_mem(ckFilmConvertKernel, &narg, #name); #include "kernel_textures.h" ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_sample), (void*)&d_sample); ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_resolution), (void*)&d_resolution); ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_x), (void*)&d_x); ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_y), (void*)&d_y); ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_w), (void*)&d_w); ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_h), (void*)&d_h); ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_offset), (void*)&d_offset); ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_stride), (void*)&d_stride); opencl_assert(ciErr); size_t workgroup_size; clGetKernelWorkGroupInfo(ckFilmConvertKernel, cdDevice, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &workgroup_size, NULL); workgroup_size = max(sqrt((double)workgroup_size), 1.0); size_t local_size[2] = {workgroup_size, workgroup_size}; size_t global_size[2] = {global_size_round_up(local_size[0], d_w), global_size_round_up(local_size[1], d_h)}; /* run kernel */ ciErr = clEnqueueNDRangeKernel(cqCommandQueue, ckFilmConvertKernel, 2, NULL, global_size, local_size, 0, NULL, NULL); opencl_assert(ciErr); opencl_assert(clFinish(cqCommandQueue)); }
void path_trace(RenderTile& rtile, int sample) { /* cast arguments to cl types */ cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer); cl_mem d_buffer = CL_MEM_PTR(rtile.buffer); cl_mem d_rng_state = CL_MEM_PTR(rtile.rng_state); cl_int d_x = rtile.x; cl_int d_y = rtile.y; cl_int d_w = rtile.w; cl_int d_h = rtile.h; cl_int d_sample = sample; cl_int d_offset = rtile.offset; cl_int d_stride = rtile.stride; /* sample arguments */ int narg = 0; ciErr = 0; ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_data), (void*)&d_data); ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_buffer), (void*)&d_buffer); ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_rng_state), (void*)&d_rng_state); #define KERNEL_TEX(type, ttype, name) \ ciErr |= set_kernel_arg_mem(ckPathTraceKernel, &narg, #name); #include "kernel_textures.h" ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_sample), (void*)&d_sample); ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_x), (void*)&d_x); ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_y), (void*)&d_y); ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_w), (void*)&d_w); ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_h), (void*)&d_h); ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_offset), (void*)&d_offset); ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_stride), (void*)&d_stride); opencl_assert(ciErr); size_t workgroup_size; clGetKernelWorkGroupInfo(ckPathTraceKernel, cdDevice, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &workgroup_size, NULL); workgroup_size = max(sqrt((double)workgroup_size), 1.0); size_t local_size[2] = {workgroup_size, workgroup_size}; size_t global_size[2] = {global_size_round_up(local_size[0], d_w), global_size_round_up(local_size[1], d_h)}; /* run kernel */ ciErr = clEnqueueNDRangeKernel(cqCommandQueue, ckPathTraceKernel, 2, NULL, global_size, local_size, 0, NULL, NULL); opencl_assert(ciErr); opencl_assert(clFinish(cqCommandQueue)); }
void mem_copy_to(device_memory& mem) { /* this is blocking */ size_t size = mem.memory_size(); ciErr = clEnqueueWriteBuffer(cqCommandQueue, CL_MEM_PTR(mem.device_pointer), CL_TRUE, 0, size, (void*)mem.data_pointer, 0, NULL, NULL); opencl_assert(ciErr); }
void mem_free(device_memory& mem) { if(mem.device_pointer) { ciErr = clReleaseMemObject(CL_MEM_PTR(mem.device_pointer)); mem.device_pointer = 0; opencl_assert(ciErr); } }
void mem_copy_from(device_memory& mem, int y, int w, int h, int elem) { size_t offset = elem*y*w; size_t size = elem*w*h; ciErr = clEnqueueReadBuffer(cqCommandQueue, CL_MEM_PTR(mem.device_pointer), CL_TRUE, offset, size, (uchar*)mem.data_pointer + offset, 0, NULL, NULL); opencl_assert(ciErr); }
void mem_alloc(device_memory& mem, MemoryType type) { size_t size = mem.memory_size(); if(type == MEM_READ_ONLY) mem.device_pointer = (device_ptr)clCreateBuffer(cxContext, CL_MEM_READ_ONLY, size, NULL, &ciErr); else if(type == MEM_WRITE_ONLY) mem.device_pointer = (device_ptr)clCreateBuffer(cxContext, CL_MEM_WRITE_ONLY, size, NULL, &ciErr); else mem.device_pointer = (device_ptr)clCreateBuffer(cxContext, CL_MEM_READ_WRITE, size, NULL, &ciErr); opencl_assert(ciErr); }
cl_int set_kernel_arg_mem(cl_kernel kernel, int *narg, const char *name) { cl_mem ptr; cl_int err = 0; if(mem_map.find(name) != mem_map.end()) { device_memory *mem = mem_map[name]; ptr = CL_MEM_PTR(mem->device_pointer); } else { /* work around NULL not working, even though the spec says otherwise */ ptr = CL_MEM_PTR(null_mem); } err |= clSetKernelArg(kernel, (*narg)++, sizeof(ptr), (void*)&ptr); opencl_assert(err); return err; }
string device_opencl_capabilities(void) { if(OpenCLInfo::device_type() == 0) { return "All OpenCL devices are forced to be OFF"; } string result = ""; string error_msg = ""; /* Only used by opencl_assert(), but in the future * it could also be nicely reported to the console. */ cl_uint num_platforms = 0; opencl_assert(clGetPlatformIDs(0, NULL, &num_platforms)); if(num_platforms == 0) { return "No OpenCL platforms found\n"; } result += string_printf("Number of platforms: %u\n", num_platforms); vector<cl_platform_id> platform_ids; platform_ids.resize(num_platforms); opencl_assert(clGetPlatformIDs(num_platforms, &platform_ids[0], NULL)); #define APPEND_STRING_INFO(func, id, name, what) \ do { \ char data[1024] = "\0"; \ opencl_assert(func(id, what, sizeof(data), &data, NULL)); \ result += string_printf("%s: %s\n", name, data); \ } while(false) #define APPEND_PLATFORM_STRING_INFO(id, name, what) \ APPEND_STRING_INFO(clGetPlatformInfo, id, "\tPlatform " name, what) #define APPEND_DEVICE_STRING_INFO(id, name, what) \ APPEND_STRING_INFO(clGetDeviceInfo, id, "\t\t\tDevice " name, what) vector<cl_device_id> device_ids; for(cl_uint platform = 0; platform < num_platforms; ++platform) { cl_platform_id platform_id = platform_ids[platform]; result += string_printf("Platform #%u\n", platform); APPEND_PLATFORM_STRING_INFO(platform_id, "Name", CL_PLATFORM_NAME); APPEND_PLATFORM_STRING_INFO(platform_id, "Vendor", CL_PLATFORM_VENDOR); APPEND_PLATFORM_STRING_INFO(platform_id, "Version", CL_PLATFORM_VERSION); APPEND_PLATFORM_STRING_INFO(platform_id, "Profile", CL_PLATFORM_PROFILE); APPEND_PLATFORM_STRING_INFO(platform_id, "Extensions", CL_PLATFORM_EXTENSIONS); cl_uint num_devices = 0; opencl_assert(clGetDeviceIDs(platform_ids[platform], CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices)); result += string_printf("\tNumber of devices: %u\n", num_devices); device_ids.resize(num_devices); opencl_assert(clGetDeviceIDs(platform_ids[platform], CL_DEVICE_TYPE_ALL, num_devices, &device_ids[0], NULL)); for(cl_uint device = 0; device < num_devices; ++device) { cl_device_id device_id = device_ids[device]; result += string_printf("\t\tDevice: #%u\n", device); APPEND_DEVICE_STRING_INFO(device_id, "Name", CL_DEVICE_NAME); APPEND_DEVICE_STRING_INFO(device_id, "Vendor", CL_DEVICE_VENDOR); APPEND_DEVICE_STRING_INFO(device_id, "OpenCL C Version", CL_DEVICE_OPENCL_C_VERSION); APPEND_DEVICE_STRING_INFO(device_id, "Profile", CL_DEVICE_PROFILE); APPEND_DEVICE_STRING_INFO(device_id, "Version", CL_DEVICE_VERSION); APPEND_DEVICE_STRING_INFO(device_id, "Extensions", CL_DEVICE_EXTENSIONS); } } #undef APPEND_STRING_INFO #undef APPEND_PLATFORM_STRING_INFO #undef APPEND_DEVICE_STRING_INFO return result; }