예제 #1
0
	void tonemap(DeviceTask& task)
	{
		/* cast arguments to cl types */
		cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer);
		cl_mem d_rgba = CL_MEM_PTR(task.rgba);
		cl_mem d_buffer = CL_MEM_PTR(task.buffer);
		cl_int d_x = task.x;
		cl_int d_y = task.y;
		cl_int d_w = task.w;
		cl_int d_h = task.h;
		cl_int d_sample = task.sample;
		cl_int d_resolution = task.resolution;
		cl_int d_offset = task.offset;
		cl_int d_stride = task.stride;

		/* sample arguments */
		int narg = 0;
		ciErr = 0;

		ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_data), (void*)&d_data);
		ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_rgba), (void*)&d_rgba);
		ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_buffer), (void*)&d_buffer);

#define KERNEL_TEX(type, ttype, name) \
	ciErr |= set_kernel_arg_mem(ckFilmConvertKernel, &narg, #name);
#include "kernel_textures.h"

		ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_sample), (void*)&d_sample);
		ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_resolution), (void*)&d_resolution);
		ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_x), (void*)&d_x);
		ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_y), (void*)&d_y);
		ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_w), (void*)&d_w);
		ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_h), (void*)&d_h);
		ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_offset), (void*)&d_offset);
		ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_stride), (void*)&d_stride);

		opencl_assert(ciErr);

		size_t workgroup_size;

		clGetKernelWorkGroupInfo(ckFilmConvertKernel, cdDevice,
			CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &workgroup_size, NULL);
	
		workgroup_size = max(sqrt((double)workgroup_size), 1.0);

		size_t local_size[2] = {workgroup_size, workgroup_size};
		size_t global_size[2] = {global_size_round_up(local_size[0], d_w), global_size_round_up(local_size[1], d_h)};

		/* run kernel */
		ciErr = clEnqueueNDRangeKernel(cqCommandQueue, ckFilmConvertKernel, 2, NULL, global_size, local_size, 0, NULL, NULL);
		opencl_assert(ciErr);
		opencl_assert(clFinish(cqCommandQueue));
	}
예제 #2
0
	void path_trace(RenderTile& rtile, int sample)
	{
		/* cast arguments to cl types */
		cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer);
		cl_mem d_buffer = CL_MEM_PTR(rtile.buffer);
		cl_mem d_rng_state = CL_MEM_PTR(rtile.rng_state);
		cl_int d_x = rtile.x;
		cl_int d_y = rtile.y;
		cl_int d_w = rtile.w;
		cl_int d_h = rtile.h;
		cl_int d_sample = sample;
		cl_int d_offset = rtile.offset;
		cl_int d_stride = rtile.stride;

		/* sample arguments */
		int narg = 0;
		ciErr = 0;

		ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_data), (void*)&d_data);
		ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_buffer), (void*)&d_buffer);
		ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_rng_state), (void*)&d_rng_state);

#define KERNEL_TEX(type, ttype, name) \
	ciErr |= set_kernel_arg_mem(ckPathTraceKernel, &narg, #name);
#include "kernel_textures.h"

		ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_sample), (void*)&d_sample);
		ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_x), (void*)&d_x);
		ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_y), (void*)&d_y);
		ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_w), (void*)&d_w);
		ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_h), (void*)&d_h);
		ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_offset), (void*)&d_offset);
		ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_stride), (void*)&d_stride);

		opencl_assert(ciErr);

		size_t workgroup_size;

		clGetKernelWorkGroupInfo(ckPathTraceKernel, cdDevice,
			CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &workgroup_size, NULL);
	
		workgroup_size = max(sqrt((double)workgroup_size), 1.0);

		size_t local_size[2] = {workgroup_size, workgroup_size};
		size_t global_size[2] = {global_size_round_up(local_size[0], d_w), global_size_round_up(local_size[1], d_h)};

		/* run kernel */
		ciErr = clEnqueueNDRangeKernel(cqCommandQueue, ckPathTraceKernel, 2, NULL, global_size, local_size, 0, NULL, NULL);
		opencl_assert(ciErr);
		opencl_assert(clFinish(cqCommandQueue));
	}
예제 #3
0
	void mem_copy_to(device_memory& mem)
	{
		/* this is blocking */
		size_t size = mem.memory_size();
		ciErr = clEnqueueWriteBuffer(cqCommandQueue, CL_MEM_PTR(mem.device_pointer), CL_TRUE, 0, size, (void*)mem.data_pointer, 0, NULL, NULL);
		opencl_assert(ciErr);
	}
예제 #4
0
	void mem_free(device_memory& mem)
	{
		if(mem.device_pointer) {
			ciErr = clReleaseMemObject(CL_MEM_PTR(mem.device_pointer));
			mem.device_pointer = 0;
			opencl_assert(ciErr);
		}
	}
예제 #5
0
	void mem_copy_from(device_memory& mem, int y, int w, int h, int elem)
	{
		size_t offset = elem*y*w;
		size_t size = elem*w*h;

		ciErr = clEnqueueReadBuffer(cqCommandQueue, CL_MEM_PTR(mem.device_pointer), CL_TRUE, offset, size, (uchar*)mem.data_pointer + offset, 0, NULL, NULL);
		opencl_assert(ciErr);
	}
예제 #6
0
	void mem_alloc(device_memory& mem, MemoryType type)
	{
		size_t size = mem.memory_size();

		if(type == MEM_READ_ONLY)
			mem.device_pointer = (device_ptr)clCreateBuffer(cxContext, CL_MEM_READ_ONLY, size, NULL, &ciErr);
		else if(type == MEM_WRITE_ONLY)
			mem.device_pointer = (device_ptr)clCreateBuffer(cxContext, CL_MEM_WRITE_ONLY, size, NULL, &ciErr);
		else
			mem.device_pointer = (device_ptr)clCreateBuffer(cxContext, CL_MEM_READ_WRITE, size, NULL, &ciErr);

		opencl_assert(ciErr);
	}
예제 #7
0
	cl_int set_kernel_arg_mem(cl_kernel kernel, int *narg, const char *name)
	{
		cl_mem ptr;
		cl_int err = 0;

		if(mem_map.find(name) != mem_map.end()) {
			device_memory *mem = mem_map[name];
		
			ptr = CL_MEM_PTR(mem->device_pointer);
		}
		else {
			/* work around NULL not working, even though the spec says otherwise */
			ptr = CL_MEM_PTR(null_mem);
		}
		
		err |= clSetKernelArg(kernel, (*narg)++, sizeof(ptr), (void*)&ptr);
		opencl_assert(err);

		return err;
	}
예제 #8
0
string device_opencl_capabilities(void)
{
	if(OpenCLInfo::device_type() == 0) {
		return "All OpenCL devices are forced to be OFF";
	}
	string result = "";
	string error_msg = "";  /* Only used by opencl_assert(), but in the future
	                         * it could also be nicely reported to the console.
	                         */
	cl_uint num_platforms = 0;
	opencl_assert(clGetPlatformIDs(0, NULL, &num_platforms));
	if(num_platforms == 0) {
		return "No OpenCL platforms found\n";
	}
	result += string_printf("Number of platforms: %u\n", num_platforms);

	vector<cl_platform_id> platform_ids;
	platform_ids.resize(num_platforms);
	opencl_assert(clGetPlatformIDs(num_platforms, &platform_ids[0], NULL));

#define APPEND_STRING_INFO(func, id, name, what) \
	do { \
		char data[1024] = "\0"; \
		opencl_assert(func(id, what, sizeof(data), &data, NULL)); \
		result += string_printf("%s: %s\n", name, data); \
	} while(false)
#define APPEND_PLATFORM_STRING_INFO(id, name, what) \
	APPEND_STRING_INFO(clGetPlatformInfo, id, "\tPlatform " name, what)
#define APPEND_DEVICE_STRING_INFO(id, name, what) \
	APPEND_STRING_INFO(clGetDeviceInfo, id, "\t\t\tDevice " name, what)

	vector<cl_device_id> device_ids;
	for(cl_uint platform = 0; platform < num_platforms; ++platform) {
		cl_platform_id platform_id = platform_ids[platform];

		result += string_printf("Platform #%u\n", platform);

		APPEND_PLATFORM_STRING_INFO(platform_id, "Name", CL_PLATFORM_NAME);
		APPEND_PLATFORM_STRING_INFO(platform_id, "Vendor", CL_PLATFORM_VENDOR);
		APPEND_PLATFORM_STRING_INFO(platform_id, "Version", CL_PLATFORM_VERSION);
		APPEND_PLATFORM_STRING_INFO(platform_id, "Profile", CL_PLATFORM_PROFILE);
		APPEND_PLATFORM_STRING_INFO(platform_id, "Extensions", CL_PLATFORM_EXTENSIONS);

		cl_uint num_devices = 0;
		opencl_assert(clGetDeviceIDs(platform_ids[platform],
		                             CL_DEVICE_TYPE_ALL,
		                             0,
		                             NULL,
		                             &num_devices));
		result += string_printf("\tNumber of devices: %u\n", num_devices);

		device_ids.resize(num_devices);
		opencl_assert(clGetDeviceIDs(platform_ids[platform],
		                             CL_DEVICE_TYPE_ALL,
		                             num_devices,
		                             &device_ids[0],
		                             NULL));
		for(cl_uint device = 0; device < num_devices; ++device) {
			cl_device_id device_id = device_ids[device];

			result += string_printf("\t\tDevice: #%u\n", device);

			APPEND_DEVICE_STRING_INFO(device_id, "Name", CL_DEVICE_NAME);
			APPEND_DEVICE_STRING_INFO(device_id, "Vendor", CL_DEVICE_VENDOR);
			APPEND_DEVICE_STRING_INFO(device_id, "OpenCL C Version", CL_DEVICE_OPENCL_C_VERSION);
			APPEND_DEVICE_STRING_INFO(device_id, "Profile", CL_DEVICE_PROFILE);
			APPEND_DEVICE_STRING_INFO(device_id, "Version", CL_DEVICE_VERSION);
			APPEND_DEVICE_STRING_INFO(device_id, "Extensions", CL_DEVICE_EXTENSIONS);
		}
	}

#undef APPEND_STRING_INFO
#undef APPEND_PLATFORM_STRING_INFO
#undef APPEND_DEVICE_STRING_INFO

	return result;
}