virtual bool enqueue(const KernelDimensions& dim, device_memory& kg, device_memory& data) { device->kernel_set_args(program(), 0, kg, data); device->ciErr = clEnqueueNDRangeKernel(device->cqCommandQueue, program(), 2, NULL, dim.global_size, dim.local_size, 0, NULL, NULL); device->opencl_assert_err(device->ciErr, "clEnqueueNDRangeKernel"); if(device->ciErr != CL_SUCCESS) { string message = string_printf("OpenCL error: %s in clEnqueueNDRangeKernel()", clewErrorString(device->ciErr)); device->opencl_error(message); return false; } return true; }
virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads) { device_vector<uint64_t> size_buffer(device, "size_buffer", MEM_READ_WRITE); size_buffer.alloc(1); size_buffer.zero_to_device(); uint threads = num_threads; device->kernel_set_args(device->program_state_buffer_size(), 0, kg, data, threads, size_buffer); size_t global_size = 64; device->ciErr = clEnqueueNDRangeKernel(device->cqCommandQueue, device->program_state_buffer_size(), 1, NULL, &global_size, NULL, 0, NULL, NULL); device->opencl_assert_err(device->ciErr, "clEnqueueNDRangeKernel"); size_buffer.copy_from_device(0, 1, 1); size_t size = size_buffer[0]; size_buffer.free(); if(device->ciErr != CL_SUCCESS) { string message = string_printf("OpenCL error: %s in clEnqueueNDRangeKernel()", clewErrorString(device->ciErr)); device->opencl_error(message); return 0; } return size; }
virtual bool enqueue(const KernelDimensions& dim, device_memory& kg, device_memory& data) { if(cached_id != cached_memory.id) { cl_uint start_arg_index = device->kernel_set_args(program(), 0, kg, data, *cached_memory.split_data, *cached_memory.ray_state); device->set_kernel_arg_buffers(program(), &start_arg_index); start_arg_index += device->kernel_set_args(program(), start_arg_index, *cached_memory.queue_index, *cached_memory.use_queues_flag, *cached_memory.work_pools, *cached_memory.buffer); cached_id = cached_memory.id; } device->ciErr = clEnqueueNDRangeKernel(device->cqCommandQueue, program(), 2, NULL, dim.global_size, dim.local_size, 0, NULL, NULL); device->opencl_assert_err(device->ciErr, "clEnqueueNDRangeKernel"); if(device->ciErr != CL_SUCCESS) { string message = string_printf("OpenCL error: %s in clEnqueueNDRangeKernel()", clewErrorString(device->ciErr)); device->opencl_error(message); return false; } return true; }
virtual bool enqueue_split_kernel_data_init(const KernelDimensions& dim, RenderTile& rtile, int num_global_elements, device_memory& kernel_globals, device_memory& kernel_data, device_memory& split_data, device_memory& ray_state, device_memory& queue_index, device_memory& use_queues_flag, device_memory& work_pool_wgs ) { cl_int dQueue_size = dim.global_size[0] * dim.global_size[1]; /* Set the range of samples to be processed for every ray in * path-regeneration logic. */ cl_int start_sample = rtile.start_sample; cl_int end_sample = rtile.start_sample + rtile.num_samples; cl_uint start_arg_index = device->kernel_set_args(device->program_data_init(), 0, kernel_globals, kernel_data, split_data, num_global_elements, ray_state, rtile.rng_state); /* TODO(sergey): Avoid map lookup here. */ #define KERNEL_TEX(type, ttype, name) \ device->set_kernel_arg_mem(device->program_data_init(), &start_arg_index, #name); #include "kernel/kernel_textures.h" #undef KERNEL_TEX start_arg_index += device->kernel_set_args(device->program_data_init(), start_arg_index, start_sample, end_sample, rtile.x, rtile.y, rtile.w, rtile.h, rtile.offset, rtile.stride, queue_index, dQueue_size, use_queues_flag, work_pool_wgs, rtile.num_samples, rtile.buffer); /* Enqueue ckPathTraceKernel_data_init kernel. */ device->ciErr = clEnqueueNDRangeKernel(device->cqCommandQueue, device->program_data_init(), 2, NULL, dim.global_size, dim.local_size, 0, NULL, NULL); device->opencl_assert_err(device->ciErr, "clEnqueueNDRangeKernel"); if(device->ciErr != CL_SUCCESS) { string message = string_printf("OpenCL error: %s in clEnqueueNDRangeKernel()", clewErrorString(device->ciErr)); device->opencl_error(message); return false; } return true; }
virtual bool enqueue_split_kernel_data_init(const KernelDimensions& dim, RenderTile& rtile, int num_global_elements, device_memory& kernel_globals, device_memory& kernel_data, device_memory& split_data, device_memory& ray_state, device_memory& queue_index, device_memory& use_queues_flag, device_memory& work_pool_wgs ) { cl_int dQueue_size = dim.global_size[0] * dim.global_size[1]; /* Set the range of samples to be processed for every ray in * path-regeneration logic. */ cl_int start_sample = rtile.start_sample; cl_int end_sample = rtile.start_sample + rtile.num_samples; cl_uint start_arg_index = device->kernel_set_args(device->program_data_init(), 0, kernel_globals, kernel_data, split_data, num_global_elements, ray_state); device->set_kernel_arg_buffers(device->program_data_init(), &start_arg_index); start_arg_index += device->kernel_set_args(device->program_data_init(), start_arg_index, start_sample, end_sample, rtile.x, rtile.y, rtile.w, rtile.h, rtile.offset, rtile.stride, queue_index, dQueue_size, use_queues_flag, work_pool_wgs, rtile.num_samples, rtile.buffer); /* Enqueue ckPathTraceKernel_data_init kernel. */ device->ciErr = clEnqueueNDRangeKernel(device->cqCommandQueue, device->program_data_init(), 2, NULL, dim.global_size, dim.local_size, 0, NULL, NULL); device->opencl_assert_err(device->ciErr, "clEnqueueNDRangeKernel"); if(device->ciErr != CL_SUCCESS) { string message = string_printf("OpenCL error: %s in clEnqueueNDRangeKernel()", clewErrorString(device->ciErr)); device->opencl_error(message); return false; } cached_memory.split_data = &split_data; cached_memory.ray_state = &ray_state; cached_memory.queue_index = &queue_index; cached_memory.use_queues_flag = &use_queues_flag; cached_memory.work_pools = &work_pool_wgs; cached_memory.buffer = &rtile.buffer; cached_memory.id++; return true; }