void thread_run(DeviceTask *task) { flush_texture_buffers(); if(task->type == DeviceTask::FILM_CONVERT) { film_convert(*task, task->buffer, task->rgba_byte, task->rgba_half); } else if(task->type == DeviceTask::SHADER) { shader(*task); } else if(task->type == DeviceTask::RENDER) { RenderTile tile; DenoisingTask denoising(this); /* Allocate buffer for kernel globals */ device_only_memory<KernelGlobalsDummy> kgbuffer(this, "kernel_globals"); kgbuffer.alloc_to_device(1); /* Keep rendering tiles until done. */ while(task->acquire_tile(this, tile)) { if(tile.task == RenderTile::PATH_TRACE) { assert(tile.task == RenderTile::PATH_TRACE); scoped_timer timer(&tile.buffers->render_time); split_kernel->path_trace(task, tile, kgbuffer, *const_mem_map["__data"]); /* Complete kernel execution before release tile. */ /* This helps in multi-device render; * The device that reaches the critical-section function * release_tile waits (stalling other devices from entering * release_tile) for all kernels to complete. If device1 (a * slow-render device) reaches release_tile first then it would * stall device2 (a fast-render device) from proceeding to render * next tile. */ clFinish(cqCommandQueue); } else if(tile.task == RenderTile::DENOISE) { tile.sample = tile.start_sample + tile.num_samples; denoise(tile, denoising, *task); task->update_progress(&tile, tile.w*tile.h); } task->release_tile(tile); } kgbuffer.free(); } }
virtual bool load_kernels(const DeviceRequestedFeatures& requested_features, vector<OpenCLDeviceBase::OpenCLProgram*> &programs) { bool single_program = OpenCLInfo::use_single_program(); program_data_init = OpenCLDeviceBase::OpenCLProgram(this, single_program ? "split" : "split_data_init", single_program ? "kernel_split.cl" : "kernel_data_init.cl", get_build_options(this, requested_features)); program_data_init.add_kernel(ustring("path_trace_data_init")); programs.push_back(&program_data_init); program_state_buffer_size = OpenCLDeviceBase::OpenCLProgram(this, single_program ? "split" : "split_state_buffer_size", single_program ? "kernel_split.cl" : "kernel_state_buffer_size.cl", get_build_options(this, requested_features)); program_state_buffer_size.add_kernel(ustring("path_trace_state_buffer_size")); programs.push_back(&program_state_buffer_size); return split_kernel->load_kernels(requested_features); }
void thread_run(DeviceTask *task) { if(task->type == DeviceTask::FILM_CONVERT) { film_convert(*task, task->buffer, task->rgba_byte, task->rgba_half); } else if(task->type == DeviceTask::SHADER) { shader(*task); } else if(task->type == DeviceTask::RENDER) { RenderTile tile; /* Copy dummy KernelGlobals related to OpenCL from kernel_globals.h to * fetch its size. */ typedef struct KernelGlobals { ccl_constant KernelData *data; #define KERNEL_TEX(type, ttype, name) \ ccl_global type *name; #include "kernel/kernel_textures.h" #undef KERNEL_TEX SplitData split_data; SplitParams split_param_data; } KernelGlobals; /* Allocate buffer for kernel globals */ device_memory kgbuffer; kgbuffer.resize(sizeof(KernelGlobals)); mem_alloc("kernel_globals", kgbuffer, MEM_READ_WRITE); /* Keep rendering tiles until done. */ while(task->acquire_tile(this, tile)) { if(tile.task == RenderTile::PATH_TRACE) { assert(tile.task == RenderTile::PATH_TRACE); split_kernel->path_trace(task, tile, kgbuffer, *const_mem_map["__data"]); /* Complete kernel execution before release tile. */ /* This helps in multi-device render; * The device that reaches the critical-section function * release_tile waits (stalling other devices from entering * release_tile) for all kernels to complete. If device1 (a * slow-render device) reaches release_tile first then it would * stall device2 (a fast-render device) from proceeding to render * next tile. */ clFinish(cqCommandQueue); } else if(tile.task == RenderTile::DENOISE) { tile.sample = tile.start_sample + tile.num_samples; denoise(tile, *task); task->update_progress(&tile, tile.w*tile.h); } task->release_tile(tile); } mem_free(kgbuffer); } }