void thread_run(DeviceTask *task) { flush_texture_buffers(); if(task->type == DeviceTask::FILM_CONVERT) { film_convert(*task, task->buffer, task->rgba_byte, task->rgba_half); } else if(task->type == DeviceTask::SHADER) { shader(*task); } else if(task->type == DeviceTask::RENDER) { RenderTile tile; DenoisingTask denoising(this); /* Allocate buffer for kernel globals */ device_only_memory<KernelGlobalsDummy> kgbuffer(this, "kernel_globals"); kgbuffer.alloc_to_device(1); /* Keep rendering tiles until done. */ while(task->acquire_tile(this, tile)) { if(tile.task == RenderTile::PATH_TRACE) { assert(tile.task == RenderTile::PATH_TRACE); scoped_timer timer(&tile.buffers->render_time); split_kernel->path_trace(task, tile, kgbuffer, *const_mem_map["__data"]); /* Complete kernel execution before release tile. */ /* This helps in multi-device render; * The device that reaches the critical-section function * release_tile waits (stalling other devices from entering * release_tile) for all kernels to complete. If device1 (a * slow-render device) reaches release_tile first then it would * stall device2 (a fast-render device) from proceeding to render * next tile. */ clFinish(cqCommandQueue); } else if(tile.task == RenderTile::DENOISE) { tile.sample = tile.start_sample + tile.num_samples; denoise(tile, denoising, *task); task->update_progress(&tile, tile.w*tile.h); } task->release_tile(tile); } kgbuffer.free(); } }
void thread_run(DeviceTask *task) { if(task->type == DeviceTask::FILM_CONVERT) { film_convert(*task, task->buffer, task->rgba_byte, task->rgba_half); } else if(task->type == DeviceTask::SHADER) { shader(*task); } else if(task->type == DeviceTask::RENDER) { RenderTile tile; DenoisingTask denoising(this, *task); /* Keep rendering tiles until done. */ while(task->acquire_tile(this, tile)) { if(tile.task == RenderTile::PATH_TRACE) { int start_sample = tile.start_sample; int end_sample = tile.start_sample + tile.num_samples; for(int sample = start_sample; sample < end_sample; sample++) { if(task->get_cancel()) { if(task->need_finish_queue == false) break; } path_trace(tile, sample); tile.sample = sample + 1; task->update_progress(&tile, tile.w*tile.h); } /* Complete kernel execution before release tile */ /* This helps in multi-device render; * The device that reaches the critical-section function * release_tile waits (stalling other devices from entering * release_tile) for all kernels to complete. If device1 (a * slow-render device) reaches release_tile first then it would * stall device2 (a fast-render device) from proceeding to render * next tile. */ clFinish(cqCommandQueue); } else if(tile.task == RenderTile::DENOISE) { tile.sample = tile.start_sample + tile.num_samples; denoise(tile, denoising); task->update_progress(&tile, tile.w*tile.h); } task->release_tile(tile); } } }
void thread_run(DeviceTask *task) { if(task->type == DeviceTask::FILM_CONVERT) { film_convert(*task, task->buffer, task->rgba_byte, task->rgba_half); } else if(task->type == DeviceTask::SHADER) { shader(*task); } else if(task->type == DeviceTask::RENDER) { RenderTile tile; /* Copy dummy KernelGlobals related to OpenCL from kernel_globals.h to * fetch its size. */ typedef struct KernelGlobals { ccl_constant KernelData *data; #define KERNEL_TEX(type, ttype, name) \ ccl_global type *name; #include "kernel/kernel_textures.h" #undef KERNEL_TEX SplitData split_data; SplitParams split_param_data; } KernelGlobals; /* Allocate buffer for kernel globals */ device_memory kgbuffer; kgbuffer.resize(sizeof(KernelGlobals)); mem_alloc("kernel_globals", kgbuffer, MEM_READ_WRITE); /* Keep rendering tiles until done. */ while(task->acquire_tile(this, tile)) { if(tile.task == RenderTile::PATH_TRACE) { assert(tile.task == RenderTile::PATH_TRACE); split_kernel->path_trace(task, tile, kgbuffer, *const_mem_map["__data"]); /* Complete kernel execution before release tile. */ /* This helps in multi-device render; * The device that reaches the critical-section function * release_tile waits (stalling other devices from entering * release_tile) for all kernels to complete. If device1 (a * slow-render device) reaches release_tile first then it would * stall device2 (a fast-render device) from proceeding to render * next tile. */ clFinish(cqCommandQueue); } else if(tile.task == RenderTile::DENOISE) { tile.sample = tile.start_sample + tile.num_samples; denoise(tile, *task); task->update_progress(&tile, tile.w*tile.h); } task->release_tile(tile); } mem_free(kgbuffer); } }
void thread_run(DeviceTask *task) { if(task->type == DeviceTask::FILM_CONVERT) { film_convert(*task, task->buffer, task->rgba_byte, task->rgba_half); } else if(task->type == DeviceTask::SHADER) { shader(*task); } else if(task->type == DeviceTask::PATH_TRACE) { RenderTile tile; bool initialize_data_and_check_render_feasibility = false; bool need_to_split_tiles_further = false; int2 max_render_feasible_tile_size; size_t feasible_global_work_size; const int2 tile_size = task->requested_tile_size; /* Keep rendering tiles until done. */ while(task->acquire_tile(this, tile)) { if(!initialize_data_and_check_render_feasibility) { /* Initialize data. */ /* Calculate per_thread_output_buffer_size. */ size_t output_buffer_size = 0; ciErr = clGetMemObjectInfo((cl_mem)tile.buffer, CL_MEM_SIZE, sizeof(output_buffer_size), &output_buffer_size, NULL); assert(ciErr == CL_SUCCESS && "Can't get tile.buffer mem object info"); /* This value is different when running on AMD and NV. */ if(background) { /* In offline render the number of buffer elements * associated with tile.buffer is the current tile size. */ per_thread_output_buffer_size = output_buffer_size / (tile.w * tile.h); } else { /* interactive rendering, unlike offline render, the number of buffer elements * associated with tile.buffer is the entire viewport size. */ per_thread_output_buffer_size = output_buffer_size / (tile.buffers->params.width * tile.buffers->params.height); } /* Check render feasibility. */ feasible_global_work_size = get_feasible_global_work_size( tile_size, CL_MEM_PTR(const_mem_map["__data"]->device_pointer)); max_render_feasible_tile_size = get_max_render_feasible_tile_size( feasible_global_work_size); need_to_split_tiles_further = need_to_split_tile(tile_size.x, tile_size.y, max_render_feasible_tile_size); initialize_data_and_check_render_feasibility = true; } if(need_to_split_tiles_further) { int2 split_tile_size = get_split_tile_size(tile, max_render_feasible_tile_size); vector<SplitRenderTile> to_path_trace_render_tiles = split_tiles(tile, split_tile_size); /* Print message to console */ if(background && (to_path_trace_render_tiles.size() > 1)) { fprintf(stderr, "Message : Tiles need to be split " "further inside path trace (due to insufficient " "device-global-memory for split kernel to " "function) \n" "The current tile of dimensions %dx%d is split " "into tiles of dimension %dx%d for render \n", tile.w, tile.h, split_tile_size.x, split_tile_size.y); } /* Process all split tiles. */ for(int tile_iter = 0; tile_iter < to_path_trace_render_tiles.size(); ++tile_iter) { path_trace(task, to_path_trace_render_tiles[tile_iter], max_render_feasible_tile_size); } } else { /* No splitting required; process the entire tile at once. */ /* Render feasible tile size is user-set-tile-size itself. */ max_render_feasible_tile_size.x = (((tile_size.x - 1) / SPLIT_KERNEL_LOCAL_SIZE_X) + 1) * SPLIT_KERNEL_LOCAL_SIZE_X; max_render_feasible_tile_size.y = (((tile_size.y - 1) / SPLIT_KERNEL_LOCAL_SIZE_Y) + 1) * SPLIT_KERNEL_LOCAL_SIZE_Y; /* buffer_rng_state_stride is stride itself. */ SplitRenderTile split_tile(tile); split_tile.buffer_rng_state_stride = tile.stride; path_trace(task, split_tile, max_render_feasible_tile_size); } tile.sample = tile.start_sample + tile.num_samples; /* Complete kernel execution before release tile. */ /* This helps in multi-device render; * The device that reaches the critical-section function * release_tile waits (stalling other devices from entering * release_tile) for all kernels to complete. If device1 (a * slow-render device) reaches release_tile first then it would * stall device2 (a fast-render device) from proceeding to render * next tile. */ clFinish(cqCommandQueue); task->release_tile(tile); } } }