void thread_run(DeviceTask *task) { if(task->type == DeviceTask::TONEMAP) { tonemap(*task, task->buffer, task->rgba); } else if(task->type == DeviceTask::PATH_TRACE) { RenderTile tile; /* keep rendering tiles until done */ while(task->acquire_tile(this, tile)) { int start_sample = tile.start_sample; int end_sample = tile.start_sample + tile.num_samples; for(int sample = start_sample; sample < end_sample; sample++) { if (task->get_cancel()) { if(task->need_finish_queue == false) break; } path_trace(tile, sample); tile.sample = sample + 1; task->update_progress(tile); } task->release_tile(tile); } } }
void thread_run(DeviceTask *task) { if(task->type == DeviceTask::FILM_CONVERT) { film_convert(*task, task->buffer, task->rgba_byte, task->rgba_half); } else if(task->type == DeviceTask::SHADER) { shader(*task); } else if(task->type == DeviceTask::RENDER) { RenderTile tile; DenoisingTask denoising(this, *task); /* Keep rendering tiles until done. */ while(task->acquire_tile(this, tile)) { if(tile.task == RenderTile::PATH_TRACE) { int start_sample = tile.start_sample; int end_sample = tile.start_sample + tile.num_samples; for(int sample = start_sample; sample < end_sample; sample++) { if(task->get_cancel()) { if(task->need_finish_queue == false) break; } path_trace(tile, sample); tile.sample = sample + 1; task->update_progress(&tile, tile.w*tile.h); } /* Complete kernel execution before release tile */ /* This helps in multi-device render; * The device that reaches the critical-section function * release_tile waits (stalling other devices from entering * release_tile) for all kernels to complete. If device1 (a * slow-render device) reaches release_tile first then it would * stall device2 (a fast-render device) from proceeding to render * next tile. */ clFinish(cqCommandQueue); } else if(tile.task == RenderTile::DENOISE) { tile.sample = tile.start_sample + tile.num_samples; denoise(tile, denoising); task->update_progress(&tile, tile.w*tile.h); } task->release_tile(tile); } } }
void task_add(DeviceTask& maintask) { list<DeviceTask> tasks; /* arbitrary limit to work around apple ATI opencl issue */ if(platform_name == "Apple") maintask.split_max_size(tasks, 76800); else tasks.push_back(maintask); DeviceTask task; foreach(DeviceTask& task, tasks) { if(task.type == DeviceTask::TONEMAP) tonemap(task); else if(task.type == DeviceTask::PATH_TRACE) path_trace(task); } }
static unsigned long thread_func(void *param) { ThreadParams *thread_params = (ThreadParams*)param; BucketGrid *bucket_grid = (BucketGrid*)thread_params->bucket_grid; RenderParams *render_params = (RenderParams*)thread_params->render_params; u32 bucket_id = __sync_fetch_and_add(bucket_grid->current_bucket, 1); while(bucket_id < bucket_grid->num_buckets){ u32 bucket_index = hilbert_curve_transform_bucket_id( bucket_grid->num_buckets_x,bucket_id); bucket_grid->active_buckets[bucket_index] = 1; path_trace(*render_params,*bucket_grid,bucket_index); bucket_grid->active_buckets[bucket_index] = 0; bucket_grid->done_buckets[bucket_index] = 1; semaphore_post(bucket_grid->bucket_done); bucket_id = __sync_fetch_and_add(bucket_grid->current_bucket, 1); } return 0; }
void thread_run(DeviceTask *task) { if(task->type == DeviceTask::FILM_CONVERT) { film_convert(*task, task->buffer, task->rgba_byte, task->rgba_half); } else if(task->type == DeviceTask::SHADER) { shader(*task); } else if(task->type == DeviceTask::PATH_TRACE) { RenderTile tile; bool initialize_data_and_check_render_feasibility = false; bool need_to_split_tiles_further = false; int2 max_render_feasible_tile_size; size_t feasible_global_work_size; const int2 tile_size = task->requested_tile_size; /* Keep rendering tiles until done. */ while(task->acquire_tile(this, tile)) { if(!initialize_data_and_check_render_feasibility) { /* Initialize data. */ /* Calculate per_thread_output_buffer_size. */ size_t output_buffer_size = 0; ciErr = clGetMemObjectInfo((cl_mem)tile.buffer, CL_MEM_SIZE, sizeof(output_buffer_size), &output_buffer_size, NULL); assert(ciErr == CL_SUCCESS && "Can't get tile.buffer mem object info"); /* This value is different when running on AMD and NV. */ if(background) { /* In offline render the number of buffer elements * associated with tile.buffer is the current tile size. */ per_thread_output_buffer_size = output_buffer_size / (tile.w * tile.h); } else { /* interactive rendering, unlike offline render, the number of buffer elements * associated with tile.buffer is the entire viewport size. */ per_thread_output_buffer_size = output_buffer_size / (tile.buffers->params.width * tile.buffers->params.height); } /* Check render feasibility. */ feasible_global_work_size = get_feasible_global_work_size( tile_size, CL_MEM_PTR(const_mem_map["__data"]->device_pointer)); max_render_feasible_tile_size = get_max_render_feasible_tile_size( feasible_global_work_size); need_to_split_tiles_further = need_to_split_tile(tile_size.x, tile_size.y, max_render_feasible_tile_size); initialize_data_and_check_render_feasibility = true; } if(need_to_split_tiles_further) { int2 split_tile_size = get_split_tile_size(tile, max_render_feasible_tile_size); vector<SplitRenderTile> to_path_trace_render_tiles = split_tiles(tile, split_tile_size); /* Print message to console */ if(background && (to_path_trace_render_tiles.size() > 1)) { fprintf(stderr, "Message : Tiles need to be split " "further inside path trace (due to insufficient " "device-global-memory for split kernel to " "function) \n" "The current tile of dimensions %dx%d is split " "into tiles of dimension %dx%d for render \n", tile.w, tile.h, split_tile_size.x, split_tile_size.y); } /* Process all split tiles. */ for(int tile_iter = 0; tile_iter < to_path_trace_render_tiles.size(); ++tile_iter) { path_trace(task, to_path_trace_render_tiles[tile_iter], max_render_feasible_tile_size); } } else { /* No splitting required; process the entire tile at once. */ /* Render feasible tile size is user-set-tile-size itself. */ max_render_feasible_tile_size.x = (((tile_size.x - 1) / SPLIT_KERNEL_LOCAL_SIZE_X) + 1) * SPLIT_KERNEL_LOCAL_SIZE_X; max_render_feasible_tile_size.y = (((tile_size.y - 1) / SPLIT_KERNEL_LOCAL_SIZE_Y) + 1) * SPLIT_KERNEL_LOCAL_SIZE_Y; /* buffer_rng_state_stride is stride itself. */ SplitRenderTile split_tile(tile); split_tile.buffer_rng_state_stride = tile.stride; path_trace(task, split_tile, max_render_feasible_tile_size); } tile.sample = tile.start_sample + tile.num_samples; /* Complete kernel execution before release tile. */ /* This helps in multi-device render; * The device that reaches the critical-section function * release_tile waits (stalling other devices from entering * release_tile) for all kernels to complete. If device1 (a * slow-render device) reaches release_tile first then it would * stall device2 (a fast-render device) from proceeding to render * next tile. */ clFinish(cqCommandQueue); task->release_tile(tile); } } }
void thread_render(DeviceTask &task) { if (task_pool.canceled()) { if (task.need_finish_queue == false) return; } /* allocate buffer for kernel globals */ device_only_memory<KernelGlobals> kgbuffer(this, "kernel_globals"); kgbuffer.alloc_to_device(1); KernelGlobals *kg = new ((void *)kgbuffer.device_pointer) KernelGlobals(thread_kernel_globals_init()); profiler.add_state(&kg->profiler); CPUSplitKernel *split_kernel = NULL; if (use_split_kernel) { split_kernel = new CPUSplitKernel(this); if (!split_kernel->load_kernels(requested_features)) { thread_kernel_globals_free((KernelGlobals *)kgbuffer.device_pointer); kgbuffer.free(); delete split_kernel; return; } } RenderTile tile; DenoisingTask denoising(this, task); denoising.profiler = &kg->profiler; while (task.acquire_tile(this, tile)) { if (tile.task == RenderTile::PATH_TRACE) { if (use_split_kernel) { device_only_memory<uchar> void_buffer(this, "void_buffer"); split_kernel->path_trace(&task, tile, kgbuffer, void_buffer); } else { path_trace(task, tile, kg); } } else if (tile.task == RenderTile::DENOISE) { denoise(denoising, tile); task.update_progress(&tile, tile.w * tile.h); } task.release_tile(tile); if (task_pool.canceled()) { if (task.need_finish_queue == false) break; } } profiler.remove_state(&kg->profiler); thread_kernel_globals_free((KernelGlobals *)kgbuffer.device_pointer); kg->~KernelGlobals(); kgbuffer.free(); delete split_kernel; }