void ImageManager::device_update(Device *device, Scene *scene, Progress& progress) { if(!need_update) { return; } TaskPool pool; for(int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) { for(size_t slot = 0; slot < images[type].size(); slot++) { if(!images[type][slot]) continue; if(images[type][slot]->users == 0) { device_free_image(device, (ImageDataType)type, slot); } else if(images[type][slot]->need_load) { if(!osl_texture_system || images[type][slot]->builtin_data) pool.push(function_bind(&ImageManager::device_load_image, this, device, scene, (ImageDataType)type, slot, &progress)); } } } pool.wait_work(); need_update = false; }
void thread_shader(DeviceTask& task) { KernelGlobals kg = kernel_globals; #ifdef WITH_OSL OSLShader::thread_init(&kg, &kernel_globals, &osl_globals); #endif #ifdef WITH_OPTIMIZED_KERNEL if(system_cpu_support_optimized()) { for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) { kernel_cpu_optimized_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x); if(task_pool.cancelled()) break; } } else #endif { for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) { kernel_cpu_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x); if(task_pool.cancelled()) break; } } #ifdef WITH_OSL OSLShader::thread_free(&kg); #endif }
TEST(util_task, basic) { TaskScheduler::init(0); TaskPool pool; for (int i = 0; i < 100; ++i) { pool.push(function_bind(task_run)); } TaskPool::Summary summary; pool.wait_work(&summary); TaskScheduler::exit(); EXPECT_EQ(summary.num_tasks_handled, 100); }
~CUDADevice() { task_pool.stop(); cuda_push_context(); cuda_assert(cuCtxDetach(cuContext)) }
void beginConvertLegacySavegame(String const &sourcePath, String const &gameId) { LOG_AS("SaveGames"); LOG_TRACE("Scheduling legacy savegame conversion for %s (gameId:%s)") << sourcePath << gameId; Loop::get().audienceForIteration() += this; convertSavegameTasks.start(new ConvertSavegameTask(sourcePath, gameId)); }
void thread_shader(DeviceTask &task) { KernelGlobals kg = kernel_globals; #ifdef WITH_OSL OSLShader::thread_init(&kg, &kernel_globals, &osl_globals); #endif for (int sample = 0; sample < task.num_samples; sample++) { for (int x = task.shader_x; x < task.shader_x + task.shader_w; x++) shader_kernel()(&kg, (uint4 *)task.shader_input, (float4 *)task.shader_output, task.shader_eval_type, task.shader_filter, x, task.offset, sample); if (task.get_cancel() || task_pool.canceled()) break; task.update_progress(NULL); } #ifdef WITH_OSL OSLShader::thread_free(&kg); #endif }
TaskHandle CreateTask(TaskPool& pool, TaskHandle* parentTask, const DELEGATE_CBK<void, const Task&>& threadedFunction) { Task* freeTask = pool.createTask(parentTask ? parentTask->_task : nullptr, threadedFunction); return TaskHandle(freeTask, &pool); }
void task_add(DeviceTask& task) { /* split task into smaller ones */ list<DeviceTask> tasks; task.split(tasks, TaskScheduler::num_threads()); foreach(DeviceTask& task, tasks) task_pool.push(new CPUDeviceTask(this, task)); }
void task_add(DeviceTask& task) { /* split task into smaller ones, more than number of threads for uneven * workloads where some parts of the image render slower than others */ list<DeviceTask> tasks; task.split(tasks, TaskScheduler::num_threads()); foreach(DeviceTask& task, tasks) task_pool.push(new CPUDeviceTask(this, task)); }
void parallel_for(TaskPool& pool, const DELEGATE_CBK<void, const Task&, U32, U32>& cbk, U32 count, U32 partitionSize, TaskPriority priority, bool noWait, bool useCurrentThread) { if (count > 0) { const U32 crtPartitionSize = std::min(partitionSize, count); const U32 partitionCount = count / crtPartitionSize; const U32 remainder = count % crtPartitionSize; U32 adjustedCount = partitionCount; if (useCurrentThread) { adjustedCount -= 1; } std::atomic_uint jobCount = adjustedCount + (remainder > 0 ? 1 : 0); for (U32 i = 0; i < adjustedCount; ++i) { const U32 start = i * crtPartitionSize; const U32 end = start + crtPartitionSize; CreateTask(pool, nullptr, [&cbk, &jobCount, start, end](const Task& parentTask) { cbk(parentTask, start, end); jobCount.fetch_sub(1); }).startTask(priority); } if (remainder > 0) { CreateTask(pool, nullptr, [&cbk, &jobCount, count, remainder](const Task& parentTask) { cbk(parentTask, count - remainder, count); jobCount.fetch_sub(1); }).startTask(priority); } if (useCurrentThread) { TaskHandle threadTask = CreateTask(pool, [](const Task& parentTask) {ACKNOWLEDGE_UNUSED(parentTask); }); const U32 start = adjustedCount * crtPartitionSize; const U32 end = start + crtPartitionSize; cbk(*threadTask._task, start, end); } if (!noWait) { while (jobCount.load() > 0) { pool.threadWaiting(); } } } }
void ImageManager::device_update(Device *device, DeviceScene *dscene, Progress& progress) { if(!need_update) return; TaskPool pool; for(size_t slot = 0; slot < images.size(); slot++) { if(!images[slot]) continue; if(images[slot]->users == 0) { device_free_image(device, dscene, slot); } else if(images[slot]->need_load) { if(!osl_texture_system) pool.push(function_bind(&ImageManager::device_load_image, this, device, dscene, slot, &progress)); } } for(size_t slot = 0; slot < float_images.size(); slot++) { if(!float_images[slot]) continue; if(float_images[slot]->users == 0) { device_free_image(device, dscene, slot + TEX_IMAGE_FLOAT_START); } else if(float_images[slot]->need_load) { if(!osl_texture_system) pool.push(function_bind(&ImageManager::device_load_image, this, device, dscene, slot + TEX_IMAGE_FLOAT_START, &progress)); } } pool.wait_work(); if(pack_images) device_pack_images(device, dscene, progress); need_update = false; }
void thread_shader(DeviceTask& task) { KernelGlobals kg = kernel_globals; #ifdef WITH_OSL OSLShader::thread_init(&kg, &kernel_globals, &osl_globals); #endif void(*shader_kernel)(KernelGlobals*, uint4*, float4*, int, int, int, int); #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 if(system_cpu_support_avx2()) shader_kernel = kernel_cpu_avx2_shader; else #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX if(system_cpu_support_avx()) shader_kernel = kernel_cpu_avx_shader; else #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 if(system_cpu_support_sse41()) shader_kernel = kernel_cpu_sse41_shader; else #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 if(system_cpu_support_sse3()) shader_kernel = kernel_cpu_sse3_shader; else #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 if(system_cpu_support_sse2()) shader_kernel = kernel_cpu_sse2_shader; else #endif shader_kernel = kernel_cpu_shader; for(int sample = 0; sample < task.num_samples; sample++) { for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) shader_kernel(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, task.offset, sample); if(task.get_cancel() || task_pool.canceled()) break; task.update_progress(NULL); } #ifdef WITH_OSL OSLShader::thread_free(&kg); #endif }
void task_add(DeviceTask& task) { /* split task into smaller ones */ list<DeviceTask> tasks; if(task.type == DeviceTask::SHADER) task.split(tasks, TaskScheduler::num_threads(), 256); else task.split(tasks, TaskScheduler::num_threads()); foreach(DeviceTask& task, tasks) task_pool.push(new CPUDeviceTask(this, task)); }
void rewrapCache() { if(cache.isEmpty()) return; if(isRewrapping()) { // Cancel an existing rewrap. cancelRewrap++; } // Start a rewrapping task that goes through all the existing entries, // starting from the latest entry. rewrapPool.start(new RewrapTask(this, cache.size() - 1, contentWidth())); }
void task_add(DeviceTask &task) { /* Load texture info. */ load_texture_info(); /* split task into smaller ones */ list<DeviceTask> tasks; if (task.type == DeviceTask::SHADER) task.split(tasks, info.cpu_threads, 256); else task.split(tasks, info.cpu_threads); foreach (DeviceTask &task, tasks) task_pool.push(new CPUDeviceTask(this, task)); }
void loopIteration() { /// @todo Refactor: TaskPool has a signal (or audience) when all tasks are complete. /// No need to check on every loop iteration. if (convertSavegameTasks.isDone()) { LOG_AS("SaveGames"); Loop::get().audienceForIteration() -= this; try { // The newly converted savegame(s) should now be somewhere in /home/savegames FileSystem::get().root().locate<Folder>("/home/savegames").populate(); } catch (Folder::NotFoundError const &) {} // Ignore. } }
void path_trace(DeviceTask &task, RenderTile &tile, KernelGlobals *kg) { const bool use_coverage = kernel_data.film.cryptomatte_passes & CRYPT_ACCURATE; scoped_timer timer(&tile.buffers->render_time); Coverage coverage(kg, tile); if (use_coverage) { coverage.init_path_trace(); } float *render_buffer = (float *)tile.buffer; int start_sample = tile.start_sample; int end_sample = tile.start_sample + tile.num_samples; /* Needed for Embree. */ SIMD_SET_FLUSH_TO_ZERO; for (int sample = start_sample; sample < end_sample; sample++) { if (task.get_cancel() || task_pool.canceled()) { if (task.need_finish_queue == false) break; } for (int y = tile.y; y < tile.y + tile.h; y++) { for (int x = tile.x; x < tile.x + tile.w; x++) { if (use_coverage) { coverage.init_pixel(x, y); } path_trace_kernel()(kg, render_buffer, sample, x, y, tile.offset, tile.stride); } } tile.sample = sample + 1; task.update_progress(&tile, tile.w * tile.h); } if (use_coverage) { coverage.finalize(); } }
~OpenCLDevice() { task_pool.stop(); if(null_mem) clReleaseMemObject(CL_MEM_PTR(null_mem)); map<string, device_vector<uchar>*>::iterator mt; for(mt = const_mem_map.begin(); mt != const_mem_map.end(); mt++) { mem_free(*(mt->second)); delete mt->second; } if(ckPathTraceKernel) clReleaseKernel(ckPathTraceKernel); if(ckFilmConvertKernel) clReleaseKernel(ckFilmConvertKernel); if(cpProgram) clReleaseProgram(cpProgram); if(cqCommandQueue) clReleaseCommandQueue(cqCommandQueue); if(cxContext) clReleaseContext(cxContext); }
~CPUDevice() { task_pool.stop(); }
void task_cancel() { task_pool.cancel(); }
void task_wait() { task_pool.wait_work(); }
~CPUDevice() { task_pool.stop(); texture_info.free(); }
void thread_shader(DeviceTask& task) { KernelGlobals kg = kernel_globals; #ifdef WITH_OSL OSLShader::thread_init(&kg, &kernel_globals, &osl_globals); #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX if(system_cpu_support_avx()) { for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) { kernel_cpu_avx_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x); if(task.get_cancel() || task_pool.canceled()) break; } } else #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 if(system_cpu_support_sse41()) { for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) { kernel_cpu_sse41_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x); if(task.get_cancel() || task_pool.canceled()) break; } } else #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 if(system_cpu_support_sse3()) { for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) { kernel_cpu_sse3_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x); if(task.get_cancel() || task_pool.canceled()) break; } } else #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 if(system_cpu_support_sse2()) { for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) { kernel_cpu_sse2_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x); if(task.get_cancel() || task_pool.canceled()) break; } } else #endif { for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) { kernel_cpu_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x); if(task.get_cancel() || task_pool.canceled()) break; } } #ifdef WITH_OSL OSLShader::thread_free(&kg); #endif }
void thread_path_trace(DeviceTask& task) { if(task_pool.canceled()) { if(task.need_finish_queue == false) return; } KernelGlobals kg = kernel_globals; #ifdef WITH_OSL OSLShader::thread_init(&kg, &kernel_globals, &osl_globals); #endif RenderTile tile; while(task.acquire_tile(this, tile)) { float *render_buffer = (float*)tile.buffer; uint *rng_state = (uint*)tile.rng_state; int start_sample = tile.start_sample; int end_sample = tile.start_sample + tile.num_samples; #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX if(system_cpu_support_avx()) { for(int sample = start_sample; sample < end_sample; sample++) { if (task.get_cancel() || task_pool.canceled()) { if(task.need_finish_queue == false) break; } for(int y = tile.y; y < tile.y + tile.h; y++) { for(int x = tile.x; x < tile.x + tile.w; x++) { kernel_cpu_avx_path_trace(&kg, render_buffer, rng_state, sample, x, y, tile.offset, tile.stride); } } tile.sample = sample + 1; task.update_progress(tile); } } else #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 if(system_cpu_support_sse41()) { for(int sample = start_sample; sample < end_sample; sample++) { if (task.get_cancel() || task_pool.canceled()) { if(task.need_finish_queue == false) break; } for(int y = tile.y; y < tile.y + tile.h; y++) { for(int x = tile.x; x < tile.x + tile.w; x++) { kernel_cpu_sse41_path_trace(&kg, render_buffer, rng_state, sample, x, y, tile.offset, tile.stride); } } tile.sample = sample + 1; task.update_progress(tile); } } else #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 if(system_cpu_support_sse3()) { for(int sample = start_sample; sample < end_sample; sample++) { if (task.get_cancel() || task_pool.canceled()) { if(task.need_finish_queue == false) break; } for(int y = tile.y; y < tile.y + tile.h; y++) { for(int x = tile.x; x < tile.x + tile.w; x++) { kernel_cpu_sse3_path_trace(&kg, render_buffer, rng_state, sample, x, y, tile.offset, tile.stride); } } tile.sample = sample + 1; task.update_progress(tile); } } else #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 if(system_cpu_support_sse2()) { for(int sample = start_sample; sample < end_sample; sample++) { if (task.get_cancel() || task_pool.canceled()) { if(task.need_finish_queue == false) break; } for(int y = tile.y; y < tile.y + tile.h; y++) { for(int x = tile.x; x < tile.x + tile.w; x++) { kernel_cpu_sse2_path_trace(&kg, render_buffer, rng_state, sample, x, y, tile.offset, tile.stride); } } tile.sample = sample + 1; task.update_progress(tile); } } else #endif { for(int sample = start_sample; sample < end_sample; sample++) { if (task.get_cancel() || task_pool.canceled()) { if(task.need_finish_queue == false) break; } for(int y = tile.y; y < tile.y + tile.h; y++) { for(int x = tile.x; x < tile.x + tile.w; x++) { kernel_cpu_path_trace(&kg, render_buffer, rng_state, sample, x, y, tile.offset, tile.stride); } } tile.sample = sample + 1; task.update_progress(tile); } } task.release_tile(tile); if(task_pool.canceled()) { if(task.need_finish_queue == false) break; } } #ifdef WITH_OSL OSLShader::thread_free(&kg); #endif }
void thread_render(DeviceTask &task) { if (task_pool.canceled()) { if (task.need_finish_queue == false) return; } /* allocate buffer for kernel globals */ device_only_memory<KernelGlobals> kgbuffer(this, "kernel_globals"); kgbuffer.alloc_to_device(1); KernelGlobals *kg = new ((void *)kgbuffer.device_pointer) KernelGlobals(thread_kernel_globals_init()); profiler.add_state(&kg->profiler); CPUSplitKernel *split_kernel = NULL; if (use_split_kernel) { split_kernel = new CPUSplitKernel(this); if (!split_kernel->load_kernels(requested_features)) { thread_kernel_globals_free((KernelGlobals *)kgbuffer.device_pointer); kgbuffer.free(); delete split_kernel; return; } } RenderTile tile; DenoisingTask denoising(this, task); denoising.profiler = &kg->profiler; while (task.acquire_tile(this, tile)) { if (tile.task == RenderTile::PATH_TRACE) { if (use_split_kernel) { device_only_memory<uchar> void_buffer(this, "void_buffer"); split_kernel->path_trace(&task, tile, kgbuffer, void_buffer); } else { path_trace(task, tile, kg); } } else if (tile.task == RenderTile::DENOISE) { denoise(denoising, tile); task.update_progress(&tile, tile.w * tile.h); } task.release_tile(tile); if (task_pool.canceled()) { if (task.need_finish_queue == false) break; } } profiler.remove_state(&kg->profiler); thread_kernel_globals_free((KernelGlobals *)kgbuffer.device_pointer); kg->~KernelGlobals(); kgbuffer.free(); delete split_kernel; }
void WaitForAllTasks(TaskPool& pool, bool yield, bool flushCallbacks, bool foceClear) { pool.waitForAllTasks(yield, flushCallbacks, foceClear); }
void task_add(DeviceTask& task) { task_pool.push(new OpenCLDeviceTask(this, task)); }
void cancelRewraps() { cancelRewrap = CancelAllRewraps; rewrapPool.waitForDone(); cancelRewrap = 0; }
~Impl() { convertSavegameTasks.waitForDone(); }
bool isRewrapping() const { return !rewrapPool.isDone(); }