void thread_shader(DeviceTask &task) { KernelGlobals kg = kernel_globals; #ifdef WITH_OSL OSLShader::thread_init(&kg, &kernel_globals, &osl_globals); #endif for (int sample = 0; sample < task.num_samples; sample++) { for (int x = task.shader_x; x < task.shader_x + task.shader_w; x++) shader_kernel()(&kg, (uint4 *)task.shader_input, (float4 *)task.shader_output, task.shader_eval_type, task.shader_filter, x, task.offset, sample); if (task.get_cancel() || task_pool.canceled()) break; task.update_progress(NULL); } #ifdef WITH_OSL OSLShader::thread_free(&kg); #endif }
int get_split_task_count(DeviceTask& task) { if(task.type == DeviceTask::SHADER) return task.get_subtask_count(TaskScheduler::num_threads(), 256); else return task.get_subtask_count(TaskScheduler::num_threads()); }
int get_split_task_count(DeviceTask &task) { if (task.type == DeviceTask::SHADER) return task.get_subtask_count(info.cpu_threads, 256); else return task.get_subtask_count(info.cpu_threads); }
void thread_shader(DeviceTask& task) { KernelGlobals kg = kernel_globals; #ifdef WITH_OSL OSLShader::thread_init(&kg, &kernel_globals, &osl_globals); #endif void(*shader_kernel)(KernelGlobals*, uint4*, float4*, int, int, int, int); #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 if(system_cpu_support_avx2()) shader_kernel = kernel_cpu_avx2_shader; else #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX if(system_cpu_support_avx()) shader_kernel = kernel_cpu_avx_shader; else #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 if(system_cpu_support_sse41()) shader_kernel = kernel_cpu_sse41_shader; else #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 if(system_cpu_support_sse3()) shader_kernel = kernel_cpu_sse3_shader; else #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 if(system_cpu_support_sse2()) shader_kernel = kernel_cpu_sse2_shader; else #endif shader_kernel = kernel_cpu_shader; for(int sample = 0; sample < task.num_samples; sample++) { for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) shader_kernel(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x, task.offset, sample); if(task.get_cancel() || task_pool.canceled()) break; task.update_progress(NULL); } #ifdef WITH_OSL OSLShader::thread_free(&kg); #endif }
void task_add(DeviceTask& task) { /* split task into smaller ones */ list<DeviceTask> tasks; if(task.type == DeviceTask::SHADER) task.split(tasks, TaskScheduler::num_threads(), 256); else task.split(tasks, TaskScheduler::num_threads()); foreach(DeviceTask& task, tasks) task_pool.push(new CPUDeviceTask(this, task)); }
void task_wait() { RPCSend snd(socket, "task_wait"); snd.write(); list<RenderTile> the_tiles; /* todo: run this threaded for connecting to multiple clients */ for(;;) { RPCReceive rcv(socket); RenderTile tile; if(rcv.name == "acquire_tile") { /* todo: watch out for recursive calls! */ if(the_task.acquire_tile(this, tile)) { /* write return as bool */ the_tiles.push_back(tile); RPCSend snd(socket, "acquire_tile"); snd.add(tile); snd.write(); } else { RPCSend snd(socket, "acquire_tile_none"); snd.write(); } } else if(rcv.name == "release_tile") { rcv.read(tile); for(list<RenderTile>::iterator it = the_tiles.begin(); it != the_tiles.end(); it++) { if(tile.x == it->x && tile.y == it->y && tile.start_sample == it->start_sample) { tile.buffers = it->buffers; the_tiles.erase(it); break; } } assert(tile.buffers != NULL); the_task.release_tile(tile); RPCSend snd(socket, "release_tile"); snd.write(); } else if(rcv.name == "task_wait_done") break; } }
void task_add(DeviceTask &task) { /* Load texture info. */ load_texture_info(); /* split task into smaller ones */ list<DeviceTask> tasks; if (task.type == DeviceTask::SHADER) task.split(tasks, info.cpu_threads, 256); else task.split(tasks, info.cpu_threads); foreach (DeviceTask &task, tasks) task_pool.push(new CPUDeviceTask(this, task)); }
void task_add(DeviceTask& task) { /* split task into smaller ones */ list<DeviceTask> tasks; task.split(tasks, TaskScheduler::num_threads()); foreach(DeviceTask& task, tasks) task_pool.push(new CPUDeviceTask(this, task)); }
void task_add(DeviceTask& task) { /* split task into smaller ones, more than number of threads for uneven * workloads where some parts of the image render slower than others */ list<DeviceTask> tasks; task.split(tasks, TaskScheduler::num_threads()); foreach(DeviceTask& task, tasks) task_pool.push(new CPUDeviceTask(this, task)); }
void path_trace(DeviceTask &task, RenderTile &tile, KernelGlobals *kg) { const bool use_coverage = kernel_data.film.cryptomatte_passes & CRYPT_ACCURATE; scoped_timer timer(&tile.buffers->render_time); Coverage coverage(kg, tile); if (use_coverage) { coverage.init_path_trace(); } float *render_buffer = (float *)tile.buffer; int start_sample = tile.start_sample; int end_sample = tile.start_sample + tile.num_samples; /* Needed for Embree. */ SIMD_SET_FLUSH_TO_ZERO; for (int sample = start_sample; sample < end_sample; sample++) { if (task.get_cancel() || task_pool.canceled()) { if (task.need_finish_queue == false) break; } for (int y = tile.y; y < tile.y + tile.h; y++) { for (int x = tile.x; x < tile.x + tile.w; x++) { if (use_coverage) { coverage.init_pixel(x, y); } path_trace_kernel()(kg, render_buffer, sample, x, y, tile.offset, tile.stride); } } tile.sample = sample + 1; task.update_progress(&tile, tile.w * tile.h); } if (use_coverage) { coverage.finalize(); } }
void task_add(DeviceTask& maintask) { list<DeviceTask> tasks; /* arbitrary limit to work around apple ATI opencl issue */ if(platform_name == "Apple") maintask.split_max_size(tasks, 76800); else tasks.push_back(maintask); DeviceTask task; foreach(DeviceTask& task, tasks) { if(task.type == DeviceTask::TONEMAP) tonemap(task); else if(task.type == DeviceTask::PATH_TRACE) path_trace(task); } }
void thread_shader(DeviceTask& task) { KernelGlobals kg = kernel_globals; #ifdef WITH_OSL OSLShader::thread_init(&kg, &kernel_globals, &osl_globals); #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX if(system_cpu_support_avx()) { for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) { kernel_cpu_avx_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x); if(task.get_cancel() || task_pool.canceled()) break; } } else #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 if(system_cpu_support_sse41()) { for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) { kernel_cpu_sse41_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x); if(task.get_cancel() || task_pool.canceled()) break; } } else #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 if(system_cpu_support_sse3()) { for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) { kernel_cpu_sse3_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x); if(task.get_cancel() || task_pool.canceled()) break; } } else #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 if(system_cpu_support_sse2()) { for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) { kernel_cpu_sse2_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x); if(task.get_cancel() || task_pool.canceled()) break; } } else #endif { for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) { kernel_cpu_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x); if(task.get_cancel() || task_pool.canceled()) break; } } #ifdef WITH_OSL OSLShader::thread_free(&kg); #endif }
void thread_path_trace(DeviceTask& task) { if(task_pool.canceled()) { if(task.need_finish_queue == false) return; } KernelGlobals kg = kernel_globals; #ifdef WITH_OSL OSLShader::thread_init(&kg, &kernel_globals, &osl_globals); #endif RenderTile tile; while(task.acquire_tile(this, tile)) { float *render_buffer = (float*)tile.buffer; uint *rng_state = (uint*)tile.rng_state; int start_sample = tile.start_sample; int end_sample = tile.start_sample + tile.num_samples; #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX if(system_cpu_support_avx()) { for(int sample = start_sample; sample < end_sample; sample++) { if (task.get_cancel() || task_pool.canceled()) { if(task.need_finish_queue == false) break; } for(int y = tile.y; y < tile.y + tile.h; y++) { for(int x = tile.x; x < tile.x + tile.w; x++) { kernel_cpu_avx_path_trace(&kg, render_buffer, rng_state, sample, x, y, tile.offset, tile.stride); } } tile.sample = sample + 1; task.update_progress(tile); } } else #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 if(system_cpu_support_sse41()) { for(int sample = start_sample; sample < end_sample; sample++) { if (task.get_cancel() || task_pool.canceled()) { if(task.need_finish_queue == false) break; } for(int y = tile.y; y < tile.y + tile.h; y++) { for(int x = tile.x; x < tile.x + tile.w; x++) { kernel_cpu_sse41_path_trace(&kg, render_buffer, rng_state, sample, x, y, tile.offset, tile.stride); } } tile.sample = sample + 1; task.update_progress(tile); } } else #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 if(system_cpu_support_sse3()) { for(int sample = start_sample; sample < end_sample; sample++) { if (task.get_cancel() || task_pool.canceled()) { if(task.need_finish_queue == false) break; } for(int y = tile.y; y < tile.y + tile.h; y++) { for(int x = tile.x; x < tile.x + tile.w; x++) { kernel_cpu_sse3_path_trace(&kg, render_buffer, rng_state, sample, x, y, tile.offset, tile.stride); } } tile.sample = sample + 1; task.update_progress(tile); } } else #endif #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 if(system_cpu_support_sse2()) { for(int sample = start_sample; sample < end_sample; sample++) { if (task.get_cancel() || task_pool.canceled()) { if(task.need_finish_queue == false) break; } for(int y = tile.y; y < tile.y + tile.h; y++) { for(int x = tile.x; x < tile.x + tile.w; x++) { kernel_cpu_sse2_path_trace(&kg, render_buffer, rng_state, sample, x, y, tile.offset, tile.stride); } } tile.sample = sample + 1; task.update_progress(tile); } } else #endif { for(int sample = start_sample; sample < end_sample; sample++) { if (task.get_cancel() || task_pool.canceled()) { if(task.need_finish_queue == false) break; } for(int y = tile.y; y < tile.y + tile.h; y++) { for(int x = tile.x; x < tile.x + tile.w; x++) { kernel_cpu_path_trace(&kg, render_buffer, rng_state, sample, x, y, tile.offset, tile.stride); } } tile.sample = sample + 1; task.update_progress(tile); } } task.release_tile(tile); if(task_pool.canceled()) { if(task.need_finish_queue == false) break; } } #ifdef WITH_OSL OSLShader::thread_free(&kg); #endif }
void task_wait() { thread_scoped_lock lock(rpc_lock); RPCSend snd(socket, &error_func, "task_wait"); snd.write(); lock.unlock(); TileList the_tiles; /* todo: run this threaded for connecting to multiple clients */ for(;;) { if(error_func.have_error()) break; RenderTile tile; lock.lock(); RPCReceive rcv(socket, &error_func); if(rcv.name == "acquire_tile") { lock.unlock(); /* todo: watch out for recursive calls! */ if(the_task.acquire_tile(this, tile)) { /* write return as bool */ the_tiles.push_back(tile); lock.lock(); RPCSend snd(socket, &error_func, "acquire_tile"); snd.add(tile); snd.write(); lock.unlock(); } else { lock.lock(); RPCSend snd(socket, &error_func, "acquire_tile_none"); snd.write(); lock.unlock(); } } else if(rcv.name == "release_tile") { rcv.read(tile); lock.unlock(); TileList::iterator it = tile_list_find(the_tiles, tile); if (it != the_tiles.end()) { tile.buffers = it->buffers; the_tiles.erase(it); } assert(tile.buffers != NULL); the_task.release_tile(tile); lock.lock(); RPCSend snd(socket, &error_func, "release_tile"); snd.write(); lock.unlock(); } else if(rcv.name == "task_wait_done") { lock.unlock(); break; } else lock.unlock(); } }
void thread_render(DeviceTask &task) { if (task_pool.canceled()) { if (task.need_finish_queue == false) return; } /* allocate buffer for kernel globals */ device_only_memory<KernelGlobals> kgbuffer(this, "kernel_globals"); kgbuffer.alloc_to_device(1); KernelGlobals *kg = new ((void *)kgbuffer.device_pointer) KernelGlobals(thread_kernel_globals_init()); profiler.add_state(&kg->profiler); CPUSplitKernel *split_kernel = NULL; if (use_split_kernel) { split_kernel = new CPUSplitKernel(this); if (!split_kernel->load_kernels(requested_features)) { thread_kernel_globals_free((KernelGlobals *)kgbuffer.device_pointer); kgbuffer.free(); delete split_kernel; return; } } RenderTile tile; DenoisingTask denoising(this, task); denoising.profiler = &kg->profiler; while (task.acquire_tile(this, tile)) { if (tile.task == RenderTile::PATH_TRACE) { if (use_split_kernel) { device_only_memory<uchar> void_buffer(this, "void_buffer"); split_kernel->path_trace(&task, tile, kgbuffer, void_buffer); } else { path_trace(task, tile, kg); } } else if (tile.task == RenderTile::DENOISE) { denoise(denoising, tile); task.update_progress(&tile, tile.w * tile.h); } task.release_tile(tile); if (task_pool.canceled()) { if (task.need_finish_queue == false) break; } } profiler.remove_state(&kg->profiler); thread_kernel_globals_free((KernelGlobals *)kgbuffer.device_pointer); kg->~KernelGlobals(); kgbuffer.free(); delete split_kernel; }