bool compile_kernel(const string& kernel_path, const string& kernel_md5) { /* we compile kernels consisting of many files. unfortunately opencl kernel caches do not seem to recognize changes in included files. so we force recompile on changes by adding the md5 hash of all files */ string source = "#include \"kernel.cl\" // " + kernel_md5 + "\n"; source = path_source_replace_includes(source, kernel_path); size_t source_len = source.size(); const char *source_str = source.c_str(); cpProgram = clCreateProgramWithSource(cxContext, 1, &source_str, &source_len, &ciErr); if(opencl_error(ciErr)) return false; double starttime = time_dt(); printf("Compiling OpenCL kernel ...\n"); if(!build_kernel(kernel_path)) return false; printf("Kernel compilation finished in %.2lfs.\n", time_dt() - starttime); return true; }
void BVHBuild::progress_update() { if(time_dt() - progress_start_time < 0.25) return; double progress_start = (double)progress_count/(double)progress_total; double duplicates = (double)(progress_total - progress_original_total)/(double)progress_total; string msg = string_printf("Building BVH %.0f%%, duplicates %.0f%%", progress_start * 100.0, duplicates * 100.0); progress.set_substatus(msg); progress_start_time = time_dt(); }
static void display_info(Progress& progress) { static double latency = 0.0; static double last = 0; double elapsed = time_dt(); string str, interactive; latency = (elapsed - last); last = elapsed; int sample, tile; double total_time, sample_time; string status, substatus; sample = progress.get_sample(); progress.get_tile(tile, total_time, sample_time); progress.get_status(status, substatus); if(substatus != "") status += ": " + substatus; interactive = options.interactive? "On":"Off"; str = string_printf("%s Time: %.2f Latency: %.4f Sample: %d Average: %.4f Interactive: %s", status.c_str(), total_time, latency, sample, sample_time, interactive.c_str()); view_display_info(str.c_str()); if(options.show_help) view_display_help(); }
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Tell Blender engine that we want to redraw ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// void BlenderSession::tag_redraw() { if(!interactive) { // Update stats and progress, only for background here because in 3d view we do it in draw for thread safety reasons update_status_progress(); // Offline render, redraw if timeout passed if(time_dt() - last_redraw_time > 1.0) { b_engine.tag_redraw(); last_redraw_time = time_dt(); } } else { // Tell blender that we want to redraw b_engine.tag_redraw(); } } //tag_redraw()
static void display_info(Progress& progress) { static double latency = 0.0; static double last = 0; double elapsed = time_dt(); string str; latency = (elapsed - last); last = elapsed; int sample, tile; double total_time, sample_time; string status, substatus; sample = progress.get_sample(); progress.get_tile(tile, total_time, sample_time); progress.get_status(status, substatus); if(substatus != "") status += ": " + substatus; str = string_printf("latency: %.4f sample: %d total: %.4f average: %.4f %s", latency, sample, total_time, sample_time, status.c_str()); view_display_info(str.c_str()); }
void time_init() { #ifdef _WIN32 last_idle_time = GetTickCount(); #else gettimeofday (&last_idle_time, NULL); #endif time_dt(); }
void BlenderSession::tag_redraw() { if(background) { /* update stats and progress, only for background here because * in 3d view we do it in draw for thread safety reasons */ update_status_progress(); /* offline render, redraw if timeout passed */ if(time_dt() - last_redraw_time > 1.0) { b_engine.tag_redraw(); last_redraw_time = time_dt(); } } else { /* tell blender that we want to redraw */ b_engine.tag_redraw(); } }
BVHNode* BVHBuild::run() { BVHRange root; /* add references */ add_references(root); if(progress.get_cancel()) return NULL; /* init spatial splits */ if(params.top_level) /* todo: get rid of this */ params.use_spatial_split = false; spatial_min_overlap = root.bounds().safe_area() * params.spatial_split_alpha; spatial_right_bounds.clear(); spatial_right_bounds.resize(max(root.size(), (int)BVHParams::NUM_SPATIAL_BINS) - 1); /* init progress updates */ progress_start_time = time_dt(); progress_count = 0; progress_total = references.size(); progress_original_total = progress_total; prim_segment.resize(references.size()); prim_index.resize(references.size()); prim_object.resize(references.size()); /* build recursively */ BVHNode *rootnode; if(params.use_spatial_split) { /* singlethreaded spatial split build */ rootnode = build_node(root, 0); } else { /* multithreaded binning build */ BVHObjectBinning rootbin(root, (references.size())? &references[0]: NULL); rootnode = build_node(rootbin, 0); task_pool.wait_work(); } /* delete if we canceled */ if(rootnode) { if(progress.get_cancel()) { rootnode->deleteSubtree(); rootnode = NULL; } else if(!params.use_spatial_split) { /*rotate(rootnode, 4, 5);*/ rootnode->update_visibility(); } } return rootnode; }
void TaskPool::wait_work(Summary *stats) { thread_scoped_lock num_lock(num_mutex); while(num != 0) { num_lock.unlock(); thread_scoped_lock queue_lock(TaskScheduler::queue_mutex); /* find task from this pool. if we get a task from another pool, * we can get into deadlock */ TaskScheduler::Entry work_entry; bool found_entry = false; list<TaskScheduler::Entry>::iterator it; for(it = TaskScheduler::queue.begin(); it != TaskScheduler::queue.end(); it++) { TaskScheduler::Entry& entry = *it; if(entry.pool == this) { work_entry = entry; found_entry = true; TaskScheduler::queue.erase(it); break; } } queue_lock.unlock(); /* if found task, do it, otherwise wait until other tasks are done */ if(found_entry) { /* run task */ work_entry.task->run(0); /* delete task */ delete work_entry.task; /* notify pool task was done */ num_decrease(1); } num_lock.lock(); if(num == 0) break; if(!found_entry) { THREADING_DEBUG("num==%d, Waiting for condition in TaskPool::wait_work !found_entry\n", num); num_cond.wait(num_lock); THREADING_DEBUG("num==%d, condition wait done in TaskPool::wait_work !found_entry\n", num); } } if(stats != NULL) { stats->time_total = time_dt() - start_time; stats->num_tasks_handled = num_tasks_handled; } }
void TaskPool::num_increase() { thread_scoped_lock num_lock(num_mutex); if(num_tasks_handled == 0) { start_time = time_dt(); } num++; num_tasks_handled++; THREADING_DEBUG("num==%d, notifying all in TaskPool::num_increase\n", num); num_cond.notify_all(); }
void Session::reset_parameters(BufferParams& buffer_params) { if(display) { if(buffer_params.modified(display->params)) { display->reset(buffer_params); } } start_time = time_dt(); paused_time = 0.0; //params.image_stat.uiCurSamples = 0; if(params.interactive) progress.set_start_time(start_time + paused_time); } //reset_parameters()
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Update render project on the render-server ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// void Session::update(BufferParams& buffer_params) { // Block for buffer acces and reset immediately. we can't do this // in the thread, because we need to allocate an OpenGL buffer, and // that only works in the main thread thread_scoped_lock display_lock(display_mutex); thread_scoped_lock render_buffer_lock(render_buffer_mutex); display_outdated = true; reset_time = time_dt(); reset_parameters(buffer_params); //server->update(); pause_cond.notify_all(); } //update()
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Reset all session data buffers ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// void Session::reset(BufferParams& buffer_params, float mb_frame_time_sampling, float fps) { // Block for buffer acces and reset immediately. we can't do this // in the thread, because we need to allocate an OpenGL buffer, and // that only works in the main thread thread_scoped_lock display_lock(display_mutex); thread_scoped_lock render_buffer_lock(render_buffer_mutex); display_outdated = true; reset_time = time_dt(); reset_parameters(buffer_params); server->reset(params.export_type, scene->kernel->uiGPUs, mb_frame_time_sampling, fps, params.deep_image); pause_cond.notify_all(); } //reset()
void DeviceTask::update_progress(RenderTile *rtile, int pixel_samples) { if ((type != RENDER) && (type != SHADER)) return; if (update_progress_sample) { if (pixel_samples == -1) { pixel_samples = shader_w; } update_progress_sample(pixel_samples, rtile ? rtile->sample : 0); } if (update_tile_sample) { double current_time = time_dt(); if (current_time - last_update_time >= 1.0) { update_tile_sample(*rtile); last_update_time = current_time; } } }
CCL_NAMESPACE_BEGIN /* Device Task */ DeviceTask::DeviceTask(Type type_) : type(type_), x(0), y(0), w(0), h(0), rgba_byte(0), rgba_half(0), buffer(0), sample(0), num_samples(1), shader_input(0), shader_output(0), shader_eval_type(0), shader_filter(0), shader_x(0), shader_w(0) { last_update_time = time_dt(); }
string compile_kernel() { /* compute cubin name */ int major, minor; cuDeviceComputeCapability(&major, &minor, cuDevId); /* attempt to use kernel provided with blender */ string cubin = path_get(string_printf("lib/kernel_sm_%d%d.cubin", major, minor)); if(path_exists(cubin)) return cubin; /* not found, try to use locally compiled kernel */ string kernel_path = path_get("kernel"); string md5 = path_files_md5_hash(kernel_path); cubin = string_printf("cycles_kernel_sm%d%d_%s.cubin", major, minor, md5.c_str()); cubin = path_user_get(path_join("cache", cubin)); /* if exists already, use it */ if(path_exists(cubin)) return cubin; #ifdef _WIN32 if(cuHavePrecompiledKernels()) { if(major < 2) cuda_error_message(string_printf("CUDA device requires compute capability 2.0 or up, found %d.%d. Your GPU is not supported.", major, minor)); else cuda_error_message(string_printf("CUDA binary kernel for this graphics card compute capability (%d.%d) not found.", major, minor)); return ""; } #endif /* if not, find CUDA compiler */ string nvcc = cuCompilerPath(); if(nvcc == "") { cuda_error_message("CUDA nvcc compiler not found. Install CUDA toolkit in default location."); return ""; } /* compile */ string kernel = path_join(kernel_path, "kernel.cu"); string include = kernel_path; const int machine = system_cpu_bits(); const int maxreg = 24; double starttime = time_dt(); printf("Compiling CUDA kernel ...\n"); path_create_directories(cubin); string command = string_printf("\"%s\" -arch=sm_%d%d -m%d --cubin \"%s\" " "-o \"%s\" --ptxas-options=\"-v\" --maxrregcount=%d --opencc-options -OPT:Olimit=0 -I\"%s\" -DNVCC", nvcc.c_str(), major, minor, machine, kernel.c_str(), cubin.c_str(), maxreg, include.c_str()); if(system(command.c_str()) == -1) { cuda_error_message("Failed to execute compilation command, see console for details."); return ""; } /* verify if compilation succeeded */ if(!path_exists(cubin)) { cuda_error_message("CUDA kernel compilation failed, see console for details."); return ""; } printf("Kernel compilation finished in %.2lfs.\n", time_dt() - starttime); return cubin; }
bool BlenderSession::draw(int w, int h) { /* pause in redraw in case update is not being called due to final render */ session->set_pause(BlenderSync::get_session_pause(b_scene, background)); /* before drawing, we verify camera and viewport size changes, because * we do not get update callbacks for those, we must detect them here */ if(session->ready_to_reset()) { bool reset = false; /* if dimensions changed, reset */ if(width != w || height != h) { if(start_resize_time == 0.0) { /* don't react immediately to resizes to avoid flickery resizing * of the viewport, and some window managers changing the window * size temporarily on unminimize */ start_resize_time = time_dt(); tag_redraw(); } else if(time_dt() - start_resize_time < 0.2) { tag_redraw(); } else { width = w; height = h; reset = true; } } /* try to acquire mutex. if we can't, come back later */ if(!session->scene->mutex.try_lock()) { tag_update(); } else { /* update camera from 3d view */ sync->sync_view(b_v3d, b_rv3d, width, height); if(scene->camera->need_update) reset = true; session->scene->mutex.unlock(); } /* reset if requested */ if(reset) { SessionParams session_params = BlenderSync::get_session_params(b_engine, b_userpref, b_scene, background); BufferParams buffer_params = BlenderSync::get_buffer_params(b_render, b_v3d, b_rv3d, scene->camera, width, height); bool session_pause = BlenderSync::get_session_pause(b_scene, background); if(session_pause == false) { session->reset(buffer_params, session_params.samples); start_resize_time = 0.0; } } } else { tag_update(); } /* update status and progress for 3d view draw */ update_status_progress(); /* draw */ BufferParams buffer_params = BlenderSync::get_buffer_params(b_render, b_v3d, b_rv3d, scene->camera, width, height); DeviceDrawParams draw_params; if(session->params.display_buffer_linear) { draw_params.bind_display_space_shader_cb = function_bind(&BL::RenderEngine::bind_display_space_shader, &b_engine, b_scene); draw_params.unbind_display_space_shader_cb = function_bind(&BL::RenderEngine::unbind_display_space_shader, &b_engine); } return !session->draw(buffer_params, draw_params); }
* * So we currently allocate single storage for now, which is only used by * the only thread working on the spatial BVH build. */ spatial_storage.resize(TaskScheduler::num_threads() + 1); size_t num_bins = max(root.size(), (int)BVHParams::NUM_SPATIAL_BINS) - 1; foreach(BVHSpatialStorage &storage, spatial_storage) { storage.right_bounds.clear(); } spatial_storage[0].right_bounds.resize(num_bins); } spatial_free_index = 0; /* init progress updates */ double build_start_time; build_start_time = progress_start_time = time_dt(); progress_count = 0; progress_total = references.size(); progress_original_total = progress_total; prim_type.resize(references.size()); prim_index.resize(references.size()); prim_object.resize(references.size()); /* build recursively */ BVHNode *rootnode; if(params.use_spatial_split) { /* Perform multithreaded spatial split build. */ rootnode = build_node(root, &references, 0, 0); task_pool.wait_work();
bool DeviceSplitKernel::path_trace(DeviceTask *task, RenderTile& tile, device_memory& kgbuffer, device_memory& kernel_data) { if(device->have_error()) { return false; } /* Get local size */ size_t local_size[2]; { int2 lsize = split_kernel_local_size(); local_size[0] = lsize[0]; local_size[1] = lsize[1]; } /* Number of elements in the global state buffer */ int num_global_elements = global_size[0] * global_size[1]; /* Allocate all required global memory once. */ if(first_tile) { first_tile = false; /* Set gloabl size */ { int2 gsize = split_kernel_global_size(kgbuffer, kernel_data, task); /* Make sure that set work size is a multiple of local * work size dimensions. */ global_size[0] = round_up(gsize[0], local_size[0]); global_size[1] = round_up(gsize[1], local_size[1]); } num_global_elements = global_size[0] * global_size[1]; assert(num_global_elements % WORK_POOL_SIZE == 0); /* Calculate max groups */ /* Denotes the maximum work groups possible w.r.t. current requested tile size. */ unsigned int work_pool_size = (device->info.type == DEVICE_CPU) ? WORK_POOL_SIZE_CPU : WORK_POOL_SIZE_GPU; unsigned int max_work_groups = num_global_elements / work_pool_size + 1; /* Allocate work_pool_wgs memory. */ work_pool_wgs.alloc_to_device(max_work_groups); queue_index.alloc_to_device(NUM_QUEUES); use_queues_flag.alloc_to_device(1); split_data.alloc_to_device(state_buffer_size(kgbuffer, kernel_data, num_global_elements)); ray_state.alloc(num_global_elements); } #define ENQUEUE_SPLIT_KERNEL(name, global_size, local_size) \ if(device->have_error()) { \ return false; \ } \ if(!kernel_##name->enqueue(KernelDimensions(global_size, local_size), kgbuffer, kernel_data)) { \ return false; \ } tile.sample = tile.start_sample; /* for exponential increase between tile updates */ int time_multiplier = 1; while(tile.sample < tile.start_sample + tile.num_samples) { /* to keep track of how long it takes to run a number of samples */ double start_time = time_dt(); /* initial guess to start rolling average */ const int initial_num_samples = 1; /* approx number of samples per second */ int samples_per_second = (avg_time_per_sample > 0.0) ? int(double(time_multiplier) / avg_time_per_sample) + 1 : initial_num_samples; RenderTile subtile = tile; subtile.start_sample = tile.sample; subtile.num_samples = min(samples_per_second, tile.start_sample + tile.num_samples - tile.sample); if(device->have_error()) { return false; } /* reset state memory here as global size for data_init * kernel might not be large enough to do in kernel */ work_pool_wgs.zero_to_device(); split_data.zero_to_device(); ray_state.zero_to_device(); if(!enqueue_split_kernel_data_init(KernelDimensions(global_size, local_size), subtile, num_global_elements, kgbuffer, kernel_data, split_data, ray_state, queue_index, use_queues_flag, work_pool_wgs)) { return false; } ENQUEUE_SPLIT_KERNEL(path_init, global_size, local_size); bool activeRaysAvailable = true; double cancel_time = DBL_MAX; while(activeRaysAvailable) { /* Do path-iteration in host [Enqueue Path-iteration kernels. */ for(int PathIter = 0; PathIter < 16; PathIter++) { ENQUEUE_SPLIT_KERNEL(scene_intersect, global_size, local_size); ENQUEUE_SPLIT_KERNEL(lamp_emission, global_size, local_size); ENQUEUE_SPLIT_KERNEL(do_volume, global_size, local_size); ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size); ENQUEUE_SPLIT_KERNEL(indirect_background, global_size, local_size); ENQUEUE_SPLIT_KERNEL(shader_setup, global_size, local_size); ENQUEUE_SPLIT_KERNEL(shader_sort, global_size, local_size); ENQUEUE_SPLIT_KERNEL(shader_eval, global_size, local_size); ENQUEUE_SPLIT_KERNEL(holdout_emission_blurring_pathtermination_ao, global_size, local_size); ENQUEUE_SPLIT_KERNEL(subsurface_scatter, global_size, local_size); ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size); ENQUEUE_SPLIT_KERNEL(direct_lighting, global_size, local_size); ENQUEUE_SPLIT_KERNEL(shadow_blocked_ao, global_size, local_size); ENQUEUE_SPLIT_KERNEL(shadow_blocked_dl, global_size, local_size); ENQUEUE_SPLIT_KERNEL(enqueue_inactive, global_size, local_size); ENQUEUE_SPLIT_KERNEL(next_iteration_setup, global_size, local_size); ENQUEUE_SPLIT_KERNEL(indirect_subsurface, global_size, local_size); ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size); ENQUEUE_SPLIT_KERNEL(buffer_update, global_size, local_size); if(task->get_cancel() && cancel_time == DBL_MAX) { /* Wait up to twice as many seconds for current samples to finish * to avoid artifacts in render result from ending too soon. */ cancel_time = time_dt() + 2.0 * time_multiplier; } if(time_dt() > cancel_time) { return true; } } /* Decide if we should exit path-iteration in host. */ ray_state.copy_from_device(0, global_size[0] * global_size[1], 1); activeRaysAvailable = false; for(int rayStateIter = 0; rayStateIter < global_size[0] * global_size[1]; ++rayStateIter) { if(!IS_STATE(ray_state.data(), rayStateIter, RAY_INACTIVE)) { if(IS_STATE(ray_state.data(), rayStateIter, RAY_INVALID)) { /* Something went wrong, abort to avoid looping endlessly. */ device->set_error("Split kernel error: invalid ray state"); return false; } /* Not all rays are RAY_INACTIVE. */ activeRaysAvailable = true; break; } } if(time_dt() > cancel_time) { return true; } } double time_per_sample = ((time_dt()-start_time) / subtile.num_samples); if(avg_time_per_sample == 0.0) { /* start rolling average */ avg_time_per_sample = time_per_sample; } else { avg_time_per_sample = alpha*time_per_sample + (1.0-alpha)*avg_time_per_sample; } #undef ENQUEUE_SPLIT_KERNEL tile.sample += subtile.num_samples; task->update_progress(&tile, tile.w*tile.h*subtile.num_samples); time_multiplier = min(time_multiplier << 1, 10); if(task->get_cancel()) { return true; } } return true; }
// prints something identifyable and the time in miliseconds since the last call of this function void doTiming(const char *description) { if (enableTimingPrintouts) { printf("[ %s: %5.1fms ] ", description, time_dt()*1000.0); } }
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Render loop ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// void Session::run_render() { reset_time = start_time = time_dt(); paused_time = 0.0; bool bStarted = false; params.image_stat.uiCurSamples = 0; if(params.interactive) progress.set_start_time(start_time); bool is_done = false; while(!progress.get_cancel()) { if(!params.interactive) { // If no work left and in background mode, we can stop immediately if(is_done) { update_status_time(); progress.set_status(string(pass_name) + " finished"); break; } } //if(!params.interactive) else { // If in interactive mode, and we are either paused or done for now, // wait for pause condition notify to wake up again thread_scoped_lock pause_lock(pause_mutex); if(pause || is_done) { update_status_time(pause, is_done); while(true) { if(pause) server->pauseRender(true); double pause_start = time_dt(); pause_cond.wait(pause_lock); paused_time += time_dt() - pause_start; progress.set_start_time(start_time + paused_time); update_status_time(pause, is_done); progress.set_update(); if(!pause) { server->pauseRender(false); break; } } } //if(pause || is_ready) if(progress.get_cancel()) break; } //if(!params.interactive), else if(!is_done) { time_sleep(0.01); // Update scene on the render-server - send all changed objects if(!bStarted || params.interactive) update_scene_to_server(frame_idx, total_frames); if(!bStarted) { server->startRender(params.interactive, params.width, params.height, params.interactive ? ::OctaneEngine::OctaneClient::IMAGE_8BIT : (params.hdr_tonemapped ? ::OctaneEngine::OctaneClient::IMAGE_FLOAT_TONEMAPPED : ::OctaneEngine::OctaneClient::IMAGE_FLOAT), params.out_of_core_enabled, params.out_of_core_mem_limit, params.out_of_core_gpu_headroom); //FIXME: Perhaps the wrong place for it... bStarted = true; } if(!server->getServerErrorMessage().empty()) { progress.set_cancel("ERROR! Check console for detailed error messages."); server->clearServerErrorMessage(); } if(progress.get_cancel()) break; // Buffers mutex is locked entirely while rendering each // sample, and released/reacquired on each iteration to allow // reset and draw in between thread_scoped_lock buffers_lock(render_buffer_mutex); // Update status and timing //update_status_time(); update_render_buffer(); if(!server->getServerErrorMessage().empty()) { progress.set_cancel("ERROR! Check console for detailed error messages."); server->clearServerErrorMessage(); } // Update status and timing update_status_time(); progress.set_update(); } //if(!is_done) else { thread_scoped_lock buffers_lock(render_buffer_mutex); update_render_buffer(); // Update status and timing update_status_time(); } is_done = !params.interactive && (params.image_stat.uiCurSamples >= params.samples); } //while(!progress.get_cancel()) } //run_render()