예제 #1
0
  void thread_shader(DeviceTask &task)
  {
    KernelGlobals kg = kernel_globals;

#ifdef WITH_OSL
    OSLShader::thread_init(&kg, &kernel_globals, &osl_globals);
#endif
    for (int sample = 0; sample < task.num_samples; sample++) {
      for (int x = task.shader_x; x < task.shader_x + task.shader_w; x++)
        shader_kernel()(&kg,
                        (uint4 *)task.shader_input,
                        (float4 *)task.shader_output,
                        task.shader_eval_type,
                        task.shader_filter,
                        x,
                        task.offset,
                        sample);

      if (task.get_cancel() || task_pool.canceled())
        break;

      task.update_progress(NULL);
    }

#ifdef WITH_OSL
    OSLShader::thread_free(&kg);
#endif
  }
예제 #2
0
	int get_split_task_count(DeviceTask& task)
	{
		if(task.type == DeviceTask::SHADER)
			return task.get_subtask_count(TaskScheduler::num_threads(), 256);
		else
			return task.get_subtask_count(TaskScheduler::num_threads());
	}
예제 #3
0
 int get_split_task_count(DeviceTask &task)
 {
   if (task.type == DeviceTask::SHADER)
     return task.get_subtask_count(info.cpu_threads, 256);
   else
     return task.get_subtask_count(info.cpu_threads);
 }
예제 #4
0
	void thread_shader(DeviceTask& task)
	{
		KernelGlobals kg = kernel_globals;

#ifdef WITH_OSL
		OSLShader::thread_init(&kg, &kernel_globals, &osl_globals);
#endif
		void(*shader_kernel)(KernelGlobals*, uint4*, float4*, int, int, int, int);

#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
		if(system_cpu_support_avx2())
			shader_kernel = kernel_cpu_avx2_shader;
		else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
		if(system_cpu_support_avx())
			shader_kernel = kernel_cpu_avx_shader;
		else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41			
		if(system_cpu_support_sse41())
			shader_kernel = kernel_cpu_sse41_shader;
		else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
		if(system_cpu_support_sse3())
			shader_kernel = kernel_cpu_sse3_shader;
		else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
		if(system_cpu_support_sse2())
			shader_kernel = kernel_cpu_sse2_shader;
		else
#endif
			shader_kernel = kernel_cpu_shader;

		for(int sample = 0; sample < task.num_samples; sample++) {
			for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++)
				shader_kernel(&kg, (uint4*)task.shader_input, (float4*)task.shader_output,
					task.shader_eval_type, x, task.offset, sample);

			if(task.get_cancel() || task_pool.canceled())
				break;

			task.update_progress(NULL);

		}

#ifdef WITH_OSL
		OSLShader::thread_free(&kg);
#endif
	}
예제 #5
0
	void task_add(DeviceTask& task)
	{
		/* split task into smaller ones */
		list<DeviceTask> tasks;

		if(task.type == DeviceTask::SHADER)
			task.split(tasks, TaskScheduler::num_threads(), 256);
		else
			task.split(tasks, TaskScheduler::num_threads());

		foreach(DeviceTask& task, tasks)
			task_pool.push(new CPUDeviceTask(this, task));
	}
예제 #6
0
	void task_wait()
	{
		RPCSend snd(socket, "task_wait");
		snd.write();

		list<RenderTile> the_tiles;

		/* todo: run this threaded for connecting to multiple clients */
		for(;;) {
			RPCReceive rcv(socket);
			RenderTile tile;

			if(rcv.name == "acquire_tile") {
				/* todo: watch out for recursive calls! */
				if(the_task.acquire_tile(this, tile)) { /* write return as bool */
					the_tiles.push_back(tile);

					RPCSend snd(socket, "acquire_tile");
					snd.add(tile);
					snd.write();
				}
				else {
					RPCSend snd(socket, "acquire_tile_none");
					snd.write();
				}
			}
			else if(rcv.name == "release_tile") {
				rcv.read(tile);

				for(list<RenderTile>::iterator it = the_tiles.begin(); it != the_tiles.end(); it++) {
					if(tile.x == it->x && tile.y == it->y && tile.start_sample == it->start_sample) {
						tile.buffers = it->buffers;
						the_tiles.erase(it);
						break;
					}
				}

				assert(tile.buffers != NULL);

				the_task.release_tile(tile);

				RPCSend snd(socket, "release_tile");
				snd.write();
			}
			else if(rcv.name == "task_wait_done")
				break;
		}
	}
예제 #7
0
  void task_add(DeviceTask &task)
  {
    /* Load texture info. */
    load_texture_info();

    /* split task into smaller ones */
    list<DeviceTask> tasks;

    if (task.type == DeviceTask::SHADER)
      task.split(tasks, info.cpu_threads, 256);
    else
      task.split(tasks, info.cpu_threads);

    foreach (DeviceTask &task, tasks)
      task_pool.push(new CPUDeviceTask(this, task));
  }
예제 #8
0
	void task_add(DeviceTask& task)
	{
		/* split task into smaller ones */
		list<DeviceTask> tasks;
		task.split(tasks, TaskScheduler::num_threads());

		foreach(DeviceTask& task, tasks)
			task_pool.push(new CPUDeviceTask(this, task));
	}
예제 #9
0
	void task_add(DeviceTask& task)
	{
		/* split task into smaller ones, more than number of threads for uneven
		 * workloads where some parts of the image render slower than others */
		list<DeviceTask> tasks;
		task.split(tasks, TaskScheduler::num_threads());

		foreach(DeviceTask& task, tasks)
			task_pool.push(new CPUDeviceTask(this, task));
	}
예제 #10
0
  void path_trace(DeviceTask &task, RenderTile &tile, KernelGlobals *kg)
  {
    const bool use_coverage = kernel_data.film.cryptomatte_passes & CRYPT_ACCURATE;

    scoped_timer timer(&tile.buffers->render_time);

    Coverage coverage(kg, tile);
    if (use_coverage) {
      coverage.init_path_trace();
    }

    float *render_buffer = (float *)tile.buffer;
    int start_sample = tile.start_sample;
    int end_sample = tile.start_sample + tile.num_samples;

    /* Needed for Embree. */
    SIMD_SET_FLUSH_TO_ZERO;

    for (int sample = start_sample; sample < end_sample; sample++) {
      if (task.get_cancel() || task_pool.canceled()) {
        if (task.need_finish_queue == false)
          break;
      }

      for (int y = tile.y; y < tile.y + tile.h; y++) {
        for (int x = tile.x; x < tile.x + tile.w; x++) {
          if (use_coverage) {
            coverage.init_pixel(x, y);
          }
          path_trace_kernel()(kg, render_buffer, sample, x, y, tile.offset, tile.stride);
        }
      }

      tile.sample = sample + 1;

      task.update_progress(&tile, tile.w * tile.h);
    }
    if (use_coverage) {
      coverage.finalize();
    }
  }
예제 #11
0
	void task_add(DeviceTask& maintask)
	{
		list<DeviceTask> tasks;

		/* arbitrary limit to work around apple ATI opencl issue */
		if(platform_name == "Apple")
			maintask.split_max_size(tasks, 76800);
		else
			tasks.push_back(maintask);

		DeviceTask task;

		foreach(DeviceTask& task, tasks) {
			if(task.type == DeviceTask::TONEMAP)
				tonemap(task);
			else if(task.type == DeviceTask::PATH_TRACE)
				path_trace(task);
		}
	}
예제 #12
0
	void thread_shader(DeviceTask& task)
	{
		KernelGlobals kg = kernel_globals;

#ifdef WITH_OSL
		OSLShader::thread_init(&kg, &kernel_globals, &osl_globals);
#endif

#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
		if(system_cpu_support_avx()) {
			for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
				kernel_cpu_avx_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);

				if(task.get_cancel() || task_pool.canceled())
					break;
			}
		}
		else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41			
		if(system_cpu_support_sse41()) {
			for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
				kernel_cpu_sse41_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);

				if(task.get_cancel() || task_pool.canceled())
					break;
			}
		}
		else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
		if(system_cpu_support_sse3()) {
			for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
				kernel_cpu_sse3_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);

				if(task.get_cancel() || task_pool.canceled())
					break;
			}
		}
		else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
		if(system_cpu_support_sse2()) {
			for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
				kernel_cpu_sse2_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);

				if(task.get_cancel() || task_pool.canceled())
					break;
			}
		}
		else
#endif
		{
			for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
				kernel_cpu_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);

				if(task.get_cancel() || task_pool.canceled())
					break;
			}
		}

#ifdef WITH_OSL
		OSLShader::thread_free(&kg);
#endif
	}
예제 #13
0
	void thread_path_trace(DeviceTask& task)
	{
		if(task_pool.canceled()) {
			if(task.need_finish_queue == false)
				return;
		}

		KernelGlobals kg = kernel_globals;

#ifdef WITH_OSL
		OSLShader::thread_init(&kg, &kernel_globals, &osl_globals);
#endif

		RenderTile tile;
		
		while(task.acquire_tile(this, tile)) {
			float *render_buffer = (float*)tile.buffer;
			uint *rng_state = (uint*)tile.rng_state;
			int start_sample = tile.start_sample;
			int end_sample = tile.start_sample + tile.num_samples;

#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
			if(system_cpu_support_avx()) {
				for(int sample = start_sample; sample < end_sample; sample++) {
					if (task.get_cancel() || task_pool.canceled()) {
						if(task.need_finish_queue == false)
							break;
					}

					for(int y = tile.y; y < tile.y + tile.h; y++) {
						for(int x = tile.x; x < tile.x + tile.w; x++) {
							kernel_cpu_avx_path_trace(&kg, render_buffer, rng_state,
								sample, x, y, tile.offset, tile.stride);
						}
					}

					tile.sample = sample + 1;

					task.update_progress(tile);
				}
			}
			else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41			
			if(system_cpu_support_sse41()) {
				for(int sample = start_sample; sample < end_sample; sample++) {
					if (task.get_cancel() || task_pool.canceled()) {
						if(task.need_finish_queue == false)
							break;
					}

					for(int y = tile.y; y < tile.y + tile.h; y++) {
						for(int x = tile.x; x < tile.x + tile.w; x++) {
							kernel_cpu_sse41_path_trace(&kg, render_buffer, rng_state,
								sample, x, y, tile.offset, tile.stride);
						}
					}

					tile.sample = sample + 1;

					task.update_progress(tile);
				}
			}
			else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
			if(system_cpu_support_sse3()) {
				for(int sample = start_sample; sample < end_sample; sample++) {
					if (task.get_cancel() || task_pool.canceled()) {
						if(task.need_finish_queue == false)
							break;
					}

					for(int y = tile.y; y < tile.y + tile.h; y++) {
						for(int x = tile.x; x < tile.x + tile.w; x++) {
							kernel_cpu_sse3_path_trace(&kg, render_buffer, rng_state,
								sample, x, y, tile.offset, tile.stride);
						}
					}

					tile.sample = sample + 1;

					task.update_progress(tile);
				}
			}
			else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
			if(system_cpu_support_sse2()) {
				for(int sample = start_sample; sample < end_sample; sample++) {
					if (task.get_cancel() || task_pool.canceled()) {
						if(task.need_finish_queue == false)
							break;
					}

					for(int y = tile.y; y < tile.y + tile.h; y++) {
						for(int x = tile.x; x < tile.x + tile.w; x++) {
							kernel_cpu_sse2_path_trace(&kg, render_buffer, rng_state,
								sample, x, y, tile.offset, tile.stride);
						}
					}

					tile.sample = sample + 1;

					task.update_progress(tile);
				}
			}
			else
#endif
			{
				for(int sample = start_sample; sample < end_sample; sample++) {
					if (task.get_cancel() || task_pool.canceled()) {
						if(task.need_finish_queue == false)
							break;
					}

					for(int y = tile.y; y < tile.y + tile.h; y++) {
						for(int x = tile.x; x < tile.x + tile.w; x++) {
							kernel_cpu_path_trace(&kg, render_buffer, rng_state,
								sample, x, y, tile.offset, tile.stride);
						}
					}

					tile.sample = sample + 1;

					task.update_progress(tile);
				}
			}

			task.release_tile(tile);

			if(task_pool.canceled()) {
				if(task.need_finish_queue == false)
					break;
			}
		}

#ifdef WITH_OSL
		OSLShader::thread_free(&kg);
#endif
	}
예제 #14
0
	void task_wait()
	{
		thread_scoped_lock lock(rpc_lock);

		RPCSend snd(socket, &error_func, "task_wait");
		snd.write();

		lock.unlock();

		TileList the_tiles;

		/* todo: run this threaded for connecting to multiple clients */
		for(;;) {
			if(error_func.have_error())
				break;

			RenderTile tile;

			lock.lock();
			RPCReceive rcv(socket, &error_func);

			if(rcv.name == "acquire_tile") {
				lock.unlock();

				/* todo: watch out for recursive calls! */
				if(the_task.acquire_tile(this, tile)) { /* write return as bool */
					the_tiles.push_back(tile);

					lock.lock();
					RPCSend snd(socket, &error_func, "acquire_tile");
					snd.add(tile);
					snd.write();
					lock.unlock();
				}
				else {
					lock.lock();
					RPCSend snd(socket, &error_func, "acquire_tile_none");
					snd.write();
					lock.unlock();
				}
			}
			else if(rcv.name == "release_tile") {
				rcv.read(tile);
				lock.unlock();

				TileList::iterator it = tile_list_find(the_tiles, tile);
				if (it != the_tiles.end()) {
					tile.buffers = it->buffers;
					the_tiles.erase(it);
				}

				assert(tile.buffers != NULL);

				the_task.release_tile(tile);

				lock.lock();
				RPCSend snd(socket, &error_func, "release_tile");
				snd.write();
				lock.unlock();
			}
			else if(rcv.name == "task_wait_done") {
				lock.unlock();
				break;
			}
			else
				lock.unlock();
		}
	}
예제 #15
0
  void thread_render(DeviceTask &task)
  {
    if (task_pool.canceled()) {
      if (task.need_finish_queue == false)
        return;
    }

    /* allocate buffer for kernel globals */
    device_only_memory<KernelGlobals> kgbuffer(this, "kernel_globals");
    kgbuffer.alloc_to_device(1);

    KernelGlobals *kg = new ((void *)kgbuffer.device_pointer)
        KernelGlobals(thread_kernel_globals_init());

    profiler.add_state(&kg->profiler);

    CPUSplitKernel *split_kernel = NULL;
    if (use_split_kernel) {
      split_kernel = new CPUSplitKernel(this);
      if (!split_kernel->load_kernels(requested_features)) {
        thread_kernel_globals_free((KernelGlobals *)kgbuffer.device_pointer);
        kgbuffer.free();
        delete split_kernel;
        return;
      }
    }

    RenderTile tile;
    DenoisingTask denoising(this, task);
    denoising.profiler = &kg->profiler;

    while (task.acquire_tile(this, tile)) {
      if (tile.task == RenderTile::PATH_TRACE) {
        if (use_split_kernel) {
          device_only_memory<uchar> void_buffer(this, "void_buffer");
          split_kernel->path_trace(&task, tile, kgbuffer, void_buffer);
        }
        else {
          path_trace(task, tile, kg);
        }
      }
      else if (tile.task == RenderTile::DENOISE) {
        denoise(denoising, tile);
        task.update_progress(&tile, tile.w * tile.h);
      }

      task.release_tile(tile);

      if (task_pool.canceled()) {
        if (task.need_finish_queue == false)
          break;
      }
    }

    profiler.remove_state(&kg->profiler);

    thread_kernel_globals_free((KernelGlobals *)kgbuffer.device_pointer);
    kg->~KernelGlobals();
    kgbuffer.free();
    delete split_kernel;
  }