Пример #1
0
  void thread_shader(DeviceTask &task)
  {
    KernelGlobals kg = kernel_globals;

#ifdef WITH_OSL
    OSLShader::thread_init(&kg, &kernel_globals, &osl_globals);
#endif
    for (int sample = 0; sample < task.num_samples; sample++) {
      for (int x = task.shader_x; x < task.shader_x + task.shader_w; x++)
        shader_kernel()(&kg,
                        (uint4 *)task.shader_input,
                        (float4 *)task.shader_output,
                        task.shader_eval_type,
                        task.shader_filter,
                        x,
                        task.offset,
                        sample);

      if (task.get_cancel() || task_pool.canceled())
        break;

      task.update_progress(NULL);
    }

#ifdef WITH_OSL
    OSLShader::thread_free(&kg);
#endif
  }
Пример #2
0
	void thread_shader(DeviceTask& task)
	{
		KernelGlobals kg = kernel_globals;

#ifdef WITH_OSL
		OSLShader::thread_init(&kg, &kernel_globals, &osl_globals);
#endif
		void(*shader_kernel)(KernelGlobals*, uint4*, float4*, int, int, int, int);

#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
		if(system_cpu_support_avx2())
			shader_kernel = kernel_cpu_avx2_shader;
		else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
		if(system_cpu_support_avx())
			shader_kernel = kernel_cpu_avx_shader;
		else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41			
		if(system_cpu_support_sse41())
			shader_kernel = kernel_cpu_sse41_shader;
		else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
		if(system_cpu_support_sse3())
			shader_kernel = kernel_cpu_sse3_shader;
		else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
		if(system_cpu_support_sse2())
			shader_kernel = kernel_cpu_sse2_shader;
		else
#endif
			shader_kernel = kernel_cpu_shader;

		for(int sample = 0; sample < task.num_samples; sample++) {
			for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++)
				shader_kernel(&kg, (uint4*)task.shader_input, (float4*)task.shader_output,
					task.shader_eval_type, x, task.offset, sample);

			if(task.get_cancel() || task_pool.canceled())
				break;

			task.update_progress(NULL);

		}

#ifdef WITH_OSL
		OSLShader::thread_free(&kg);
#endif
	}
Пример #3
0
  void path_trace(DeviceTask &task, RenderTile &tile, KernelGlobals *kg)
  {
    const bool use_coverage = kernel_data.film.cryptomatte_passes & CRYPT_ACCURATE;

    scoped_timer timer(&tile.buffers->render_time);

    Coverage coverage(kg, tile);
    if (use_coverage) {
      coverage.init_path_trace();
    }

    float *render_buffer = (float *)tile.buffer;
    int start_sample = tile.start_sample;
    int end_sample = tile.start_sample + tile.num_samples;

    /* Needed for Embree. */
    SIMD_SET_FLUSH_TO_ZERO;

    for (int sample = start_sample; sample < end_sample; sample++) {
      if (task.get_cancel() || task_pool.canceled()) {
        if (task.need_finish_queue == false)
          break;
      }

      for (int y = tile.y; y < tile.y + tile.h; y++) {
        for (int x = tile.x; x < tile.x + tile.w; x++) {
          if (use_coverage) {
            coverage.init_pixel(x, y);
          }
          path_trace_kernel()(kg, render_buffer, sample, x, y, tile.offset, tile.stride);
        }
      }

      tile.sample = sample + 1;

      task.update_progress(&tile, tile.w * tile.h);
    }
    if (use_coverage) {
      coverage.finalize();
    }
  }
Пример #4
0
	void thread_path_trace(DeviceTask& task)
	{
		if(task_pool.canceled()) {
			if(task.need_finish_queue == false)
				return;
		}

		KernelGlobals kg = kernel_globals;

#ifdef WITH_OSL
		OSLShader::thread_init(&kg, &kernel_globals, &osl_globals);
#endif

		RenderTile tile;
		
		while(task.acquire_tile(this, tile)) {
			float *render_buffer = (float*)tile.buffer;
			uint *rng_state = (uint*)tile.rng_state;
			int start_sample = tile.start_sample;
			int end_sample = tile.start_sample + tile.num_samples;

#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
			if(system_cpu_support_avx()) {
				for(int sample = start_sample; sample < end_sample; sample++) {
					if (task.get_cancel() || task_pool.canceled()) {
						if(task.need_finish_queue == false)
							break;
					}

					for(int y = tile.y; y < tile.y + tile.h; y++) {
						for(int x = tile.x; x < tile.x + tile.w; x++) {
							kernel_cpu_avx_path_trace(&kg, render_buffer, rng_state,
								sample, x, y, tile.offset, tile.stride);
						}
					}

					tile.sample = sample + 1;

					task.update_progress(tile);
				}
			}
			else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41			
			if(system_cpu_support_sse41()) {
				for(int sample = start_sample; sample < end_sample; sample++) {
					if (task.get_cancel() || task_pool.canceled()) {
						if(task.need_finish_queue == false)
							break;
					}

					for(int y = tile.y; y < tile.y + tile.h; y++) {
						for(int x = tile.x; x < tile.x + tile.w; x++) {
							kernel_cpu_sse41_path_trace(&kg, render_buffer, rng_state,
								sample, x, y, tile.offset, tile.stride);
						}
					}

					tile.sample = sample + 1;

					task.update_progress(tile);
				}
			}
			else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
			if(system_cpu_support_sse3()) {
				for(int sample = start_sample; sample < end_sample; sample++) {
					if (task.get_cancel() || task_pool.canceled()) {
						if(task.need_finish_queue == false)
							break;
					}

					for(int y = tile.y; y < tile.y + tile.h; y++) {
						for(int x = tile.x; x < tile.x + tile.w; x++) {
							kernel_cpu_sse3_path_trace(&kg, render_buffer, rng_state,
								sample, x, y, tile.offset, tile.stride);
						}
					}

					tile.sample = sample + 1;

					task.update_progress(tile);
				}
			}
			else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
			if(system_cpu_support_sse2()) {
				for(int sample = start_sample; sample < end_sample; sample++) {
					if (task.get_cancel() || task_pool.canceled()) {
						if(task.need_finish_queue == false)
							break;
					}

					for(int y = tile.y; y < tile.y + tile.h; y++) {
						for(int x = tile.x; x < tile.x + tile.w; x++) {
							kernel_cpu_sse2_path_trace(&kg, render_buffer, rng_state,
								sample, x, y, tile.offset, tile.stride);
						}
					}

					tile.sample = sample + 1;

					task.update_progress(tile);
				}
			}
			else
#endif
			{
				for(int sample = start_sample; sample < end_sample; sample++) {
					if (task.get_cancel() || task_pool.canceled()) {
						if(task.need_finish_queue == false)
							break;
					}

					for(int y = tile.y; y < tile.y + tile.h; y++) {
						for(int x = tile.x; x < tile.x + tile.w; x++) {
							kernel_cpu_path_trace(&kg, render_buffer, rng_state,
								sample, x, y, tile.offset, tile.stride);
						}
					}

					tile.sample = sample + 1;

					task.update_progress(tile);
				}
			}

			task.release_tile(tile);

			if(task_pool.canceled()) {
				if(task.need_finish_queue == false)
					break;
			}
		}

#ifdef WITH_OSL
		OSLShader::thread_free(&kg);
#endif
	}
Пример #5
0
  void thread_render(DeviceTask &task)
  {
    if (task_pool.canceled()) {
      if (task.need_finish_queue == false)
        return;
    }

    /* allocate buffer for kernel globals */
    device_only_memory<KernelGlobals> kgbuffer(this, "kernel_globals");
    kgbuffer.alloc_to_device(1);

    KernelGlobals *kg = new ((void *)kgbuffer.device_pointer)
        KernelGlobals(thread_kernel_globals_init());

    profiler.add_state(&kg->profiler);

    CPUSplitKernel *split_kernel = NULL;
    if (use_split_kernel) {
      split_kernel = new CPUSplitKernel(this);
      if (!split_kernel->load_kernels(requested_features)) {
        thread_kernel_globals_free((KernelGlobals *)kgbuffer.device_pointer);
        kgbuffer.free();
        delete split_kernel;
        return;
      }
    }

    RenderTile tile;
    DenoisingTask denoising(this, task);
    denoising.profiler = &kg->profiler;

    while (task.acquire_tile(this, tile)) {
      if (tile.task == RenderTile::PATH_TRACE) {
        if (use_split_kernel) {
          device_only_memory<uchar> void_buffer(this, "void_buffer");
          split_kernel->path_trace(&task, tile, kgbuffer, void_buffer);
        }
        else {
          path_trace(task, tile, kg);
        }
      }
      else if (tile.task == RenderTile::DENOISE) {
        denoise(denoising, tile);
        task.update_progress(&tile, tile.w * tile.h);
      }

      task.release_tile(tile);

      if (task_pool.canceled()) {
        if (task.need_finish_queue == false)
          break;
      }
    }

    profiler.remove_state(&kg->profiler);

    thread_kernel_globals_free((KernelGlobals *)kgbuffer.device_pointer);
    kg->~KernelGlobals();
    kgbuffer.free();
    delete split_kernel;
  }