Пример #1
0
	virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask * /*task*/)
	{
		cl_device_type type = OpenCLInfo::get_device_type(device->cdDevice);
		/* Use small global size on CPU devices as it seems to be much faster. */
		if(type == CL_DEVICE_TYPE_CPU) {
			VLOG(1) << "Global size: (64, 64).";
			return make_int2(64, 64);
		}

		cl_ulong max_buffer_size;
		clGetDeviceInfo(device->cdDevice, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &max_buffer_size, NULL);

		if(DebugFlags().opencl.mem_limit) {
			max_buffer_size = min(max_buffer_size,
			                      cl_ulong(DebugFlags().opencl.mem_limit - device->stats.mem_used));
		}

		VLOG(1) << "Maximum device allocation size: "
		        << string_human_readable_number(max_buffer_size) << " bytes. ("
		        << string_human_readable_size(max_buffer_size) << ").";

		/* Limit to 2gb, as we shouldn't need more than that and some devices may support much more. */
		max_buffer_size = min(max_buffer_size / 2, (cl_ulong)2l*1024*1024*1024);

		size_t num_elements = max_elements_for_max_buffer_size(kg, data, max_buffer_size);
		int2 global_size = make_int2(max(round_down((int)sqrt(num_elements), 64), 64), (int)sqrt(num_elements));
		VLOG(1) << "Global size: " << global_size << ".";
		return global_size;
	}
Пример #2
0
  KernelFunctions(
      F kernel_default, F kernel_sse2, F kernel_sse3, F kernel_sse41, F kernel_avx, F kernel_avx2)
  {
    const char *architecture_name = "default";
    kernel = kernel_default;

    /* Silence potential warnings about unused variables
     * when compiling without some architectures. */
    (void)kernel_sse2;
    (void)kernel_sse3;
    (void)kernel_sse41;
    (void)kernel_avx;
    (void)kernel_avx2;
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
    if (DebugFlags().cpu.has_avx2() && system_cpu_support_avx2()) {
      architecture_name = "AVX2";
      kernel = kernel_avx2;
    }
    else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
        if (DebugFlags().cpu.has_avx() && system_cpu_support_avx()) {
      architecture_name = "AVX";
      kernel = kernel_avx;
    }
    else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
        if (DebugFlags().cpu.has_sse41() && system_cpu_support_sse41()) {
      architecture_name = "SSE4.1";
      kernel = kernel_sse41;
    }
    else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
        if (DebugFlags().cpu.has_sse3() && system_cpu_support_sse3()) {
      architecture_name = "SSE3";
      kernel = kernel_sse3;
    }
    else
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
        if (DebugFlags().cpu.has_sse2() && system_cpu_support_sse2()) {
      architecture_name = "SSE2";
      kernel = kernel_sse2;
    }
#endif

    if (strcmp(architecture_name, logged_architecture) != 0) {
      VLOG(1) << "Will be using " << architecture_name << " kernels.";
      logged_architecture = architecture_name;
    }
  }
Пример #3
0
  virtual BVHLayoutMask get_bvh_layout_mask() const
  {
    BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_BVH2;
    if (DebugFlags().cpu.has_sse2() && system_cpu_support_sse2()) {
      bvh_layout_mask |= BVH_LAYOUT_BVH4;
    }
    if (DebugFlags().cpu.has_avx2() && system_cpu_support_avx2()) {
      bvh_layout_mask |= BVH_LAYOUT_BVH8;
    }
#ifdef WITH_EMBREE
    bvh_layout_mask |= BVH_LAYOUT_EMBREE;
#endif /* WITH_EMBREE */
    return bvh_layout_mask;
  }
Пример #4
0
  CPUDevice(DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool background_)
      : Device(info_, stats_, profiler_, background_),
        texture_info(this, "__texture_info", MEM_TEXTURE),
#define REGISTER_KERNEL(name) name##_kernel(KERNEL_FUNCTIONS(name))
        REGISTER_KERNEL(path_trace),
        REGISTER_KERNEL(convert_to_half_float),
        REGISTER_KERNEL(convert_to_byte),
        REGISTER_KERNEL(shader),
        REGISTER_KERNEL(filter_divide_shadow),
        REGISTER_KERNEL(filter_get_feature),
        REGISTER_KERNEL(filter_write_feature),
        REGISTER_KERNEL(filter_detect_outliers),
        REGISTER_KERNEL(filter_combine_halves),
        REGISTER_KERNEL(filter_nlm_calc_difference),
        REGISTER_KERNEL(filter_nlm_blur),
        REGISTER_KERNEL(filter_nlm_calc_weight),
        REGISTER_KERNEL(filter_nlm_update_output),
        REGISTER_KERNEL(filter_nlm_normalize),
        REGISTER_KERNEL(filter_construct_transform),
        REGISTER_KERNEL(filter_nlm_construct_gramian),
        REGISTER_KERNEL(filter_finalize),
        REGISTER_KERNEL(data_init)
#undef REGISTER_KERNEL
  {
    if (info.cpu_threads == 0) {
      info.cpu_threads = TaskScheduler::num_threads();
    }

#ifdef WITH_OSL
    kernel_globals.osl = &osl_globals;
#endif
    use_split_kernel = DebugFlags().cpu.split_kernel;
    if (use_split_kernel) {
      VLOG(1) << "Will be using split kernel.";
    }
    need_texture_info = false;

#define REGISTER_SPLIT_KERNEL(name) \
  split_kernels[#name] = KernelFunctions<void (*)(KernelGlobals *, KernelData *)>( \
      KERNEL_FUNCTIONS(name))
    REGISTER_SPLIT_KERNEL(path_init);
    REGISTER_SPLIT_KERNEL(scene_intersect);
    REGISTER_SPLIT_KERNEL(lamp_emission);
    REGISTER_SPLIT_KERNEL(do_volume);
    REGISTER_SPLIT_KERNEL(queue_enqueue);
    REGISTER_SPLIT_KERNEL(indirect_background);
    REGISTER_SPLIT_KERNEL(shader_setup);
    REGISTER_SPLIT_KERNEL(shader_sort);
    REGISTER_SPLIT_KERNEL(shader_eval);
    REGISTER_SPLIT_KERNEL(holdout_emission_blurring_pathtermination_ao);
    REGISTER_SPLIT_KERNEL(subsurface_scatter);
    REGISTER_SPLIT_KERNEL(direct_lighting);
    REGISTER_SPLIT_KERNEL(shadow_blocked_ao);
    REGISTER_SPLIT_KERNEL(shadow_blocked_dl);
    REGISTER_SPLIT_KERNEL(enqueue_inactive);
    REGISTER_SPLIT_KERNEL(next_iteration_setup);
    REGISTER_SPLIT_KERNEL(indirect_subsurface);
    REGISTER_SPLIT_KERNEL(buffer_update);
#undef REGISTER_SPLIT_KERNEL
#undef KERNEL_FUNCTIONS
  }