Пример #1
0
	virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask * /*task*/)
	{
		cl_device_type type = OpenCLInfo::get_device_type(device->cdDevice);
		/* Use small global size on CPU devices as it seems to be much faster. */
		if(type == CL_DEVICE_TYPE_CPU) {
			VLOG(1) << "Global size: (64, 64).";
			return make_int2(64, 64);
		}

		cl_ulong max_buffer_size;
		clGetDeviceInfo(device->cdDevice, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &max_buffer_size, NULL);

		if(DebugFlags().opencl.mem_limit) {
			max_buffer_size = min(max_buffer_size,
			                      cl_ulong(DebugFlags().opencl.mem_limit - device->stats.mem_used));
		}

		VLOG(1) << "Maximum device allocation size: "
		        << string_human_readable_number(max_buffer_size) << " bytes. ("
		        << string_human_readable_size(max_buffer_size) << ").";

		/* Limit to 2gb, as we shouldn't need more than that and some devices may support much more. */
		max_buffer_size = min(max_buffer_size / 2, (cl_ulong)2l*1024*1024*1024);

		size_t num_elements = max_elements_for_max_buffer_size(kg, data, max_buffer_size);
		int2 global_size = make_int2(max(round_down((int)sqrt(num_elements), 64), 64), (int)sqrt(num_elements));
		VLOG(1) << "Global size: " << global_size << ".";
		return global_size;
	}
Пример #2
0
	virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask * /*task*/)
	{
		cl_device_type type = OpenCLInfo::get_device_type(device->cdDevice);
		/* Use small global size on CPU devices as it seems to be much faster. */
		if(type == CL_DEVICE_TYPE_CPU) {
			VLOG(1) << "Global size: (64, 64).";
			return make_int2(64, 64);
		}

		cl_ulong max_buffer_size;
		clGetDeviceInfo(device->cdDevice, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &max_buffer_size, NULL);
		VLOG(1) << "Maximum device allocation size: "
		        << string_human_readable_number(max_buffer_size) << " bytes. ("
		        << string_human_readable_size(max_buffer_size) << ").";

		size_t num_elements = max_elements_for_max_buffer_size(kg, data, max_buffer_size / 2);
		int2 global_size = make_int2(round_down((int)sqrt(num_elements), 64), (int)sqrt(num_elements));
		VLOG(1) << "Global size: " << global_size << ".";
		return global_size;
	}