Beispiel #1
0
/*
 * init_opencl_context_and_shmem
 *
 * We can have performance gain using asynchronous DMA transfer when data
 * chunk it moved to OpenCL device from host machine, however, it requires
 * preparations to ensure the memory region to be copied to/from is pinned
 * on RAM; not swapped out. OpenCL provides an interface to map a certain
 * host address area as pinned buffer object, even though its size is
 * restricted to CL_DEVICE_MAX_MEM_ALLOC_SIZE parameter. Usually, it is
 * much less than size of shared memory to be assigned to PG-Strom, around
 * 500MB - 2GB in typical GPU/MIC device. So, we need to split a flat
 * continuous memory into several 'zones' to pin it using OpenCL interface.
 * Because it is a job of OpenCL intermediation server to collect properties
 * of devices, and this server shall be launched post initialization stage,
 * we also have to acquire and pin the shared memory region in the context
 * of OpenCL intermediation server, not postmaster itself.
 */
static void
init_opencl_context_and_shmem(void)
{
	Size	zone_length = LONG_MAX;
	cl_int	i, rc;

	/*
	 * Create an OpenCL context
	 */
	opencl_context = clCreateContext(NULL,
									 opencl_num_devices,
									 opencl_devices,
									 NULL,
									 NULL,
									 &rc);
	if (rc != CL_SUCCESS)
		elog(ERROR, "clCreateContext failed: %s", opencl_strerror(rc));

	/*
	 * Create an OpenCL command queue for each device
	 */
	for (i=0; i < opencl_num_devices; i++)
	{
		const pgstrom_device_info *dev_info = pgstrom_get_device_info(i);

		opencl_cmdq[i] =
			clCreateCommandQueue(opencl_context,
								 opencl_devices[i],
								 CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE |
								 CL_QUEUE_PROFILING_ENABLE,
								 &rc);
		if (rc != CL_SUCCESS)
			elog(ERROR, "clCreateCommandQueue failed: %s",
				 opencl_strerror(rc));

		if (zone_length > dev_info->dev_max_mem_alloc_size)
			zone_length = (dev_info->dev_max_mem_alloc_size &
						   ~((1UL << 20) - 1));
	}
	/* Lock shared memory of PG-Strom's private area */
	pgstrom_setup_shmem(zone_length, on_shmem_zone_callback);

	/* Lock shared memory of shared buffer area */
	if (!on_shmem_zone_callback(BufferBlocks,
								NBuffers * (Size) BLCKSZ,
								"buffer", false))
	{
		Size	total_size = NBuffers * (Size) BLCKSZ;
		Size	offset;

		Assert((zone_length & (BLCKSZ - 1)) == 0);

		for (offset = 0; offset < total_size; offset += zone_length)
		{
			on_shmem_zone_callback(BufferBlocks + offset,
								   Min(zone_length, total_size - offset),
								   "buffer", true);
		}
	}
}
Beispiel #2
0
Datum
pgstrom_opencl_device_info(PG_FUNCTION_ARGS)
{
	FuncCallContext	*fncxt;
	Datum		values[4];
	bool		isnull[4];
	HeapTuple	tuple;
	uint32		dindex;
	uint32		pindex;
	const pgstrom_device_info *dinfo;
	const char *key;
	const char *value;
	char		buf[256];
	int			ofs = 0;

	if (SRF_IS_FIRSTCALL())
	{
		TupleDesc		tupdesc;
		MemoryContext	oldcxt;

		fncxt = SRF_FIRSTCALL_INIT();
		oldcxt = MemoryContextSwitchTo(fncxt->multi_call_memory_ctx);

		tupdesc = CreateTemplateTupleDesc(4, false);
		TupleDescInitEntry(tupdesc, (AttrNumber) 1, "dnum",
						   INT4OID, -1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 2, "pnum",
						   INT4OID, -1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 3, "property",
						   TEXTOID, -1, 0);
		TupleDescInitEntry(tupdesc, (AttrNumber) 4, "value",
						   TEXTOID, -1, 0);
		fncxt->tuple_desc = BlessTupleDesc(tupdesc);

		fncxt->user_fctx = 0;

		MemoryContextSwitchTo(oldcxt);
	}
	fncxt = SRF_PERCALL_SETUP();

	dindex = fncxt->call_cntr / 55;
	pindex = fncxt->call_cntr % 55;

	if (dindex == pgstrom_get_device_nums())
		SRF_RETURN_DONE(fncxt);

	dinfo = pgstrom_get_device_info(dindex);
	Assert(dinfo != NULL);

	switch (pindex)
	{
		case 0:
			key = "platform index";
			value = psprintf("%u", dinfo->pl_info->pl_index);
			break;
		case 1:
			key = "platform profile";
			value = dinfo->pl_info->pl_profile;
			break;
		case 2:
			key = "platform version";
			value = dinfo->pl_info->pl_version;
			break;
		case 3:
			key = "platform name";
			value = dinfo->pl_info->pl_name;
			break;
		case 4:
			key = "platform vendor";
			value = dinfo->pl_info->pl_vendor;
			break;
		case 5:
			key = "platform extensions";
			value = dinfo->pl_info->pl_extensions;
			break;
		case 6:
			key = "address bits";
			value = psprintf("%u", dinfo->dev_address_bits);
			break;
		case 7:
			key = "device available";
			value = dinfo->dev_available ? "yes" : "no";
			break;
		case 8:
			key = "compiler available";
			value = dinfo->dev_compiler_available ? "yes" : "no";
			break;
		case 9:
			key = "double fp config";
			value = fp_config_to_cstring(dinfo->dev_double_fp_config);
			break;
		case 10:
			key = "little endian";
			value = dinfo->dev_endian_little ? "yes" : "no";
			break;
		case 11:
			key = "error correction support";
			value = dinfo->dev_error_correction_support ? "yes" : "no";
			break;
		case 12:
			key = "execution capabilities";
			if (dinfo->dev_execution_capabilities & CL_EXEC_KERNEL)
				ofs += sprintf(buf + ofs, "OpenCL");
			if (dinfo->dev_execution_capabilities & CL_EXEC_NATIVE_KERNEL)
				ofs += sprintf(buf + ofs, "%sNative", ofs > 0 ? ", " : "");
			value = buf;
			break;
		case 13:
			key = "device extensions";
			value = dinfo->dev_device_extensions;
			break;
		case 14:
			key = "global mem cache size";
			value = psprintf("%lu", dinfo->dev_global_mem_cache_size);
			break;
		case 15:
			key = "global mem cache type";
			switch (dinfo->dev_global_mem_cache_type)
			{
				case CL_NONE:
					value = "none";
					break;
				case CL_READ_ONLY_CACHE:
					value = "read only";
					break;
				case CL_READ_WRITE_CACHE:
					value = "read write";
					break;
				default:
					value = "???";
					break;
			}
			break;
		case 16:
			key = "global mem cacheline size";
			value = psprintf("%u", dinfo->dev_global_mem_cacheline_size);
			break;
		case 17:
			key = "global mem size";
			value = psprintf("%lu", dinfo->dev_global_mem_size);
			break;
		case 18:
			key = "host unified memory";
			value = dinfo->dev_host_unified_memory ? "yes" : "no";
			break;
		case 19:
			key = "local mem size";
			value = psprintf("%lu", dinfo->dev_local_mem_size);
			break;
		case 20:
			key = "local mem type";
			switch (dinfo->dev_local_mem_type)
			{
				case CL_LOCAL:
					value = "local";
					break;
				case CL_GLOBAL:
					value = "global";
					break;
				case CL_NONE:
					value = "none";
					break;
				default:
					value = "???";
					break;
			}
			break;
		case 21:
			key = "max clock frequency";
			value = psprintf("%u", dinfo->dev_max_clock_frequency);
			break;
		case 22:
			key = "max compute units";
			value = psprintf("%u", dinfo->dev_max_compute_units);
			break;
		case 23:
			key = "max constant args";
			value = psprintf("%u", dinfo->dev_max_constant_args);
			break;
		case 24:
			key = "max constant buffer size";
			value = psprintf("%lu", dinfo->dev_max_constant_buffer_size);
			break;
		case 25:
			key = "max mem alloc size";
			value = psprintf("%lu", dinfo->dev_max_mem_alloc_size);
			break;
		case 26:
			key = "max parameter size";
			value = psprintf("%lu", dinfo->dev_max_parameter_size);
			break;
		case 27:
			key = "max samplers";
			value = psprintf("%u", dinfo->dev_max_samplers);
			break;
		case 28:
			key = "max work group size";
			value = psprintf("%zu", dinfo->dev_max_work_group_size);
			break;
		case 29:
			key = "max work group dimensions";
			value = psprintf("%u", dinfo->dev_max_work_item_dimensions);
			break;
		case 30:
			key = "max work item sizes";
			value = psprintf("{%zu, %zu, %zu}",
							 dinfo->dev_max_work_item_sizes[0],
							 dinfo->dev_max_work_item_sizes[1],
							 dinfo->dev_max_work_item_sizes[2]);
			break;
		case 31:
			key = "mem base address align";
			value = psprintf("%u", dinfo->dev_mem_base_addr_align);
			break;
		case 32:
			key = "device name";
			value = dinfo->dev_name;
			break;
		case 33:
			key = "native vector width (char)";
			value = psprintf("%u", dinfo->dev_native_vector_width_char);
			break;
		case 34:
			key = "native vector width (short)";
			value = psprintf("%u", dinfo->dev_native_vector_width_short);
			break;
		case 35:
			key = "native vector width (int)";
			value = psprintf("%u", dinfo->dev_native_vector_width_int);
			break;
		case 36:
			key = "native vector width (long)";
			value = psprintf("%u", dinfo->dev_native_vector_width_long);
			break;
		case 37:
			key = "native vector width (float)";
			value = psprintf("%u", dinfo->dev_native_vector_width_float);
			break;
		case 38:
			key = "native vector width (double)";
			value = psprintf("%u", dinfo->dev_native_vector_width_double);
			break;
		case 39:
			key = "opencl c version";
			value = dinfo->dev_opencl_c_version;
			break;
		case 40:
			key = "preferred vector width (char)";
			value = psprintf("%u", dinfo->dev_preferred_vector_width_char);
			break;
		case 41:
			key = "preferred vector width (short)";
			value = psprintf("%u", dinfo->dev_preferred_vector_width_short);
			break;
		case 42:
			key = "preferred vector width (int)";
			value = psprintf("%u", dinfo->dev_preferred_vector_width_int);
			break;
		case 43:
			key = "preferred vector width (long)";
			value = psprintf("%u", dinfo->dev_preferred_vector_width_long);
			break;
		case 44:
			key = "preferred vector width (float)";
			value = psprintf("%u", dinfo->dev_preferred_vector_width_float);
			break;
		case 45:
			key = "preferred vector width (double)";
			value = psprintf("%u", dinfo->dev_preferred_vector_width_double);
			break;
		case 46:
			key = "device profile";
			value = dinfo->dev_profile;
			break;
		case 47:
			key = "profiling timer resolution";
			value = psprintf("%zu", dinfo->dev_profiling_timer_resolution);
			break;
		case 48:
			key = "command queue properties";
			if (dinfo->dev_queue_properties &
				CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)
				ofs += sprintf(buf, "%sout of order", ofs > 0 ? ", " : "");
			if (dinfo->dev_queue_properties & CL_QUEUE_PROFILING_ENABLE)
				ofs += sprintf(buf, "%sprofiling", ofs > 0 ? ", " : "");
			value = buf;
			break;
		case 49:
			key = "single fp config";
			value = fp_config_to_cstring(dinfo->dev_single_fp_config);
			break;
		case 50:
			key = "device type";
			if (dinfo->dev_type & CL_DEVICE_TYPE_CPU)
				ofs += sprintf(buf, "%scpu", ofs > 0 ? ", " : "");
			if (dinfo->dev_type & CL_DEVICE_TYPE_GPU)
				ofs += sprintf(buf, "%sgpu", ofs > 0 ? ", " : "");
			if (dinfo->dev_type & CL_DEVICE_TYPE_ACCELERATOR)
				ofs += sprintf(buf, "%saccelerator", ofs > 0 ? ", " : "");
			if (dinfo->dev_type & CL_DEVICE_TYPE_DEFAULT)
				ofs += sprintf(buf, "%sdefault", ofs > 0 ? ", " : "");
			if (dinfo->dev_type & CL_DEVICE_TYPE_CUSTOM)
				ofs += sprintf(buf, "%scustom", ofs > 0 ? ", " : "");
			value = buf;
			break;
		case 51:
			key = "device vendor";
			value = dinfo->dev_vendor;
			break;
		case 52:
			key = "device vendor id";
			value = psprintf("%u", dinfo->dev_vendor_id);
			break;
		case 53:
			key = "device version";
			value = dinfo->dev_version;
			break;
		case 54:
			key = "driver version";
			value = dinfo->driver_version;
			break;
		default:
			elog(ERROR, "unexpected property index");
			break;
	}
	memset(isnull, 0, sizeof(isnull));
	values[0] = Int32GetDatum(dindex);
	values[1] = Int32GetDatum(pindex);
	values[2] = CStringGetTextDatum(key);
	values[3] = CStringGetTextDatum(value);

	tuple = heap_form_tuple(fncxt->tuple_desc, values, isnull);

	SRF_RETURN_NEXT(fncxt, HeapTupleGetDatum(tuple));
}