Exemplo n.º 1
0
/*
 * init_opencl_context_and_shmem
 *
 * We can have performance gain using asynchronous DMA transfer when data
 * chunk it moved to OpenCL device from host machine, however, it requires
 * preparations to ensure the memory region to be copied to/from is pinned
 * on RAM; not swapped out. OpenCL provides an interface to map a certain
 * host address area as pinned buffer object, even though its size is
 * restricted to CL_DEVICE_MAX_MEM_ALLOC_SIZE parameter. Usually, it is
 * much less than size of shared memory to be assigned to PG-Strom, around
 * 500MB - 2GB in typical GPU/MIC device. So, we need to split a flat
 * continuous memory into several 'zones' to pin it using OpenCL interface.
 * Because it is a job of OpenCL intermediation server to collect properties
 * of devices, and this server shall be launched post initialization stage,
 * we also have to acquire and pin the shared memory region in the context
 * of OpenCL intermediation server, not postmaster itself.
 */
static void
init_opencl_context_and_shmem(void)
{
	Size	zone_length = LONG_MAX;
	cl_int	i, rc;

	/*
	 * Create an OpenCL context
	 */
	opencl_context = clCreateContext(NULL,
									 opencl_num_devices,
									 opencl_devices,
									 NULL,
									 NULL,
									 &rc);
	if (rc != CL_SUCCESS)
		elog(ERROR, "clCreateContext failed: %s", opencl_strerror(rc));

	/*
	 * Create an OpenCL command queue for each device
	 */
	for (i=0; i < opencl_num_devices; i++)
	{
		const pgstrom_device_info *dev_info = pgstrom_get_device_info(i);

		opencl_cmdq[i] =
			clCreateCommandQueue(opencl_context,
								 opencl_devices[i],
								 CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE |
								 CL_QUEUE_PROFILING_ENABLE,
								 &rc);
		if (rc != CL_SUCCESS)
			elog(ERROR, "clCreateCommandQueue failed: %s",
				 opencl_strerror(rc));

		if (zone_length > dev_info->dev_max_mem_alloc_size)
			zone_length = (dev_info->dev_max_mem_alloc_size &
						   ~((1UL << 20) - 1));
	}
	/* Lock shared memory of PG-Strom's private area */
	pgstrom_setup_shmem(zone_length, on_shmem_zone_callback);

	/* Lock shared memory of shared buffer area */
	if (!on_shmem_zone_callback(BufferBlocks,
								NBuffers * (Size) BLCKSZ,
								"buffer", false))
	{
		Size	total_size = NBuffers * (Size) BLCKSZ;
		Size	offset;

		Assert((zone_length & (BLCKSZ - 1)) == 0);

		for (offset = 0; offset < total_size; offset += zone_length)
		{
			on_shmem_zone_callback(BufferBlocks + offset,
								   Min(zone_length, total_size - offset),
								   "buffer", true);
		}
	}
}
Exemplo n.º 2
0
/*
 * pgstrom_strerror
 *
 * translation from StromError_* to human readable form
 */
const char *
pgstrom_strerror(cl_int errcode)
{
	static char		unknown_buf[256];

	if (errcode < 0)
		return opencl_strerror(errcode);

	switch (errcode)
	{
		case StromError_Success:
			return "success";
		case StromError_RowFiltered:
			return "row is filtered";
		case StromError_RowReCheck:
			return "row should be rechecked";
		case StromError_ServerNotReady:
			return "OpenCL server is not ready";
		case StromError_BadRequestMessage:
			return "request message is bad";
		case StromError_OpenCLInternal:
			return "OpenCL internal error";
		case StromError_OutOfSharedMemory:
			return "out of shared memory";
		case StromError_DivisionByZero:
			return "division by zero";
		default:
			snprintf(unknown_buf, sizeof(unknown_buf),
					 "undefined strom error (code: %d)", errcode);
			break;
	}
	return unknown_buf;
}
Exemplo n.º 3
0
int main(int argc, char *argv[])
{
	cl_platform_id	platform_ids[32];
	cl_uint			platform_num;
	cl_int			i, c, rc;

	while ((c = getopt(argc, argv, "lp:d:")) != -1)
	{
		switch (c)
		{
			case 'l':
				only_list = 1;
				break;
			case 'p':
				only_platform = atoi(optarg);
				break;
			case 'd':
				only_device = atoi(optarg);
				break;
			default:
				fprintf(stderr,
						"usage: %s [-l] [-p <platform>] [-d <device>]\n",
						basename(argv[0]));
				return 1;
		}
	}

	rc = clGetPlatformIDs(lengthof(platform_ids),
						  platform_ids,
						  &platform_num);
	if (rc != CL_SUCCESS)
	{
		fprintf(stderr, "failed on clGetPlatformIDs (%s)",
				opencl_strerror(rc));
		return 1;
	}

	for (i=0; i < platform_num; i++)
	{
		if (only_platform < 0 || i + 1 == only_platform)
			dump_platform(i, platform_ids[i]);
	}
	return 0;
}
Exemplo n.º 4
0
/*
 * on_shmem_zone_callback
 *
 * It is a callback function for each zone on shared memory segment
 * initialization. It assigns a buffer object of OpenCL for each zone
 * for asynchronous memory transfer later.
 */
static void *
on_shmem_zone_callback(void *address, Size length)
{
    cl_mem		host_mem;
    cl_int		rc;

    host_mem = clCreateBuffer(opencl_context,
                              CL_MEM_READ_WRITE |
                              CL_MEM_USE_HOST_PTR,
                              length,
                              address,
                              &rc);
    if (rc != CL_SUCCESS)
        elog(ERROR, "clCreateBuffer failed on host memory (%p-%p): %s",
             address, (char *)address + length - 1, opencl_strerror(rc));
    elog(LOG, "PG-Strom: zone %p-%p was mapped (len: %luMB)",
         address, (char *)address + length - 1, length >> 20);
    return host_mem;
}
Exemplo n.º 5
0
Arquivo: main.c Projeto: ablimit/devel
/*
 * pgstrom_strerror
 *
 * translation from StromError_* to human readable form
 */
const char *
pgstrom_strerror(cl_int errcode)
{
	static char		unknown_buf[256];

	if (errcode < 0)
		return opencl_strerror(errcode);

	switch (errcode)
	{
		case StromError_Success:
			return "Success";
		case StromError_RowFiltered:
			return "Row is filtered";
		case StromError_CpuReCheck:
			return "To be re-checked by CPU";
		case StromError_ServerNotReady:
			return "OpenCL server is not ready";
		case StromError_BadRequestMessage:
			return "Request message is bad";
		case StromError_OpenCLInternal:
			return "OpenCL internal error";
		case StromError_OutOfSharedMemory:
			return "out of shared memory";
		case StromError_OutOfMemory:
			return "out of host memory";
		case StromError_DataStoreCorruption:
			return "data store is corrupted";
		case StromError_DataStoreNoSpace:
			return "data store has no space";
		case StromError_DataStoreOutOfRange:
			return "out of range in data store";
		case StromError_SanityCheckViolation:
			return "sanity check violation";
		default:
			snprintf(unknown_buf, sizeof(unknown_buf),
					 "undefined strom error (code: %d)", errcode);
			break;
	}
	return unknown_buf;
}
Exemplo n.º 6
0
/*
 * on_shmem_zone_callback
 *
 * It is a callback function for each zone on shared memory segment
 * initialization. It assigns a buffer object of OpenCL for each zone
 * for asynchronous memory transfer later.
 */
static bool
on_shmem_zone_callback(void *address, Size length,
					   const char *label, bool abort_on_error)
{
	cl_int		rc;

	(void)clCreateBuffer(opencl_context,
						 CL_MEM_READ_WRITE |
						 CL_MEM_USE_HOST_PTR,
						 length,
						 address,
						 &rc);
	if (rc != CL_SUCCESS)
	{
		if (abort_on_error)
			elog(ERROR, "clCreateBuffer failed on host memory (%p-%p): %s",
				 address, (char *)address + length - 1, opencl_strerror(rc));
		return false;
	}
	elog(LOG, "PG-Strom: %s %p-%p was mapped (len: %luMB)",
		 label, address, (char *)address + length - 1, length >> 20);
	return true;
}
Exemplo n.º 7
0
static void
run_test(const char *namebuf, cl_context context, cl_command_queue cmdq)
{
	cl_event	   *ev;
	char		   *hmem;
	cl_mem			dmem;
	cl_mem			pinned = NULL;
	cl_int			num_chunks;
	cl_int			rc, i, j, k;
	struct timeval	tv1, tv2;

	num_chunks = buffer_size / chunk_size;
	ev = malloc(sizeof(cl_event) * (num_chunks + 1) * num_trial);
	if (!ev)
		error_exit("out of memory (%s)", strerror(rc));

	hmem = malloc(buffer_size);
	if (!hmem)
		error_exit("out of memory (%s)", strerror(rc));

	dmem = clCreateBuffer(context,
						  CL_MEM_READ_WRITE,
						  buffer_size,
						  NULL,
						  &rc);
	if (rc != CL_SUCCESS)
		error_exit("failed on clCreateBuffer(size=%lu) (%s)",
				   buffer_size, opencl_strerror(rc));

	gettimeofday(&tv1, NULL);

	if (!is_blocking)
	{
		pinned = clCreateBuffer(context,
								CL_MEM_READ_WRITE |
								CL_MEM_USE_HOST_PTR,
								buffer_size,
								hmem,
								&rc);
		if (rc != CL_SUCCESS)
			error_exit("failed on clCreateBuffer(size=%lu) (%s)",
					   buffer_size, opencl_strerror(rc));
	}

	for (i=0, k=0; i < num_trial; i++)
	{
		for (j=0; j < num_chunks; j++)
		{
			rc = clEnqueueWriteBuffer(cmdq,
									  dmem,
									  is_blocking,
									  j * chunk_size,
									  chunk_size,
									  hmem + j * chunk_size,
									  i > 0 ? 1 : 0,
									  i > 0 ? &ev[k-1] : NULL,
									  &ev[k+j]);
			if (rc != CL_SUCCESS)
				error_exit("failed on clEnqueueWriteBuffer (%s)",
						   opencl_strerror(rc));
		}

		rc = clEnqueueReadBuffer(cmdq,
								 dmem,
								 is_blocking,
								 0,
								 buffer_size,
								 hmem,
								 num_chunks,
								 &ev[k],
								 &ev[k+num_chunks]);
		if (rc != CL_SUCCESS)
			error_exit("failed on clEnqueueReadBuffer (%s)",
					   opencl_strerror(rc));
		k += num_chunks + 1;
	}
	rc = clFinish(cmdq);
	if (rc != CL_SUCCESS)
		error_exit("failed on clFinish (%s)", opencl_strerror(rc));

	gettimeofday(&tv2, NULL);

	printf("DMA send/recv test result\n"
		   "device:         %s\n"
		   "size:           %luMB\n"
		   "chunks:         %lu%s x %d\n"
		   "ntrials:        %d\n"
		   "total_size:     %luMB\n"
		   "time:           %.2fs\n"
		   "speed:          %.2fMB/s\n"
		   "mode:           %s\n",
		   namebuf,
		   buffer_size >> 20,
		   chunk_size > (1UL<<20) ? chunk_size >> 20 : chunk_size >> 10,
		   chunk_size > (1UL<<20) ? "MB" : "KB",
		   num_chunks,
		   num_trial,
		   (buffer_size >> 20) * num_trial,
		   (double)((tv2.tv_sec * 1000000 + tv2.tv_usec) -
					(tv1.tv_sec * 1000000 + tv1.tv_usec)) / 1000000.0,
		   (double)(((buffer_size >> 20) * num_trial) * 1000000) /
           (double)((tv2.tv_sec * 1000000 + tv2.tv_usec) -
                    (tv1.tv_sec * 1000000 + tv1.tv_usec)),
		   is_blocking ? "sync" : "async");

	/* release resources */
	clReleaseMemObject(dmem);
	free(hmem);
	free(ev);
}
Exemplo n.º 8
0
int main(int argc, char *argv[])
{
	cl_platform_id	platform_ids[32];
	cl_int			platform_num;
	cl_device_id	device_ids[256];
	cl_int			device_num;
	cl_context		context;
	cl_command_queue cmdq;
	cl_int			c, rc;
	char			namebuf[1024];

	while ((c = getopt(argc, argv, "p:d:m:n:s:c:")) >= 0)
	{
		switch (c)
		{
			case 'p':
				platform_idx = atoi(optarg);
				break;
			case 'd':
				device_idx = atoi(optarg);
				break;
			case 'm':
				if (strcmp(optarg, "sync") == 0)
					is_blocking = CL_TRUE;
				else if (strcmp(optarg, "async") == 0)
					is_blocking = CL_FALSE;
				else
					usage(basename(argv[0]));
				break;
			case 'n':
				num_trial = atoi(optarg);
				break;
			case 's':
				buffer_size = atoi(optarg) << 20;
				break;
			case 'c':
				chunk_size = atoi(optarg) << 10;
				break;
			default:
				usage(basename(argv[0]));
				break;
		}
	}
	if (optind != argc)
		usage(basename(argv[0]));

	if (chunk_size == 0)
		chunk_size = buffer_size;
	else if (buffer_size % chunk_size != 0 || buffer_size < chunk_size)
	{
		fprintf(stderr, "chunk_size (-c) must be aligned to buffer_size\n");
		return 1;
	}

	/*
	 * Initialize OpenCL platform/device
	 */
	opencl_entry_init();

	/* Get platform IDs */
	rc = clGetPlatformIDs(lengthof(platform_ids),
						  platform_ids,
						  &platform_num);
	if (rc != CL_SUCCESS)
		error_exit("failed on clGetPlatformIDs (%s)", opencl_strerror(rc));
	if (platform_idx < 1 || platform_idx > platform_num)
		error_exit("opencl platform index %d did not exist", platform_idx);

	/* Get device IDs */
	rc = clGetDeviceIDs(platform_ids[platform_idx - 1],
						CL_DEVICE_TYPE_ALL,
						lengthof(device_ids),
						device_ids,
						&device_num);
	if (rc != CL_SUCCESS)
		error_exit("failed on clGetDeviceIDs (%s)\n", opencl_strerror(rc));
	if (device_idx < 1 || device_idx > device_num)
		error_exit("opencl device index %d did not exist", device_idx);

	/* Get name of opencl device */
	rc = clGetDeviceInfo(device_ids[device_idx - 1],
						 CL_DEVICE_NAME,
						 sizeof(namebuf), namebuf, NULL);
	if (rc != CL_SUCCESS)
		error_exit("failed on clGetDeviceInfo (%s)", opencl_strerror(rc));

	/* Construct an OpenCL context */
	context = clCreateContext(NULL,
                              1,
                              &device_ids[device_idx - 1],
                              NULL,
                              NULL,
                              &rc);
	if (rc != CL_SUCCESS)
		error_exit("failed to create an opencl context (%s)",
				   opencl_strerror(rc));

	/* Construct an OpenCL command queue */
	cmdq = clCreateCommandQueue(context,
								device_ids[device_idx - 1],
								CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
								&rc);
	if (rc != CL_SUCCESS)
		error_exit("failed to create an opencl command queue (%s)",
				   opencl_strerror(rc));

	/* do the job */
	run_test(namebuf, context, cmdq);

	/* cleanup resources */
	clReleaseCommandQueue(cmdq);
	clReleaseContext(context);

	return 0;
}
Exemplo n.º 9
0
/*
 * pgstrom_collect_device_info
 *
 * It collects properties of all the OpenCL devices. It shall be called once
 * by the OpenCL management worker process, prior to any other backends.
 */
static List *
construct_opencl_device_info(int platform_index)
{
    cl_platform_id	platforms[32];
    cl_device_id	devices[MAX_NUM_DEVICES];
    cl_uint			n_platform;
    cl_uint			n_devices;
    cl_int			i, j, rc;
    long			score_max = -1;
    List		   *result = NIL;

    rc = clGetPlatformIDs(lengthof(platforms),
                          platforms,
                          &n_platform);
    if (rc != CL_SUCCESS)
        elog(ERROR, "clGetPlatformIDs failed (%s)", opencl_strerror(rc));

    for (i=0; i < n_platform; i++)
    {
        pgstrom_platform_info  *pl_info;
        pgstrom_device_info	   *dev_info;
        long		score = 0;
        List	   *temp = NIL;

        pl_info = collect_opencl_platform_info(platforms[i]);
        pl_info->pl_index = i;

        rc = clGetDeviceIDs(platforms[i],
                            CL_DEVICE_TYPE_CPU |
                            CL_DEVICE_TYPE_GPU |
                            CL_DEVICE_TYPE_ACCELERATOR,
                            lengthof(devices),
                            devices,
                            &n_devices);
        if (rc != CL_SUCCESS)
            elog(ERROR, "clGetDeviceIDs failed (%s)", opencl_strerror(rc));

        elog(LOG, "PG-Strom: [%d] OpenCL Platform: %s", i, pl_info->pl_name);

        for (j=0; j < n_devices; j++)
        {
            dev_info = collect_opencl_device_info(devices[j]);
            dev_info->pl_info = pl_info;
            dev_info->dev_index = j;

            elog(LOG, "PG-Strom:  + device %s (%uMHz x %uunits, %luMB)",
                 dev_info->dev_name,
                 dev_info->dev_max_clock_frequency,
                 dev_info->dev_max_compute_units,
                 dev_info->dev_global_mem_size >> 20);

            /* rough estimation about computing power */
            if ((dev_info->dev_type & CL_DEVICE_TYPE_GPU) != 0)
                score += 32 * (dev_info->dev_max_compute_units *
                               dev_info->dev_max_clock_frequency);
            else
                score += (dev_info->dev_max_compute_units *
                          dev_info->dev_max_clock_frequency);

            temp = lappend(temp, dev_info);
        }

        if (platform_index == i || (platform_index < 0 && score > score_max))
        {
            opencl_platform_id = platforms[i];
            opencl_num_devices = n_devices;
            for (j=0; j < n_devices; j++)
                opencl_devices[j] = devices[j];

            score_max = score;
            result = temp;
        }
    }

    /* show platform name if auto-selection */
    if (platform_index < 0 && result != NIL)
    {
        pgstrom_platform_info *pl_info
            = ((pgstrom_device_info *) linitial(result))->pl_info;
        elog(LOG, "PG-Strom: auto platform selection: %s", pl_info->pl_name);
    }

    if (result != NIL)
    {
        /*
         * Create an OpenCL context
         */
        opencl_context = clCreateContext(NULL,
                                         opencl_num_devices,
                                         opencl_devices,
                                         NULL,
                                         NULL,
                                         &rc);
        if (rc != CL_SUCCESS)
            elog(ERROR, "clCreateContext failed: %s", opencl_strerror(rc));

        /*
         * Create an OpenCL command queue for each device
         */
        for (j=0; j < opencl_num_devices; j++)
        {
            opencl_cmdq[j] =
                clCreateCommandQueue(opencl_context,
                                     opencl_devices[j],
                                     CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE |
                                     CL_QUEUE_PROFILING_ENABLE,
                                     &rc);
            if (rc != CL_SUCCESS)
                elog(ERROR, "clCreateCommandQueue failed: %s",
                     opencl_strerror(rc));
        }
    }
    return result;
}
Exemplo n.º 10
0
pgstrom_device_info *
collect_opencl_device_info(cl_device_id device_id)
{
	pgstrom_device_info *dev_info;
	Size		offset = 0;
	Size		buflen = 10240;
	cl_int		i, rc;
	int			major, minor;
	static struct {
		cl_uint		param;
		size_t		size;
		size_t		offset;
		bool		is_cstring;
	} catalog[] = {
		CLDEV_PARAM(CL_DEVICE_ADDRESS_BITS,
					dev_address_bits, false),
		CLDEV_PARAM(CL_DEVICE_AVAILABLE,
					dev_available, false),
		CLDEV_PARAM(CL_DEVICE_COMPILER_AVAILABLE,
					dev_compiler_available, false),
		CLDEV_PARAM(CL_DEVICE_DOUBLE_FP_CONFIG,
					dev_double_fp_config, false),
		CLDEV_PARAM(CL_DEVICE_ENDIAN_LITTLE,
					dev_endian_little, false),
		CLDEV_PARAM(CL_DEVICE_ERROR_CORRECTION_SUPPORT,
					dev_error_correction_support, false),
		CLDEV_PARAM(CL_DEVICE_EXECUTION_CAPABILITIES,
					dev_execution_capabilities, false),
		CLDEV_PARAM(CL_DEVICE_EXTENSIONS,
					dev_device_extensions, true),
		CLDEV_PARAM(CL_DEVICE_GLOBAL_MEM_CACHE_SIZE,
					dev_global_mem_cache_size, false),
		CLDEV_PARAM(CL_DEVICE_GLOBAL_MEM_CACHE_TYPE,
					dev_global_mem_cache_type, false),
		CLDEV_PARAM(CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE,
					dev_global_mem_cacheline_size, false),
		CLDEV_PARAM(CL_DEVICE_GLOBAL_MEM_SIZE,
					dev_global_mem_size, false),
		CLDEV_PARAM(CL_DEVICE_HOST_UNIFIED_MEMORY,
					dev_host_unified_memory, false),
		CLDEV_PARAM(CL_DEVICE_LOCAL_MEM_SIZE,
					dev_local_mem_size, false),
		CLDEV_PARAM(CL_DEVICE_LOCAL_MEM_TYPE,
					dev_local_mem_type, false),
		CLDEV_PARAM(CL_DEVICE_MAX_CLOCK_FREQUENCY,
					dev_max_clock_frequency, false),
		CLDEV_PARAM(CL_DEVICE_MAX_COMPUTE_UNITS,
					dev_max_compute_units, false),
		CLDEV_PARAM(CL_DEVICE_MAX_CONSTANT_ARGS,
					dev_max_constant_args, false),
		CLDEV_PARAM(CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE,
					dev_max_constant_buffer_size, false),
		CLDEV_PARAM(CL_DEVICE_MAX_MEM_ALLOC_SIZE,
					dev_max_mem_alloc_size, false),
		CLDEV_PARAM(CL_DEVICE_MAX_PARAMETER_SIZE,
					dev_max_parameter_size, false),
		CLDEV_PARAM(CL_DEVICE_MAX_SAMPLERS,
					dev_max_samplers, false),
		CLDEV_PARAM(CL_DEVICE_MAX_WORK_GROUP_SIZE,
					dev_max_work_group_size, false),
		CLDEV_PARAM(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
					dev_max_work_item_dimensions, false),
		CLDEV_PARAM(CL_DEVICE_MAX_WORK_ITEM_SIZES,
					dev_max_work_item_sizes, false),
		CLDEV_PARAM(CL_DEVICE_MEM_BASE_ADDR_ALIGN,
					dev_mem_base_addr_align, false),
		CLDEV_PARAM(CL_DEVICE_NAME,
					dev_name, true),
		CLDEV_PARAM(CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR,
					dev_native_vector_width_char, false),
		CLDEV_PARAM(CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT,
					dev_native_vector_width_short, false),
		CLDEV_PARAM(CL_DEVICE_NATIVE_VECTOR_WIDTH_INT,
					dev_native_vector_width_int, false),
		CLDEV_PARAM(CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG,
					dev_native_vector_width_long, false),
		CLDEV_PARAM(CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT,
					dev_native_vector_width_float, false),
		CLDEV_PARAM(CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE,
					dev_native_vector_width_double, false),
		CLDEV_PARAM(CL_DEVICE_OPENCL_C_VERSION,
					dev_opencl_c_version, true),
		CLDEV_PARAM(CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR,
					dev_preferred_vector_width_char, false),
		CLDEV_PARAM(CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT,
					dev_preferred_vector_width_short, false),
		CLDEV_PARAM(CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT,
					dev_preferred_vector_width_int, false),
		CLDEV_PARAM(CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG,
					dev_preferred_vector_width_long, false),
		CLDEV_PARAM(CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT,
					dev_preferred_vector_width_float, false),
		CLDEV_PARAM(CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE,
					dev_preferred_vector_width_double, false),
		CLDEV_PARAM(CL_DEVICE_PROFILE,
					dev_profile, true),
		CLDEV_PARAM(CL_DEVICE_PROFILING_TIMER_RESOLUTION,
					dev_profiling_timer_resolution, false),
		CLDEV_PARAM(CL_DEVICE_QUEUE_PROPERTIES,
					dev_queue_properties, false),
		CLDEV_PARAM(CL_DEVICE_SINGLE_FP_CONFIG,
					dev_single_fp_config, false),
		CLDEV_PARAM(CL_DEVICE_TYPE,
					dev_type, false),
		CLDEV_PARAM(CL_DEVICE_VENDOR,
					dev_vendor, true),
		CLDEV_PARAM(CL_DEVICE_VENDOR_ID,
					dev_vendor_id, false),
		CLDEV_PARAM(CL_DEVICE_VERSION,
					dev_version, true),
		CLDEV_PARAM(CL_DRIVER_VERSION,
					driver_version, true)
	};

	dev_info = palloc(offsetof(pgstrom_device_info, buffer[buflen]));
	memset(dev_info, 0, sizeof(pgstrom_device_info));

	for (i=0; i < lengthof(catalog); i++)
	{
		size_t	param_size;
		size_t	param_retsz;
		char   *param_addr;

		if (!catalog[i].is_cstring)
		{
			param_size = catalog[i].size;
			param_addr = (char *)dev_info + catalog[i].offset;
		}
		else
		{
			Assert(catalog[i].size == sizeof(char *));
			param_size = buflen - offset;
			param_addr = &dev_info->buffer[offset];
		}

		rc = clGetDeviceInfo(device_id,
							 catalog[i].param,
							 param_size,
							 param_addr,
							 &param_retsz);
		if (rc != CL_SUCCESS)
			elog(ERROR, "failed on clGetDeviceInfo (param=%d, %s)",
				 catalog[i].param, opencl_strerror(rc));
		Assert(param_size == param_retsz || catalog[i].is_cstring);

		if (catalog[i].is_cstring)
		{
			param_addr[param_retsz] = '\0';
			*((char **)((char *)dev_info + catalog[i].offset)) = param_addr;
			offset += MAXALIGN(param_retsz);
		}
	}
	dev_info->buflen = offset;

	/*
	 * Check device capability is enough to run PG-Strom
	 */
	if (strcmp(dev_info->dev_profile, "FULL_PROFILE") != 0)
	{
		elog(LOG, "Profile of OpenCL device \"%s\" is \"%s\", skipped",
			 dev_info->dev_name, dev_info->dev_profile);
		goto out_clean;
	}
	if ((dev_info->dev_type & (CL_DEVICE_TYPE_CPU |
							   CL_DEVICE_TYPE_GPU |
							   CL_DEVICE_TYPE_ACCELERATOR)) == 0)
	{
		elog(LOG, "Only CPU, GPU or Accelerator are supported, skipped");
		goto out_clean;
	}
	if (!dev_info->dev_available)
	{
		elog(LOG, "OpenCL device \"%s\" is not available, skipped",
			 dev_info->dev_name);
		goto out_clean;
	}
	if (!dev_info->dev_compiler_available)
	{
		elog(LOG, "OpenCL compiler of device \"%s\" is not available, skipped",
			 dev_info->dev_name);
		goto out_clean;
	}
	if (!dev_info->dev_endian_little)
	{
		elog(LOG, "OpenCL device \"%s\" has big endian, not supported",
			 dev_info->dev_name);
		goto out_clean;
	}
	if (sscanf(dev_info->dev_opencl_c_version, "OpenCL C %d.%d ",
			   &major, &minor) != 2 ||
		major < 1 || (major == 1 && minor < 1))
	{
		elog(LOG, "OpenCL C version of \"%s\"is too old \"%s\", skipped",
			 dev_info->dev_name, dev_info->dev_opencl_c_version);
		goto out_clean;
	}

	if (dev_info->dev_max_work_item_dimensions != 3)
	{
		elog(LOG, "OpenCL device \"%s\" has work item dimensions larger than 3, skipped",
			dev_info->dev_name);
		goto out_clean;
	}
	return dev_info;

out_clean:
	pfree(dev_info);
	return NULL;
}
Exemplo n.º 11
0
pgstrom_platform_info *
collect_opencl_platform_info(cl_platform_id platform_id)
{
	pgstrom_platform_info *pl_info;
	Size		offset = 0;
	Size		buflen = 10240;
	cl_int		i, rc;
	int			major, minor;
	static struct {
		cl_uint		param;
		size_t		size;
		size_t		offset;
		bool		is_cstring;
	} catalog[] = {
		CLPF_PARAM(CL_PLATFORM_PROFILE, pl_profile, true),
        CLPF_PARAM(CL_PLATFORM_VERSION, pl_version, true),
        CLPF_PARAM(CL_PLATFORM_NAME, pl_name, true),
        CLPF_PARAM(CL_PLATFORM_VENDOR, pl_vendor, true),
        CLPF_PARAM(CL_PLATFORM_EXTENSIONS, pl_extensions, true),
	};

	pl_info = palloc(offsetof(pgstrom_platform_info, buffer[buflen]));
	memset(pl_info, 0, sizeof(pgstrom_platform_info));

	/* collect platform properties */
	for (i=0; i < lengthof(catalog); i++)
	{
		size_t	param_size;
		size_t	param_retsz;
		char   *param_addr;

		if (!catalog[i].is_cstring)
		{
			param_size = catalog[i].size;
			param_addr = (char *)pl_info + catalog[i].offset;
		}
		else
		{
			Assert(catalog[i].size == sizeof(char *));
			param_size = buflen - offset;
			param_addr = &pl_info->buffer[offset];
		}

		rc = clGetPlatformInfo(platform_id,
							   catalog[i].param,
							   param_size,
							   param_addr,
							   &param_retsz);
		if (rc != CL_SUCCESS)
			elog(ERROR, "failed on clGetPlatformInfo (param=%d, %s)",
				 catalog[i].param, opencl_strerror(rc));
		Assert(param_size == param_retsz || catalog[i].is_cstring);

		if (catalog[i].is_cstring)
		{
			param_addr[param_retsz] = '\0';
			*((char **)((char *)pl_info + catalog[i].offset)) = param_addr;
			offset += MAXALIGN(param_retsz);
		}
	}
	pl_info->buflen = offset;

	if (strcmp(pl_info->pl_profile, "FULL_PROFILE") != 0)
	{
		elog(LOG, "Profile of OpenCL driver \"%s\" is \"%s\", skipped",
			 pl_info->pl_name, pl_info->pl_profile);
		goto out_clean;
	}

	if (sscanf(pl_info->pl_version, "OpenCL %d.%d ", &major, &minor) != 2 ||
		major < 1 || (major == 1 && minor < 1))
	{
		elog(LOG, "OpenCL version of \"%s\" is too old \"%s\", skipped",
			 pl_info->pl_name, pl_info->pl_version);
		goto out_clean;
	}
	return pl_info;

out_clean:
	pfree(pl_info);
	return NULL;
}
Exemplo n.º 12
0
static void dump_platform(int index, cl_platform_id platform_id)
{
	static struct {
		cl_platform_info info;
		size_t		size;
		void	   *addr;
	} catalog[] = {
		PLATFORM_ATTR(CL_PLATFORM_PROFILE, profile),
		PLATFORM_ATTR(CL_PLATFORM_VERSION, version),
        PLATFORM_ATTR(CL_PLATFORM_NAME, name),
        PLATFORM_ATTR(CL_PLATFORM_VENDOR, vendor),
        PLATFORM_ATTR(CL_PLATFORM_EXTENSIONS, extensions),
	};
	cl_device_id	device_ids[256];
	cl_uint			device_num;
	cl_int			i, rc;

	for (i=0; i < lengthof(catalog); i++)
	{
		rc = clGetPlatformInfo(platform_id,
							   catalog[i].info,
							   catalog[i].size,
							   catalog[i].addr,
							   NULL);
		if (rc != CL_SUCCESS)
		{
			fprintf(stderr, "failed on clGetPlatformInfo (%s)\n",
					opencl_strerror(rc));
			exit(1);
		}
	}

	rc = clGetDeviceIDs(platform_id,
						CL_DEVICE_TYPE_ALL,
						lengthof(device_ids),
						device_ids,
						&device_num);
	if (rc != CL_SUCCESS)
	{
		fprintf(stderr, "failed on clGetDeviceIDs (%s)\n",
				opencl_strerror(rc));
		exit(1);
	}

	if (only_list)
		printf("Platform-%02d: %s / %s - %s\n", index + 1,
			   platform_info.vendor,
			   platform_info.name,
			   platform_info.version);
	else
	{
		printf("platform-index:      %d\n", index + 1);
		printf("platform-vendor:     %s\n", platform_info.vendor);
		printf("platform-name:       %s\n", platform_info.name);
		printf("platform-version:    %s\n", platform_info.version);
		printf("platform-profile:    %s\n", platform_info.profile);
		printf("platform-extensions: %s\n", platform_info.extensions);
	}

	for (i=0; i < device_num; i++)
	{
		if (only_device < 0 || i + 1 == only_device)
			dump_device(i, device_ids[i]);
	}
	putchar('\n');
}
Exemplo n.º 13
0
static void dump_device(int index, cl_device_id device_id)
{
	static struct {
		cl_device_info info;
		size_t		size;
		void	   *addr;
	} catalog[] = {
		DEVICE_ATTR(CL_DEVICE_ADDRESS_BITS, address_bits),
		DEVICE_ATTR(CL_DEVICE_AVAILABLE, available),
		DEVICE_ATTR(CL_DEVICE_COMPILER_AVAILABLE, compiler_available),
		DEVICE_ATTR(CL_DEVICE_DOUBLE_FP_CONFIG, double_fp_config),
		DEVICE_ATTR(CL_DEVICE_ENDIAN_LITTLE, endian_little),
		DEVICE_ATTR(CL_DEVICE_ERROR_CORRECTION_SUPPORT,
					error_correction_support),
		DEVICE_ATTR(CL_DEVICE_EXECUTION_CAPABILITIES,
					execution_capabilities),
		DEVICE_ATTR(CL_DEVICE_EXTENSIONS, extensions),
		DEVICE_ATTR(CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, global_mem_cache_size),
		DEVICE_ATTR(CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, global_mem_cache_type),
		DEVICE_ATTR(CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE,
					global_mem_cacheline_size),
		DEVICE_ATTR(CL_DEVICE_GLOBAL_MEM_SIZE, global_mem_size),
		DEVICE_ATTR(CL_DEVICE_HALF_FP_CONFIG, half_fp_config),
		DEVICE_ATTR(CL_DEVICE_HOST_UNIFIED_MEMORY, host_unified_memory),
		DEVICE_ATTR(CL_DEVICE_IMAGE_SUPPORT, image_support),
		DEVICE_ATTR(CL_DEVICE_IMAGE2D_MAX_HEIGHT, image2d_max_height),
		DEVICE_ATTR(CL_DEVICE_IMAGE2D_MAX_WIDTH, image2d_max_width),
		DEVICE_ATTR(CL_DEVICE_IMAGE3D_MAX_DEPTH, image3d_max_depth),
		DEVICE_ATTR(CL_DEVICE_IMAGE3D_MAX_HEIGHT, image3d_max_height),
		DEVICE_ATTR(CL_DEVICE_IMAGE3D_MAX_WIDTH, image3d_max_width),
		DEVICE_ATTR(CL_DEVICE_LOCAL_MEM_SIZE, local_mem_size),
		DEVICE_ATTR(CL_DEVICE_LOCAL_MEM_TYPE, local_mem_type),
		DEVICE_ATTR(CL_DEVICE_MAX_CLOCK_FREQUENCY, max_clock_frequency),
		DEVICE_ATTR(CL_DEVICE_MAX_COMPUTE_UNITS, max_compute_units),
		DEVICE_ATTR(CL_DEVICE_MAX_CONSTANT_ARGS, max_constant_args),
		DEVICE_ATTR(CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE,
					max_constant_buffer_size),
		DEVICE_ATTR(CL_DEVICE_MAX_MEM_ALLOC_SIZE, max_mem_alloc_size),
		DEVICE_ATTR(CL_DEVICE_MAX_PARAMETER_SIZE, max_parameter_size),
		DEVICE_ATTR(CL_DEVICE_MAX_READ_IMAGE_ARGS, max_read_image_args),
		DEVICE_ATTR(CL_DEVICE_MAX_SAMPLERS, max_samplers),
		DEVICE_ATTR(CL_DEVICE_MAX_WORK_GROUP_SIZE, max_work_group_size),
		DEVICE_ATTR(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
					max_work_item_dimensions),
		DEVICE_ATTR(CL_DEVICE_MAX_WORK_ITEM_SIZES, max_work_item_sizes),
		DEVICE_ATTR(CL_DEVICE_MAX_WRITE_IMAGE_ARGS, max_write_image_args),
		DEVICE_ATTR(CL_DEVICE_MEM_BASE_ADDR_ALIGN, mem_base_addr_align),
		DEVICE_ATTR(CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE,
					min_data_type_align_size),
		DEVICE_ATTR(CL_DEVICE_NAME, name),
		DEVICE_ATTR(CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR,
					native_vector_width_char),
		DEVICE_ATTR(CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT,
					native_vector_width_short),
		DEVICE_ATTR(CL_DEVICE_NATIVE_VECTOR_WIDTH_INT,
					native_vector_width_int),
		DEVICE_ATTR(CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG,
					native_vector_width_long),
		DEVICE_ATTR(CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT,
					native_vector_width_float),
		DEVICE_ATTR(CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE,
					native_vector_width_double),
		DEVICE_ATTR(CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF,
					native_vector_width_half),
		DEVICE_ATTR(CL_DEVICE_OPENCL_C_VERSION, opencl_c_version),
		DEVICE_ATTR(CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR,
					preferred_vector_width_char),
		DEVICE_ATTR(CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT,
					preferred_vector_width_short),
		DEVICE_ATTR(CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT,
					preferred_vector_width_int),
		DEVICE_ATTR(CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG,
					preferred_vector_width_long),
		DEVICE_ATTR(CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT,
					preferred_vector_width_float),
		DEVICE_ATTR(CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE,
					preferred_vector_width_double),
		DEVICE_ATTR(CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF,
					preferred_vector_width_half),
		DEVICE_ATTR(CL_DEVICE_PROFILE, profile),
		DEVICE_ATTR(CL_DEVICE_PROFILING_TIMER_RESOLUTION,
					profiling_timer_resolution),
		DEVICE_ATTR(CL_DEVICE_QUEUE_PROPERTIES, queue_properties),
		DEVICE_ATTR(CL_DEVICE_SINGLE_FP_CONFIG, single_fp_config),
		DEVICE_ATTR(CL_DEVICE_TYPE, type),
		DEVICE_ATTR(CL_DEVICE_VENDOR, vendor),
		DEVICE_ATTR(CL_DEVICE_VENDOR_ID, vendor_id),
		DEVICE_ATTR(CL_DEVICE_VERSION, version),
		DEVICE_ATTR(CL_DRIVER_VERSION, driver_version),
	};
	cl_int		i, rc;

	for (i=0; i < lengthof(catalog); i++)
	{
		rc = clGetDeviceInfo(device_id,
							 catalog[i].info,
							 catalog[i].size,
							 catalog[i].addr,
							 NULL);
		if (rc != CL_SUCCESS &&
			!(rc == CL_INVALID_VALUE &&
			  (catalog[i].info == CL_DEVICE_DOUBLE_FP_CONFIG ||
			   catalog[i].info == CL_DEVICE_HALF_FP_CONFIG)))
		{
			fprintf(stderr, "failed on clGetDeviceInfo (%s)\n",
					opencl_strerror(rc));
			exit(1);
		}
	}

	if (only_list)
		printf("  Device-%02d: %s / %s - %s\n",
			   index + 1,
			   dinfo.vendor,
			   dinfo.name,
			   dinfo.version);
	else
	{
		printf("  Device-%02d\n", index + 1);
		printf("  Device type:                     %s\n",
			   dev_type_str(dinfo.type));
		printf("  Vendor:                          %s (id: %08x)\n",
			   dinfo.vendor, dinfo.vendor_id);
		printf("  Name:                            %s\n",
			   dinfo.name);
		printf("  Version:                         %s\n",
			   dinfo.version);
		printf("  Driver version:                  %s\n",
			   dinfo.driver_version);
		printf("  OpenCL C version:                %s\n",
			   dinfo.opencl_c_version);
		printf("  Profile:                         %s\n",
			   dinfo.profile);
		printf("  Device available:                %s\n",
			   dinfo.available ? "yes" : "no");
		printf("  Address bits:                    %u\n",
			   dinfo.address_bits);
		printf("  Compiler available:              %s\n",
			   dinfo.compiler_available ? "yes" : "no");
		if (strstr(dinfo.extensions, "cl_khr_fp64") != NULL)
			printf("  Double FP config:                %s\n",
				   dev_fp_config_str(dinfo.double_fp_config));
		printf("  Endian:                          %s\n",
			   dinfo.endian_little ? "little" : "big");
		printf("  Error correction support:        %s\n",
			   dinfo.error_correction_support ? "yes" : "no");
		printf("  Execution capability:            %s\n",
			   dev_execution_capabilities_str(dinfo.execution_capabilities));
		printf("  Extensions:                      %s\n",
			   dinfo.extensions);
		printf("  Global memory cache size:        %lu KB\n",
			   dinfo.global_mem_cache_size / 1024);
		printf("  Global memory cache type:        %s\n",
			   dev_mem_cache_type_str(dinfo.global_mem_cache_type));
		printf("  Global memory cacheline size:    %u\n",
			   dinfo.global_mem_cacheline_size);
		printf("  Global memory size:              %zu MB\n",
			   dinfo.global_mem_size / (1024 * 1024));
		if (strstr(dinfo.extensions, "cl_khr_fp16") != NULL)
			printf("  Half FP config:                  %s\n",
				   dev_fp_config_str(dinfo.half_fp_config));
		printf("  Host unified memory:             %s\n",
			   dinfo.host_unified_memory ? "yes" : "no");
		printf("  Image support:                   %s\n",
			   dinfo.image_support ? "yes" : "no");
		printf("  Image 2D max size:               %lu x %lu\n",
			   dinfo.image2d_max_width,
			   dinfo.image2d_max_height);
		printf("  Image 3D max size:               %lu x %lu x %lu\n",
			   dinfo.image3d_max_width,
			   dinfo.image3d_max_height,
			   dinfo.image3d_max_depth);
		printf("  Local memory size:               %lu\n",
			   dinfo.local_mem_size);
		printf("  Local memory type:               %s\n",
			   dev_local_mem_type_str(dinfo.local_mem_type));
		printf("  Max clock frequency:             %u\n",
			   dinfo.max_clock_frequency);
		printf("  Max compute units:               %u\n",
			   dinfo.max_compute_units);
		printf("  Max constant args:               %u\n",
			   dinfo.max_constant_args);
		printf("  Max constant buffer size:        %zu\n",
			   dinfo.max_constant_buffer_size);
		printf("  Max memory allocation size:      %zu MB\n",
			   dinfo.max_mem_alloc_size / (1024 * 1024));
		printf("  Max parameter size:              %zu\n",
			   (cl_ulong)dinfo.max_parameter_size);
		printf("  Max read image args:             %u\n",
			   dinfo.max_read_image_args);
		printf("  Max samplers:                    %u\n",
			   dinfo.max_samplers);
		printf("  Max work-group size:             %zu\n",
			   (cl_ulong)dinfo.max_work_group_size);
		printf("  Max work-item sizes:             {%u,%u,%u}\n",
			   (cl_uint) dinfo.max_work_item_sizes[0],
			   (cl_uint) dinfo.max_work_item_sizes[1],
			   (cl_uint) dinfo.max_work_item_sizes[2]);
		printf("  Max write image args:            %u\n",
			   dinfo.max_write_image_args);
		printf("  Memory base address align:       %u\n",
			   dinfo.mem_base_addr_align);
		printf("  Min data type align size:        %u\n",
			   dinfo.min_data_type_align_size);
		printf("  Native vector width - char:      %u\n",
			   dinfo.native_vector_width_char);
		printf("  Native vector width - short:     %u\n",
			   dinfo.native_vector_width_short);
		printf("  Native vector width - int:       %u\n",
			   dinfo.native_vector_width_int);
		printf("  Native vector width - long:      %u\n",
			   dinfo.native_vector_width_long);
		printf("  Native vector width - float:     %u\n",
			   dinfo.native_vector_width_float);
		if (strstr(dinfo.extensions, "cl_khr_fp64") != NULL)
			printf("  Native vector width - double:    %u\n",
				   dinfo.native_vector_width_double);
		if (strstr(dinfo.extensions, "cl_khr_fp16") != NULL)
			printf("  Native vector width - half:      %u\n",
				   dinfo.native_vector_width_half);
		printf("  Preferred vector width - char:   %u\n",
			   dinfo.preferred_vector_width_char);
		printf("  Preferred vector width - short:  %u\n",
			   dinfo.preferred_vector_width_short);
		printf("  Preferred vector width - int:    %u\n",
			   dinfo.preferred_vector_width_int);
		printf("  Preferred vector width - long:   %u\n",
			   dinfo.preferred_vector_width_long);
		printf("  Preferred vector width - float:  %u\n",
			   dinfo.preferred_vector_width_float);
		if (strstr(dinfo.extensions, "cl_khr_fp64") != NULL)
			printf("  Preferred vector width - double: %u\n",
				   dinfo.preferred_vector_width_double);
		if (strstr(dinfo.extensions, "cl_khr_fp16") != NULL)
			printf("  Preferred vector width - half:   %u\n",
				   dinfo.preferred_vector_width_half);
		printf("  Profiling timer resolution:      %lu\n",
			   dinfo.profiling_timer_resolution);
		printf("  Queue properties:                %s\n",
			   dev_queue_properties_str(dinfo.queue_properties));
		printf("  Sindle FP config:                %s\n",
			   dev_fp_config_str(dinfo.single_fp_config));

	}
}