Пример #1
0
/**
 * Tests creation, getting info from and destruction of
 * kernel wrapper objects.
 * */
static void create_info_destroy_test() {

	/* Test variables. */
	CCLContext* ctx = NULL;
	cl_context context = NULL;
	CCLProgram* prg = NULL;
	cl_program program = NULL;
	CCLKernel* krnl = NULL;
	cl_kernel kernel = NULL;
	CCLDevice* d = NULL;
	CCLQueue* cq = NULL;
	size_t gws;
	size_t lws;
	cl_uint host_buf[CCL_TEST_KERNEL_BUF_SIZE];
	cl_uint host_buf_aux[CCL_TEST_KERNEL_BUF_SIZE];
	CCLBuffer* buf;
	GError* err = NULL;
	CCLEvent* evt = NULL;
	CCLEventWaitList ewl = NULL;
	const char* krnl_name;
	void* args[] = { NULL, NULL };
	cl_bool release_krnl;
	cl_int ocl_status;

	/* Create a context with devices from first available platform. */
	ctx = ccl_test_context_new(&err);
	g_assert_no_error(err);

	/* Create a new program from source and build it. */
	prg = ccl_program_new_from_source(
		ctx, CCL_TEST_KERNEL_CONTENT, &err);
	g_assert_no_error(err);

	ccl_program_build(prg, NULL, &err);
	g_assert_no_error(err);

	/* Create a command queue. */
	cq = ccl_queue_new(ctx, d, CL_QUEUE_PROFILING_ENABLE, &err);
	g_assert_no_error(err);

	/* Test three ways to create a kernel wrapper. */
	for (cl_uint i = 0; i < 3; ++i) {

		/* Create kernel wrapper. */
		switch (i) {
			case 0:
				/* Instantiate kernel directly. */
				krnl = ccl_kernel_new(prg, CCL_TEST_KERNEL_NAME, &err);
				g_assert_no_error(err);
				release_krnl = CL_TRUE;
				break;
			case 1:
				/* Using the program utility function. No need to free
				 * kernel in this case, because it will be freed when
				 * program is destroyed. */
				krnl = ccl_program_get_kernel(
					prg, CCL_TEST_KERNEL_NAME, &err);
				g_assert_no_error(err);
				release_krnl = CL_FALSE;
				break;
			case 2:
				/* Using the "wrap" constructor. */
				kernel = clCreateKernel(ccl_program_unwrap(prg),
					CCL_TEST_KERNEL_NAME, &ocl_status);
				g_assert_cmpint(ocl_status, ==, CL_SUCCESS);
				krnl = ccl_kernel_new_wrap(kernel);
				g_assert_cmphex(GPOINTER_TO_UINT(kernel), ==,
					GPOINTER_TO_UINT(ccl_kernel_unwrap(krnl)));
				release_krnl = CL_TRUE;
				break;
		}

		/* Get some kernel info, compare it with expected info. */

		/* Get kernel function name from kernel info, compare it with the
		 * expected value. */
		krnl_name = ccl_kernel_get_info_array(
			krnl, CL_KERNEL_FUNCTION_NAME, char*, &err);
		g_assert_no_error(err);
		g_assert_cmpstr(krnl_name, ==, CCL_TEST_KERNEL_NAME);

		/* Check if the kernel context is the same as the initial context
		 * and the program context. */
		context = ccl_kernel_get_info_scalar(
			krnl, CL_KERNEL_CONTEXT, cl_context, &err);
		g_assert_no_error(err);
		g_assert(context == ccl_context_unwrap(ctx));

		program = ccl_kernel_get_info_scalar(
			krnl, CL_KERNEL_PROGRAM, cl_program, &err);
		g_assert_no_error(err);
		g_assert(program == ccl_program_unwrap(prg));

#ifndef OPENCL_STUB

		cl_uint ocl_ver;

		/* Get OpenCL version of kernel's underlying platform. */
		ocl_ver = ccl_kernel_get_opencl_version(krnl, &err);
		g_assert_no_error(err);

		(void)ocl_ver;

#ifdef CL_VERSION_1_1

		size_t kwgz;
		size_t* kcwgs;
		CCLDevice* dev = NULL;

		/* If platform supports kernel work group queries, get kernel
		 * work group information and compare it with expected info. */
		if (ocl_ver >= 110) {

			dev = ccl_context_get_device(ctx, 0, &err);
			g_assert_no_error(err);

			kwgz = ccl_kernel_get_workgroup_info_scalar(
				krnl, dev, CL_KERNEL_WORK_GROUP_SIZE, size_t, &err);
			g_assert_no_error(err);
			(void)kwgz;

			kcwgs = ccl_kernel_get_workgroup_info_array(krnl, dev,
				CL_KERNEL_COMPILE_WORK_GROUP_SIZE, size_t*, &err);
			g_assert_no_error(err);
			(void)kcwgs;

		}

#endif /* ifdef CL_VERSION_1_1 */

#ifdef CL_VERSION_1_2

		cl_kernel_arg_address_qualifier kaaq;
		char* kernel_arg_type_name;
		char* kernel_arg_name;

		/* If platform supports kernel argument queries, get kernel argument
		 * information and compare it with expected info. */
		if (ocl_ver >= 120) {

			kaaq = ccl_kernel_get_arg_info_scalar(krnl, 0,
					CL_KERNEL_ARG_ADDRESS_QUALIFIER,
					cl_kernel_arg_address_qualifier, &err);
			g_assert((err == NULL) || (err->code == CCL_ERROR_INFO_UNAVAILABLE_OCL));
			if (err == NULL) {
				g_assert_cmphex(kaaq, ==, CL_KERNEL_ARG_ADDRESS_GLOBAL);
			} else {
Пример #2
0
/**
 * Suggest appropriate local (and optionally global) work sizes for the
 * given real work size, based on device and kernel characteristics.
 *
 * If the `gws` parameter is not `NULL`, it will be populated with a
 * global worksize which may be larger than the real work size
 * in order to better fit the kernel preferred multiple work size. As
 * such, kernels enqueued with global work sizes suggested by this
 * function should check if their global ID is within `real_worksize`.
 *
 * @public @memberof ccl_kernel
 *
 * @param[in] krnl Kernel wrapper object. If `NULL`, use only device
 * information for determining global and local worksizes.
 * @param[in] dev Device wrapper object.
 * @param[in] dims The number of dimensions used to specify the global
 * work-items and work-items in the work-group.
 * @param[in] real_worksize The real worksize.
 * @param[out] gws Location where to place a "nice" global worksize for
 * the given kernel and device, which must be equal or larger than the `
 * real_worksize` and a multiple of `lws`. This memory location should
 * be pre-allocated with space for `dims` values of size `size_t`. If
 * `NULL` it is assumed that the global worksize must be equal to
 * `real_worksize`.
 * @param[in,out] lws This memory location, of size
 * `dims * sizeof(size_t)`, serves a dual purpose: 1) as an input,
 * containing the maximum allowed local work size for each dimension, or
 * zeros if these maximums are to be fetched from the given device
 * `CL_DEVICE_MAX_WORK_ITEM_SIZES` information (if the specified values
 * are larger than the device limits, the device limits are used
 * instead); 2) as an output, where to place a "nice" local worksize,
 * which is based and respects the limits of the given kernel and device
 * (and of the non-zero values given as input).
 * @param[out] err Return location for a ::CCLErr object, or `NULL` if error
 * reporting is to be ignored.
 * @return `CL_TRUE` if function returns successfully, `CL_FALSE`
 * otherwise.
 * */
CCL_EXPORT
cl_bool ccl_kernel_suggest_worksizes(CCLKernel* krnl, CCLDevice* dev,
	cl_uint dims, const size_t* real_worksize, size_t* gws, size_t* lws,
	CCLErr** err) {

	/* Make sure dev is not NULL. */
	g_return_val_if_fail(dev != NULL, CL_FALSE);
	/* Make sure dims not zero. */
	g_return_val_if_fail(dims > 0, CL_FALSE);
	/* Make sure real_worksize is not NULL. */
	g_return_val_if_fail(real_worksize != NULL, CL_FALSE);
	/* Make sure lws is not NULL. */
	g_return_val_if_fail(lws != NULL, CL_FALSE);
	/* Make sure err is NULL or it is not set. */
	g_return_val_if_fail(err == NULL || *err == NULL, CL_FALSE);

	/* The preferred workgroup size. */
	size_t wg_size_mult = 0;
	size_t wg_size_max = 0;
	size_t wg_size = 1, wg_size_aux;
	size_t* max_wi_sizes;
	cl_uint dev_dims;
	cl_bool ret_status;
	size_t real_ws = 1;

	/* Error handling object. */
	CCLErr* err_internal = NULL;

	/* Check if device supports the requested dims. */
	dev_dims = ccl_device_get_info_scalar(
		dev, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, cl_uint, &err_internal);
	ccl_if_err_propagate_goto(err, err_internal, error_handler);
	ccl_if_err_create_goto(*err, CCL_ERROR, dims > dev_dims,
		CCL_ERROR_UNSUPPORTED_OCL, error_handler,
		"%s: device only supports a maximum of %d dimension(s), "
		"but %d were requested.",
		CCL_STRD, dev_dims, dims);

	/* Get max. work item sizes for device. */
	max_wi_sizes = ccl_device_get_info_array(
		dev, CL_DEVICE_MAX_WORK_ITEM_SIZES, size_t*, &err_internal);
	ccl_if_err_propagate_goto(err, err_internal, error_handler);

	/* For each dimension, if the user specified a maximum local work
	 * size, the effective maximum local work size will be the minimum
	 * between the user value and the device value. */
	for (cl_uint i = 0; i < dims; ++i) {
		if (lws[i] != 0)
			max_wi_sizes[i] = MIN(max_wi_sizes[i], lws[i]);
	}

	/* If kernel is not NULL, query it about workgroup size preferences
	 * and capabilities. */
	if (krnl != NULL) {

		/* Determine maximum workgroup size. */
		wg_size_max = ccl_kernel_get_workgroup_info_scalar(krnl, dev,
			CL_KERNEL_WORK_GROUP_SIZE, size_t, &err_internal);
		ccl_if_err_not_info_unavailable_propagate_goto(
			err, err_internal, error_handler);

#ifdef CL_VERSION_1_1

		/* Determine preferred workgroup size multiple (OpenCL >= 1.1). */

		/* Get OpenCL version of the underlying platform. */
		cl_uint ocl_ver = ccl_kernel_get_opencl_version(krnl, &err_internal);
		ccl_if_err_propagate_goto(err, err_internal, error_handler);

		/* If OpenCL version of the underlying platform is >= 1.1 ... */
		if (ocl_ver >= 110) {

			/* ...use CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE... */
			wg_size_mult = ccl_kernel_get_workgroup_info_scalar(
				krnl, dev, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
				size_t, &err_internal);
			ccl_if_err_not_info_unavailable_propagate_goto(
				err, err_internal, error_handler);

		} else {

			/* ...otherwise just use CL_KERNEL_WORK_GROUP_SIZE. */
			wg_size_mult = wg_size_max;

		}

#else

		wg_size_mult = wg_size_max;

#endif

	}

	/* If it was not possible to obtain wg_size_mult and wg_size_max, either
	 * because kernel is NULL or the information was unavailable, use values
	 * obtained from device. */
	if ((wg_size_max == 0) && (wg_size_mult == 0)) {
		wg_size_max = ccl_device_get_info_scalar(
			dev, CL_DEVICE_MAX_WORK_GROUP_SIZE, size_t, &err_internal);
		ccl_if_err_propagate_goto(err, err_internal, error_handler);
		wg_size_mult = wg_size_max;
	}

	/* Try to find an appropriate local worksize. */
	for (cl_uint i = 0; i < dims; ++i) {

		/* Each lws component is at most the preferred workgroup
		 * multiple or the maximum size of that component in device. */
		lws[i] = MIN(wg_size_mult, max_wi_sizes[i]);

		/* Update total workgroup size. */
		wg_size *= lws[i];

		/* Update total real worksize. */
		real_ws *= real_worksize[i];

	}

	/* Don't let each component of the local worksize to be
	 * higher than the respective component of the real
	 * worksize. */
	for (cl_uint i = 0; i < dims; ++i) {
		while (lws[i] > real_worksize[i]) {
			lws[i] /= 2;
			wg_size /= 2;
		}
	}

	/* The total workgroup size can't be higher than the maximum
	 * supported by the device. */
	while (wg_size > wg_size_max) {
		wg_size_aux = wg_size;
		for (int i = dims - 1; i >= 0; --i) {
			if (lws[i] > 1) {
				/* Local work size can't be smaller than 1. */
				lws[i] /= 2;
				wg_size /= 2;
			}
			if (wg_size <= wg_size_max) break;
		}
		/* Avoid infinite loops and throw error if wg_size didn't
		 * change. */
		ccl_if_err_create_goto(*err, CCL_ERROR, wg_size == wg_size_aux,
			CCL_ERROR_OTHER, error_handler,
			"%s: Unable to determine a work size within the device limit (%d).",
			CCL_STRD, (int) wg_size_max);
	}

	/* If output variable gws is not NULL... */
	if (gws != NULL) {
		/* ...find a global worksize which is a multiple of the local
		 * worksize and is big enough to handle the real worksize. */
		for (cl_uint i = 0; i < dims; ++i) {
			gws[i] = ((real_worksize[i] / lws[i])
				+ (((real_worksize[i] % lws[i]) > 0) ? 1 : 0))
				* lws[i];
		}
	} else {
		/* ...otherwise check if found local worksizes are divisors of
		 * the respective real_worksize. If so keep them, otherwise find
		 * local worksizes which respect the maximum sizes allowed by
		 * the kernel and the device, and is a dimension-wise divisor of
		 * the real_worksize. */
		cl_bool lws_are_divisors = CL_TRUE;
		for (cl_uint i = 0; i < dims; ++i) {
			/* Check if lws[i] is divisor of real_worksize[i]. */
			if (real_worksize[i] % lws[i] != 0) {
				/* Ops... lws[i] is not divisor of real_worksize[i], so
				 * we'll have to try and find new lws ahead. */
				lws_are_divisors = CL_FALSE;
				break;
			}
		}
		/* Is lws divisor of real_worksize, dimension-wise? */
		if (!lws_are_divisors) {
			/* No, so we'll have to find new lws. */
			wg_size = 1;
			for (cl_uint i = 0; i < dims; ++i) {

				/* For each dimension, try to use the previously
				 * found lws[i]. */
				if ((real_worksize[i] % lws[i] != 0)
					|| (lws[i] * wg_size > wg_size_max))
				{
					/* Previoulsy found lws[i] not usable, find
					 * new one. Must be a divisor of real_worksize[i]
					 * and respect the kernel and device maximum lws.*/
					cl_uint best_lws_i = 1;
					for (cl_uint j = 2; j <= real_worksize[i] / 2; ++j) {
						/* If current value is higher than the kernel
						 * and device limits, stop searching and use
						 * the best one so far. */
						if ((wg_size * j > wg_size_max)
							|| (j > max_wi_sizes[i])) break;
						/* Otherwise check if current value is divisor
						 * of lws[i]. If so, keep it as the best so
						 * far. */
						if (real_worksize[i] % j == 0)
							best_lws_i = j;
					}
					/* Keep the best divisor for current dimension. */
					lws[i] = best_lws_i;
				}
				/* Update absolute workgroup size (all dimensions). */
				wg_size *= lws[i];
			}
		}
	}

	/* If we got here, everything is OK. */
	g_assert(err == NULL || *err == NULL);
	ret_status = CL_TRUE;
	goto finish;

error_handler:

	/* If we got here there was an error, verify that it is so. */
	g_assert(err == NULL || *err != NULL);
	ret_status = CL_FALSE;

finish:

	/* Return status. */
	return ret_status;

}
Пример #3
0
/**
 * Kernel info main function.
 *
 * @param[in] argc Number of command line arguments.
 * @param[in] argv Command line arguments.
 * @return ::CCL_SUCCESS if program returns with no error, or another
 * ::CCLErrorCode value otherwise.
 * */
int main(int argc, char *argv[]) {

	/* ***************** */
	/* Program variables */
	/* ***************** */

	/* Function and program return status. */
	int status;
	/* Error management. */
	GError *err = NULL;
	/* Context wrapper. */
	CCLContext* ctx = NULL;
	/* Program wrapper. */
	CCLProgram* prg = NULL;
	/* Kernel wrapper. */
	CCLKernel* krnl = NULL;
	/* Device wrapper. */
	CCLDevice* dev = NULL;
	/* Device filters. */
	CCLDevSelFilters filters = NULL;
	/* Default device index. */
	cl_int dev_idx = -1;
	/* OpenCL version. */
	double ocl_ver;
	/* Kernel workgroup info variables. */
	size_t k_wg_size;
	size_t k_pref_wg_size_mult;
	size_t* k_compile_wg_size;
	cl_ulong k_loc_mem_size;
	cl_ulong k_priv_mem_size;

	/* ************************** */
	/* Parse command line options */
	/* ************************** */

	/* If version was requested, output version and exit. */
	if ((argc == 2) && (g_strcmp0("--version", argv[1]) == 0)) {
		ccl_common_version_print("ccl_kerninfo");
		exit(0);
	}

	ccl_if_err_create_goto(err, CCL_ERROR, (argc < 3) || (argc > 4),
		CCL_ERROR_ARGS, error_handler,
		"Usage: %s <program_file> <kernel_name> [device_index]\n",
		argv[0]);
	if (argc == 4) dev_idx = atoi(argv[3]);

	/* ********************************************* */
	/* Initialize OpenCL variables and build program */
	/* ********************************************* */

	/* Select a context/device. */
	ccl_devsel_add_dep_filter(
		&filters, ccl_devsel_dep_menu,
		(dev_idx == -1) ? NULL : (void*) &dev_idx);
	ctx = ccl_context_new_from_filters(&filters, &err);
	ccl_if_err_goto(err, error_handler);

	/* Get program which contains kernel. */
	prg = ccl_program_new_from_source_file(ctx, argv[1], &err);
	ccl_if_err_goto(err, error_handler);

	/* Build program. */
	ccl_program_build(prg, NULL, &err);
	ccl_if_err_goto(err, error_handler);

	/* Get kernel */
	krnl = ccl_program_get_kernel(prg, argv[2], &err);
	ccl_if_err_goto(err, error_handler);

	/* Get the device. */
	dev = ccl_context_get_device(ctx, 0, &err);
	ccl_if_err_goto(err, error_handler);

	/* Check platform  OpenCL version. */
	ocl_ver = ccl_kernel_get_opencl_version(krnl, &err);
	ccl_if_err_goto(err, error_handler);

	/* *************************** */
	/*  Get and print kernel info  */
	/* *************************** */

	g_printf("\n   ======================== Static Kernel Information =======================\n\n");

	k_wg_size = ccl_kernel_get_workgroup_info_scalar(
		krnl, dev, CL_KERNEL_WORK_GROUP_SIZE, size_t, &err);
	ccl_if_err_goto(err, error_handler);
	g_printf("     Maximum workgroup size                  : %lu\n",
		(unsigned long) k_wg_size);

	/* Only show info about CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE
	 * if OpenCL version of the underlying platform is >= 1.1. */
	if (ocl_ver >= 110) {
		k_pref_wg_size_mult = ccl_kernel_get_workgroup_info_scalar(krnl,
			dev, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, size_t, &err);
		ccl_if_err_goto(err, error_handler);
		g_printf("     Preferred multiple of workgroup size    : %lu\n",
			(unsigned long) k_pref_wg_size_mult);
	}

	k_compile_wg_size = ccl_kernel_get_workgroup_info_array(krnl, dev,
		CL_KERNEL_COMPILE_WORK_GROUP_SIZE, size_t*, &err);
	ccl_if_err_goto(err, error_handler);
	g_printf("     WG size in __attribute__ qualifier      : (%lu, %lu, %lu)\n",
		(unsigned long) k_compile_wg_size[0],
		(unsigned long) k_compile_wg_size[1],
		(unsigned long) k_compile_wg_size[2]);

	k_loc_mem_size = ccl_kernel_get_workgroup_info_scalar(krnl, dev,
		CL_KERNEL_LOCAL_MEM_SIZE, cl_ulong, &err);
	ccl_if_err_goto(err, error_handler);
	g_printf("     Local memory used by kernel             : %lu bytes\n",
		(unsigned long) k_loc_mem_size);

	k_priv_mem_size = ccl_kernel_get_workgroup_info_scalar(krnl, dev,
		CL_KERNEL_PRIVATE_MEM_SIZE, cl_ulong, &err);
	ccl_if_err_goto(err, error_handler);
	g_printf("     Min. private mem. used by each workitem : %lu bytes\n",
		(unsigned long) k_priv_mem_size);

	g_printf("\n");

	/* ************** */
	/* Error handling */
	/* ************** */

	/* If we get here, no need for error checking, jump to cleanup. */
	g_assert(err == NULL);
	status = CCL_SUCCESS;
	goto cleanup;

error_handler:
	/* If we got here there was an error, verify that it is so. */
	g_assert(err != NULL);
	g_fprintf(stderr, "%s\n", err->message);
	status = (err->domain == CCL_ERROR) ? err->code : CCL_ERROR_OTHER;
	g_error_free(err);

cleanup:

	/* *********** */
	/* Free stuff! */
	/* *********** */

	if (prg != NULL) ccl_program_destroy(prg);
	if (ctx != NULL) ccl_context_destroy(ctx);

	/* Confirm that memory allocated by wrappers has been properly
	 * freed. */
	g_return_val_if_fail(ccl_wrapper_memcheck(), CCL_ERROR_OTHER);

	/* Return status. */
	return status;

}