/** * Get the OpenCL version of the platform associated with this kernel. * The version is returned as an integer, in the following format: * * * 100 for OpenCL 1.0 * * 110 for OpenCL 1.1 * * 120 for OpenCL 1.2 * * 200 for OpenCL 2.0 * * etc. * * @public @memberof ccl_kernel * * @param[in] krnl A kernel wrapper object. * @param[out] err Return location for a ::CCLErr object, or `NULL` if error * reporting is to be ignored. * @return The OpenCL version of the platform associated with this * kernel as an integer. If an error occurs, 0 is returned. * */ CCL_EXPORT cl_uint ccl_kernel_get_opencl_version(CCLKernel* krnl, CCLErr** err) { /* Make sure krnl is not NULL. */ g_return_val_if_fail(krnl != NULL, 0); /* Make sure err is NULL or it is not set. */ g_return_val_if_fail(err == NULL || *err == NULL, 0); cl_context context; CCLContext* ctx; CCLErr* err_internal = NULL; cl_uint ocl_ver; /* Get cl_context object for this kernel. */ context = ccl_kernel_get_info_scalar( krnl, CL_KERNEL_CONTEXT, cl_context, &err_internal); ccl_if_err_propagate_goto(err, err_internal, error_handler); /* Get context wrapper. */ ctx = ccl_context_new_wrap(context); /* Get OpenCL version. */ ocl_ver = ccl_context_get_opencl_version(ctx, &err_internal); ccl_if_err_propagate_goto(err, err_internal, error_handler); /* Unref. the context wrapper. */ ccl_context_unref(ctx); /* If we got here, everything is OK. */ g_assert(err == NULL || *err == NULL); goto finish; error_handler: /* If we got here there was an error, verify that it is so. */ g_assert(err == NULL || *err != NULL); ocl_ver = 0; finish: /* Return event wrapper. */ return ocl_ver; }
/** * Get ::CCLDevice wrapper at given index. * * @protected @memberof ccl_dev_container * * @param[in] devcon The device container object. * @param[in] get_devices Function to get cl_device_id's from wrapped * object. * @param[in] index Index of device in device container. * @param[out] err Return location for a GError, or `NULL` if error * reporting is to be ignored. * @return The ::CCLDevice wrapper at given index or `NULL` if an error * occurs. * */ CCL_EXPORT CCLDevice* ccl_dev_container_get_device( CCLDevContainer* devcon, ccl_dev_container_get_cldevices get_devices, cl_uint index, GError** err) { /* Make sure err is NULL or it is not set. */ g_return_val_if_fail(err == NULL || *err == NULL, NULL); /* Make sure devcon is not NULL. */ g_return_val_if_fail(devcon != NULL, NULL); /* The return value. */ CCLDevice* device_ret; /* Internal error object. */ GError* err_internal = NULL; /* Check if device list is already initialized. */ if (devcon->devices == NULL) { /* Not initialized, initialize it. */ ccl_dev_container_init_devices( devcon, get_devices, &err_internal); /* Check for errors. */ ccl_if_err_propagate_goto(err, err_internal, error_handler); } /* Make sure device index is less than the number of devices. */ ccl_if_err_create_goto(*err, CCL_ERROR, index >= devcon->num_devices, CCL_ERROR_DEVICE_NOT_FOUND, error_handler, "%s: device index (%d) out of bounds (%d devices in list).", G_STRLOC, index, devcon->num_devices); /* If we got here, everything is OK. */ g_assert(err == NULL || *err == NULL); device_ret = devcon->devices[index]; goto finish; error_handler: /* If we got here there was an error, verify that it is so. */ g_assert(err == NULL || *err != NULL); device_ret = NULL; finish: /* Return list of device wrappers. */ return device_ret; }
/** * @internal * Initialize device list in device container. * * @private @memberof ccl_dev_container * * @param[in] devcon Device container wrapper. * @param[in] get_devices Concrete wrapper implementation of * ccl_dev_container_get_cldevices(), which returns a list of * cl_device_id's for the wrapper. * @param[out] err Return location for a GError, or `NULL` if error * reporting is to be ignored. * */ static void ccl_dev_container_init_devices(CCLDevContainer* devcon, ccl_dev_container_get_cldevices get_devices, GError **err) { /* Make sure err is NULL or it is not set. */ g_return_if_fail(err == NULL || *err == NULL); /* Make sure devcon is not NULL. */ g_return_if_fail(devcon != NULL); /* Make sure device list is not initialized. */ g_return_if_fail(devcon->devices == NULL); CCLWrapperInfo* info_devs; GError* err_internal = NULL; /* Get device IDs. */ info_devs = get_devices(devcon, &err_internal); ccl_if_err_propagate_goto(err, err_internal, error_handler); /* Determine number of devices. */ devcon->num_devices = (cl_uint) (info_devs->size / sizeof(cl_device_id)); /* Allocate memory for array of device wrapper objects. */ devcon->devices = g_slice_alloc( devcon->num_devices * sizeof(CCLDevice*)); /* Wrap device IDs in device wrapper objects. */ for (guint i = 0; i < devcon->num_devices; ++i) { /* Add device wrapper object to array of wrapper objects. */ devcon->devices[i] = ccl_device_new_wrap( ((cl_device_id*) info_devs->value)[i]); } /* If we got here, everything is OK. */ g_assert(err == NULL || *err == NULL); goto finish; error_handler: /* If we got here there was an error, verify that it is so. */ g_assert(err == NULL || *err != NULL); finish: /* Terminate function. */ return; }
/** * Set kernel arguments and enqueue it for execution on a device. * * Internally this function sets kernel arguments by calling * ::ccl_kernel_set_args_v(), and enqueues the kernel for execution * by calling ::ccl_kernel_enqueue_ndrange(). * * The ::ccl_kernel_set_args_and_enqueue_ndrange() function performs the * same operation but accepts a `NULL`-terminated variable list of * arguments instead. * * If the ::ccl_arg_skip constant is passed in place of a specific * argument, that argument will not be set by this function call. Any * previously set argument continues to be valid. * * @public @memberof ccl_kernel * * @warning This function is not thread-safe. For multi-threaded * access to the same kernel function, create multiple instances of * a kernel wrapper for the given kernel function with ccl_kernel_new(), * one for each thread. * * @param[in] krnl A kernel wrapper object. * @param[in] cq A command queue wrapper object. * @param[in] work_dim The number of dimensions used to specify the * global work-items and work-items in the work-group. * @param[in] global_work_offset Can be used to specify an array of * `work_dim` unsigned values that describe the offset used to calculate * the global ID of a work-item. * @param[in] global_work_size An array of `work_dim` unsigned values * that describe the number of global work-items in `work_dim` * dimensions that will execute the kernel function. * @param[in] local_work_size An array of `work_dim` unsigned values * that describe the number of work-items that make up a work-group that * will execute the specified kernel. * @param[in,out] evt_wait_lst List of events that need to complete * before this command can be executed. The list will be cleared and * can be reused by client code. * @param[in] args A `NULL`-terminated list of arguments to set. * Arguments must be of type ::CCLArg*, ::CCLBuffer*, ::CCLImage* or * ::CCLSampler*. * @param[out] err Return location for a ::CCLErr object, or `NULL` if error * reporting is to be ignored. * @return Event wrapper object that identifies this command. * */ CCL_EXPORT CCLEvent* ccl_kernel_set_args_and_enqueue_ndrange_v(CCLKernel* krnl, CCLQueue* cq, cl_uint work_dim, const size_t* global_work_offset, const size_t* global_work_size, const size_t* local_work_size, CCLEventWaitList* evt_wait_lst, void** args, CCLErr** err) { /* Make sure krnl is not NULL. */ g_return_val_if_fail(krnl != NULL, NULL); /* Make sure cq is not NULL. */ g_return_val_if_fail(cq != NULL, NULL); /* Make sure err is NULL or it is not set. */ g_return_val_if_fail(err == NULL || *err == NULL, NULL); CCLErr* err_internal = NULL; CCLEvent* evt = NULL; /* Set kernel arguments. */ ccl_kernel_set_args_v(krnl, args); /* Enqueue kernel. */ evt = ccl_kernel_enqueue_ndrange(krnl, cq, work_dim, global_work_offset, global_work_size, local_work_size, evt_wait_lst, &err_internal); ccl_if_err_propagate_goto(err, err_internal, error_handler); /* If we got here, everything is OK. */ g_assert(err == NULL || *err == NULL); goto finish; error_handler: /* If we got here there was an error, verify that it is so. */ g_assert(err == NULL || *err != NULL); finish: /* Return event wrapper. */ return evt; }
/** * Suggest appropriate local (and optionally global) work sizes for the * given real work size, based on device and kernel characteristics. * * If the `gws` parameter is not `NULL`, it will be populated with a * global worksize which may be larger than the real work size * in order to better fit the kernel preferred multiple work size. As * such, kernels enqueued with global work sizes suggested by this * function should check if their global ID is within `real_worksize`. * * @public @memberof ccl_kernel * * @param[in] krnl Kernel wrapper object. If `NULL`, use only device * information for determining global and local worksizes. * @param[in] dev Device wrapper object. * @param[in] dims The number of dimensions used to specify the global * work-items and work-items in the work-group. * @param[in] real_worksize The real worksize. * @param[out] gws Location where to place a "nice" global worksize for * the given kernel and device, which must be equal or larger than the ` * real_worksize` and a multiple of `lws`. This memory location should * be pre-allocated with space for `dims` values of size `size_t`. If * `NULL` it is assumed that the global worksize must be equal to * `real_worksize`. * @param[in,out] lws This memory location, of size * `dims * sizeof(size_t)`, serves a dual purpose: 1) as an input, * containing the maximum allowed local work size for each dimension, or * zeros if these maximums are to be fetched from the given device * `CL_DEVICE_MAX_WORK_ITEM_SIZES` information (if the specified values * are larger than the device limits, the device limits are used * instead); 2) as an output, where to place a "nice" local worksize, * which is based and respects the limits of the given kernel and device * (and of the non-zero values given as input). * @param[out] err Return location for a ::CCLErr object, or `NULL` if error * reporting is to be ignored. * @return `CL_TRUE` if function returns successfully, `CL_FALSE` * otherwise. * */ CCL_EXPORT cl_bool ccl_kernel_suggest_worksizes(CCLKernel* krnl, CCLDevice* dev, cl_uint dims, const size_t* real_worksize, size_t* gws, size_t* lws, CCLErr** err) { /* Make sure dev is not NULL. */ g_return_val_if_fail(dev != NULL, CL_FALSE); /* Make sure dims not zero. */ g_return_val_if_fail(dims > 0, CL_FALSE); /* Make sure real_worksize is not NULL. */ g_return_val_if_fail(real_worksize != NULL, CL_FALSE); /* Make sure lws is not NULL. */ g_return_val_if_fail(lws != NULL, CL_FALSE); /* Make sure err is NULL or it is not set. */ g_return_val_if_fail(err == NULL || *err == NULL, CL_FALSE); /* The preferred workgroup size. */ size_t wg_size_mult = 0; size_t wg_size_max = 0; size_t wg_size = 1, wg_size_aux; size_t* max_wi_sizes; cl_uint dev_dims; cl_bool ret_status; size_t real_ws = 1; /* Error handling object. */ CCLErr* err_internal = NULL; /* Check if device supports the requested dims. */ dev_dims = ccl_device_get_info_scalar( dev, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, cl_uint, &err_internal); ccl_if_err_propagate_goto(err, err_internal, error_handler); ccl_if_err_create_goto(*err, CCL_ERROR, dims > dev_dims, CCL_ERROR_UNSUPPORTED_OCL, error_handler, "%s: device only supports a maximum of %d dimension(s), " "but %d were requested.", CCL_STRD, dev_dims, dims); /* Get max. work item sizes for device. */ max_wi_sizes = ccl_device_get_info_array( dev, CL_DEVICE_MAX_WORK_ITEM_SIZES, size_t*, &err_internal); ccl_if_err_propagate_goto(err, err_internal, error_handler); /* For each dimension, if the user specified a maximum local work * size, the effective maximum local work size will be the minimum * between the user value and the device value. */ for (cl_uint i = 0; i < dims; ++i) { if (lws[i] != 0) max_wi_sizes[i] = MIN(max_wi_sizes[i], lws[i]); } /* If kernel is not NULL, query it about workgroup size preferences * and capabilities. */ if (krnl != NULL) { /* Determine maximum workgroup size. */ wg_size_max = ccl_kernel_get_workgroup_info_scalar(krnl, dev, CL_KERNEL_WORK_GROUP_SIZE, size_t, &err_internal); ccl_if_err_not_info_unavailable_propagate_goto( err, err_internal, error_handler); #ifdef CL_VERSION_1_1 /* Determine preferred workgroup size multiple (OpenCL >= 1.1). */ /* Get OpenCL version of the underlying platform. */ cl_uint ocl_ver = ccl_kernel_get_opencl_version(krnl, &err_internal); ccl_if_err_propagate_goto(err, err_internal, error_handler); /* If OpenCL version of the underlying platform is >= 1.1 ... */ if (ocl_ver >= 110) { /* ...use CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE... */ wg_size_mult = ccl_kernel_get_workgroup_info_scalar( krnl, dev, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, size_t, &err_internal); ccl_if_err_not_info_unavailable_propagate_goto( err, err_internal, error_handler); } else { /* ...otherwise just use CL_KERNEL_WORK_GROUP_SIZE. */ wg_size_mult = wg_size_max; } #else wg_size_mult = wg_size_max; #endif } /* If it was not possible to obtain wg_size_mult and wg_size_max, either * because kernel is NULL or the information was unavailable, use values * obtained from device. */ if ((wg_size_max == 0) && (wg_size_mult == 0)) { wg_size_max = ccl_device_get_info_scalar( dev, CL_DEVICE_MAX_WORK_GROUP_SIZE, size_t, &err_internal); ccl_if_err_propagate_goto(err, err_internal, error_handler); wg_size_mult = wg_size_max; } /* Try to find an appropriate local worksize. */ for (cl_uint i = 0; i < dims; ++i) { /* Each lws component is at most the preferred workgroup * multiple or the maximum size of that component in device. */ lws[i] = MIN(wg_size_mult, max_wi_sizes[i]); /* Update total workgroup size. */ wg_size *= lws[i]; /* Update total real worksize. */ real_ws *= real_worksize[i]; } /* Don't let each component of the local worksize to be * higher than the respective component of the real * worksize. */ for (cl_uint i = 0; i < dims; ++i) { while (lws[i] > real_worksize[i]) { lws[i] /= 2; wg_size /= 2; } } /* The total workgroup size can't be higher than the maximum * supported by the device. */ while (wg_size > wg_size_max) { wg_size_aux = wg_size; for (int i = dims - 1; i >= 0; --i) { if (lws[i] > 1) { /* Local work size can't be smaller than 1. */ lws[i] /= 2; wg_size /= 2; } if (wg_size <= wg_size_max) break; } /* Avoid infinite loops and throw error if wg_size didn't * change. */ ccl_if_err_create_goto(*err, CCL_ERROR, wg_size == wg_size_aux, CCL_ERROR_OTHER, error_handler, "%s: Unable to determine a work size within the device limit (%d).", CCL_STRD, (int) wg_size_max); } /* If output variable gws is not NULL... */ if (gws != NULL) { /* ...find a global worksize which is a multiple of the local * worksize and is big enough to handle the real worksize. */ for (cl_uint i = 0; i < dims; ++i) { gws[i] = ((real_worksize[i] / lws[i]) + (((real_worksize[i] % lws[i]) > 0) ? 1 : 0)) * lws[i]; } } else { /* ...otherwise check if found local worksizes are divisors of * the respective real_worksize. If so keep them, otherwise find * local worksizes which respect the maximum sizes allowed by * the kernel and the device, and is a dimension-wise divisor of * the real_worksize. */ cl_bool lws_are_divisors = CL_TRUE; for (cl_uint i = 0; i < dims; ++i) { /* Check if lws[i] is divisor of real_worksize[i]. */ if (real_worksize[i] % lws[i] != 0) { /* Ops... lws[i] is not divisor of real_worksize[i], so * we'll have to try and find new lws ahead. */ lws_are_divisors = CL_FALSE; break; } } /* Is lws divisor of real_worksize, dimension-wise? */ if (!lws_are_divisors) { /* No, so we'll have to find new lws. */ wg_size = 1; for (cl_uint i = 0; i < dims; ++i) { /* For each dimension, try to use the previously * found lws[i]. */ if ((real_worksize[i] % lws[i] != 0) || (lws[i] * wg_size > wg_size_max)) { /* Previoulsy found lws[i] not usable, find * new one. Must be a divisor of real_worksize[i] * and respect the kernel and device maximum lws.*/ cl_uint best_lws_i = 1; for (cl_uint j = 2; j <= real_worksize[i] / 2; ++j) { /* If current value is higher than the kernel * and device limits, stop searching and use * the best one so far. */ if ((wg_size * j > wg_size_max) || (j > max_wi_sizes[i])) break; /* Otherwise check if current value is divisor * of lws[i]. If so, keep it as the best so * far. */ if (real_worksize[i] % j == 0) best_lws_i = j; } /* Keep the best divisor for current dimension. */ lws[i] = best_lws_i; } /* Update absolute workgroup size (all dimensions). */ wg_size *= lws[i]; } } } /* If we got here, everything is OK. */ g_assert(err == NULL || *err == NULL); ret_status = CL_TRUE; goto finish; error_handler: /* If we got here there was an error, verify that it is so. */ g_assert(err == NULL || *err != NULL); ret_status = CL_FALSE; finish: /* Return status. */ return ret_status; }
/** * Get a ::CCLWrapperInfo kernel argument information object. * * @public @memberof ccl_kernel * @see ccl_wrapper_get_info() * @note Requires OpenCL >= 1.2 * * @param[in] krnl The kernel wrapper object. * @param[in] idx Argument index. * @param[in] param_name Name of information/parameter to get. * @param[out] err Return location for a ::CCLErr object, or `NULL` if error * reporting is to be ignored. * @return The requested kernel argument information object. This * object will be automatically freed when the kernel wrapper object is * destroyed. If an error occurs, NULL is returned. * */ CCL_EXPORT CCLWrapperInfo* ccl_kernel_get_arg_info(CCLKernel* krnl, cl_uint idx, cl_kernel_arg_info param_name, CCLErr** err) { /* Make sure krnl is not NULL. */ g_return_val_if_fail(krnl != NULL, NULL); /* Helper wrapper. */ CCLWrapper fake_wrapper; /* Kernel information to return. */ CCLWrapperInfo* info; /* Error handling object. */ CCLErr* err_internal = NULL; /* OpenCL version of the underlying platform. */ double ocl_ver; #ifndef CL_VERSION_1_2 CCL_UNUSED(idx); CCL_UNUSED(param_name); CCL_UNUSED(fake_wrapper); CCL_UNUSED(err_internal); CCL_UNUSED(ocl_ver); /* If cf4ocl was not compiled with support for OpenCL >= 1.2, always throw * error. */ ccl_if_err_create_goto(*err, CCL_ERROR, TRUE, CCL_ERROR_UNSUPPORTED_OCL, error_handler, "%s: Obtaining kernel argument information requires cf4ocl to be " "deployed with support for OpenCL version 1.2 or newer.", CCL_STRD); #else /* Check that context platform is >= OpenCL 1.2 */ ocl_ver = ccl_kernel_get_opencl_version(krnl, &err_internal); ccl_if_err_propagate_goto(err, err_internal, error_handler); /* If OpenCL version is not >= 1.2, throw error. */ ccl_if_err_create_goto(*err, CCL_ERROR, ocl_ver < 120, CCL_ERROR_UNSUPPORTED_OCL, error_handler, "%s: information about kernel arguments requires OpenCL" \ " version 1.2 or newer.", CCL_STRD); /* Wrap argument index in a fake cl_object. */ fake_wrapper.cl_object = GUINT_TO_POINTER(idx); /* Get kernel argument info. */ info = ccl_wrapper_get_info( (CCLWrapper*) krnl, &fake_wrapper, param_name, 0, CCL_INFO_KERNEL_ARG, CL_FALSE, &err_internal); ccl_if_err_propagate_goto(err, err_internal, error_handler); #endif /* If we got here, everything is OK. */ g_assert(err == NULL || *err == NULL); goto finish; error_handler: /* If we got here there was an error, verify that it is so. */ g_assert(err == NULL || *err != NULL); /* An error occurred, return NULL to signal it. */ info = NULL; finish: /* Return argument info. */ return info; }
/** * Create a new sampler wrapper object using a list of properties. * * If a supported property is not specified, a default value is used. * Some valid properties are `CL_SAMPLER_NORMALIZED_COORDS` (default * value is `CL_TRUE`), `CL_SAMPLER_ADDRESSING_MODE` (default value is * `CL_ADDRESS_CLAMP`) and `CL_SAMPLER_FILTER_MODE` (default value is * `CL_FILTER_NEAREST`). * * This function mimicks the style of the OpenCL 2.0 sampler * constructor, clCreateSamplerWithProperties(), but can be used with * any version of OpenCL. Thus, The underlying OpenCL sampler object is * created using: * * * clCreateSampler() - for platforms with OpenCL version <= 1.2 * * clCreateSamplerWithProperties() - for platforms with OpenCL version * >= 2.0. * * @public @memberof ccl_sampler * * @param[in] ctx A context wrapper object. * @param[in] sampler_properties A list of sampler property names and * their corresponding values. Each sampler property name is immediately * followed by the corresponding desired value. The list is terminated * with 0. If a supported property is not specified, its default value * will be used. If `NULL`, default values for supported sampler * properties will be used. * @param[out] err Return location for a ::CCLErr object, or `NULL` if error * reporting is to be ignored. * @return A new sampler wrapper object or `NULL` if an error occurs. * */ CCL_EXPORT CCLSampler* ccl_sampler_new_full(CCLContext* ctx, const cl_sampler_properties *sampler_properties, CCLErr** err) { /* Make sure err is NULL or it is not set. */ g_return_val_if_fail((err) == NULL || *(err) == NULL, NULL); /* Make sure ctx is not NULL. */ g_return_val_if_fail(ctx != NULL, NULL); /* New sampler wrapper object to create. */ CCLSampler* smplr = NULL; /* OpenCL sampler object to create and wrap. */ cl_sampler sampler; /* OpenCL function status. */ cl_int ocl_status; #ifdef CL_VERSION_2_0 /* OpenCL platform version. */ double ocl_ver; /* Internal error handling object. */ CCLErr* err_internal = NULL; /* Get context platform version. */ ocl_ver = ccl_context_get_opencl_version(ctx, &err_internal); ccl_if_err_propagate_goto(err, err_internal, error_handler); /* Create the OpenCL sampler object. */ if (ocl_ver >= 200) { /* Platform is OpenCL >= 2.0, use "new" API. */ sampler = clCreateSamplerWithProperties( ccl_context_unwrap(ctx), sampler_properties, &ocl_status); } else { /* Platform is OpenCL <= 1.2, use "old" API. */ struct ccl_sampler_basic_properties sbp = ccl_sampler_get_basic_properties(sampler_properties); CCL_BEGIN_IGNORE_DEPRECATIONS sampler = clCreateSampler(ccl_context_unwrap(ctx), sbp.normalized_coords, sbp.addressing_mode, sbp.filter_mode, &ocl_status); CCL_END_IGNORE_DEPRECATIONS } #else /* Create OpenCL sampler object. */ struct ccl_sampler_basic_properties sbp = ccl_sampler_get_basic_properties(sampler_properties); sampler = clCreateSampler(ccl_context_unwrap(ctx), sbp.normalized_coords, sbp.addressing_mode, sbp.filter_mode, &ocl_status); #endif /* Check for errors. */ ccl_if_err_create_goto(*err, CCL_OCL_ERROR, CL_SUCCESS != ocl_status, ocl_status, error_handler, "%s: unable to create sampler (OpenCL error %d: %s).", CCL_STRD, ocl_status, ccl_err(ocl_status)); /* Create sampler wrapper. */ smplr = ccl_sampler_new_wrap(sampler); /* If we got here, everything is OK. */ g_assert(err == NULL || *err == NULL); goto finish; error_handler: /* If we got here there was an error, verify that it is so. */ g_assert(err == NULL || *err != NULL); finish: /* Return sampler wrapper. */ return smplr; }