Exemplo n.º 1
0
/**
 * Get the platform wrapper for the given device wrapper.
 *
 * @public @memberof ccl_platform
 *
 * @param[in] dev The device wrapper from where to get a platform wrapper.
 * @param[out] err Return location for a ::CCLErr object, or `NULL` if error
 * reporting is to be ignored.
 * @return The platform wrapper for the given device wrapper or `NULL` in
 * case an error occurs.
 * */
CCL_EXPORT
CCLPlatform * ccl_platform_new_from_device(CCLDevice * dev, CCLErr ** err) {

    /* Make sure dev is not NULL. */
    g_return_val_if_fail(dev != NULL, NULL);
    /* Make sure err is NULL or it is not set. */
    g_return_val_if_fail(err == NULL || *err == NULL, NULL);

    /* The OpenCL platform_id object. */
    cl_platform_id platform_id;
    /* The platform wrapper to return. */
    CCLPlatform * platf = NULL;
    /* Internal error object. */
    CCLErr * err_internal = NULL;

    /* Get OpenCL platform_id object from device. */
    platform_id = ccl_device_get_info_scalar(
        dev, CL_DEVICE_PLATFORM, cl_platform_id, &err_internal);
    g_if_err_propagate_goto(err, err_internal, error_handler);

    /* Create/get the platform wrapper. */
    platf = ccl_platform_new_wrap(platform_id);

    /* If we got here, everything is OK. */
    g_assert(err == NULL || *err == NULL);
    goto finish;

error_handler:
    /* If we got here there was an error, verify that it is so. */
    g_assert(err == NULL || *err != NULL);

finish:

    /* Return the device platform wrapper. */
    return platf;
}
Exemplo n.º 2
0
/**
 * Suggest appropriate local (and optionally global) work sizes for the
 * given real work size, based on device and kernel characteristics.
 *
 * If the `gws` parameter is not `NULL`, it will be populated with a
 * global worksize which may be larger than the real work size
 * in order to better fit the kernel preferred multiple work size. As
 * such, kernels enqueued with global work sizes suggested by this
 * function should check if their global ID is within `real_worksize`.
 *
 * @public @memberof ccl_kernel
 *
 * @param[in] krnl Kernel wrapper object. If `NULL`, use only device
 * information for determining global and local worksizes.
 * @param[in] dev Device wrapper object.
 * @param[in] dims The number of dimensions used to specify the global
 * work-items and work-items in the work-group.
 * @param[in] real_worksize The real worksize.
 * @param[out] gws Location where to place a "nice" global worksize for
 * the given kernel and device, which must be equal or larger than the `
 * real_worksize` and a multiple of `lws`. This memory location should
 * be pre-allocated with space for `dims` values of size `size_t`. If
 * `NULL` it is assumed that the global worksize must be equal to
 * `real_worksize`.
 * @param[in,out] lws This memory location, of size
 * `dims * sizeof(size_t)`, serves a dual purpose: 1) as an input,
 * containing the maximum allowed local work size for each dimension, or
 * zeros if these maximums are to be fetched from the given device
 * `CL_DEVICE_MAX_WORK_ITEM_SIZES` information (if the specified values
 * are larger than the device limits, the device limits are used
 * instead); 2) as an output, where to place a "nice" local worksize,
 * which is based and respects the limits of the given kernel and device
 * (and of the non-zero values given as input).
 * @param[out] err Return location for a ::CCLErr object, or `NULL` if error
 * reporting is to be ignored.
 * @return `CL_TRUE` if function returns successfully, `CL_FALSE`
 * otherwise.
 * */
CCL_EXPORT
cl_bool ccl_kernel_suggest_worksizes(CCLKernel* krnl, CCLDevice* dev,
	cl_uint dims, const size_t* real_worksize, size_t* gws, size_t* lws,
	CCLErr** err) {

	/* Make sure dev is not NULL. */
	g_return_val_if_fail(dev != NULL, CL_FALSE);
	/* Make sure dims not zero. */
	g_return_val_if_fail(dims > 0, CL_FALSE);
	/* Make sure real_worksize is not NULL. */
	g_return_val_if_fail(real_worksize != NULL, CL_FALSE);
	/* Make sure lws is not NULL. */
	g_return_val_if_fail(lws != NULL, CL_FALSE);
	/* Make sure err is NULL or it is not set. */
	g_return_val_if_fail(err == NULL || *err == NULL, CL_FALSE);

	/* The preferred workgroup size. */
	size_t wg_size_mult = 0;
	size_t wg_size_max = 0;
	size_t wg_size = 1, wg_size_aux;
	size_t* max_wi_sizes;
	cl_uint dev_dims;
	cl_bool ret_status;
	size_t real_ws = 1;

	/* Error handling object. */
	CCLErr* err_internal = NULL;

	/* Check if device supports the requested dims. */
	dev_dims = ccl_device_get_info_scalar(
		dev, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, cl_uint, &err_internal);
	ccl_if_err_propagate_goto(err, err_internal, error_handler);
	ccl_if_err_create_goto(*err, CCL_ERROR, dims > dev_dims,
		CCL_ERROR_UNSUPPORTED_OCL, error_handler,
		"%s: device only supports a maximum of %d dimension(s), "
		"but %d were requested.",
		CCL_STRD, dev_dims, dims);

	/* Get max. work item sizes for device. */
	max_wi_sizes = ccl_device_get_info_array(
		dev, CL_DEVICE_MAX_WORK_ITEM_SIZES, size_t*, &err_internal);
	ccl_if_err_propagate_goto(err, err_internal, error_handler);

	/* For each dimension, if the user specified a maximum local work
	 * size, the effective maximum local work size will be the minimum
	 * between the user value and the device value. */
	for (cl_uint i = 0; i < dims; ++i) {
		if (lws[i] != 0)
			max_wi_sizes[i] = MIN(max_wi_sizes[i], lws[i]);
	}

	/* If kernel is not NULL, query it about workgroup size preferences
	 * and capabilities. */
	if (krnl != NULL) {

		/* Determine maximum workgroup size. */
		wg_size_max = ccl_kernel_get_workgroup_info_scalar(krnl, dev,
			CL_KERNEL_WORK_GROUP_SIZE, size_t, &err_internal);
		ccl_if_err_not_info_unavailable_propagate_goto(
			err, err_internal, error_handler);

#ifdef CL_VERSION_1_1

		/* Determine preferred workgroup size multiple (OpenCL >= 1.1). */

		/* Get OpenCL version of the underlying platform. */
		cl_uint ocl_ver = ccl_kernel_get_opencl_version(krnl, &err_internal);
		ccl_if_err_propagate_goto(err, err_internal, error_handler);

		/* If OpenCL version of the underlying platform is >= 1.1 ... */
		if (ocl_ver >= 110) {

			/* ...use CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE... */
			wg_size_mult = ccl_kernel_get_workgroup_info_scalar(
				krnl, dev, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
				size_t, &err_internal);
			ccl_if_err_not_info_unavailable_propagate_goto(
				err, err_internal, error_handler);

		} else {

			/* ...otherwise just use CL_KERNEL_WORK_GROUP_SIZE. */
			wg_size_mult = wg_size_max;

		}

#else

		wg_size_mult = wg_size_max;

#endif

	}

	/* If it was not possible to obtain wg_size_mult and wg_size_max, either
	 * because kernel is NULL or the information was unavailable, use values
	 * obtained from device. */
	if ((wg_size_max == 0) && (wg_size_mult == 0)) {
		wg_size_max = ccl_device_get_info_scalar(
			dev, CL_DEVICE_MAX_WORK_GROUP_SIZE, size_t, &err_internal);
		ccl_if_err_propagate_goto(err, err_internal, error_handler);
		wg_size_mult = wg_size_max;
	}

	/* Try to find an appropriate local worksize. */
	for (cl_uint i = 0; i < dims; ++i) {

		/* Each lws component is at most the preferred workgroup
		 * multiple or the maximum size of that component in device. */
		lws[i] = MIN(wg_size_mult, max_wi_sizes[i]);

		/* Update total workgroup size. */
		wg_size *= lws[i];

		/* Update total real worksize. */
		real_ws *= real_worksize[i];

	}

	/* Don't let each component of the local worksize to be
	 * higher than the respective component of the real
	 * worksize. */
	for (cl_uint i = 0; i < dims; ++i) {
		while (lws[i] > real_worksize[i]) {
			lws[i] /= 2;
			wg_size /= 2;
		}
	}

	/* The total workgroup size can't be higher than the maximum
	 * supported by the device. */
	while (wg_size > wg_size_max) {
		wg_size_aux = wg_size;
		for (int i = dims - 1; i >= 0; --i) {
			if (lws[i] > 1) {
				/* Local work size can't be smaller than 1. */
				lws[i] /= 2;
				wg_size /= 2;
			}
			if (wg_size <= wg_size_max) break;
		}
		/* Avoid infinite loops and throw error if wg_size didn't
		 * change. */
		ccl_if_err_create_goto(*err, CCL_ERROR, wg_size == wg_size_aux,
			CCL_ERROR_OTHER, error_handler,
			"%s: Unable to determine a work size within the device limit (%d).",
			CCL_STRD, (int) wg_size_max);
	}

	/* If output variable gws is not NULL... */
	if (gws != NULL) {
		/* ...find a global worksize which is a multiple of the local
		 * worksize and is big enough to handle the real worksize. */
		for (cl_uint i = 0; i < dims; ++i) {
			gws[i] = ((real_worksize[i] / lws[i])
				+ (((real_worksize[i] % lws[i]) > 0) ? 1 : 0))
				* lws[i];
		}
	} else {
		/* ...otherwise check if found local worksizes are divisors of
		 * the respective real_worksize. If so keep them, otherwise find
		 * local worksizes which respect the maximum sizes allowed by
		 * the kernel and the device, and is a dimension-wise divisor of
		 * the real_worksize. */
		cl_bool lws_are_divisors = CL_TRUE;
		for (cl_uint i = 0; i < dims; ++i) {
			/* Check if lws[i] is divisor of real_worksize[i]. */
			if (real_worksize[i] % lws[i] != 0) {
				/* Ops... lws[i] is not divisor of real_worksize[i], so
				 * we'll have to try and find new lws ahead. */
				lws_are_divisors = CL_FALSE;
				break;
			}
		}
		/* Is lws divisor of real_worksize, dimension-wise? */
		if (!lws_are_divisors) {
			/* No, so we'll have to find new lws. */
			wg_size = 1;
			for (cl_uint i = 0; i < dims; ++i) {

				/* For each dimension, try to use the previously
				 * found lws[i]. */
				if ((real_worksize[i] % lws[i] != 0)
					|| (lws[i] * wg_size > wg_size_max))
				{
					/* Previoulsy found lws[i] not usable, find
					 * new one. Must be a divisor of real_worksize[i]
					 * and respect the kernel and device maximum lws.*/
					cl_uint best_lws_i = 1;
					for (cl_uint j = 2; j <= real_worksize[i] / 2; ++j) {
						/* If current value is higher than the kernel
						 * and device limits, stop searching and use
						 * the best one so far. */
						if ((wg_size * j > wg_size_max)
							|| (j > max_wi_sizes[i])) break;
						/* Otherwise check if current value is divisor
						 * of lws[i]. If so, keep it as the best so
						 * far. */
						if (real_worksize[i] % j == 0)
							best_lws_i = j;
					}
					/* Keep the best divisor for current dimension. */
					lws[i] = best_lws_i;
				}
				/* Update absolute workgroup size (all dimensions). */
				wg_size *= lws[i];
			}
		}
	}

	/* If we got here, everything is OK. */
	g_assert(err == NULL || *err == NULL);
	ret_status = CL_TRUE;
	goto finish;

error_handler:

	/* If we got here there was an error, verify that it is so. */
	g_assert(err == NULL || *err != NULL);
	ret_status = CL_FALSE;

finish:

	/* Return status. */
	return ret_status;

}
Exemplo n.º 3
0
/**
 * Cellular automata sample main function.
 * */
int main(int argc, char* argv[]) {

	/* Wrappers for OpenCL objects. */
	CCLContext* ctx;
	CCLDevice* dev;
	CCLImage* img1;
	CCLImage* img2;
	CCLProgram* prg;
	CCLKernel* krnl;
	CCLEvent* evt1;
	CCLEvent* evt2;
	/* Other variables. */
	CCLEventWaitList ewl = NULL;
	/* Profiler object. */
	CCLProf* prof;
	/* Output images filename. */
	char* filename;
	/* Selected device, may be given in command line. */
	int dev_idx = -1;
	/* Error handling object (must be NULL). */
	GError* err = NULL;
	/* Does selected device support images? */
	cl_bool image_ok;
	/* Initial sim state. */
	cl_uchar4* input_image;
	/* Simulation states. */
	cl_uchar4** output_images;
	/* RNG seed, may be given in command line. */
	unsigned int seed;
	/* Image file write status. */
	int file_write_status;
	/* Image format. */
	cl_image_format image_format = { CL_RGBA, CL_UNSIGNED_INT8 };
	/* Thread data. */
	struct thread_data td;

	/* Global and local worksizes. */
	size_t gws[2];
	size_t lws[2];
	/* Threads. */
	GThread* comm_thread;
	GThread* exec_thread;

	/* Check arguments. */
	if (argc >= 2) {
		/* Check if a device was specified in the command line. */
		dev_idx = atoi(argv[1]);
	}
	if (argc >= 3) {
		/* Check if a RNG seed was specified. */
		seed = atoi(argv[2]);
	} else {
		seed = (unsigned int) time(NULL);
	}

	/* Initialize RNG. */
	srand(seed);

	/* Create random initial state. */
	input_image = (cl_uchar4*)
		malloc(CA_WIDTH * CA_HEIGHT * sizeof(cl_uchar4));
	for (cl_uint i = 0; i < CA_WIDTH * CA_HEIGHT; ++i) {
		cl_uchar state = (rand() & 0x3) ? 0xFF : 0x00;
		input_image[i] = (cl_uchar4) {{ state, state, state, 0xFF }};
	}

	/* Allocate space for simulation results. */
	output_images = (cl_uchar4**)
		malloc((CA_ITERS + 1) * sizeof(cl_uchar4*));
	for (cl_uint i = 0; i < CA_ITERS + 1; ++i)
		output_images[i] = (cl_uchar4*)
			malloc(CA_WIDTH * CA_HEIGHT * sizeof(cl_uchar4));

	/* Create context using device selected from menu. */
	ctx = ccl_context_new_from_menu_full(&dev_idx, &err);
	HANDLE_ERROR(err);

	/* Get first device in context. */
	dev = ccl_context_get_device(ctx, 0, &err);
	HANDLE_ERROR(err);

	/* Ask device if it supports images. */
	image_ok = ccl_device_get_info_scalar(
		dev, CL_DEVICE_IMAGE_SUPPORT, cl_bool, &err);
	HANDLE_ERROR(err);
	if (!image_ok)
		ERROR_MSG_AND_EXIT("Selected device doesn't support images.");

	/* Create command queues. */
	queue_exec = ccl_queue_new(ctx, dev, CL_QUEUE_PROFILING_ENABLE, &err);
	HANDLE_ERROR(err);
	queue_comm = ccl_queue_new(ctx, dev, CL_QUEUE_PROFILING_ENABLE, &err);
	HANDLE_ERROR(err);

	/* Create 2D image for initial state. */
	img1 = ccl_image_new(ctx, CL_MEM_READ_WRITE,
		&image_format, NULL, &err,
		"image_type", (cl_mem_object_type) CL_MEM_OBJECT_IMAGE2D,
		"image_width", (size_t) CA_WIDTH,
		"image_height", (size_t) CA_HEIGHT,
		NULL);
	HANDLE_ERROR(err);

	/* Create another 2D image for double buffering. */
	img2 = ccl_image_new(ctx, CL_MEM_READ_WRITE,
		&image_format, NULL, &err,
		"image_type", (cl_mem_object_type) CL_MEM_OBJECT_IMAGE2D,
		"image_width", (size_t) CA_WIDTH,
		"image_height", (size_t) CA_HEIGHT,
		NULL);
	HANDLE_ERROR(err);

	/* Create program from kernel source and compile it. */
	prg = ccl_program_new_from_source(ctx, CA_KERNEL, &err);
	HANDLE_ERROR(err);

	ccl_program_build(prg, NULL, &err);
	HANDLE_ERROR(err);

	/* Get kernel wrapper. */
	krnl = ccl_program_get_kernel(prg, "ca", &err);
	HANDLE_ERROR(err);

	/* Determine nice local and global worksizes. */
	ccl_kernel_suggest_worksizes(krnl, dev, 2, real_ws, gws, lws, &err);
	HANDLE_ERROR(err);

	printf("\n * Global work-size: (%d, %d)\n", (int) gws[0], (int) gws[1]);
	printf(" * Local work-size: (%d, %d)\n", (int) lws[0], (int) lws[1]);

	/* Create thread communication queues. */
	comm_thread_queue = g_async_queue_new();
	exec_thread_queue = g_async_queue_new();
	host_thread_queue = g_async_queue_new();

	/* Setup thread data. */
	td.krnl = krnl;
	td.img1 = img1;
	td.img2 = img2;
	td.gws = gws;
	td.lws = lws;
	td.output_images = output_images;

	/* Create threads. */
	exec_thread = g_thread_new("exec_thread", exec_func, &td);
	comm_thread = g_thread_new("comm_thread", comm_func, &td);

	/* Start profiling. */
	prof = ccl_prof_new();
	ccl_prof_start(prof);

	/* Write initial state. */
	ccl_image_enqueue_write(img1, queue_comm, CL_TRUE,
		origin, region, 0, 0, input_image, NULL, &err);
	HANDLE_ERROR(err);

	/* Run CA_ITERS iterations of the CA. */
	for (cl_uint i = 0; i < CA_ITERS; ++i) {

		/* Send message to comms thread. */
		g_async_queue_push(comm_thread_queue, &go_msg);

		/* Send message to exec thread. */
		g_async_queue_push(exec_thread_queue, &go_msg);

		/* Get event wrappers from both threads. */
		evt1 = (CCLEvent*) g_async_queue_pop(host_thread_queue);
		evt2 = (CCLEvent*) g_async_queue_pop(host_thread_queue);

		/* Can't continue until this iteration is over. */
		ccl_event_wait_list_add(&ewl, evt1, evt2, NULL);

		/* Wait for events. */
		ccl_event_wait(&ewl, &err);
		HANDLE_ERROR(err);

	}

	/* Send message to comms thread to read last result. */
	g_async_queue_push(comm_thread_queue, &go_msg);

	/* Send stop messages to both threads. */
	g_async_queue_push(comm_thread_queue, &stop_msg);
	g_async_queue_push(exec_thread_queue, &stop_msg);

	/* Get event wrapper from comms thread. */
	evt1 = (CCLEvent*) g_async_queue_pop(host_thread_queue);

	/* Can't continue until final read is over. */
	ccl_event_wait_list_add(&ewl, evt1, NULL);
	ccl_event_wait(&ewl, &err);
	HANDLE_ERROR(err);

	/* Make sure both queues are finished. */
	ccl_queue_finish(queue_comm, &err);
	HANDLE_ERROR(err);
	ccl_queue_finish(queue_exec, &err);
	HANDLE_ERROR(err);

	/* Stop profiling timer and add queues for analysis. */
	ccl_prof_stop(prof);
	ccl_prof_add_queue(prof, "Comms", queue_comm);
	ccl_prof_add_queue(prof, "Exec", queue_exec);

	/* Allocate space for base filename. */
	filename = (char*) malloc(
		(strlen(IMAGE_FILE_PREFIX ".png") + IMAGE_FILE_NUM_DIGITS + 1) * sizeof(char));

	/* Write results to image files. */
	for (cl_uint i = 0; i < CA_ITERS; ++i) {

		/* Determine next filename. */
		sprintf(filename, "%s%0" G_STRINGIFY(IMAGE_FILE_NUM_DIGITS) "d.png", IMAGE_FILE_PREFIX, i);

		/* Save next image. */
		file_write_status = stbi_write_png(filename, CA_WIDTH, CA_HEIGHT, 4,
			output_images[i], CA_WIDTH * sizeof(cl_uchar4));

		/* Give feedback if unable to save image. */
		if (!file_write_status) {
			ERROR_MSG_AND_EXIT("Unable to save image in file.");
		}
	}

	/* Process profiling info. */
	ccl_prof_calc(prof, &err);
	HANDLE_ERROR(err);

	/* Print profiling info. */
	ccl_prof_print_summary(prof);

	/* Save profiling info. */
	ccl_prof_export_info_file(prof, "prof.tsv", &err);
	HANDLE_ERROR(err);

	/* Destroy threads. */
	g_thread_join(exec_thread);
	g_thread_join(comm_thread);

	/* Destroy thread communication queues. */
	g_async_queue_unref(comm_thread_queue);
	g_async_queue_unref(exec_thread_queue);
	g_async_queue_unref(host_thread_queue);

	/* Release host buffers. */
	free(filename);
	free(input_image);
	for (cl_uint i = 0; i < CA_ITERS + 1; ++i)
		free(output_images[i]);
	free(output_images);

	/* Release wrappers. */
	ccl_image_destroy(img1);
	ccl_image_destroy(img2);
	ccl_program_destroy(prg);
	ccl_queue_destroy(queue_comm);
	ccl_queue_destroy(queue_exec);
	ccl_context_destroy(ctx);

	/* Destroy profiler. */
	ccl_prof_destroy(prof);

	/* Check all wrappers have been destroyed. */
	g_assert(ccl_wrapper_memcheck());

	/* Terminate. */
	return 0;

}
Exemplo n.º 4
0
/**
 * @internal
 *
 * @brief Tests the ccl_buffer_new_from_region() function.
 * */
static void create_from_region_test() {

    /* Test variables. */
    CCLContext * ctx = NULL;
    CCLDevice * dev = NULL;
    CCLQueue * cq = NULL;
    CCLBuffer * buf = NULL;
    CCLBuffer * subbuf = NULL;
    CCLEvent * evt = NULL;
    CCLEventWaitList ewl = NULL;
    CCLErr * err = NULL;
    cl_ulong * hbuf;
    cl_ulong * hsubbuf;
    cl_uint min_align;
    size_t siz_buf;
    size_t siz_subbuf;

    /* Get the test context with the pre-defined device. */
    ctx = ccl_test_context_new(&err);
    g_assert_no_error(err);

    /* Get first device in context. */
    dev = ccl_context_get_device(ctx, 0, &err);
    g_assert_no_error(err);

    /* Get minimum alignment for sub-buffer in bits. */
    min_align = ccl_device_get_info_scalar(
        dev, CL_DEVICE_MEM_BASE_ADDR_ALIGN, cl_uint, &err);
    g_assert_no_error(err);

    /* Determine buffer and sub-buffer sizes (divide by 64 because its
     * the number of bits in cl_ulong). */
    siz_subbuf = sizeof(cl_ulong) * min_align / 64;
    siz_buf = 4 * siz_subbuf;

    /* Allocate memory for host buffer and host sub-buffer. */
    hbuf = g_slice_alloc(siz_buf);
    hsubbuf = g_slice_alloc(siz_subbuf);

    /* Initialize initial host buffer. */
    for (cl_uint i = 0; i < siz_buf / sizeof(cl_ulong); ++i)
        hbuf[i] = g_test_rand_int();

    /* Create a command queue. */
    cq = ccl_queue_new(ctx, dev, 0, &err);
    g_assert_no_error(err);

    /* Create a regular buffer, put some data in it. */
    buf = ccl_buffer_new(
        ctx, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, siz_buf, hbuf, &err);
    g_assert_no_error(err);

    /* Create sub-buffer from indexes 16 to 31 (16 positions) of
     * original buffer. */
    subbuf = ccl_buffer_new_from_region(
        buf, 0, siz_subbuf, siz_subbuf, &err);
    g_assert_no_error(err);

    /* Get data in sub-buffer to a new host buffer. */
    evt = ccl_buffer_enqueue_read(
        subbuf, cq, CL_FALSE, 0, siz_subbuf, hsubbuf, NULL, &err);
    g_assert_no_error(err);

    /* Wait for read to be complete. */
    ccl_event_wait(ccl_ewl(&ewl, evt, NULL), &err);
    g_assert_no_error(err);

    /* Check that expected values were successfully read. */
    for (cl_uint i = 0; i < siz_subbuf / sizeof(cl_ulong); ++i)
        g_assert_cmpuint(
            hsubbuf[i], ==, hbuf[i + siz_subbuf / sizeof(cl_ulong)]);

    /* Destroy stuff. */
    ccl_buffer_destroy(buf);
    ccl_buffer_destroy(subbuf);
    ccl_queue_destroy(cq);
    ccl_context_destroy(ctx);
    g_slice_free1(siz_buf, hbuf);
    g_slice_free1(siz_subbuf, hsubbuf);

    /* Confirm that memory allocated by wrappers has been properly
     * freed. */
    g_assert(ccl_wrapper_memcheck());
}
Exemplo n.º 5
0
/**
 * Image filter main function.
 * */
int main(int argc, char * argv[]) {

    /* Wrappers for OpenCL objects. */
    CCLContext * ctx;
    CCLDevice * dev;
    CCLImage * img_in;
    CCLImage * img_out;
    CCLQueue * queue;
    CCLProgram * prg;
    CCLKernel * krnl;
    CCLSampler * smplr;

    /* Device selected specified in the command line. */
    int dev_idx = -1;

    /* Error handling object (must be initialized to NULL). */
    CCLErr * err = NULL;

    /* Does selected device support images? */
    cl_bool image_ok;

    /* Image data in host. */
    unsigned char * input_image;
    unsigned char * output_image;

    /* Image properties. */
    int width, height, n_channels;

    /* Image file write status. */
    int file_write_status;

    /* Image parameters. */
    cl_image_format image_format = { CL_RGBA, CL_UNSIGNED_INT8 };

    /* Origin and region of complete image. */
    size_t origin[3] = { 0, 0, 0 };
    size_t region[3];

    /* Real worksize. */
    size_t real_ws[2];

    /* Global and local worksizes. */
    size_t gws[2];
    size_t lws[2];

    /* Check arguments. */
    if (argc < 2) {
        ERROR_MSG_AND_EXIT("Usage: image_filter <image_file> [device_index]");
    } else if (argc >= 3) {
        /* Check if a device was specified in the command line. */
        dev_idx = atoi(argv[2]);
    }

    /* Load image. */
    input_image = stbi_load(argv[1], &width, &height, &n_channels, 4);
    if (!input_image) ERROR_MSG_AND_EXIT(stbi_failure_reason());

    /* Real work size. */
    real_ws[0] = width; real_ws[1] = height;

    /* Set image region. */
    region[0] = width; region[1] = height; region[2] = 1;

    /* Create context using device selected from menu. */
    ctx = ccl_context_new_from_menu_full(&dev_idx, &err);
    HANDLE_ERROR(err);

    /* Get first device in context. */
    dev = ccl_context_get_device(ctx, 0, &err);
    HANDLE_ERROR(err);

    /* Ask device if it supports images. */
    image_ok = ccl_device_get_info_scalar(
        dev, CL_DEVICE_IMAGE_SUPPORT, cl_bool, &err);
    HANDLE_ERROR(err);
    if (!image_ok)
        ERROR_MSG_AND_EXIT("Selected device doesn't support images.");

    /* Create a command queue. */
    queue = ccl_queue_new(ctx, dev, 0, &err);
    HANDLE_ERROR(err);

    /* Create 2D input image using loaded image data. */
    img_in = ccl_image_new(ctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
        &image_format, input_image, &err,
        "image_type", (cl_mem_object_type) CL_MEM_OBJECT_IMAGE2D,
        "image_width", (size_t) width,
        "image_height", (size_t) height,
        NULL);
    HANDLE_ERROR(err);

    /* Create 2D output image. */
    img_out = ccl_image_new(ctx, CL_MEM_WRITE_ONLY,
        &image_format, NULL, &err,
        "image_type", (cl_mem_object_type) CL_MEM_OBJECT_IMAGE2D,
        "image_width", (size_t) width,
        "image_height", (size_t) height,
        NULL);
    HANDLE_ERROR(err);

    /* Create program from kernel source and compile it. */
    prg = ccl_program_new_from_source(ctx, FILTER_KERNEL, &err);
    HANDLE_ERROR(err);

    ccl_program_build(prg, NULL, &err);
    HANDLE_ERROR(err);

    /* Get kernel wrapper. */
    krnl = ccl_program_get_kernel(prg, "do_filter", &err);
    HANDLE_ERROR(err);

    /* Determine nice local and global worksizes. */
    ccl_kernel_suggest_worksizes(krnl, dev, 2, real_ws, gws, lws, &err);
    HANDLE_ERROR(err);

    /* Show information to user. */
    printf("\n * Image size: %d x %d, %d channels\n",
        width, height, n_channels);
    printf(" * Global work-size: (%d, %d)\n", (int) gws[0], (int) gws[1]);
    printf(" * Local work-size: (%d, %d)\n", (int) lws[0], (int) lws[1]);

    /* Create sampler (this could also be created in-kernel). */
    smplr = ccl_sampler_new(ctx, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE,
        CL_FILTER_NEAREST, &err);
    HANDLE_ERROR(err);

    /* Apply filter. */
    ccl_kernel_set_args_and_enqueue_ndrange(
        krnl, queue, 2, NULL, gws, lws, NULL, &err,
        img_in, img_out, smplr, NULL);
    HANDLE_ERROR(err);

    /* Allocate space for output image. */
    output_image = (unsigned char *)
        malloc(width * height * 4 * sizeof(unsigned char));

    /* Read image data back to host. */
    ccl_image_enqueue_read(img_out, queue, CL_TRUE, origin, region,
        0, 0, output_image, NULL, &err);
    HANDLE_ERROR(err);

    /* Write image to file. */
    file_write_status = stbi_write_png(IMAGE_FILE, width, height, 4,
        output_image, width * 4);

    /* Give feedback. */
    if (file_write_status) {
        fprintf(stdout, "\nImage saved in file '" IMAGE_FILE "'\n");
    } else {
        ERROR_MSG_AND_EXIT("Unable to save image in file.");
    }

    /* Release host images. */
    free(output_image);
    stbi_image_free(input_image);

    /* Release wrappers. */
    ccl_image_destroy(img_in);
    ccl_image_destroy(img_out);
    ccl_sampler_destroy(smplr);
    ccl_program_destroy(prg);
    ccl_queue_destroy(queue);
    ccl_context_destroy(ctx);

    /* Check all wrappers have been destroyed. */
    assert(ccl_wrapper_memcheck());

    /* Terminate. */
    return EXIT_SUCCESS;
}