/// Enqueues a function to execute on the host. event enqueue_native_kernel(void (BOOST_COMPUTE_CL_CALLBACK *user_func)(void *), void *args, size_t cb_args, uint_ num_mem_objects, const cl_mem *mem_list, const void **args_mem_loc, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); event event_; cl_int ret = clEnqueueNativeKernel( m_queue, user_func, args, cb_args, num_mem_objects, mem_list, args_mem_loc, events.size(), events.get_event_ptr(), &event_.get() ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } return event_; }
cl_int WINAPI wine_clEnqueueNativeKernel(cl_command_queue command_queue, void WINAPI (*user_func)(void *args), void * args, size_t cb_args, cl_uint num_mem_objects, const cl_mem * mem_list, const void ** args_mem_loc, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { cl_int ret = CL_INVALID_OPERATION; /* FIXME: There appears to be no obvious method for translating the ABI for user_func. * There is no opaque user_data structure passed, that could encapsulate the return address. * The OpenCL specification seems to indicate that args has an implementation specific * structure that cannot be used to stash away a return address for the WINAPI user_func. */ #if 0 ret = clEnqueueNativeKernel(command_queue, user_func, args, cb_args, num_mem_objects, mem_list, args_mem_loc, num_events_in_wait_list, event_wait_list, event); #else FIXME("not supported due to user_func ABI mismatch\n"); #endif return ret; }
/** * Enqueues a command to execute a native C/C++ function not compiled * using the OpenCL compiler. This function is a wrapper for the * clEnqueueNativeKernel() OpenCL function, the documentation of which * provides additional information. * * @public @memberof ccl_kernel * * @param[in] cq A command queue wrapper object. * @param[in] user_func A pointer to a host-callable user function. * @param[in] args A pointer to the args list that `user_func` should be * called with. * @param[in] cb_args The size in bytes of the args list that args * points to. * @param[in] num_mos The number of ::CCLMemObj* objects that are passed * in `mo_list`. * @param[in] mo_list A list of ::CCLMemObj* objects (or `NULL` * references), if num_mos > 0. * @param[in] args_mem_loc A pointer to appropriate locations that * `args` points to where `cl_mem` values (unwrapped from the respective * ::CCLMemObj* objects) are stored. Before the user function is * executed, the `cl_mem` values are replaced by pointers to global * memory. * @param[in,out] evt_wait_lst List of events that need to complete * before this command can be executed. The list will be cleared and * can be reused by client code. * @param[out] err Return location for a ::CCLErr object, or `NULL` if error * reporting is to be ignored. * @return Event wrapper object that identifies this command. * */ CCL_EXPORT CCLEvent* ccl_kernel_enqueue_native(CCLQueue* cq, void (CL_CALLBACK * user_func)(void*), void* args, size_t cb_args, cl_uint num_mos, CCLMemObj* const* mo_list, const void** args_mem_loc, CCLEventWaitList* evt_wait_lst, CCLErr** err) { /* Make sure cq is not NULL. */ g_return_val_if_fail(cq != NULL, NULL); /* Make sure user_func is not NULL. */ g_return_val_if_fail(user_func != NULL, NULL); /* Make sure that num_mos == 0 AND mo_list != NULL, OR, that * num_mos > 0 AND mo_list != NULL */ g_return_val_if_fail(((num_mos == 0) && (mo_list == NULL)) || ((num_mos > 0) && (mo_list != NULL)), NULL); /* Make sure err is NULL or it is not set. */ g_return_val_if_fail(err == NULL || *err == NULL, NULL); /* OpenCL status flag. */ cl_int ocl_status; /* OpenCL event. */ cl_event event = NULL; /* Event wrapper. */ CCLEvent* evt = NULL; /* List of cl_mem objects. */ cl_mem* mem_list = NULL; /* Unwrap memory objects. */ if (num_mos > 0) { mem_list = g_slice_alloc(sizeof(cl_mem) * num_mos); for (cl_uint i = 0; i < num_mos; ++i) { mem_list[i] = mo_list[i] != NULL ? ccl_memobj_unwrap(mo_list[i]) : NULL; } } /* Enqueue kernel. */ ocl_status = clEnqueueNativeKernel(ccl_queue_unwrap(cq), user_func, args, cb_args, num_mos, (const cl_mem*) mem_list, args_mem_loc, ccl_event_wait_list_get_num_events(evt_wait_lst), ccl_event_wait_list_get_clevents(evt_wait_lst), &event); ccl_if_err_create_goto(*err, CCL_OCL_ERROR, CL_SUCCESS != ocl_status, ocl_status, error_handler, "%s: unable to enqueue native kernel (OpenCL error %d: %s).", CCL_STRD, ocl_status, ccl_err(ocl_status)); /* Wrap event and associate it with the respective command queue. * The event object will be released automatically when the command * queue is released. */ evt = ccl_queue_produce_event(cq, event); /* Clear event wait list. */ ccl_event_wait_list_clear(evt_wait_lst); /* If we got here, everything is OK. */ g_assert(err == NULL || *err == NULL); goto finish; error_handler: /* If we got here there was an error, verify that it is so. */ g_assert(err == NULL || *err != NULL); finish: /* Release temporary cl_mem list. */ if (num_mos > 0) g_slice_free1(sizeof(cl_mem) * num_mos, mem_list); /* Return event wrapper. */ return evt; }
int main(int argc, char **argv) { unsigned int n = 100; double *h_a; double *h_b; double *h_c; cl_mem mem_list[3]; const void *args_mem_loc[3]; struct native_kernel_args args; cl_mem d_a; cl_mem d_b; cl_mem d_c; cl_context ctx; cl_device_id did; cl_command_queue queue; size_t bytes = n * sizeof(double); h_a = (double *) malloc(bytes); h_b = (double *) malloc(bytes); h_c = (double *) malloc(bytes); size_t i; for( i = 0; i < n; i++ ) { h_a[i] = (double)i; h_b[i] = (double)i; } cl_int err; CHECK_CL_ERROR(poclu_get_any_device(&ctx, &did, &queue)); TEST_ASSERT( ctx ); TEST_ASSERT( did ); TEST_ASSERT( queue ); d_a = clCreateBuffer(ctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, bytes, h_a, &err); CHECK_OPENCL_ERROR_IN("clCreateBuffer"); TEST_ASSERT(d_a); d_b = clCreateBuffer(ctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, bytes, h_b, &err); CHECK_OPENCL_ERROR_IN("clCreateBuffer"); TEST_ASSERT(d_b); d_c = clCreateBuffer(ctx, CL_MEM_WRITE_ONLY, bytes, NULL, &err); CHECK_OPENCL_ERROR_IN("clCreateBuffer"); TEST_ASSERT(d_c); args.size = n; args.a = 0; args.b = 0; args.c = 0; mem_list[0] = d_a; mem_list[1] = d_b; mem_list[2] = d_c; args_mem_loc[0] = &args.a; args_mem_loc[1] = &args.b; args_mem_loc[2] = &args.c; err = clEnqueueNativeKernel ( queue, native_vec_add, &args, sizeof(struct native_kernel_args), 3, mem_list, args_mem_loc, 0, NULL, NULL); CHECK_OPENCL_ERROR_IN("clEnqueueNativeKernel"); err = clEnqueueReadBuffer(queue, d_c, CL_TRUE, 0, bytes, h_c, 0, NULL, NULL ); CHECK_OPENCL_ERROR_IN("clEnqueueReadBuffer"); err = clFinish(queue); CHECK_OPENCL_ERROR_IN("clFinish"); for(i = 0; i < n; i++) if(h_c[i] != 2 * i) { printf("Fail to validate vector\n"); goto error; } CHECK_CL_ERROR (clReleaseMemObject (d_a)); CHECK_CL_ERROR (clReleaseMemObject (d_b)); CHECK_CL_ERROR (clReleaseMemObject (d_c)); CHECK_CL_ERROR (clReleaseCommandQueue (queue)); CHECK_CL_ERROR (clReleaseContext (ctx)); free(h_a); free(h_b); free(h_c); return EXIT_SUCCESS; error: return EXIT_FAILURE; }
int main(int argc, char **argv) { unsigned int n = 100; double *h_a; double *h_b; double *h_c; cl_mem mem_list[3]; const void *args_mem_loc[3]; struct native_kernel_args args; cl_mem d_a; cl_mem d_b; cl_mem d_c; cl_platform_id platforms[1]; cl_uint nplatforms; cl_device_id devices[1]; // + 1 for duplicate test cl_uint num_devices; cl_context context; cl_command_queue queue; size_t bytes = n * sizeof(double); h_a = (double *) malloc(bytes); h_b = (double *) malloc(bytes); h_c = (double *) malloc(bytes); int i; for( i = 0; i < n; i++ ) { h_a[i] = i; h_b[i] = i; } cl_int err; err = clGetPlatformIDs(1, platforms, &nplatforms); if (err != CL_SUCCESS && !nplatforms) return EXIT_FAILURE; err = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_ALL, 1, devices, &num_devices); if (err != CL_SUCCESS) return EXIT_FAILURE; context = clCreateContext(NULL, num_devices, devices, NULL, NULL, &err); if (err != CL_SUCCESS) return EXIT_FAILURE; err = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(cl_device_id), devices, NULL); if (err != CL_SUCCESS) { puts("clGetContextInfo call failed\n"); goto error; } queue = clCreateCommandQueue(context, devices[0], 0, NULL); if (!queue) { puts("clCreateCommandQueue call failed\n"); goto error; } d_a = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, bytes, h_a, &err); if (d_a == NULL) { printf("clCreateBuffer call failed err = %d\n", err); goto error; } d_b = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, bytes, h_b, &err); if (d_b == NULL) { printf("clCreateBuffer call failed err = %d\n", err); goto error; } d_c = clCreateBuffer(context, CL_MEM_WRITE_ONLY, bytes, NULL, &err); if (d_c == NULL) { printf("clCreateBuffer call failed err = %d\n", err); goto error; } args.size = n; args.a = 0; args.b = 0; args.c = 0; mem_list[0] = d_a; mem_list[1] = d_b; mem_list[2] = d_c; args_mem_loc[0] = &args.a; args_mem_loc[1] = &args.b; args_mem_loc[2] = &args.c; err = clEnqueueNativeKernel ( queue, native_vec_add, &args, sizeof(struct native_kernel_args), 3, mem_list, args_mem_loc, 0, NULL, NULL); if (err != CL_SUCCESS) { puts("clGetContextInfo call failed\n"); goto error; } err = clEnqueueReadBuffer(queue, d_c, CL_TRUE, 0, bytes, h_c, 0, NULL, NULL ); if (err != CL_SUCCESS) { puts("clGetContextInfo call failed\n"); goto error; } clFinish(queue); for(i = 0; i < n; i++) if(h_c[i] != 2 * i) { printf("Fail to validate vector\n"); goto error; } clReleaseMemObject(d_a); clReleaseMemObject(d_b); clReleaseMemObject(d_c); clReleaseCommandQueue(queue); clReleaseContext(context); free(h_a); free(h_b); free(h_c); return EXIT_SUCCESS; error: return EXIT_FAILURE; }