示例#1
0
    /// Enqueues a function to execute on the host.
    event enqueue_native_kernel(void (BOOST_COMPUTE_CL_CALLBACK *user_func)(void *),
                                void *args,
                                size_t cb_args,
                                uint_ num_mem_objects,
                                const cl_mem *mem_list,
                                const void **args_mem_loc,
                                const wait_list &events = wait_list())
    {
        BOOST_ASSERT(m_queue != 0);

        event event_;
        cl_int ret = clEnqueueNativeKernel(
            m_queue,
            user_func,
            args,
            cb_args,
            num_mem_objects,
            mem_list,
            args_mem_loc,
            events.size(),
            events.get_event_ptr(),
            &event_.get()
        );
        if(ret != CL_SUCCESS){
            BOOST_THROW_EXCEPTION(opencl_error(ret));
        }

        return event_;
    }
示例#2
0
cl_int WINAPI wine_clEnqueueNativeKernel(cl_command_queue command_queue,
                                         void WINAPI (*user_func)(void *args),
                                         void * args, size_t cb_args,
                                         cl_uint num_mem_objects, const cl_mem * mem_list, const void ** args_mem_loc,
                                         cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event)
{
    cl_int ret = CL_INVALID_OPERATION;
    /* FIXME: There appears to be no obvious method for translating the ABI for user_func.
     * There is no opaque user_data structure passed, that could encapsulate the return address.
     * The OpenCL specification seems to indicate that args has an implementation specific
     * structure that cannot be used to stash away a return address for the WINAPI user_func.
     */
#if 0
    ret = clEnqueueNativeKernel(command_queue, user_func, args, cb_args, num_mem_objects, mem_list, args_mem_loc,
                                 num_events_in_wait_list, event_wait_list, event);
#else
    FIXME("not supported due to user_func ABI mismatch\n");
#endif
    return ret;
}
示例#3
0
/**
 * Enqueues a command to execute a native C/C++ function not compiled
 * using the OpenCL compiler. This function is a wrapper for the
 * clEnqueueNativeKernel() OpenCL function, the documentation of which
 * provides additional information.
 *
 * @public @memberof ccl_kernel
 *
 * @param[in] cq A command queue wrapper object.
 * @param[in] user_func A pointer to a host-callable user function.
 * @param[in] args A pointer to the args list that `user_func` should be
 * called with.
 * @param[in] cb_args The size in bytes of the args list that args
 * points to.
 * @param[in] num_mos The number of ::CCLMemObj* objects that are passed
 * in `mo_list`.
 * @param[in] mo_list A list of ::CCLMemObj* objects (or `NULL`
 * references), if num_mos > 0.
 * @param[in] args_mem_loc A pointer to appropriate locations that
 * `args` points to where `cl_mem` values (unwrapped from the respective
 * ::CCLMemObj* objects) are stored. Before the user function is
 * executed, the `cl_mem` values are replaced by pointers to global
 * memory.
 * @param[in,out] evt_wait_lst List of events that need to complete
 * before this command can be executed. The list will be cleared and
 * can be reused by client code.
 * @param[out] err Return location for a ::CCLErr object, or `NULL` if error
 * reporting is to be ignored.
 * @return Event wrapper object that identifies this command.
 * */
CCL_EXPORT
CCLEvent* ccl_kernel_enqueue_native(CCLQueue* cq,
	void (CL_CALLBACK * user_func)(void*), void* args, size_t cb_args,
	cl_uint num_mos, CCLMemObj* const* mo_list,
	const void** args_mem_loc, CCLEventWaitList* evt_wait_lst,
	CCLErr** err) {

	/* Make sure cq is not NULL. */
	g_return_val_if_fail(cq != NULL, NULL);
	/* Make sure user_func is not NULL. */
	g_return_val_if_fail(user_func != NULL, NULL);
	/* Make sure that num_mos == 0 AND mo_list != NULL, OR, that
	 * num_mos > 0  AND mo_list != NULL */
	g_return_val_if_fail(((num_mos == 0) && (mo_list == NULL))
		|| ((num_mos > 0) && (mo_list != NULL)), NULL);
	/* Make sure err is NULL or it is not set. */
	g_return_val_if_fail(err == NULL || *err == NULL, NULL);

	/* OpenCL status flag. */
	cl_int ocl_status;
	/* OpenCL event. */
	cl_event event = NULL;
	/* Event wrapper. */
	CCLEvent* evt = NULL;
	/* List of cl_mem objects. */
	cl_mem* mem_list = NULL;

	/* Unwrap memory objects. */
	if (num_mos > 0) {
		mem_list = g_slice_alloc(sizeof(cl_mem) * num_mos);
		for (cl_uint i = 0; i < num_mos; ++i) {
			mem_list[i] = mo_list[i] != NULL
				? ccl_memobj_unwrap(mo_list[i])
				: NULL;
		}
	}

	/* Enqueue kernel. */
	ocl_status = clEnqueueNativeKernel(ccl_queue_unwrap(cq), user_func,
		args, cb_args, num_mos, (const cl_mem*) mem_list, args_mem_loc,
		ccl_event_wait_list_get_num_events(evt_wait_lst),
		ccl_event_wait_list_get_clevents(evt_wait_lst), &event);
	ccl_if_err_create_goto(*err, CCL_OCL_ERROR,
		CL_SUCCESS != ocl_status, ocl_status, error_handler,
		"%s: unable to enqueue native kernel (OpenCL error %d: %s).",
		CCL_STRD, ocl_status, ccl_err(ocl_status));

	/* Wrap event and associate it with the respective command queue.
	 * The event object will be released automatically when the command
	 * queue is released. */
	evt = ccl_queue_produce_event(cq, event);

	/* Clear event wait list. */
	ccl_event_wait_list_clear(evt_wait_lst);

	/* If we got here, everything is OK. */
	g_assert(err == NULL || *err == NULL);
	goto finish;

error_handler:

	/* If we got here there was an error, verify that it is so. */
	g_assert(err == NULL || *err != NULL);

finish:

	/* Release temporary cl_mem list. */
	if (num_mos > 0)
		g_slice_free1(sizeof(cl_mem) * num_mos, mem_list);

	/* Return event wrapper. */
	return evt;

}
示例#4
0
int main(int argc, char **argv) {
  unsigned int n = 100;
  
  double *h_a;
  double *h_b;
  double *h_c;
  cl_mem mem_list[3];
  const void *args_mem_loc[3];

  struct native_kernel_args args;
 
  cl_mem d_a;
  cl_mem d_b;
  cl_mem d_c;
  
  cl_context ctx;
  cl_device_id did;
  cl_command_queue queue;
 
  size_t bytes = n * sizeof(double);
 
  h_a = (double *) malloc(bytes);
  h_b = (double *) malloc(bytes);
  h_c = (double *) malloc(bytes);
 
  size_t i;
  for( i = 0; i < n; i++ )
  {
    h_a[i] = (double)i;
    h_b[i] = (double)i;
  }

  cl_int err;

  CHECK_CL_ERROR(poclu_get_any_device(&ctx, &did, &queue));
  TEST_ASSERT( ctx );
  TEST_ASSERT( did );
  TEST_ASSERT( queue );

  d_a = clCreateBuffer(ctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, bytes, h_a, &err);
  CHECK_OPENCL_ERROR_IN("clCreateBuffer");
  TEST_ASSERT(d_a);

  d_b = clCreateBuffer(ctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, bytes, h_b, &err);
  CHECK_OPENCL_ERROR_IN("clCreateBuffer");
  TEST_ASSERT(d_b);

  d_c = clCreateBuffer(ctx, CL_MEM_WRITE_ONLY, bytes, NULL, &err);
  CHECK_OPENCL_ERROR_IN("clCreateBuffer");
  TEST_ASSERT(d_c);

  args.size = n;
  args.a = 0;
  args.b = 0;
  args.c = 0;

  mem_list[0] = d_a;
  mem_list[1] = d_b;
  mem_list[2] = d_c;

  args_mem_loc[0] = &args.a;
  args_mem_loc[1] = &args.b;
  args_mem_loc[2] = &args.c;
  
  err = clEnqueueNativeKernel ( queue, native_vec_add, &args, sizeof(struct native_kernel_args),
          3, mem_list, args_mem_loc, 0, NULL, NULL);
  CHECK_OPENCL_ERROR_IN("clEnqueueNativeKernel");
 
  err = clEnqueueReadBuffer(queue, d_c, CL_TRUE, 0, bytes, h_c, 0, NULL, NULL );
  CHECK_OPENCL_ERROR_IN("clEnqueueReadBuffer");

  err = clFinish(queue);
  CHECK_OPENCL_ERROR_IN("clFinish");

  for(i = 0; i < n; i++)
    if(h_c[i] != 2 * i)
      {
        printf("Fail to validate vector\n");
        goto error;
      }

  CHECK_CL_ERROR (clReleaseMemObject (d_a));
  CHECK_CL_ERROR (clReleaseMemObject (d_b));
  CHECK_CL_ERROR (clReleaseMemObject (d_c));
  CHECK_CL_ERROR (clReleaseCommandQueue (queue));
  CHECK_CL_ERROR (clReleaseContext (ctx));

  free(h_a);
  free(h_b);
  free(h_c);

  return EXIT_SUCCESS;

error:
  return EXIT_FAILURE;
}
示例#5
0
int main(int argc, char **argv) {
  unsigned int n = 100;
  
  double *h_a;
  double *h_b;
  double *h_c;
  cl_mem mem_list[3];
  const void *args_mem_loc[3];

  struct native_kernel_args args;
 
  cl_mem d_a;
  cl_mem d_b;
  cl_mem d_c;
  
  cl_platform_id platforms[1];
  cl_uint nplatforms;
  cl_device_id devices[1]; // + 1 for duplicate test
  cl_uint num_devices;

  cl_context context;
  cl_command_queue queue;
 
  size_t bytes = n * sizeof(double);
 
  h_a = (double *) malloc(bytes);
  h_b = (double *) malloc(bytes);
  h_c = (double *) malloc(bytes);
 
  int i;
  for( i = 0; i < n; i++ )
  {
    h_a[i] = i;
    h_b[i] = i;
  }

  cl_int err;

  err = clGetPlatformIDs(1, platforms, &nplatforms);	
  if (err != CL_SUCCESS && !nplatforms)
    return EXIT_FAILURE;
  
  err = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_ALL, 1,
                       devices, &num_devices);  
  if (err != CL_SUCCESS)
    return EXIT_FAILURE;

  context = clCreateContext(NULL, num_devices, devices, NULL, 
                                       NULL, &err);
  if (err != CL_SUCCESS)
    return EXIT_FAILURE;

  err = clGetContextInfo(context, CL_CONTEXT_DEVICES,
                         sizeof(cl_device_id), devices, NULL);
  if (err != CL_SUCCESS) 
    {
      puts("clGetContextInfo call failed\n");
      goto error;
    }

  queue = clCreateCommandQueue(context, devices[0], 0, NULL); 
  if (!queue) 
    {
      puts("clCreateCommandQueue call failed\n");
      goto error;
    }

  d_a = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, bytes, h_a, &err);
  if (d_a == NULL)
    {
      printf("clCreateBuffer call failed err = %d\n", err);
      goto error;
    }
  d_b = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, bytes, h_b, &err);
  if (d_b == NULL)
    {
      printf("clCreateBuffer call failed err = %d\n", err);
      goto error;
    }
  d_c = clCreateBuffer(context, CL_MEM_WRITE_ONLY, bytes, NULL, &err);
  if (d_c == NULL)
    {
      printf("clCreateBuffer call failed err = %d\n", err);
      goto error;
    }

  args.size = n;
  args.a = 0;
  args.b = 0;
  args.c = 0;

  mem_list[0] = d_a;
  mem_list[1] = d_b;
  mem_list[2] = d_c;

  args_mem_loc[0] = &args.a;
  args_mem_loc[1] = &args.b;
  args_mem_loc[2] = &args.c;
  
  err = clEnqueueNativeKernel ( queue, native_vec_add, &args, sizeof(struct native_kernel_args),
          3, mem_list, args_mem_loc, 0, NULL, NULL);
  if (err != CL_SUCCESS) 
    {
      puts("clGetContextInfo call failed\n");
      goto error;
    }
 
  err = clEnqueueReadBuffer(queue, d_c, CL_TRUE, 0, bytes, h_c, 0, NULL, NULL );
  if (err != CL_SUCCESS) 
    {
      puts("clGetContextInfo call failed\n");
      goto error;
    }

  clFinish(queue);

  for(i = 0; i < n; i++)
    if(h_c[i] != 2 * i)
      {
        printf("Fail to validate vector\n");
        goto error;
      }
     
  clReleaseMemObject(d_a);
  clReleaseMemObject(d_b);
  clReleaseMemObject(d_c);
  clReleaseCommandQueue(queue);
  clReleaseContext(context);
 
  free(h_a);
  free(h_b);
  free(h_c);

  return EXIT_SUCCESS;

error:
  return EXIT_FAILURE;
}