Beispiel #1
0
static int initialize(int use_gpu)
{
	cl_int result;
	size_t size;

#ifndef POCL_HSA
	// create OpenCL context
	cl_platform_id platform_id;
	if (clGetPlatformIDs(1, &platform_id, NULL) != CL_SUCCESS) { printf("ERROR: clGetPlatformIDs(1,*,0) failed\n"); return -1; }
	cl_context_properties ctxprop[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform_id, 0};
	device_type = use_gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU;
	context = clCreateContextFromType( ctxprop, device_type, NULL, NULL, NULL );
#else
	context = poclu_create_any_context();
#endif
	if( !context ) { printf("ERROR: clCreateContextFromType(%s) failed\n", use_gpu ? "GPU" : "CPU"); return -1; }

	// get the list of GPUs
	result = clGetContextInfo( context, CL_CONTEXT_DEVICES, 0, NULL, &size );
	num_devices = (int) (size / sizeof(cl_device_id));
	
	if( result != CL_SUCCESS || num_devices < 1 ) { printf("ERROR: clGetContextInfo() failed\n"); return -1; }
	device_list = new cl_device_id[num_devices];
	if( !device_list ) { printf("ERROR: new cl_device_id[] failed\n"); return -1; }
	result = clGetContextInfo( context, CL_CONTEXT_DEVICES, size, device_list, NULL );
	if( result != CL_SUCCESS ) { printf("ERROR: clGetContextInfo() failed\n"); return -1; }

	// create command queue for the first device
	cmd_queue = clCreateCommandQueue( context, device_list[0], 0, NULL );
	if( !cmd_queue ) { printf("ERROR: clCreateCommandQueue() failed\n"); return -1; }

	return 0;
}
Beispiel #2
0
///
// functions for preparing create opencl program, contains CreateContext, CreateProgram, CreateCommandQueue, CreateMemBuffer, and Cleanup
// Create an OpenCL context on the first available GPU platform. 
cl_context CreateContext()
{
    cl_context context = NULL;
    cl_uint platformIdCount = 0;
    cl_int errNum;

 #ifndef POCL_HSA
    // get number of platforms
    clGetPlatformIDs (0, NULL, &platformIdCount);

    std::vector<cl_platform_id> platformIds(platformIdCount);
    clGetPlatformIDs (platformIdCount, platformIds.data(), NULL);
	
	// In this example, first platform is a CPU, the second one is a GPU. we just choose the first available device.  
    cl_context_properties contextProperties[] =
    {
        CL_CONTEXT_PLATFORM,
        (cl_context_properties)platformIds[1],
        0
    };
    context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU,
                                      NULL, NULL, &errNum);
    if (errNum != CL_SUCCESS)
    {
        std::cout << "Could not create GPU context, trying CPU..." << std::endl;
        context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_CPU,
                                          NULL, NULL, &errNum);
        if (errNum != CL_SUCCESS)
        {
            std::cerr << "Failed to create an OpenCL GPU or CPU context." << std::endl;
            return NULL;
        }
    }
   #else
	context = poclu_create_any_context();
   #endif
    
    return context;

}
Beispiel #3
0
int main(int argc, char **argv)
{
  /* test name */
  char name[] = "test_image_query_funcs";
  size_t global_work_size[1] = { 1 }, local_work_size[1]= { 1 };
  size_t srcdir_length, name_length, filename_size;
  char *filename = NULL;
  char *source = NULL;
  cl_device_id devices[1];
  cl_context context = NULL;
  cl_command_queue queue = NULL;
  cl_program program = NULL;
  cl_kernel kernel = NULL;
  cl_int err;

  /* image parameters */
  cl_uchar4 *imageData;
  cl_image_format image_format;
  cl_image_desc image2_desc, image3_desc;

  printf("Running test %s...\n", name);

  memset(&image2_desc, 0, sizeof(cl_image_desc));
  image2_desc.image_type = CL_MEM_OBJECT_IMAGE2D;
  image2_desc.image_width = 2;
  image2_desc.image_height = 4;

  memset(&image3_desc, 0, sizeof(cl_image_desc));
  image3_desc.image_type = CL_MEM_OBJECT_IMAGE3D;
  image3_desc.image_width = 2;
  image3_desc.image_height = 4;
  image3_desc.image_depth = 8;

  image_format.image_channel_order = CL_RGBA;
  image_format.image_channel_data_type = CL_UNSIGNED_INT8;
  imageData = (cl_uchar4*)malloc (4 * 4 * sizeof(cl_uchar4));

  TEST_ASSERT (imageData != NULL && "out of host memory\n");
  memset (imageData, 1, 4*4*sizeof(cl_uchar4));

  /* determine file name of kernel source to load */
  srcdir_length = strlen(SRCDIR);
  name_length = strlen(name);
  filename_size = srcdir_length + name_length + 16;
  filename = (char *)malloc(filename_size + 1);
  TEST_ASSERT (filename != NULL && "out of host memory\n");

  snprintf(filename, filename_size, "%s/%s.cl", SRCDIR, name);

  /* read source code */
  source = poclu_read_file (filename);
  TEST_ASSERT (source != NULL && "Kernel .cl not found.");

  /* setup an OpenCL context and command queue using default device */
  context = poclu_create_any_context();
  TEST_ASSERT (context != NULL && "clCreateContextFromType call failed\n");

  cl_sampler external_sampler = clCreateSampler (
      context, CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST, &err);
  CHECK_OPENCL_ERROR_IN ("clCreateSampler");

  CHECK_CL_ERROR (clGetContextInfo (context, CL_CONTEXT_DEVICES,
                                    sizeof (cl_device_id), devices, NULL));

  queue = clCreateCommandQueue (context, devices[0], 0, &err);
  CHECK_OPENCL_ERROR_IN ("clCreateCommandQueue");

  /* Create image */
  cl_mem image2
      = clCreateImage (context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
                       &image_format, &image2_desc, imageData, &err);
  CHECK_OPENCL_ERROR_IN ("clCreateImage image2");

  cl_mem image3
      = clCreateImage (context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
                       &image_format, &image3_desc, imageData, &err);
  CHECK_OPENCL_ERROR_IN ("clCreateImage image3");

  unsigned color[4] = { 2, 9, 11, 7 };
  size_t orig[3] = { 0, 0, 0 };
  size_t reg[3] = { 2, 4, 1 };
  err = clEnqueueFillImage (queue, image2, color, orig, reg, 0, NULL, NULL);
  CHECK_OPENCL_ERROR_IN ("clCreateImage image3");

  /* create and build program */
  program = clCreateProgramWithSource (context, 1, (const char **)&source,
                                       NULL, &err);
  CHECK_OPENCL_ERROR_IN ("clCreateProgramWithSource");

  err = clBuildProgram (program, 0, NULL, NULL, NULL, NULL);
  CHECK_OPENCL_ERROR_IN ("clBuildProgram");

  /* execute the kernel with give name */
  kernel = clCreateKernel (program, name, NULL);
  CHECK_OPENCL_ERROR_IN ("clCreateKernel");

  err = clSetKernelArg (kernel, 0, sizeof (cl_mem), &image2);
  CHECK_OPENCL_ERROR_IN ("clSetKernelArg 0");

  err = clSetKernelArg (kernel, 1, sizeof (cl_mem), &image3);
  CHECK_OPENCL_ERROR_IN ("clSetKernelArg 1");

  err = clSetKernelArg (kernel, 2, sizeof (cl_sampler), &external_sampler);
  CHECK_OPENCL_ERROR_IN ("clSetKernelArg 2");

  err = clEnqueueNDRangeKernel (queue, kernel, 1, NULL, global_work_size,
                                local_work_size, 0, NULL, NULL);
  CHECK_OPENCL_ERROR_IN ("clEnqueueNDRangeKernel");

  err = clFinish (queue);
  CHECK_OPENCL_ERROR_IN ("clFinish");

  clReleaseMemObject (image2);
  clReleaseMemObject (image3);
  clReleaseKernel (kernel);
  clReleaseProgram (program);
  clReleaseCommandQueue (queue);
  clReleaseSampler (external_sampler);
  clUnloadCompiler ();
  clReleaseContext (context);
  free (source);
  free (filename);
  free (imageData);

  printf("OK\n");
  return 0;
}
Beispiel #4
0
int main(int argc, char **argv)
{
  /* test name */
  char name[] = "test_image_query_funcs";
  size_t global_work_size[1] = { 1 }, local_work_size[1]= { 1 };
  size_t srcdir_length, name_length, filename_size;
  size_t source_size, source_read;
  char const *sources[1];
  char *filename = NULL;
  char *source = NULL;
  FILE *source_file = NULL;
  cl_device_id devices[1];
  cl_context context = NULL;
  cl_command_queue queue = NULL;
  cl_program program = NULL;
  cl_kernel kernel = NULL;
  cl_int result;
  int retval = -1;

  /* image parameters */
  cl_uchar4 *imageData;
  cl_image_format image_format;
  cl_image_desc image2_desc, image3_desc;

  

  printf("Running test %s...\n", name);

  memset(&image2_desc, 0, sizeof(cl_image_desc));
  image2_desc.image_type = CL_MEM_OBJECT_IMAGE2D;
  image2_desc.image_width = 2;
  image2_desc.image_height = 4;

  memset(&image3_desc, 0, sizeof(cl_image_desc));
  image3_desc.image_type = CL_MEM_OBJECT_IMAGE3D;
  image3_desc.image_width = 2;
  image3_desc.image_height = 4;
  image3_desc.image_depth = 8;

  image_format.image_channel_order = CL_RGBA;
  image_format.image_channel_data_type = CL_UNSIGNED_INT8;
  imageData = (cl_uchar4*)malloc (4 * 4 * sizeof(cl_uchar4));
  
  if (imageData == NULL)
    {
      puts("out of host memory\n");
      goto error;
    }
  memset (imageData, 1, 4*4*sizeof(cl_uchar4));

  /* determine file name of kernel source to load */
  srcdir_length = strlen(SRCDIR);
  name_length = strlen(name);
  filename_size = srcdir_length + name_length + 16;
  filename = (char *)malloc(filename_size + 1);
  if (!filename) 
    {
      puts("out of memory");
      goto error;
    }
  
  snprintf(filename, filename_size, "%s/%s.cl", SRCDIR, name);
  
  /* read source code */
  source_file = fopen(filename, "r");
  if (!source_file) 
    {
      puts("source file not found\n");
      goto error;
    }
  
  fseek(source_file, 0, SEEK_END);
  source_size = ftell(source_file);
  fseek(source_file, 0, SEEK_SET);
  
  source = (char *)malloc(source_size + 1);
  if (!source) 
    {
      puts("out of memory\n");
      goto error;
    }
  
  source_read = fread(source, 1, source_size, source_file);
  if (source_read != source_size) 
    {
      puts("error reading from file\n");
      goto error;
    }
  
  source[source_size] = '\0';
  fclose(source_file);
  source_file = NULL;
  
  /* setup an OpenCL context and command queue using default device */
  context = poclu_create_any_context();
  if (!context) 
    {
      puts("clCreateContextFromType call failed\n");
      goto error;
    }

  result = clGetContextInfo(context, CL_CONTEXT_DEVICES,
                            sizeof(cl_device_id), devices, NULL);
  if (result != CL_SUCCESS) 
    {
      puts("clGetContextInfo call failed\n");
      goto error;
    }

  queue = clCreateCommandQueue(context, devices[0], 0, NULL); 
  if (!queue) 
    {
      puts("clCreateCommandQueue call failed\n");
      goto error;
    }

  /* Create image */

  cl_mem image2 = clCreateImage (context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
                                &image_format, &image2_desc, imageData, &result);
  if (result != CL_SUCCESS)
    {
      puts("image2 creation failed\n");
      goto error;
    }

  cl_mem image3 = clCreateImage (context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
                                &image_format, &image3_desc, imageData, &result);
  if (result != CL_SUCCESS)
    {
      puts("image3 creation failed\n");
      goto error;
    }


  /* create and build program */
  sources[0] = source;
  program = clCreateProgramWithSource(context, 1, sources, NULL, NULL); 
  if (!program) 
    {
      puts("clCreateProgramWithSource call failed\n");
      goto error;
    }

  result = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); 
  if (result != CL_SUCCESS) 
    {
      puts("clBuildProgram call failed\n");
      goto error;
    }

  /* execute the kernel with give name */
  kernel = clCreateKernel(program, name, NULL); 
  if (!kernel) 
    {
      puts("clCreateKernel call failed\n");
      goto error;
    }

   result = clSetKernelArg( kernel, 0, sizeof(cl_mem), &image2);
   if (result)
     {
       puts("clSetKernelArg 0 failed\n");
       goto error;
     }

   result = clSetKernelArg( kernel, 1, sizeof(cl_mem), &image3);
   if (result)
     {
       puts("clSetKernelArg 1 failed\n");
       goto error;
     }

  result = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size, 
                                  local_work_size, 0, NULL, NULL); 
  if (result != CL_SUCCESS) 
    {
      puts("clEnqueueNDRangeKernel call failed\n");
      goto error;
    }

  result = clFinish(queue);
  if (result == CL_SUCCESS)
    retval = 0;

error:

  if (kernel) 
    {
      clReleaseKernel(kernel);
    }
  if (program) 
    {
      clReleaseProgram(program);
    }
  if (queue) 
    {
      clReleaseCommandQueue(queue);
    }
  if (context) 
    {
      clReleaseContext(context);
    }
  if (source_file) 
    {
      fclose(source_file);
    }
  if (source) 
    {
      free(source);
    }
  if (filename)
    {
      free(filename);
    }
  if (imageData)
    {
      free(imageData);
    }

  if (retval) 
    {
      printf("FAIL\n");
      return 1;
    }
 
  printf("OK\n");
  return 0;
}
Beispiel #5
0
int 
exec_scalarwave_kernel(char      const *const program_source, 
                       cl_double       *const phi,
                       cl_double const *const phi_p,
                       cl_double const *const phi_p_p,
                       grid_t    const *const grid)
{ 
  static int initialised = 0;
  static cl_context context;
  static cl_command_queue cmd_queue;
  static cl_program program;
  static cl_kernel kernel;
  
  if (!initialised) {
    initialised = 1;
    
    context = poclu_create_any_context();
    if (!context) return -1;
    
    size_t ndevices;
    clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &ndevices);
    ndevices /= sizeof(cl_device_id);
    cl_device_id *devices = (cl_device_id*)malloc(ndevices * sizeof(cl_device_id));
    clGetContextInfo(context, CL_CONTEXT_DEVICES,
                     ndevices*sizeof(cl_device_id), devices, NULL);
    
    cmd_queue =
      clCreateCommandQueue(context, devices[0], 0, NULL);
    if (!cmd_queue) return -1;
    
    program =
      clCreateProgramWithSource(context, 1, (const char**)&program_source,
                                NULL, NULL);
    if (!program) return -1;
    
    int ierr;
    ierr = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
    if (ierr) return -1;
    
    kernel = clCreateKernel(program, "scalarwave", NULL);
    if (!kernel) return -1;

    free (devices);
  }
  
  size_t const npoints = grid->ai * grid->aj * grid->ak;
  cl_mem const mem_phi =
    clCreateBuffer(context, 0,
                    npoints*sizeof(*phi), NULL, NULL);
  if (!mem_phi) return -1;
  cl_mem const mem_phi_p =
    clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
                   npoints*sizeof(*phi_p), (cl_double*)phi_p, NULL);
  if (!mem_phi_p) return -1;
  cl_mem const mem_phi_p_p =
    clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
                   npoints*sizeof(*phi_p_p), (cl_double*)phi_p_p, NULL);
  if (!mem_phi_p_p) return -1;
  cl_mem const mem_grid =
    clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
                   sizeof(*grid), (grid_t*)grid, NULL);
  if (!mem_grid) return -1;
  
  int ierr;
  ierr = clSetKernelArg(kernel, 0, sizeof(cl_mem), &mem_phi);
  if (ierr) return -1;
  ierr = clSetKernelArg(kernel, 1, sizeof(cl_mem), &mem_phi_p);
  if (ierr) return -1;
  ierr = clSetKernelArg(kernel, 2, sizeof(cl_mem), &mem_phi_p_p);
  if (ierr) return -1;
  ierr = clSetKernelArg(kernel, 3, sizeof(cl_mem), &mem_grid);
  if (ierr) return -1;
  
  size_t const global_work_size[3] =
    {grid->ai, grid->aj, grid->ak};
  size_t const local_work_size[3] =
    {GRID_GRANULARITY, GRID_GRANULARITY, GRID_GRANULARITY};
  
  ierr = clEnqueueNDRangeKernel(cmd_queue, kernel, 3, NULL, 
                                global_work_size, local_work_size,  
                                0, NULL, NULL);
  if (ierr) return -1;
  
  ierr = clFinish(cmd_queue);
  if (ierr) return -1;
  
  ierr = clEnqueueReadBuffer(cmd_queue, mem_phi, CL_TRUE, 
                             0, npoints*sizeof(*phi), phi,
                             0, NULL, NULL);
  if (ierr) return -1;
  
  clReleaseMemObject(mem_phi);
  clReleaseMemObject(mem_phi_p);
  clReleaseMemObject(mem_phi_p_p);
  clReleaseMemObject(mem_grid);
  /* clReleaseKernel(kernel); */
  /* clReleaseProgram(program); */
  /* clReleaseCommandQueue(cmd_queue); */
  /* clReleaseContext(context); */
 
  return 0;
}
Beispiel #6
0
int call_test(const char *name)
{
  size_t global_work_size[1] = { 1 }, local_work_size[1]= { 1 };
  size_t srcdir_length, name_length, filename_size;
  size_t source_size, source_read;
  char const *sources[1];
  char *filename = NULL;
  char *source = NULL;
  FILE *source_file = NULL;
  cl_device_id devices[1];
  cl_context context = NULL;
  cl_command_queue queue = NULL;
  cl_program program = NULL;
  cl_kernel kernel = NULL;
  cl_int result;
  int retval = -1;
  
  assert(name);

  /* determine file name of kernel source to load */
  srcdir_length = strlen(SRCDIR);
  name_length = strlen(name);
  filename_size = srcdir_length + name_length + 16;
  filename = (char *)malloc(filename_size + 1);
  if (!filename) {
    puts("out of memory");
    goto error;
  }

  snprintf(filename, filename_size, "%s/%s.cl", SRCDIR, name);

  /* read source code */
  source_file = fopen(filename, "r");
  if (!source_file) {
     puts("source file not found\n");
     goto error;
  }

  fseek(source_file, 0, SEEK_END);
  source_size = ftell(source_file);
  fseek(source_file, 0, SEEK_SET);
  
  source = (char *)malloc(source_size + 1);
  if (!source) {
    puts("out of memory\n");
    goto error;
  }

  source_read = fread(source, 1, source_size, source_file);
  if (source_read != source_size) {
    puts("error reading from file\n");
    goto error;
  }

  source[source_size] = '\0';
  fclose(source_file);
  source_file = NULL;

  /* setup an OpenCL context and command queue using default device */
  context = poclu_create_any_context();
  if (!context) {
    puts("clCreateContextFromType call failed\n");
    goto error;
  }

  result = clGetContextInfo(context, CL_CONTEXT_DEVICES,
      sizeof(cl_device_id), devices, NULL);
  if (result != CL_SUCCESS) {
    puts("clGetContextInfo call failed\n");
    goto error;
  }

  queue = clCreateCommandQueue(context, devices[0], 0, NULL); 
  if (!queue) {
    puts("clCreateCommandQueue call failed\n");
    goto error;
  }

  /* create and build program */
  sources[0] = source;
  program = clCreateProgramWithSource(context, 1, sources, NULL, NULL); 
  if (!program) {
    puts("clCreateProgramWithSource call failed\n");
    goto error;
  }

  result = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); 
  if (result != CL_SUCCESS) {
    puts("clBuildProgram call failed\n");
    goto error;
  }

  /* execute the kernel with give name */
  kernel = clCreateKernel(program, name, NULL); 
  if (!kernel) {
    puts("clCreateKernel call failed\n");
    goto error;
  }

  result = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, 
      global_work_size, local_work_size, 0, NULL, NULL); 
  if (result != CL_SUCCESS) {
    puts("clEnqueueNDRangeKernel call failed\n");
    goto error;
  }

  result = clFinish(queue);
  if (result == CL_SUCCESS)
    retval = 0;

error:

  if (kernel) {
    clReleaseKernel(kernel);
  }
  if (program) {
    clReleaseProgram(program);
  }
  if (queue) {
    clReleaseCommandQueue(queue);
  }
  if (context) {
    clReleaseContext(context);
  }
  if (source_file) {
    fclose(source_file);
  }
  if (source) {
    free(source);
  }
  if (filename) {
    free(filename);
  }

  return retval;
}
Beispiel #7
0
int call_test(const char *name)
{
  size_t global_work_size[1] = { 1 }, local_work_size[1]= { 1 };
  size_t srcdir_length, name_length, filename_size;
  char *filename = NULL;
  char *source = NULL;
  cl_device_id devices[1];
  cl_context context = NULL;
  cl_command_queue queue = NULL;
  cl_program program = NULL;
  cl_kernel kernel = NULL;
  cl_int result;
  int retval = -1;

  TEST_ASSERT (name != NULL);

  /* determine file name of kernel source to load */
  srcdir_length = strlen(SRCDIR);
  name_length = strlen(name);
  filename_size = srcdir_length + name_length + 16;
  filename = (char *)malloc(filename_size + 1);
  if (!filename) {
    puts("out of memory");
    goto error;
  }

  snprintf(filename, filename_size, "%s/%s.cl", SRCDIR, name);

  /* read source code */
  source = poclu_read_file (filename);
  TEST_ASSERT (source != NULL && "Kernel .cl not found.");

  /* setup an OpenCL context and command queue using default device */
  context = poclu_create_any_context();
  if (!context) {
    puts("clCreateContextFromType call failed\n");
    goto error;
  }

  result = clGetContextInfo(context, CL_CONTEXT_DEVICES,
      sizeof(cl_device_id), devices, NULL);
  if (result != CL_SUCCESS) {
    puts("clGetContextInfo call failed\n");
    goto error;
  }

  queue = clCreateCommandQueue(context, devices[0], 0, NULL); 
  if (!queue) {
    puts("clCreateCommandQueue call failed\n");
    goto error;
  }

  /* create and build program */
  program = clCreateProgramWithSource (context, 1, (const char **)&source,
                                       NULL, NULL);
  if (!program) {
    puts("clCreateProgramWithSource call failed\n");
    goto error;
  }

  result = clBuildProgram(program, 0, NULL, "-I" SRCDIR, NULL, NULL);
  if (result != CL_SUCCESS) {
    puts("clBuildProgram call failed\n");
    goto error;
  }

  /* execute the kernel with give name */
  kernel = clCreateKernel(program, name, NULL); 
  if (!kernel) {
    puts("clCreateKernel call failed\n");
    goto error;
  }

  result = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, 
      global_work_size, local_work_size, 0, NULL, NULL); 
  if (result != CL_SUCCESS) {
    puts("clEnqueueNDRangeKernel call failed\n");
    goto error;
  }

  result = clFinish(queue);
  if (result == CL_SUCCESS)
    retval = 0;

error:

  if (kernel) {
    clReleaseKernel(kernel);
  }
  if (program) {
    clReleaseProgram(program);
  }
  if (queue) {
    clReleaseCommandQueue(queue);
  }
  if (context) {
    clUnloadCompiler ();
    clReleaseContext (context);
  }
  if (source) {
    free(source);
  }
  if (filename) {
    free(filename);
  }

  return retval;
}
Beispiel #8
0
int 
exec_dot_product_kernel(const char *program_source, size_t source_size,
                        int n, cl_float4 *srcA, cl_float4 *srcB, cl_float *dst) 
{ 
  cl_context  context; 
  cl_command_queue cmd_queue; 
  cl_device_id  *devices; 
  cl_program  program; 
  cl_kernel  kernel; 
  cl_mem       memobjs[3]; 
  size_t       global_work_size[1]; 
  size_t       local_work_size[1]; 
  size_t       cb; 
  cl_int       err; 
  int          i;
  context = poclu_create_any_context();
  if (context == (cl_context)0) 
    return -1; 
 
  // get the list of GPU devices associated with context 
  clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &cb); 
  devices = (cl_device_id *) malloc(cb); 
  clGetContextInfo(context, CL_CONTEXT_DEVICES, cb, devices, NULL); 
 
  // create a command-queue 
  cmd_queue = clCreateCommandQueue(context, devices[0], 0, NULL); 
  if (cmd_queue == (cl_command_queue)0) 
    { 
      clReleaseContext(context); 
      free(devices); 
      return -1; 
    } 

  for (i = 0; i < n; ++i)
    {
       poclu_bswap_cl_float_array(devices[0], (cl_float*)&srcA[i], 4);
       poclu_bswap_cl_float_array(devices[0], (cl_float*)&srcB[i], 4);
    }

 
  // allocate the buffer memory objects 
  memobjs[0] = clCreateBuffer(context, 
                              CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, 
                              sizeof(cl_float4) * n, srcA, NULL); 
  if (memobjs[0] == (cl_mem)0) 
    { 
      clReleaseCommandQueue(cmd_queue); 
      clReleaseContext(context); 
      return -1; 
    } 
 
  memobjs[1] = clCreateBuffer(context, 
                              CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, 
                              sizeof(cl_float4) * n, srcB, NULL); 
  if (memobjs[1] == (cl_mem)0) 
    { 
      delete_memobjs(memobjs, 1); 
      clReleaseCommandQueue(cmd_queue); 
      clReleaseContext(context); 
      return -1;
    } 
 
  memobjs[2] = clCreateBuffer(context, 
			      CL_MEM_READ_WRITE, 
			      sizeof(cl_float) * n, NULL, NULL); 
  if (memobjs[2] == (cl_mem)0) 
    { 
      delete_memobjs(memobjs, 2); 
      clReleaseCommandQueue(cmd_queue); 
      clReleaseContext(context); 
      return -1; 
    } 
 
  // create the program 
  program = 
    clCreateProgramWithBinary
    (context, 1, devices, &source_size, 
     (const unsigned char**)&program_source, NULL, NULL); 
  if (program == (cl_program)0) 
    { 
      delete_memobjs(memobjs, 3); 
      clReleaseCommandQueue(cmd_queue); 
      clReleaseContext(context); 
      return -1; 
    } 
 
  // build the program 
  err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); 
  if (err != CL_SUCCESS) 
    { 
      delete_memobjs(memobjs, 3); 
      clReleaseProgram(program); 
      clReleaseCommandQueue(cmd_queue); 
      clReleaseContext(context); 
      return -1; 
    } 
 
  // create the kernel 
  kernel = clCreateKernel(program, "dot_product", NULL); 
  if (kernel == (cl_kernel)0) 
    { 
      delete_memobjs(memobjs, 3); 
      clReleaseProgram(program); 
      clReleaseCommandQueue(cmd_queue); 
      clReleaseContext(context); 
      return -1; 
    } 
 
  // set the args values 
  err = clSetKernelArg(kernel,  0,  
		       sizeof(cl_mem), (void *) &memobjs[0]); 
  err |= clSetKernelArg(kernel, 1,  
			sizeof(cl_mem), (void *) &memobjs[1]); 
  err |= clSetKernelArg(kernel, 2,
			sizeof(cl_mem), (void *) &memobjs[2]); 
 
  if (err != CL_SUCCESS) 
    { 
      delete_memobjs(memobjs, 3); 
      clReleaseKernel(kernel); 
      clReleaseProgram(program); 
      clReleaseCommandQueue(cmd_queue); 
      clReleaseContext(context); 
      return -1; 
    } 
 
  // set work-item dimensions 
  global_work_size[0] = n; 
  local_work_size[0]= 128; 
 
  // execute kernel 
  err = clEnqueueNDRangeKernel(cmd_queue, kernel, 1, NULL, 
			       global_work_size, local_work_size,  
			       0, NULL, NULL); 
  if (err != CL_SUCCESS) 
    { 
      delete_memobjs(memobjs, 3); 
      clReleaseKernel(kernel); 
      clReleaseProgram(program); 
      clReleaseCommandQueue(cmd_queue); 
      clReleaseContext(context); 
      return -1; 
    } 
 
  // read output image 
  err = clEnqueueReadBuffer(cmd_queue, memobjs[2], CL_TRUE, 
			    0, n * sizeof(cl_float), dst, 
			    0, NULL, NULL); 
  if (err != CL_SUCCESS) 
    { 
      delete_memobjs(memobjs, 3); 
      clReleaseKernel(kernel); 
      clReleaseProgram(program); 
      clReleaseCommandQueue(cmd_queue); 
      clReleaseContext(context); 
      return -1; 
    } 
  for (i = 0; i < n; ++i)
    {
      poclu_bswap_cl_float_array(devices[0], (cl_float*)&dst[i], 1);
      poclu_bswap_cl_float_array(devices[0], (cl_float*)&srcA[i], 4);
      poclu_bswap_cl_float_array(devices[0], (cl_float*)&srcB[i], 4);
    }
  free(devices); 


  // release kernel, program, and memory objects 
  delete_memobjs(memobjs, 3); 
  clReleaseKernel(kernel); 
  clReleaseProgram(program); 
  clReleaseCommandQueue(cmd_queue); 
  clReleaseContext(context); 
  return 0; // success... 
}
Beispiel #9
0
int 
exec_trig_kernel(const char *program_source, 
                 int n, void *srcA, void *dst) 
{ 
  cl_context  context; 
  cl_command_queue cmd_queue; 
  cl_device_id  *devices; 
  cl_program  program; 
  cl_kernel  kernel; 
  cl_mem       memobjs[2]; 
  size_t       global_work_size[1]; 
  size_t       local_work_size[1]; 
  size_t       cb; 
  cl_int       err; 

  float c = 7.3f; // a scalar number to test non-pointer args
 
  // create the OpenCL context on a GPU device 
  context = poclu_create_any_context();
  if (context == (cl_context)0) 
    return -1; 
 
  // get the list of GPU devices associated with context 
  clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &cb); 
  devices = malloc(cb);
  clGetContextInfo(context, CL_CONTEXT_DEVICES, cb, devices, NULL); 
 
  // create a command-queue 
  cmd_queue = clCreateCommandQueue(context, devices[0], 0, NULL); 
  if (cmd_queue == (cl_command_queue)0) 
    { 
      clReleaseContext(context); 
      free(devices); 
      return -1; 
    } 
  free(devices); 
 
  // allocate the buffer memory objects 
  memobjs[0] = clCreateBuffer(context, 
                              CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, 
                              sizeof(cl_float4) * n, srcA, NULL); 
  if (memobjs[0] == (cl_mem)0) 
    { 
      clReleaseCommandQueue(cmd_queue); 
      clReleaseContext(context); 
      return -1; 
    } 
 
  memobjs[1] = clCreateBuffer(context, 
			      CL_MEM_READ_WRITE, 
			      sizeof(cl_float4) * n, NULL, NULL); 
  if (memobjs[1] == (cl_mem)0) 
    { 
      delete_memobjs(memobjs, 1); 
      clReleaseCommandQueue(cmd_queue); 
      clReleaseContext(context); 
      return -1; 
    } 
 
  // create the program 
  program = clCreateProgramWithSource(context, 
				      1, (const char**)&program_source, NULL, NULL); 
  if (program == (cl_program)0) 
    { 
      delete_memobjs(memobjs, 2); 
      clReleaseCommandQueue(cmd_queue); 
      clReleaseContext(context); 
      return -1; 
    } 
 
  // build the program 
  err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); 
  if (err != CL_SUCCESS) 
    { 
      delete_memobjs(memobjs, 2); 
      clReleaseProgram(program); 
      clReleaseCommandQueue(cmd_queue); 
      clReleaseContext(context); 
      return -1; 
    } 
 
  // create the kernel 
  kernel = clCreateKernel(program, "trig", NULL); 
  if (kernel == (cl_kernel)0) 
    { 
      delete_memobjs(memobjs, 2); 
      clReleaseProgram(program); 
      clReleaseCommandQueue(cmd_queue); 
      clReleaseContext(context); 
      return -1; 
    } 
 
  // set the args values 
  err = clSetKernelArg(kernel,  0,  
		       sizeof(cl_mem), (void *) &memobjs[0]); 
  err |= clSetKernelArg(kernel, 1,
			sizeof(cl_mem), (void *) &memobjs[1]); 
  err |= clSetKernelArg(kernel, 2,
			sizeof(float), (void *) &c); 
 
  if (err != CL_SUCCESS) 
    { 
      delete_memobjs(memobjs, 2); 
      clReleaseKernel(kernel); 
      clReleaseProgram(program); 
      clReleaseCommandQueue(cmd_queue); 
      clReleaseContext(context); 
      return -1; 
    } 
 
  // set work-item dimensions 
  global_work_size[0] = n; 
  local_work_size[0]= 2; 
 
  // execute kernel 
  err = clEnqueueNDRangeKernel(cmd_queue, kernel, 1, NULL, 
			       global_work_size, local_work_size,  
			       0, NULL, NULL); 
  if (err != CL_SUCCESS) 
    { 
      delete_memobjs(memobjs, 2); 
      clReleaseKernel(kernel); 
      clReleaseProgram(program); 
      clReleaseCommandQueue(cmd_queue); 
      clReleaseContext(context); 
      return -1; 
    } 
 
  // read output image 
  err = clEnqueueReadBuffer(cmd_queue, memobjs[1], CL_TRUE, 
			    0, n * sizeof(cl_float4), dst, 
			    0, NULL, NULL); 
  if (err != CL_SUCCESS) 
    { 
      delete_memobjs(memobjs, 2); 
      clReleaseKernel(kernel); 
      clReleaseProgram(program); 
      clReleaseCommandQueue(cmd_queue); 
      clReleaseContext(context); 
      return -1; 
    } 
 
  // release kernel, program, and memory objects 
  delete_memobjs(memobjs, 2); 
  clReleaseKernel(kernel); 
  clReleaseProgram(program); 
  clReleaseCommandQueue(cmd_queue); 
  clReleaseContext(context); 
  return 0; // success... 
}
Beispiel #10
0
int main(int argc, char** argv) {

	printf("WG size of kernel = %d X %d\n", BLOCK_SIZE, BLOCK_SIZE);

	cl_int error;
	cl_uint num_platforms;

	// Get the number of platforms
	error = clGetPlatformIDs(0, NULL, &num_platforms);
	if (error != CL_SUCCESS) fatal_CL(error, __LINE__);

	// Get the list of platforms
	cl_platform_id* platforms = (cl_platform_id *) malloc(sizeof(cl_platform_id) * num_platforms);
	error = clGetPlatformIDs(num_platforms, platforms, NULL);
	if (error != CL_SUCCESS) fatal_CL(error, __LINE__);

	char pbuf[100];
#ifndef POCL_HSA    
	// Print the chosen platform (if there are multiple platforms, choose the first one)
	cl_platform_id platform = platforms[0];
	error = clGetPlatformInfo(platform, CL_PLATFORM_VENDOR, sizeof(pbuf), pbuf, NULL);
	if (error != CL_SUCCESS) fatal_CL(error, __LINE__);
	printf("Platform: %s\n", pbuf);

	// Create a GPU context
	cl_context_properties context_properties[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties) platform, 0};
	context = clCreateContextFromType(context_properties, CL_DEVICE_TYPE_GPU, NULL, NULL, &error);
	if (error != CL_SUCCESS) fatal_CL(error, __LINE__);
#else

	context = poclu_create_any_context();

#endif

	// Get and print the chosen device (if there are multiple devices, choose the first one)
	size_t devices_size;
	error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &devices_size);
	if (error != CL_SUCCESS) fatal_CL(error, __LINE__);
	cl_device_id *devices = (cl_device_id *) malloc(devices_size);
	error = clGetContextInfo(context, CL_CONTEXT_DEVICES, devices_size, devices, NULL);
	if (error != CL_SUCCESS) fatal_CL(error, __LINE__);
	device = devices[0];
	error = clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(pbuf), pbuf, NULL);
	if (error != CL_SUCCESS) fatal_CL(error, __LINE__);
	printf("Device: %s\n", pbuf);

	size_t wgs;
	error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(wgs), &wgs, NULL);
	if (error != CL_SUCCESS) fatal_CL(error, __LINE__);
	printf("CL_DEVICE_MAX_WORK_GROUP_SIZE: %lu\n", wgs);

	// Create a command queue
	command_queue = DIVIDEND_CL_WRAP(clCreateCommandQueue)(context, device, 0, &error);
	if (error != CL_SUCCESS) fatal_CL(error, __LINE__);



	int size;
	int grid_rows,grid_cols = 0;
	float *FilesavingTemp,*FilesavingPower; //,*MatrixOut; 
	char *tfile, *pfile, *ofile;

	int total_iterations = 60;
	int pyramid_height = 1; // number of iterations

	if (argc < 7)
		usage(argc, argv);
	if((grid_rows = atoi(argv[1]))<=0||
			(grid_cols = atoi(argv[1]))<=0||
			(pyramid_height = atoi(argv[2]))<=0||
			(total_iterations = atoi(argv[3]))<=0)
		usage(argc, argv);

	tfile=argv[4];
	pfile=argv[5];
	ofile=argv[6];

	size=grid_rows*grid_cols;

	// --------------- pyramid parameters --------------- 
	int borderCols = (pyramid_height)*EXPAND_RATE/2;
	int borderRows = (pyramid_height)*EXPAND_RATE/2;
	int smallBlockCol = BLOCK_SIZE-(pyramid_height)*EXPAND_RATE;
	int smallBlockRow = BLOCK_SIZE-(pyramid_height)*EXPAND_RATE;
	int blockCols = grid_cols/smallBlockCol+((grid_cols%smallBlockCol==0)?0:1);
	int blockRows = grid_rows/smallBlockRow+((grid_rows%smallBlockRow==0)?0:1);

	FilesavingTemp = (float *) malloc(size*sizeof(float));
	FilesavingPower = (float *) malloc(size*sizeof(float));
	// MatrixOut = (float *) calloc (size, sizeof(float));

	if( !FilesavingPower || !FilesavingTemp) // || !MatrixOut)
		fatal("unable to allocate memory");

	// Read input data from disk
	readinput(FilesavingTemp, grid_rows, grid_cols, tfile);
	readinput(FilesavingPower, grid_rows, grid_cols, pfile);

	// Load kernel source from file
	const char *source = load_kernel_source("hotspot_kernel.cl");
	size_t sourceSize = strlen(source);

	// Compile the kernel
	cl_program program = clCreateProgramWithSource(context, 1, &source, &sourceSize, &error);
	if (error != CL_SUCCESS) fatal_CL(error, __LINE__);

	char clOptions[110];
	//  sprintf(clOptions,"-I../../src"); 
	sprintf(clOptions," ");
#ifdef BLOCK_SIZE
	sprintf(clOptions + strlen(clOptions), " -DBLOCK_SIZE=%d", BLOCK_SIZE);
#endif

	// Create an executable from the kernel
	error = DIVIDEND_CL_WRAP(clBuildProgram)(program, 1, &device, clOptions, NULL, NULL);
	// Show compiler warnings/errors
	static char log[65536]; memset(log, 0, sizeof(log));
	clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, sizeof(log)-1, log, NULL);
	if (strstr(log,"warning:") || strstr(log, "error:")) printf("<<<<\n%s\n>>>>\n", log);
	if (error != CL_SUCCESS) fatal_CL(error, __LINE__);
	kernel = clCreateKernel(program, "hotspot", &error);
	if (error != CL_SUCCESS) fatal_CL(error, __LINE__);


	long long start_time = get_time();

	// Create two temperature matrices and copy the temperature input data
	cl_mem MatrixTemp[2];
	// Create input memory buffers on device
	MatrixTemp[0] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, sizeof(float) * size, FilesavingTemp, &error);
	if (error != CL_SUCCESS) fatal_CL(error, __LINE__);

	// Lingjie Zhang modifited at Nov 1, 2015
	//MatrixTemp[1] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, sizeof(float) * size, NULL, &error);
	MatrixTemp[1] = clCreateBuffer(context, CL_MEM_READ_WRITE , sizeof(float) * size, NULL, &error);
	// end Lingjie Zhang modification

	if (error != CL_SUCCESS) fatal_CL(error, __LINE__);

	// Copy the power input data
	cl_mem MatrixPower = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(float) * size, FilesavingPower, &error);
	if (error != CL_SUCCESS) fatal_CL(error, __LINE__);

	// Perform the computation
	int ret = compute_tran_temp(MatrixPower, MatrixTemp, grid_cols, grid_rows, total_iterations, pyramid_height,
			blockCols, blockRows, borderCols, borderRows, FilesavingTemp, FilesavingPower);

	// Copy final temperature data back
	cl_float *MatrixOut = (cl_float *) clEnqueueMapBuffer(command_queue, MatrixTemp[ret], CL_TRUE, CL_MAP_READ, 0, sizeof(float) * size, 0, NULL, NULL, &error);
	if (error != CL_SUCCESS) fatal_CL(error, __LINE__);

	long long end_time = get_time();	
	printf("Total time: %.3f seconds\n", ((float) (end_time - start_time)) / (1000*1000));

	// Write final output to output file
	writeoutput(MatrixOut, grid_rows, grid_cols, ofile);

	error = clEnqueueUnmapMemObject(command_queue, MatrixTemp[ret], (void *) MatrixOut, 0, NULL, NULL);
	if (error != CL_SUCCESS) fatal_CL(error, __LINE__);

	clReleaseMemObject(MatrixTemp[0]);
	clReleaseMemObject(MatrixTemp[1]);
	clReleaseMemObject(MatrixPower);

	clReleaseContext(context);

	return 0;
}
Beispiel #11
0
/**
 * The test kernels are assumed to:
 *
 * 1) called 'test_kernel'
 * 2) no inputs or outputs, only work item id printfs to verify the correct 
 *    workgroup transformations
 * 3) executable with any local and global dimensions and sizes
 *
 * Usage:
 *
 * ./run_kernel somekernel.cl 2 2 3 4
 *
 * Where the first integer is the number of work groups to execute and the
 * rest are the local dimensions.
 */
int
main (int argc, char **argv)
{
  FILE *source_file;
  char *source;
  int source_size;
  cl_context context;
  size_t cb;
  cl_device_id *devices;
  cl_command_queue cmd_queue;
  cl_program program;
  cl_int err;
  cl_kernel kernel;
  size_t global_work_size[3];
  size_t local_work_size[3];
  char kernel_path[2048];

  snprintf (kernel_path, 2048,  "%s/%s", SRCDIR, argv[1]);
  source_file = fopen(kernel_path, "r");
  assert(source_file != NULL && "Kernel .cl not found.");

  fseek (source_file, 0, SEEK_END);
  source_size = ftell (source_file);
  fseek (source_file, 0, SEEK_SET);

  source = malloc (source_size + 1);
  assert (source != NULL);

  fread (source, source_size, 1, source_file);
  source[source_size] = '\0';

  fclose(source_file);

  local_work_size[0] = atoi(argv[3]);
  local_work_size[1] = atoi(argv[4]);
  local_work_size[2] = atoi(argv[5]);

  global_work_size[0] = local_work_size[0] * atoi(argv[2]);
  global_work_size[1] = local_work_size[1];
  global_work_size[2] = local_work_size[2];
  
  context = poclu_create_any_context();
  if (context == (cl_context)0) 
    return -1; 

  clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &cb); 
  devices = malloc(cb);
  clGetContextInfo(context, CL_CONTEXT_DEVICES, cb, devices, NULL); 
 
  cmd_queue = clCreateCommandQueue(context, devices[0], 0, NULL); 
  if (cmd_queue == (cl_command_queue)0) 
    { 
      clReleaseContext(context); 
      free(devices); 
      return -1; 
    } 
  free(devices); 

  program = clCreateProgramWithSource(context, 
				      1, (const char**)&source, NULL, NULL); 
  if (program == (cl_program)0) 
    { 
      clReleaseCommandQueue(cmd_queue); 
      clReleaseContext(context); 
      return -1; 
    } 
 
  err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); 
  if (err != CL_SUCCESS) 
    { 
      clReleaseProgram(program); 
      clReleaseCommandQueue(cmd_queue); 
      clReleaseContext(context); 
      return -1; 
    } 
 
  kernel = clCreateKernel(program, "test_kernel", NULL); 
  if (kernel == (cl_kernel)0) 
    { 
      clReleaseProgram(program); 
      clReleaseCommandQueue(cmd_queue); 
      clReleaseContext(context); 
      return -1; 
    } 


  err = clEnqueueNDRangeKernel(cmd_queue, kernel, 3, NULL, 
			       global_work_size, local_work_size,  
			       0, NULL, NULL); 
  if(err != CL_SUCCESS)
    {
       clReleaseKernel(kernel);
       clReleaseProgram(program);
       clReleaseCommandQueue(cmd_queue);
       clReleaseContext(context);
       return -1;
    }
  clFinish(cmd_queue);
  clReleaseKernel(kernel); 
  clReleaseProgram(program); 
  clReleaseCommandQueue(cmd_queue); 
  clReleaseContext(context); 

  return 0;
}
Beispiel #12
0
int main(int argc, char **argv)
{
  /* test name */
  char name[] = "test_sampler_address_clamp";
  size_t global_work_size[1] = { 1 }, local_work_size[1]= { 1 };
  size_t srcdir_length, name_length, filename_size;
  char *filename = NULL;
  char *source = NULL;
  cl_device_id devices[1];
  cl_context context = NULL;
  cl_command_queue queue = NULL;
  cl_program program = NULL;
  cl_kernel kernel = NULL;
  cl_int result;
  int retval = -1;

  /* image parameters */
  cl_uchar4 *imageData;
  cl_image_format image_format;
  cl_image_desc image_desc;

  printf("Running test %s...\n", name);
  memset(&image_desc, 0, sizeof(cl_image_desc));
  image_desc.image_type = CL_MEM_OBJECT_IMAGE2D;
  image_desc.image_width = 4;
  image_desc.image_height = 4;
  image_format.image_channel_order = CL_RGBA;
  image_format.image_channel_data_type = CL_UNSIGNED_INT8;
  imageData = (cl_uchar4*)malloc (4 * 4 * sizeof(cl_uchar4));
  
  if (imageData == NULL)
    {
      puts("out of host memory\n");
      goto error;
    }
  memset (imageData, 1, 4*4*sizeof(cl_uchar4));

  /* determine file name of kernel source to load */
  srcdir_length = strlen(SRCDIR);
  name_length = strlen(name);
  filename_size = srcdir_length + name_length + 16;
  filename = (char *)malloc(filename_size + 1);
  if (!filename) 
    {
      puts("out of memory");
      goto error;
    }
  
  snprintf(filename, filename_size, "%s/%s.cl", SRCDIR, name);
  
  /* read source code */
  source = poclu_read_file (filename);
  TEST_ASSERT (source != NULL && "Kernel .cl not found.");

  /* setup an OpenCL context and command queue using default device */
  context = poclu_create_any_context();
  if (!context) 
    {
      puts("clCreateContextFromType call failed\n");
      goto error;
    }

  result = clGetContextInfo(context, CL_CONTEXT_DEVICES,
                            sizeof(cl_device_id), devices, NULL);
  if (result != CL_SUCCESS) 
    {
      puts("clGetContextInfo call failed\n");
      goto error;
    }

  queue = clCreateCommandQueue(context, devices[0], 0, NULL); 
  if (!queue) 
    {
      puts("clCreateCommandQueue call failed\n");
      goto error;
    }

  /* Create image */

  cl_mem image = clCreateImage (context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
                                &image_format, &image_desc, imageData, &result);
  if (result != CL_SUCCESS)
    {
      puts("image creation failed\n");
      goto error;
    }


  /* create and build program */
  program = clCreateProgramWithSource (context, 1, (const char **)&source,
                                       NULL, NULL);
  if (!program) 
    {
      puts("clCreateProgramWithSource call failed\n");
      goto error;
    }

  result = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); 
  if (result != CL_SUCCESS) 
    {
      puts("clBuildProgram call failed\n");
      goto error;
    }

  /* execute the kernel with give name */
  kernel = clCreateKernel(program, name, NULL); 
  if (!kernel) 
    {
      puts("clCreateKernel call failed\n");
      goto error;
    }

   result = clSetKernelArg( kernel, 0, sizeof(cl_mem), &image);
   if (result)
     {
       puts("clSetKernelArg failed\n");
       goto error;
     }

  result = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size, 
                                  local_work_size, 0, NULL, NULL); 
  if (result != CL_SUCCESS) 
    {
      puts("clEnqueueNDRangeKernel call failed\n");
      goto error;
    }

  result = clFinish(queue);
  if (result == CL_SUCCESS)
    retval = 0;

error:

  if (image)
    {
      clReleaseMemObject (image);
    }

  if (kernel) 
    {
      clReleaseKernel(kernel);
    }
  if (program) 
    {
      clReleaseProgram(program);
    }
  if (queue) 
    {
      clReleaseCommandQueue(queue);
    }
  if (context) 
    {
      clUnloadCompiler ();
      clReleaseContext (context);
    }
  if (source) 
    {
      free(source);
    }
  if (filename)
    {
      free(filename);
    }
  if (imageData)
    {
      free(imageData);
    }


  if (retval) 
    {
      printf("FAIL\n");
      return 1;
    }
 
  printf("OK\n");
  return 0;
}