Exemplo n.º 1
0
int
main (void)
{
  FILE *source_file;
  char *source;
  int source_size;
  cl_float *input, *output;
  int i;
  int j;
  cl_context  context; 
  size_t cb;
  cl_device_id *devices;
  cl_command_queue cmd_queue;
  cl_program program;
  cl_int err;
  cl_kernel kernel;
  cl_mem memobjs[2];
  size_t global_work_size[2];
  size_t local_work_size[2];

  source_file = fopen("example2a.cl", "r");
  if (source_file == NULL) 
    source_file = fopen (SRCDIR "/example2a.cl", "r");

  assert(source_file != NULL && "example2a.cl not found!");

  fseek (source_file, 0, SEEK_END);
  source_size = ftell (source_file);
  fseek (source_file, 0, SEEK_SET);

  source = (char *) malloc (source_size + 1);
  assert (source != NULL);

  fread (source, source_size, 1, source_file);
  source[source_size] = '\0';

  fclose (source_file);

  input = (cl_float *) malloc (WIDTH * HEIGHT * sizeof (cl_float));
  output = (cl_float *) malloc (WIDTH * (HEIGHT + PADDING) * sizeof (cl_float));

  srand48(0);
  for (i = 0; i < HEIGHT; ++i)
    {
      for (j = 0; j < WIDTH; ++j)
	input[i * WIDTH + j] = drand48();
    }
  
  context = clCreateContextFromType(NULL, CL_DEVICE_TYPE_CPU, 
				    NULL, NULL, NULL); 
  if (context == (cl_context)0) 
    return -1; 

  clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &cb); 
  devices = (cl_device_id *) malloc(cb); 
  clGetContextInfo(context, CL_CONTEXT_DEVICES, cb, devices, NULL); 
 
  cmd_queue = clCreateCommandQueue(context, devices[0], 0, NULL); 
  if (cmd_queue == (cl_command_queue)0) 
    { 
      clReleaseContext(context); 
      free(devices); 
      return -1; 
    } 
  free(devices); 

  memobjs[0] = clCreateBuffer(context,
			      CL_MEM_READ_WRITE,
			      sizeof(cl_float) * WIDTH * (HEIGHT + PADDING), NULL, NULL);
  if (memobjs[0] == (cl_mem)0) 
    { 
      clReleaseCommandQueue(cmd_queue); 
      clReleaseContext(context); 
      return -1; 
    } 

  memobjs[1] = clCreateBuffer(context,
			      CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
			      sizeof(cl_float) * WIDTH * HEIGHT, input, NULL);
  if (memobjs[1] == (cl_mem)0) 
    { 
      delete_memobjs(memobjs, 1);
      clReleaseCommandQueue(cmd_queue); 
      clReleaseContext(context); 
      return -1; 
    } 

  program = clCreateProgramWithSource(context, 
				      1, (const char**)&source, NULL, NULL); 
  if (program == (cl_program)0) 
    { 
      clReleaseCommandQueue(cmd_queue); 
      clReleaseContext(context); 
      return -1; 
    } 
 
  err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); 
  if (err != CL_SUCCESS) 
    { 
      clReleaseProgram(program); 
      clReleaseCommandQueue(cmd_queue); 
      clReleaseContext(context); 
      return -1; 
    } 
 
  kernel = clCreateKernel(program, "matrix_transpose", NULL); 
  if (kernel == (cl_kernel)0) 
    { 
      clReleaseProgram(program); 
      clReleaseCommandQueue(cmd_queue); 
      clReleaseContext(context); 
      return -1; 
    } 

  err = clSetKernelArg(kernel,  0,  
		       sizeof(cl_mem), (void *) &memobjs[0]); 
  err |= clSetKernelArg(kernel, 1,  
			sizeof(cl_mem), (void *) &memobjs[1]); 
  /* err |= clSetKernelArg(kernel, 2, */
  /*       		(32 + 1) * 32 * sizeof(float), NULL); */
 
  if (err != CL_SUCCESS) 
    { 
      delete_memobjs(memobjs, 2); 
      clReleaseKernel(kernel); 
      clReleaseProgram(program); 
      clReleaseCommandQueue(cmd_queue); 
      clReleaseContext(context); 
      return -1; 
    } 

  global_work_size[0] = 2 * WIDTH; 
  global_work_size[1] = HEIGHT / 32; 
  local_work_size[0]= 64; 
  local_work_size[1]= 1; 

  err = clEnqueueNDRangeKernel(cmd_queue, kernel, 2, NULL, 
			       global_work_size, local_work_size,  
			       0, NULL, NULL); 

  if (err != CL_SUCCESS) 
    { 
      delete_memobjs(memobjs, 2); 
      clReleaseKernel(kernel); 
      clReleaseProgram(program); 
      clReleaseCommandQueue(cmd_queue); 
      clReleaseContext(context); 
      return -1; 
    } 
 
  err = clEnqueueReadBuffer(cmd_queue, memobjs[0], CL_TRUE, 
			    0, WIDTH * (HEIGHT + PADDING) * sizeof(cl_float), output, 
			    0, NULL, NULL); 
  if (err != CL_SUCCESS) 
    { 
      delete_memobjs(memobjs, 2); 
      clReleaseKernel(kernel); 
      clReleaseProgram(program); 
      clReleaseCommandQueue(cmd_queue); 
      clReleaseContext(context); 
      return -1; 
    } 
 
  delete_memobjs(memobjs, 2); 
  clReleaseKernel(kernel); 
  clReleaseProgram(program); 
  clReleaseCommandQueue(cmd_queue); 
  clReleaseContext(context); 

  for (i = 0; i < HEIGHT; ++i)
    {
      for (j = 0; j < WIDTH; ++j) {
	if (input[i * WIDTH + j] != output[j * (HEIGHT + PADDING) + i]) {
	  printf ("FAIL\n");
	  return -1;
	}
      }
    }
  
  printf ("OK\n");
  return 0;
}
Exemplo n.º 2
0
int 
exec_dot_product_kernel(const char *program_source, 
                        int n, cl_float4 *srcA, cl_float4 *srcB, cl_float4 *dst) 
{ 
  cl_context  context; 
  cl_command_queue cmd_queue; 
  cl_device_id  *devices; 
  cl_program  program; 
  cl_kernel  kernel; 
  cl_mem       memobjs[3]; 
  size_t       global_work_size[1]; 
  size_t       local_work_size[1]; 
  size_t       cb; 
  cl_int       err; 
  int          i;
 
  // create the OpenCL context on any available OCL device 
  context = clCreateContextFromType(
      NULL, 
      CL_DEVICE_TYPE_ALL,
      NULL, NULL, NULL); 
  if (context == (cl_context)0) 
    return -1; 
 
  // get the list of GPU devices associated with context 
  clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &cb); 
  devices = (cl_device_id *) malloc(cb); 
  clGetContextInfo(context, CL_CONTEXT_DEVICES, cb, devices, NULL); 
 
  // create a command-queue 
  cmd_queue = clCreateCommandQueue(context, devices[0], 0, NULL); 
  if (cmd_queue == (cl_command_queue)0) 
    { 
      clReleaseContext(context); 
      free(devices); 
      return -1; 
    } 

  for (i = 0; i < n; ++i)
    {
       poclu_bswap_cl_float_array(devices[0], (cl_float*)&srcA[i], 4);
       poclu_bswap_cl_float_array(devices[0], (cl_float*)&srcB[i], 4);
    }

 
  // allocate the buffer memory objects 
  memobjs[0] = clCreateBuffer(context, 
                              CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, 
                              sizeof(cl_float4) * n, srcA, NULL); 
  if (memobjs[0] == (cl_mem)0) 
    { 
      clReleaseCommandQueue(cmd_queue); 
      clReleaseContext(context); 
      return -1; 
    } 
 
  memobjs[1] = clCreateBuffer(context, 
                              CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, 
                              sizeof(cl_float4) * n, srcB, NULL); 
  if (memobjs[1] == (cl_mem)0) 
    { 
      delete_memobjs(memobjs, 1); 
      clReleaseCommandQueue(cmd_queue); 
      clReleaseContext(context); 
      return -1;
    } 
 
  memobjs[2] = clCreateBuffer(context, 
			      CL_MEM_READ_WRITE, 
			      sizeof(cl_float) * n, NULL, NULL); 
  if (memobjs[2] == (cl_mem)0) 
    { 
      delete_memobjs(memobjs, 2); 
      clReleaseCommandQueue(cmd_queue); 
      clReleaseContext(context); 
      return -1; 
    } 
 
  // create the program 
  program = clCreateProgramWithSource(context, 
				      1, (const char**)&program_source, NULL, NULL); 
  if (program == (cl_program)0) 
    { 
      delete_memobjs(memobjs, 3); 
      clReleaseCommandQueue(cmd_queue); 
      clReleaseContext(context); 
      return -1; 
    } 
 
  // build the program 
  err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); 
  if (err != CL_SUCCESS) 
    { 
      delete_memobjs(memobjs, 3); 
      clReleaseProgram(program); 
      clReleaseCommandQueue(cmd_queue); 
      clReleaseContext(context); 
      return -1; 
    } 
 
  // create the kernel 
  kernel = clCreateKernel(program, "dot_product", NULL); 
  if (kernel == (cl_kernel)0) 
    { 
      delete_memobjs(memobjs, 3); 
      clReleaseProgram(program); 
      clReleaseCommandQueue(cmd_queue); 
      clReleaseContext(context); 
      return -1; 
    } 
 
  // set the args values 
  err = clSetKernelArg(kernel,  0,  
		       sizeof(cl_mem), (void *) &memobjs[0]); 
  err |= clSetKernelArg(kernel, 1,  
			sizeof(cl_mem), (void *) &memobjs[1]); 
  err |= clSetKernelArg(kernel, 2,
			sizeof(cl_mem), (void *) &memobjs[2]); 
 
  if (err != CL_SUCCESS) 
    { 
      delete_memobjs(memobjs, 3); 
      clReleaseKernel(kernel); 
      clReleaseProgram(program); 
      clReleaseCommandQueue(cmd_queue); 
      clReleaseContext(context); 
      return -1; 
    } 
 
  // set work-item dimensions 
  global_work_size[0] = n; 
  local_work_size[0]= 2; 
 
  // execute kernel 
  err = clEnqueueNDRangeKernel(cmd_queue, kernel, 1, NULL, 
			       global_work_size, local_work_size,  
			       0, NULL, NULL); 
  if (err != CL_SUCCESS) 
    { 
      delete_memobjs(memobjs, 3); 
      clReleaseKernel(kernel); 
      clReleaseProgram(program); 
      clReleaseCommandQueue(cmd_queue); 
      clReleaseContext(context); 
      return -1; 
    } 
 
  // read output image 
  err = clEnqueueReadBuffer(cmd_queue, memobjs[2], CL_TRUE, 
			    0, n * sizeof(cl_float), dst, 
			    0, NULL, NULL); 
  if (err != CL_SUCCESS) 
    { 
      delete_memobjs(memobjs, 3); 
      clReleaseKernel(kernel); 
      clReleaseProgram(program); 
      clReleaseCommandQueue(cmd_queue); 
      clReleaseContext(context); 
      return -1; 
    } 
  for (i = 0; i < n; ++i)
    {
      poclu_bswap_cl_float_array(devices[0], (cl_float*)&dst[i], 4);
      poclu_bswap_cl_float_array(devices[0], (cl_float*)&srcA[i], 4);
      poclu_bswap_cl_float_array(devices[0], (cl_float*)&srcB[i], 4);
    }
  free(devices); 


  // release kernel, program, and memory objects 
  delete_memobjs(memobjs, 3); 
  clReleaseKernel(kernel); 
  clReleaseProgram(program); 
  clReleaseCommandQueue(cmd_queue); 
  clReleaseContext(context); 
  return 0; // success... 
}
Exemplo n.º 3
0
int 
exec_trig_kernel(const char *program_source, 
                 int n, void *srcA, void *dst) 
{ 
  cl_context  context; 
  cl_command_queue cmd_queue; 
  cl_device_id  *devices; 
  cl_program  program; 
  cl_kernel  kernel; 
  cl_mem       memobjs[2]; 
  size_t       global_work_size[1]; 
  size_t       local_work_size[1]; 
  size_t       cb; 
  cl_int       err; 

  float c = 7.3f; // a scalar number to test non-pointer args
 
  // create the OpenCL context on a GPU device 
  context = poclu_create_any_context();
  if (context == (cl_context)0) 
    return -1; 
 
  // get the list of GPU devices associated with context 
  clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &cb); 
  devices = malloc(cb);
  clGetContextInfo(context, CL_CONTEXT_DEVICES, cb, devices, NULL); 
 
  // create a command-queue 
  cmd_queue = clCreateCommandQueue(context, devices[0], 0, NULL); 
  if (cmd_queue == (cl_command_queue)0) 
    { 
      clReleaseContext(context); 
      free(devices); 
      return -1; 
    } 
  free(devices); 
 
  // allocate the buffer memory objects 
  memobjs[0] = clCreateBuffer(context, 
                              CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, 
                              sizeof(cl_float4) * n, srcA, NULL); 
  if (memobjs[0] == (cl_mem)0) 
    { 
      clReleaseCommandQueue(cmd_queue); 
      clReleaseContext(context); 
      return -1; 
    } 
 
  memobjs[1] = clCreateBuffer(context, 
			      CL_MEM_READ_WRITE, 
			      sizeof(cl_float4) * n, NULL, NULL); 
  if (memobjs[1] == (cl_mem)0) 
    { 
      delete_memobjs(memobjs, 1); 
      clReleaseCommandQueue(cmd_queue); 
      clReleaseContext(context); 
      return -1; 
    } 
 
  // create the program 
  program = clCreateProgramWithSource(context, 
				      1, (const char**)&program_source, NULL, NULL); 
  if (program == (cl_program)0) 
    { 
      delete_memobjs(memobjs, 2); 
      clReleaseCommandQueue(cmd_queue); 
      clReleaseContext(context); 
      return -1; 
    } 
 
  // build the program 
  err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); 
  if (err != CL_SUCCESS) 
    { 
      delete_memobjs(memobjs, 2); 
      clReleaseProgram(program); 
      clReleaseCommandQueue(cmd_queue); 
      clReleaseContext(context); 
      return -1; 
    } 
 
  // create the kernel 
  kernel = clCreateKernel(program, "trig", NULL); 
  if (kernel == (cl_kernel)0) 
    { 
      delete_memobjs(memobjs, 2); 
      clReleaseProgram(program); 
      clReleaseCommandQueue(cmd_queue); 
      clReleaseContext(context); 
      return -1; 
    } 
 
  // set the args values 
  err = clSetKernelArg(kernel,  0,  
		       sizeof(cl_mem), (void *) &memobjs[0]); 
  err |= clSetKernelArg(kernel, 1,
			sizeof(cl_mem), (void *) &memobjs[1]); 
  err |= clSetKernelArg(kernel, 2,
			sizeof(float), (void *) &c); 
 
  if (err != CL_SUCCESS) 
    { 
      delete_memobjs(memobjs, 2); 
      clReleaseKernel(kernel); 
      clReleaseProgram(program); 
      clReleaseCommandQueue(cmd_queue); 
      clReleaseContext(context); 
      return -1; 
    } 
 
  // set work-item dimensions 
  global_work_size[0] = n; 
  local_work_size[0]= 2; 
 
  // execute kernel 
  err = clEnqueueNDRangeKernel(cmd_queue, kernel, 1, NULL, 
			       global_work_size, local_work_size,  
			       0, NULL, NULL); 
  if (err != CL_SUCCESS) 
    { 
      delete_memobjs(memobjs, 2); 
      clReleaseKernel(kernel); 
      clReleaseProgram(program); 
      clReleaseCommandQueue(cmd_queue); 
      clReleaseContext(context); 
      return -1; 
    } 
 
  // read output image 
  err = clEnqueueReadBuffer(cmd_queue, memobjs[1], CL_TRUE, 
			    0, n * sizeof(cl_float4), dst, 
			    0, NULL, NULL); 
  if (err != CL_SUCCESS) 
    { 
      delete_memobjs(memobjs, 2); 
      clReleaseKernel(kernel); 
      clReleaseProgram(program); 
      clReleaseCommandQueue(cmd_queue); 
      clReleaseContext(context); 
      return -1; 
    } 
 
  // release kernel, program, and memory objects 
  delete_memobjs(memobjs, 2); 
  clReleaseKernel(kernel); 
  clReleaseProgram(program); 
  clReleaseCommandQueue(cmd_queue); 
  clReleaseContext(context); 
  return 0; // success... 
}