Ejemplo n.º 1
0
int readAndBuildKernel(char *filename)
{
  cl_int ciErrNum = CL_SUCCESS;
  size_t kernelLength;
  char *source;
  
  source = readFile(filename);
  kernelLength = strlen(source);
  
  // create the program
  theProgram = clCreateProgramWithSource(cxGPUContext, 1, (const char **)&source, 
                                                    &kernelLength, &ciErrNum);
  printCLError(ciErrNum,5);
    
  // build the program
  ciErrNum = clBuildProgram(theProgram, 0, NULL, NULL, NULL, NULL);
  if (ciErrNum != CL_SUCCESS)
  {
    // write out the build log, then exit
    char cBuildLog[10240];
    clGetProgramBuildInfo(theProgram, device, CL_PROGRAM_BUILD_LOG, 
                          sizeof(cBuildLog), cBuildLog, NULL );
    printf("\nBuild Log:\n%s\n\n", (char *)&cBuildLog);
    return -1;
  }
  
  theKernel = clCreateKernel(theProgram, "filter", &ciErrNum);
  printCLError(ciErrNum,6);
  
  //Discard temp storage
  free(source);
  
  return 0;
}
Ejemplo n.º 2
0
int init_OpenCL()
{
  cl_int ciErrNum = CL_SUCCESS;
  cl_platform_id platform;
  unsigned int no_plat;

  // We assume that we only have one platform available. (This may not be true.)
  ciErrNum =  clGetPlatformIDs(1,&platform,&no_plat);
  printCLError(ciErrNum,0);

  // Get the GPU device
  ciErrNum = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
  printCLError(ciErrNum,1);
  
  // create the OpenCL context on the device
  cxGPUContext = clCreateContext(0, 1, &device, NULL, NULL, &ciErrNum);
  printCLError(ciErrNum,2);

  ciErrNum = clGetDeviceInfo(device,CL_DEVICE_MAX_WORK_GROUP_SIZE,sizeof(size_t),&noWG,NULL);
  printCLError(ciErrNum,3);
  printf("maximum number of workgroups: %d\n", (int)noWG);
  
  // create command queue
  commandQueue = clCreateCommandQueue(cxGPUContext, device, 0, &ciErrNum);
  printCLError(ciErrNum,4);
}
Ejemplo n.º 3
0
Archivo: sort.c Proyecto: nip3o/tddd56
int init_OpenCL()
{
  cl_int ciErrNum = CL_SUCCESS;
  size_t kernelLength;
  char *source;
  cl_device_id device;
  cl_platform_id platform;
  unsigned int no_plat;

  ciErrNum =  clGetPlatformIDs(1,&platform,&no_plat);
  printCLError(ciErrNum,0);

  //get the device
  ciErrNum = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
  printCLError(ciErrNum,1);
  
  // create the OpenCL context on the device
  cxGPUContext = clCreateContext(0, 1, &device, NULL, NULL, &ciErrNum);
  printCLError(ciErrNum,2);

  ciErrNum = clGetDeviceInfo(device,CL_DEVICE_MAX_WORK_GROUP_SIZE,sizeof(size_t),&noWG,NULL);
  printCLError(ciErrNum,3);
  printf("maximum number of workgroups: %d\n", (int)noWG);
  
  // create command queue
  commandQueue = clCreateCommandQueue(cxGPUContext, device, 0, &ciErrNum);
  printCLError(ciErrNum,4);
  
  source = readFile("sort.cl");
  kernelLength = strlen(source);
  
  // create the program
  cpSort = clCreateProgramWithSource(cxGPUContext, 1, (const char **)&source, 
                                                    &kernelLength, &ciErrNum);
  printCLError(ciErrNum,5);
    
  // build the program
  ciErrNum = clBuildProgram(cpSort, 0, NULL, NULL, NULL, NULL);
  if (ciErrNum != CL_SUCCESS)
  {
    // write out the build log, then exit
    char cBuildLog[10240];
    clGetProgramBuildInfo(cpSort, device, CL_PROGRAM_BUILD_LOG, 
                          sizeof(cBuildLog), cBuildLog, NULL );
    printf("\nBuild Log:\n%s\n\n", (char *)&cBuildLog);
    return -1;
  }
  
  gpgpuSort = clCreateKernel(cpSort, "sort", &ciErrNum);
  printCLError(ciErrNum,6);
  
  //Discard temp storage
  free(source);
  
  return 0;
}
Ejemplo n.º 4
0
////////////////////////////////////////////////////////////////////////////////
// main computation function
////////////////////////////////////////////////////////////////////////////////
void computeImages()
{
	//read in full size of memory
	image = readppm("maskros512.ppm", &n, &m);
	out = (unsigned char*) malloc(n*m*3);
	cl_mem in_data, out_data;
	cl_int ciErrNum = CL_SUCCESS;
	
	
	// Create space for data and copy image to device (note that we could also use clEnqueueWriteBuffer to upload)
	
	in_data = clCreateBuffer(cxGPUContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
		3*n*m * sizeof(unsigned char), image, &ciErrNum);
	printCLError(ciErrNum,6);
	out_data = clCreateBuffer(cxGPUContext, CL_MEM_WRITE_ONLY,
		3*n*m * sizeof(unsigned char), NULL, &ciErrNum);
	printCLError(ciErrNum,7);

	// set the args values
	ciErrNum  = clSetKernelArg(theKernel, 0, sizeof(cl_mem),  (void *) &in_data);
	ciErrNum |= clSetKernelArg(theKernel, 1, sizeof(cl_mem),  (void *) &out_data);
	ciErrNum |= clSetKernelArg(theKernel, 2, sizeof(cl_uint), (void *) &n);
	ciErrNum |= clSetKernelArg(theKernel, 3, sizeof(cl_uint), (void *) &m);
	printCLError(ciErrNum,8);

	// Computing arrangement
	//size_t localWorkSize, globalWorkSize;
	size_t globalWorkSize[3] = {512, 512, 1};
	size_t localWorkSize[3] = {16, 16, 1}; //256 threads in each block
	// 32*32 (1024) blocks in total to have 512*512 threads in total

	printf("Startup time %lf\n", GetSeconds());

	// Compute!
	cl_event event;
	ResetMilli();
	ciErrNum = clEnqueueNDRangeKernel(commandQueue, theKernel, 2, NULL, &globalWorkSize, &localWorkSize, 0, NULL, &event);
	printCLError(ciErrNum,9);

 	ciErrNum = clWaitForEvents(1, &event); // Synch
	printCLError(ciErrNum,10);
	printf("time %lf\n", GetSeconds());

	ciErrNum = clEnqueueReadBuffer(commandQueue, out_data, CL_TRUE, 0, 3*n*m * sizeof(unsigned char), out, 0, NULL, &event);
	printCLError(ciErrNum,11);
	clWaitForEvents(1, &event); // Synch
	printCLError(ciErrNum,10);
    
	clReleaseMemObject(in_data);
	clReleaseMemObject(out_data);
	
	return;
}
Ejemplo n.º 5
0
Archivo: sort.c Proyecto: nip3o/tddd56
int gpu_Sort(unsigned int *data, unsigned int length)
{
  cl_int ciErrNum = CL_SUCCESS;
  size_t localWorkSize, globalWorkSize;
  cl_mem in_data;
  cl_mem out_data;
  printf("GPU sorting.\n");
  
  in_data = clCreateBuffer(cxGPUContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, length * sizeof(unsigned int), data, &ciErrNum);
  out_data = clCreateBuffer(cxGPUContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, length * sizeof(unsigned int), data, &ciErrNum);
    printCLError(ciErrNum,7);

    if (length<512)
      localWorkSize  = length;
    else
      localWorkSize  = 512;

    globalWorkSize = length;

    // set the args values
    ciErrNum  = clSetKernelArg(gpgpuSort, 0, sizeof(cl_mem),  (void *) &in_data);
    ciErrNum |= clSetKernelArg(gpgpuSort, 1, sizeof(cl_uint), (void *) &length);
    ciErrNum |= clSetKernelArg(gpgpuSort, 2, sizeof(cl_mem),  (void *) &out_data);
    printCLError(ciErrNum,8);

    gettimeofday(&t_s_gpu, NULL);
    
    cl_event event;
    ciErrNum = clEnqueueNDRangeKernel(commandQueue, gpgpuSort, 1, NULL, &globalWorkSize, &localWorkSize, 0, NULL, &event);
    printCLError(ciErrNum,9);
    
    clWaitForEvents(1, &event); // Synch
    gettimeofday(&t_e_gpu, NULL);
    printCLError(ciErrNum,10);

  ciErrNum = clEnqueueReadBuffer(commandQueue, out_data, CL_TRUE, 0, length * sizeof(unsigned int), data, 0, NULL, &event);
    printCLError(ciErrNum,11);
    clWaitForEvents(1, &event); // Synch
  printCLError(ciErrNum,10);
    
  clReleaseMemObject(in_data);
  clReleaseMemObject(out_data);
  return ciErrNum;
}