Пример #1
0
cl_mem
piglit_cl_create_image(piglit_cl_context context, cl_mem_flags flags,
                       const cl_image_format *format,
                       const piglit_image_desc *desc)
{
	cl_int errNo;
	cl_mem image = NULL;

#ifdef CL_VERSION_1_2
	if (piglit_cl_get_platform_version(context->platform_id) >= 12) {
		image = clCreateImage(context->cl_ctx, flags, format, desc, NULL, &errNo);
	} else
#endif
	if (desc->image_type == CL_MEM_OBJECT_IMAGE2D) {
		image = clCreateImage2D(context->cl_ctx, flags, format,
		                        desc->image_width, desc->image_height, 0,
		                        NULL, &errNo);
	} else if (desc->image_type == CL_MEM_OBJECT_IMAGE3D) {
		image = clCreateImage3D(context->cl_ctx, flags, format,
		                        desc->image_width, desc->image_height,
		                        desc->image_depth, 0, 0,
		                        NULL, &errNo);
	} else {
		fprintf(stderr,
		        "Invalid image mem object type: %s\n",
		        piglit_cl_get_enum_name(desc->image_type));
	}
	if(!piglit_cl_check_error(errNo, CL_SUCCESS)) {
		fprintf(stderr,
		        "Could not create image: %s\n",
		        piglit_cl_get_error_name(errNo));
	}

	return image;
}
Пример #2
0
/**
 * \brief ocl::Image::create Creates cl_mem for this Image.
 *
 * Note that no Memory is allocated. Allocation takes place when data is transfered.
 * It is assumed that an active Queue exists.
 *
 * \param width Width of the image.
 * \param height Height of the image.
 * \param depth Depth of the image.
 * \param type Channeltype of the image.
 * \param order Channelorder of the image.
 */
void ocl::Image::create(size_t width, size_t height, size_t depth, ChannelType type, ChannelOrder order, Access access)
{
    TRUE_ASSERT(this->_context != 0, "Context not valid - cannot create Image");
    cl_mem_flags flags = access;

    cl_image_format format;
    format.image_channel_order = order;
    format.image_channel_data_type = type;

    cl_int status;

#if defined(OPENCL_V1_0) || defined(OPENCL_V1_1)
    this->_id = clCreateImage3D(this->_context->id(), flags, &format, width, height, depth, 0, 0, NULL, &status);
#else
    _cl_image_desc desc;
    desc.image_type = CL_MEM_OBJECT_IMAGE3D;
    desc.image_height = height;
    desc.image_width = width;
    desc.image_depth = depth;
    desc.image_array_size = 1;
    desc.image_row_pitch = 0;
    desc.image_slice_pitch = 0;
    desc.num_mip_levels = 0;
    desc.num_samples = 0;
    desc.buffer = NULL;
    this->_id = clCreateImage(this->_context->id(), flags, &format, &desc, NULL, &status);
#endif
    OPENCL_SAFE_CALL(status);
    TRUE_ASSERT(this->_id != 0, "Could not create 3D image.");
}
Пример #3
0
OpenCLHandle FilterEffect::createOpenCLImageResult(uint8_t* source)
{
    FilterContextOpenCL* context = FilterContextOpenCL::context();
    ASSERT(context);

    if (context->inError())
        return 0;

    ASSERT(!hasResult());
    cl_image_format clImageFormat;
    clImageFormat.image_channel_order = CL_RGBA;
    clImageFormat.image_channel_data_type = CL_UNORM_INT8;

    int errorCode = 0;
#ifdef CL_API_SUFFIX__VERSION_1_2
    cl_image_desc imageDescriptor = { CL_MEM_OBJECT_IMAGE2D, m_absolutePaintRect.width(), m_absolutePaintRect.height(), 0, 0, 0, 0, 0, 0, 0};
    m_openCLImageResult = clCreateImage(context->deviceContext(), CL_MEM_READ_WRITE | (source ? CL_MEM_COPY_HOST_PTR : 0),
        &clImageFormat, &imageDescriptor, source, &errorCode);
#else
    m_openCLImageResult = clCreateImage2D(context->deviceContext(), CL_MEM_READ_WRITE | (source ? CL_MEM_COPY_HOST_PTR : 0),
        &clImageFormat, m_absolutePaintRect.width(), m_absolutePaintRect.height(), 0, source, &errorCode);
#endif
    if (context->isFailed(errorCode))
        return 0;

    return m_openCLImageResult;
}
Пример #4
0
JNIEXPORT jlong JNICALL Java_org_lwjgl_opencl_CL12_nclCreateImage(JNIEnv *env, jclass clazz, jlong context, jlong flags, jlong image_format, jlong image_desc, jlong host_ptr, jlong errcode_ret, jlong function_pointer) {
	const cl_image_format *image_format_address = (const cl_image_format *)(intptr_t)image_format;
	const cl_image_desc *image_desc_address = (const cl_image_desc *)(intptr_t)image_desc;
	cl_void *host_ptr_address = (cl_void *)(intptr_t)host_ptr;
	cl_int *errcode_ret_address = (cl_int *)(intptr_t)errcode_ret;
	clCreateImagePROC clCreateImage = (clCreateImagePROC)((intptr_t)function_pointer);
	cl_mem __result = clCreateImage((cl_context)(intptr_t)context, flags, image_format_address, image_desc_address, host_ptr_address, errcode_ret_address);
	return (intptr_t)__result;
}
Пример #5
0
    ImageBuffer(CLcontext context, unsigned short width, unsigned short height, void* data) {
        cl_int err;

        cl_image_format format;
        format.image_channel_order = CL_RGBA;
        format.image_channel_data_type = CL_FLOAT;

        cl_image_desc desc;
        desc.image_width = width;
        desc.image_height = height;
        desc.image_type = CL_MEM_OBJECT_IMAGE2D;

        image = clCreateImage(context.context, T, &format, &desc, data, &err);
        if(err != CL_SUCCESS) {
            THROW_EXCEPTION("Failed to create memory object");
        }
    }
Пример #6
0
cl_mem
CLContext::create_image (
    cl_mem_flags flags, const cl_image_format& format,
    const cl_image_desc &image_info, void *host_ptr)
{
    cl_mem mem_id = NULL;
    cl_int errcode = CL_SUCCESS;

    mem_id = clCreateImage (
                 _context_id, flags,
                 &format, &image_info,
                 host_ptr, &errcode);

    XCAM_FAIL_RETURN (
        WARNING,
        errcode == CL_SUCCESS,
        NULL,
        "create cl image failed");
    return mem_id;
}
Пример #7
0
       cl_mem bindTexture(const oclMat &mat)
        {
            cl_mem texture;
            cl_image_format format;
            int err;
            int depth    = mat.depth();
            int channels = mat.channels();

            switch(depth)
            {
            case CV_8U:
                format.image_channel_data_type = CL_UNSIGNED_INT8;
                break;
            case CV_32S:
                format.image_channel_data_type = CL_UNSIGNED_INT32;
                break;
            case CV_32F:
                format.image_channel_data_type = CL_FLOAT;
                break;
            default:
                throw std::exception();
                break;
            }
            switch(channels)
            {
            case 1:
                format.image_channel_order     = CL_R;
                break;
            case 3:
                format.image_channel_order     = CL_RGB;
                break;
            case 4:
                format.image_channel_order     = CL_RGBA;
                break;
            default:
                throw std::exception();
                break;
            }
#if CL_VERSION_1_2
            cl_image_desc desc;
            desc.image_type       = CL_MEM_OBJECT_IMAGE2D;
            desc.image_width      = mat.cols;
            desc.image_height     = mat.rows;
            desc.image_depth      = 0;
            desc.image_array_size = 1;
            desc.image_row_pitch  = 0;
            desc.image_slice_pitch = 0;
            desc.buffer           = NULL;
            desc.num_mip_levels   = 0;
            desc.num_samples      = 0;
            texture = clCreateImage(mat.clCxt->impl->clContext, CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
#else
            texture = clCreateImage2D(
                mat.clCxt->impl->clContext,
                CL_MEM_READ_WRITE,
                &format,
                mat.cols,
                mat.rows,
                0,
                NULL,
                &err);
#endif
            size_t origin[] = { 0, 0, 0 };
            size_t region[] = { mat.cols, mat.rows, 1 };

            cl_mem devData;
            if (mat.cols * mat.elemSize() != mat.step)
            {
                devData = clCreateBuffer(mat.clCxt->impl->clContext, CL_MEM_READ_ONLY, mat.cols * mat.rows
                    * mat.elemSize(), NULL, NULL);
                const size_t regin[3] = {mat.cols * mat.elemSize(), mat.rows, 1};
                clEnqueueCopyBufferRect(mat.clCxt->impl->clCmdQueue, (cl_mem)mat.data, devData, origin, origin,
                    regin, mat.step, 0, mat.cols * mat.elemSize(), 0, 0, NULL, NULL);
            }
            else
            {
                devData = (cl_mem)mat.data;
            }

            clEnqueueCopyBufferToImage(mat.clCxt->impl->clCmdQueue, devData, texture, 0, origin, region, 0, NULL, 0);
            if ((mat.cols * mat.elemSize() != mat.step))
            {
                clFinish(mat.clCxt->impl->clCmdQueue);
                clReleaseMemObject(devData);
            }

            openCLSafeCall(err);
            return texture;
        }
Пример #8
0
int main(int argc, char **argv)
{
  /* test name */
  char name[] = "test_image_query_funcs";
  size_t global_work_size[1] = { 1 }, local_work_size[1]= { 1 };
  size_t srcdir_length, name_length, filename_size;
  size_t source_size, source_read;
  char const *sources[1];
  char *filename = NULL;
  char *source = NULL;
  FILE *source_file = NULL;
  cl_device_id devices[1];
  cl_context context = NULL;
  cl_command_queue queue = NULL;
  cl_program program = NULL;
  cl_kernel kernel = NULL;
  cl_int result;
  int retval = -1;

  /* image parameters */
  cl_uchar4 *imageData;
  cl_image_format image_format;
  cl_image_desc image2_desc, image3_desc;

  

  printf("Running test %s...\n", name);

  memset(&image2_desc, 0, sizeof(cl_image_desc));
  image2_desc.image_type = CL_MEM_OBJECT_IMAGE2D;
  image2_desc.image_width = 2;
  image2_desc.image_height = 4;

  memset(&image3_desc, 0, sizeof(cl_image_desc));
  image3_desc.image_type = CL_MEM_OBJECT_IMAGE3D;
  image3_desc.image_width = 2;
  image3_desc.image_height = 4;
  image3_desc.image_depth = 8;

  image_format.image_channel_order = CL_RGBA;
  image_format.image_channel_data_type = CL_UNSIGNED_INT8;
  imageData = (cl_uchar4*)malloc (4 * 4 * sizeof(cl_uchar4));
  
  if (imageData == NULL)
    {
      puts("out of host memory\n");
      goto error;
    }
  memset (imageData, 1, 4*4*sizeof(cl_uchar4));

  /* determine file name of kernel source to load */
  srcdir_length = strlen(SRCDIR);
  name_length = strlen(name);
  filename_size = srcdir_length + name_length + 16;
  filename = (char *)malloc(filename_size + 1);
  if (!filename) 
    {
      puts("out of memory");
      goto error;
    }
  
  snprintf(filename, filename_size, "%s/%s.cl", SRCDIR, name);
  
  /* read source code */
  source_file = fopen(filename, "r");
  if (!source_file) 
    {
      puts("source file not found\n");
      goto error;
    }
  
  fseek(source_file, 0, SEEK_END);
  source_size = ftell(source_file);
  fseek(source_file, 0, SEEK_SET);
  
  source = (char *)malloc(source_size + 1);
  if (!source) 
    {
      puts("out of memory\n");
      goto error;
    }
  
  source_read = fread(source, 1, source_size, source_file);
  if (source_read != source_size) 
    {
      puts("error reading from file\n");
      goto error;
    }
  
  source[source_size] = '\0';
  fclose(source_file);
  source_file = NULL;
  
  /* setup an OpenCL context and command queue using default device */
  context = poclu_create_any_context();
  if (!context) 
    {
      puts("clCreateContextFromType call failed\n");
      goto error;
    }

  result = clGetContextInfo(context, CL_CONTEXT_DEVICES,
                            sizeof(cl_device_id), devices, NULL);
  if (result != CL_SUCCESS) 
    {
      puts("clGetContextInfo call failed\n");
      goto error;
    }

  queue = clCreateCommandQueue(context, devices[0], 0, NULL); 
  if (!queue) 
    {
      puts("clCreateCommandQueue call failed\n");
      goto error;
    }

  /* Create image */

  cl_mem image2 = clCreateImage (context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
                                &image_format, &image2_desc, imageData, &result);
  if (result != CL_SUCCESS)
    {
      puts("image2 creation failed\n");
      goto error;
    }

  cl_mem image3 = clCreateImage (context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
                                &image_format, &image3_desc, imageData, &result);
  if (result != CL_SUCCESS)
    {
      puts("image3 creation failed\n");
      goto error;
    }


  /* create and build program */
  sources[0] = source;
  program = clCreateProgramWithSource(context, 1, sources, NULL, NULL); 
  if (!program) 
    {
      puts("clCreateProgramWithSource call failed\n");
      goto error;
    }

  result = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); 
  if (result != CL_SUCCESS) 
    {
      puts("clBuildProgram call failed\n");
      goto error;
    }

  /* execute the kernel with give name */
  kernel = clCreateKernel(program, name, NULL); 
  if (!kernel) 
    {
      puts("clCreateKernel call failed\n");
      goto error;
    }

   result = clSetKernelArg( kernel, 0, sizeof(cl_mem), &image2);
   if (result)
     {
       puts("clSetKernelArg 0 failed\n");
       goto error;
     }

   result = clSetKernelArg( kernel, 1, sizeof(cl_mem), &image3);
   if (result)
     {
       puts("clSetKernelArg 1 failed\n");
       goto error;
     }

  result = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size, 
                                  local_work_size, 0, NULL, NULL); 
  if (result != CL_SUCCESS) 
    {
      puts("clEnqueueNDRangeKernel call failed\n");
      goto error;
    }

  result = clFinish(queue);
  if (result == CL_SUCCESS)
    retval = 0;

error:

  if (kernel) 
    {
      clReleaseKernel(kernel);
    }
  if (program) 
    {
      clReleaseProgram(program);
    }
  if (queue) 
    {
      clReleaseCommandQueue(queue);
    }
  if (context) 
    {
      clReleaseContext(context);
    }
  if (source_file) 
    {
      fclose(source_file);
    }
  if (source) 
    {
      free(source);
    }
  if (filename)
    {
      free(filename);
    }
  if (imageData)
    {
      free(imageData);
    }

  if (retval) 
    {
      printf("FAIL\n");
      return 1;
    }
 
  printf("OK\n");
  return 0;
}
Пример #9
0
int main(int argc, char **argv) 
{
   /* Host data */
   float *hInputImage = NULL; 
   float *hOutputImage = NULL;

   /* Angle for rotation (degrees) */
   const float theta = 45.0f;

   /* Allocate space for the input image and read the
    * data from disk */
   int imageRows;
   int imageCols;
   hInputImage = readBmpFloat("../../Images/cat-face.bmp", &imageRows, &imageCols);
   const int imageElements = imageRows*imageCols;
   const size_t imageSize = imageElements*sizeof(float);

   /* Allocate space for the output image */
   hOutputImage = (float*)malloc(imageSize);
   if (!hOutputImage) { exit(-1); }

   /* Use this to check the output of each API call */
   cl_int status;

   /* Get the first platform */
   cl_platform_id platform;
   status = clGetPlatformIDs(1, &platform, NULL);
   check(status);

   /* Get the first device */
   cl_device_id device;
   status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
   check(status);

   /* Create a context and associate it with the device */
   cl_context context;
   context = clCreateContext(NULL, 1, &device, NULL, NULL, &status);
   check(status);

   /* Create a command queue and associate it with the device */
   cl_command_queue cmdQueue;
   cmdQueue = clCreateCommandQueue(context, device, 0, &status);
   check(status);

   /* The image descriptor describes how the data will be stored 
    * in memory. This descriptor initializes a 2D image with no pitch */
   cl_image_desc desc;
   desc.image_type = CL_MEM_OBJECT_IMAGE2D;
   desc.image_width = imageCols;
   desc.image_height = imageRows;
   desc.image_depth = 0;
   desc.image_array_size = 0;
   desc.image_row_pitch = 0;
   desc.image_slice_pitch = 0;
   desc.num_mip_levels = 0;
   desc.num_samples = 0;
   desc.buffer = NULL;

   /* The image format describes the properties of each pixel */
   cl_image_format format;
   format.image_channel_order = CL_R; // single channel
   format.image_channel_data_type = CL_FLOAT;

   /* Create the input image and initialize it using a 
    * pointer to the image data on the host. */
   cl_mem inputImage = clCreateImage(context, CL_MEM_READ_ONLY,
      &format, &desc, NULL, NULL);

   /* Create the output image */
   cl_mem outputImage = clCreateImage(context, CL_MEM_WRITE_ONLY,
      &format, &desc, NULL, NULL);

   /* Copy the host image data to the device */
   size_t origin[3] = {0, 0, 0}; // Offset within the image to copy from
   size_t region[3] = {imageCols, imageRows, 1}; // Elements to per dimension
   clEnqueueWriteImage(cmdQueue, inputImage, CL_TRUE, 
      origin, region, 0 /* row-pitch */, 0 /* slice-pitch */, 
      hInputImage, 0, NULL, NULL);

   /* Create a program with source code */
   char *programSource = readFile("image-rotation.cl");
   size_t programSourceLen = strlen(programSource);
   cl_program program = clCreateProgramWithSource(context, 1,
      (const char**)&programSource, &programSourceLen, &status);
   check(status);

   /* Build (compile) the program for the device */
   status = clBuildProgram(program, 1, &device, NULL, NULL, NULL);
   if (status != CL_SUCCESS) {
      printCompilerError(program, device);
      exit(-1);
   }

   /* Create the kernel */
   cl_kernel kernel;
   kernel = clCreateKernel(program, "rotation", &status);
   check(status);

   /* Set the kernel arguments */
   status  = clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputImage);
   status |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &outputImage);
   status |= clSetKernelArg(kernel, 2, sizeof(int), &imageCols);
   status |= clSetKernelArg(kernel, 3, sizeof(int), &imageRows);
   status |= clSetKernelArg(kernel, 4, sizeof(float), &theta);
   check(status);

   /* Define the index space and work-group size */
   size_t globalWorkSize[2];
   globalWorkSize[0] = imageCols;
   globalWorkSize[1] = imageRows;

   size_t localWorkSize[2];
   localWorkSize[0] = 8;
   localWorkSize[1] = 8;

   /* Enqueue the kernel for execution */
   status = clEnqueueNDRangeKernel(cmdQueue, kernel, 2, NULL,
      globalWorkSize, localWorkSize, 0, NULL, NULL);
   check(status);

   /* Read the output image buffer to the host */
   status = clEnqueueReadImage(cmdQueue, outputImage, CL_TRUE, 
      origin, region, 0 /* row-pitch */, 0 /* slice-pitch */, 
      hOutputImage, 0, NULL, NULL);
   check(status);

   /* Write the output image to file */
   writeBmpFloat(hOutputImage, "rotated-cat.bmp", imageRows, imageCols, 
      "../../Images/cat-face.bmp"); 

   /* Free OpenCL resources */
   clReleaseKernel(kernel);
   clReleaseProgram(program);
   clReleaseCommandQueue(cmdQueue);
   clReleaseMemObject(inputImage);
   clReleaseMemObject(outputImage);
   clReleaseContext(context);

   /* Free host resources */
   free(hInputImage);
   free(hOutputImage);
   free(programSource);

   return 0;
}
Пример #10
0
void clInvert3D(CL* cl, VglImage* img){
	cl_int err;
    cl_image_desc desc    = getDesc(img->shape[0], img->shape[1], 3, img->shape[2]);
    cl_image_desc descOut = getDesc(img->shape[0], img->shape[1], 3, img->shape[2]);
    cl_image_format src;
    cl_image_format out;
    switch(img->nChannels){
		case 1:
			src.image_channel_order = CL_A;
			out.image_channel_order = CL_A;
			break;
		case 3:
			rgb2rgba(NULL, img);
			src.image_channel_order = CL_RGBA;
			out.image_channel_order = CL_RGBA;
			break;
		case 4:
			src.image_channel_order = CL_RGBA;
			out.image_channel_order = CL_RGBA;
			break;
		default:
			printf("Numero de canais não suportado\n");
			exit;
	}
    src.image_channel_data_type = CL_UNORM_INT8;
    out.image_channel_data_type = CL_UNORM_INT8;
       
    cl_mem src_mem = 
    clCreateImage(cl->context, CL_MEM_READ_ONLY, &src, &desc, NULL, &err);
    printf("IMAGE STATUS SOURCE\t"); cl_error(err);
    
    cl_mem out_mem = 
	clCreateImage(cl->context, CL_MEM_WRITE_ONLY, &out, &descOut, NULL, &err);
	printf("IMAGE STATUS OUT\t"); cl_error(err);
    
    clGetMemObjectInfo(src_mem, CL_MEM_TYPE, sizeof(cl_int), &err, NULL);
    if(err == CL_MEM_OBJECT_IMAGE3D)
		printf("IMAGE TYPE:\t\tCL_MEM_OBJECT_IMAGE3D\n");
	
	size_t *src_origin=(size_t*)malloc(sizeof(size_t)*3);
	src_origin[0] = 0;
	src_origin[1] = 0;
	src_origin[2] = 0;
	
	size_t *src_region=(size_t*)malloc(sizeof(size_t)*3);
	src_region[0] = img->shape[0];
	src_region[1] = img->shape[1];
	src_region[2] = img->shape[2];
		
	err = clEnqueueWriteImage(cl->queue, src_mem, CL_TRUE,
	src_origin, src_region, 0, 0, (void*)img->ndarray, 0, 0, NULL);
	printf("ENQUEUE IMAGE STATUS "); cl_error(err);
	
	cl_program program;
	cl_kernel kernel;
	
	const char* k  = "./CLdemos/CL/Invert3D_RGBA.cl";
	const char* k2 = "./CLdemos/CL/Invert3D_A.cl";
	char** fonte;
	if(img->nChannels==1)
		fonte = (char**)getKernelPtr(k2);
	if(img->nChannels==4)
		fonte = (char**)getKernelPtr(k);
	
	program = clCreateProgramWithSource(cl->context, 1, (const char**)fonte, NULL, &err);
	printf("CREATE PROGRAM STATUS: "); cl_error(err);
	clBuildProgram(program, 0, NULL, NULL, NULL, &err);
	printf("BUILD PROGRAM STATUS: "); cl_error(err);
	kernel = clCreateKernel(program, "invert", &err);
	printf("KERNEL STATUS "); cl_error(err);
	
	
	err = clSetKernelArg( kernel, 0, sizeof( cl_mem ), (void *) &src_mem);
	printf("SET 1 KERNEL ARG "); cl_error(err);
	err = clSetKernelArg( kernel, 1, sizeof( cl_mem ), (void *) &out_mem);
	printf("SET 2 KERNEL ARG "); cl_error(err);

	size_t worksize[] = { img->shape[0], img->shape[1], img->shape[2]};
	err = clEnqueueNDRangeKernel(cl->queue, kernel, 2, NULL, worksize,
	0, 0, 0, 0);
	printf("ENQUEUE ND KERNEL STATUS "); cl_error(err);
	
	clFinish(cl->queue);
	
	char* auxout = (char*)malloc(img->shape[0]*img->shape[1]*img->shape[2]*img->nChannels);
	err = clEnqueueReadImage(cl->queue, out_mem, CL_TRUE, 
	src_origin, src_region, 0, 0, auxout, 0, NULL, NULL);
	printf("READ NEW IMAGE STATUS "); cl_error(err);
	
	for(int i=0; i<(img->shape[0]*img->nChannels*img->shape[1]*img->shape[2]); i++)
        img->ndarray[i] = auxout[i];
        
    free(auxout);    
    clReleaseKernel(kernel);
    clReleaseProgram(program);
}
int
MatrixMulImage::setupCL(void)
{
    cl_int status = 0;
    cl_device_type dType;
    
    if(deviceType.compare("cpu") == 0)
    {
        dType = CL_DEVICE_TYPE_CPU;
    }
    else //deviceType = "gpu" 
    {
        dType = CL_DEVICE_TYPE_GPU;
        if(isThereGPU() == false)
        {
            std::cout << "GPU not found. Falling back to CPU device" << std::endl;
            dType = CL_DEVICE_TYPE_CPU;
        }
    }

    /*
     * Have a look at the available platforms and pick either
     * the AMD one if available or a reasonable default.
     */

    cl_platform_id platform = NULL;
    int retValue = sampleCommon->getPlatform(platform, platformId, isPlatformEnabled());
    CHECK_ERROR(retValue, SDK_SUCCESS, "sampleCommon::getPlatform() failed");

    // Display available devices.
    retValue = sampleCommon->displayDevices(platform, dType);
    CHECK_ERROR(retValue, SDK_SUCCESS, "sampleCommon::displayDevices() failed");

    /*
     * If we could find our platform, use it. Otherwise use just available platform.
     */

    cl_context_properties cps[3] = 
    {
        CL_CONTEXT_PLATFORM, 
        (cl_context_properties)platform, 
        0
    };

    context = clCreateContextFromType(
                  cps,
                  dType,
                  NULL,
                  NULL,
                  &status);
    CHECK_OPENCL_ERROR(status, "clCreateContextFromType failed.");
    
    // getting device on which to run the sample
    status = sampleCommon->getDevices(context, &devices, deviceId, isDeviceIdEnabled());
    CHECK_ERROR(status, 0, "sampleCommon::getDevices() failed");

    //Set device info of given cl_device_id
    retValue = deviceInfo.setDeviceInfo(devices[deviceId]);
    CHECK_ERROR(retValue, SDK_SUCCESS, "deviceInfo.setDeviceInfo. failed");

    {
        // The block is to move the declaration of prop closer to its use
        cl_command_queue_properties prop = 0;
        prop |= CL_QUEUE_PROFILING_ENABLE;

        commandQueue = clCreateCommandQueue(
                           context, 
                           devices[deviceId], 
                           prop, 
                           &status);
        CHECK_ERROR(retValue, SDK_SUCCESS, "clCreateCommandQueue. failed");
    }

    cl_image_format imageFormat;
    imageFormat.image_channel_data_type = CL_FLOAT;
    imageFormat.image_channel_order = CL_RGBA;

    if(!deviceInfo.imageSupport)
    {
        std::cout << "Expected Error: Image is not supported on the Device" << std::endl;
        return SDK_EXPECTED_FAILURE;
    }

    cl_image_desc imageDesc;
    memset(&imageDesc, '\0', sizeof(cl_image_desc));
    imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D;

    // Create image for matrix A
    imageDesc.image_width = width0 / 4;
    imageDesc.image_height = height0;
    inputBuffer0 = clCreateImage(context,
                                 CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
                                 &imageFormat,
                                 &imageDesc,
                                 input0,
                                 &status);
    CHECK_OPENCL_ERROR(status, "clCreateImage failed. (inputBuffer0)");
   
    // Create image for matrix B
    imageDesc.image_width = width1 / 4;
    imageDesc.image_height = height1;
    inputBuffer1 = clCreateImage(context,
                                 CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
                                 &imageFormat,
                                 &imageDesc,
                                 input1,
                                 &status);
    CHECK_OPENCL_ERROR(status, "clCreateImage failed. (inputBuffer1)");
    
    // Create image for matrix C
    imageDesc.image_width = width1 / 4;
    imageDesc.image_height = height0;
    outputBuffer = clCreateImage(context,
                                 CL_MEM_WRITE_ONLY,
                                 &imageFormat,
                                 &imageDesc,
                                 0,
                                 &status);
    CHECK_OPENCL_ERROR(status, "clCreateImage failed. (outputBuffer)");

    // create a CL program using the kernel source 
    streamsdk::buildProgramData buildData;
    buildData.kernelName = std::string("MatrixMulImage_Kernels.cl");
    buildData.devices = devices;
    buildData.deviceId = deviceId;
    buildData.flagsStr = std::string("");
    if(isLoadBinaryEnabled())
        buildData.binaryName = std::string(loadBinary.c_str());

    if(isComplierFlagsSpecified())
        buildData.flagsFileName = std::string(flags.c_str());

    retValue = sampleCommon->buildOpenCLProgram(program, context, buildData);
    CHECK_ERROR(retValue, SDK_SUCCESS, "sampleCommon::buildOpenCLProgram() failed");
    
    kernel = clCreateKernel(program, "mmmKernel3", &status);
    CHECK_OPENCL_ERROR(status, "clCreateKernel failed.(kernel)");
   
    return SDK_SUCCESS;
}
Пример #12
0
bool CL_Image3D::Create(cl_uint uWidth, cl_uint uHeight, cl_uint uDepth, cl_uint uRowPitch, void* pImgInput, CL_ImageOrder OrderType, CL_ImageChannel ChannelType, CL_MemAccess AccessType, CL_MemStorage StorageType)
{
	CL_CPP_CONDITIONAL_RETURN_FALSE(m_Image);
	CL_CPP_CONDITIONAL_RETURN_FALSE(!m_pContextRef);
	CL_CPP_CONDITIONAL_RETURN_FALSE(!m_pContextRef->IsValid());
//	CL_CPP_CONDITIONAL_RETURN_FALSE((StorageType == CL_MemStorage_UseHostInput || StorageType == CL_MemStorage_CopyInputToDevice) && !pImgInput);

	cl_mem_flags uMemFlags = 0;
	cl_image_format ImgFormat;

	//	Determine the access flags.
	switch(AccessType)
	{
		case CL_MemAccess_ReadOnly: uMemFlags = CL_MEM_READ_ONLY;	break;
		case CL_MemAccess_WriteOnly: uMemFlags = CL_MEM_WRITE_ONLY;	break;
		case CL_MemAccess_ReadWrite: uMemFlags = CL_MEM_READ_WRITE;	break;

		default:
			return false;
	}

	//	Determine the storage flags.
	switch(StorageType)
	{
		case CL_MemStorage_AllocateOnDevice: /* default setting, do nothing */ break;
		case CL_MemStorage_AllocateOnHost: uMemFlags |= CL_MEM_ALLOC_HOST_PTR; break;
		case CL_MemStorage_UseHostInput: uMemFlags |= CL_MEM_USE_HOST_PTR; break;
		case CL_MemStorage_CopyInputToDevice: uMemFlags |= CL_MEM_COPY_HOST_PTR; break;

		default:
			return false;
	}

	//	Determine the image channel order.
	switch(OrderType)
	{
		case CL_ImageOrder_R: ImgFormat.image_channel_order = CL_R; break;
		case CL_ImageOrder_A: ImgFormat.image_channel_order = CL_A; break;
		case CL_ImageOrder_RG: ImgFormat.image_channel_order = CL_RG; break;
		case CL_ImageOrder_RA: ImgFormat.image_channel_order = CL_RA; break;
		case CL_ImageOrder_RGB: ImgFormat.image_channel_order = CL_RGB; break;
		case CL_ImageOrder_RGBA: ImgFormat.image_channel_order = CL_RGBA; break;
		case CL_ImageOrder_BGRA: ImgFormat.image_channel_order = CL_BGRA; break;
		case CL_ImageOrder_ARGB: ImgFormat.image_channel_order = CL_ARGB; break;
		case CL_ImageOrder_Intensity: ImgFormat.image_channel_order = CL_INTENSITY; break;
		case CL_ImageOrder_Luminance: ImgFormat.image_channel_order = CL_LUMINANCE; break;

#ifdef CL_VERSION_1_1
		case CL_ImageOrder_Rx: ImgFormat.image_channel_order = CL_Rx; break;
		case CL_ImageOrder_RGx: ImgFormat.image_channel_order = CL_RGx; break;
		case CL_ImageOrder_RGBx: ImgFormat.image_channel_order = CL_RGBx; break;
#endif
	}

	//	Determine the image channel data type.
	switch(ChannelType)
	{
		case CL_ImageChannel_Norm_Int8: ImgFormat.image_channel_data_type = CL_SNORM_INT8; break;
		case CL_ImageChannel_Norm_Int16: ImgFormat.image_channel_data_type = CL_SNORM_INT16; break;
		case CL_ImageChannel_Norm_UInt8: ImgFormat.image_channel_data_type = CL_UNORM_INT8; break;
		case CL_ImageChannel_Norm_UInt16: ImgFormat.image_channel_data_type = CL_UNORM_INT16; break;
		case CL_ImageChannel_Norm_UShort_555: ImgFormat.image_channel_data_type = CL_UNORM_SHORT_555; break;
		case CL_ImageChannel_Norm_UShort_565: ImgFormat.image_channel_data_type = CL_UNORM_SHORT_565; break;
		case CL_ImageChannel_Norm_UInt_101010: ImgFormat.image_channel_data_type = CL_UNORM_INT_101010; break;
		case CL_ImageChannel_Int8: ImgFormat.image_channel_data_type = CL_SIGNED_INT8; break;
		case CL_ImageChannel_Int16: ImgFormat.image_channel_data_type = CL_SIGNED_INT16; break;
		case CL_ImageChannel_Int32: ImgFormat.image_channel_data_type = CL_SIGNED_INT32; break;
		case CL_ImageChannel_UInt8: ImgFormat.image_channel_data_type = CL_UNSIGNED_INT8; break;
		case CL_ImageChannel_UInt16: ImgFormat.image_channel_data_type = CL_UNSIGNED_INT16; break;
		case CL_ImageChannel_UInt32: ImgFormat.image_channel_data_type = CL_UNSIGNED_INT32; break;
		case CL_ImageChannel_Float16: ImgFormat.image_channel_data_type = CL_HALF_FLOAT; break;
		case CL_ImageChannel_Float32: ImgFormat.image_channel_data_type = CL_FLOAT; break;
	}

	cl_context Context = m_pContextRef->GetContext();
	cl_int iErrorCode = CL_SUCCESS;
	cl_uint uSlicePitch = uRowPitch * uHeight;

	//	Create the image object.
#if defined(CL_VERSION_1_2)
	cl_image_desc ImgDesc;

	ImgDesc.image_width = uWidth;
	ImgDesc.image_height = uHeight;
	ImgDesc.image_depth = uDepth;
	ImgDesc.image_array_size = 1;
	ImgDesc.image_row_pitch = (pImgInput) ? uRowPitch : 0;
	ImgDesc.image_slice_pitch = (pImgInput) ? uSlicePitch : 0;
	ImgDesc.num_mip_levels = 0;
	ImgDesc.num_samples = 0;
	ImgDesc.buffer = NULL;

	m_Image = clCreateImage(Context, uMemFlags, &ImgFormat, &ImgDesc, pImgInput, &iErrorCode);
#else
	m_Image = clCreateImage3D(Context, uMemFlags, &ImgFormat, uWidth, uHeight, uDepth, (pImgInput) ? uRowPitch : 0, (pImgInput) ? uSlicePitch : 0, pImgInput, &iErrorCode);
#endif

	CL_CPP_CATCH_ERROR(iErrorCode);
	CL_CPP_ON_ERROR_RETURN_FALSE(iErrorCode);

	m_uWidth = uWidth;
	m_uHeight = uHeight;
	m_uDepth = uDepth;
	m_uRowPitch = uRowPitch;
	m_uSlicePitch = uSlicePitch;
	m_uTotalSize = m_uSlicePitch * m_uHeight;

	//	Ask OpenCL for the sizeo of each individual element in this image.
	clGetImageInfo(m_Image, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), &m_uElementSize, NULL);

	m_ImageOrder = OrderType;
	m_ImageChannel = ChannelType;
	m_MemAccess = AccessType;
	m_MemStorage = StorageType;

	return true;
}
Пример #13
0
/*!
 * @function clut_blurImage_local_unlimited
 * Blurs the image at [filename] with a filter of size [filter_size], and saves the result
 * to the file "output_unlimited.png". This function should be optimized to run on
 * local memory.
 * @param filename
 * The name of the file.
 * @param filter_size
 * The size of the blur filter.
 * @return
 * 0 on success, non-0 on failure.
 */
int clut_blurImage_local_unlimited(const cl_device_id device, const char * const filename, const unsigned int filter_size)
{
	const char * const fname = "clut_blurImage_local";
	int return_value = 1;
	cl_int ret;

	if (NULL == filename) {
		Debug_out(DEBUG_HOMEWORK, "%s: NULL pointer argument.\n", fname);
		goto error1;
	}

	/* compute work group size */
	size_t local_width, local_height;
	if (0 != clut_getMaxWGSize(device, &local_width, &local_height)) {
		Debug_out(DEBUG_HOMEWORK, "%s: Unable to get work group sizes.\n", fname);
		goto error1;
	}
	Debug_out(DEBUG_HOMEWORK, "%s: Max work group size is [%zu]x[%zu].\n", fname, local_width, local_height);

	/* openCL wants to know the size of __local statically allocated arrays at compile time,
	 * so the local size must be set with a #define */
	char *flags = calloc(128, sizeof(char));
	if (NULL == flags) {
		Debug_out(DEBUG_HOMEWORK, "%s: A calloc failed.\n", fname);
		goto error1;
	}
	sprintf(flags, "-D LOCAL_WIDTH=%zu -D LOCAL_HEIGHT=%zu -D FILTER_SIZE=%d", local_width, local_height, filter_size);
	Debug_out(DEBUG_HOMEWORK, "%s: Local flags are: '%s'.\n", fname, flags);

	/* Create context */
	cl_context context = clCreateContext(NULL, 1, &device, clut_contextCallback, "clut_blurImage_local_unlimited", &ret);
	CLUT_CHECK_ERROR(ret, "Unable to create context", error2);
	Debug_out(DEBUG_HOMEWORK, "%s: Created context successfully.\n", fname);

	/* Create program */
	cl_program program = clut_createProgramFromFile(context, "homework_unlimited.cl", flags);
	if (NULL == program) {
		Debug_out(DEBUG_HOMEWORK, "%s: Unable to create program.\n", fname);
		goto error3;
	}
	Debug_out(DEBUG_HOMEWORK, "%s: Program created.\n", fname);

	/* Create kernel */
	cl_kernel kernel = clCreateKernel(program, "blurImage_local_unlimited", &ret);
	CLUT_CHECK_ERROR(ret, "Unable to create kernel", error4);
	Debug_out(DEBUG_HOMEWORK, "%s: Kernel created.\n", fname);

	/* Create command_queue */
	cl_command_queue command_queue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &ret);
	CLUT_CHECK_ERROR(ret, "Unable to create command queue", error5);
	Debug_out(DEBUG_HOMEWORK, "%s: Command queue created.\n", fname);

	/* open source image */
	int width, height;
	cl_mem source_image = clut_loadImageFromFile(context, filename, &width, &height);
	if (NULL == source_image) {
		Debug_out(DEBUG_HOMEWORK, "%s: Unable to read source image.\n", fname);
		goto error6;
	}

	if ((filter_size > (unsigned int) width) || (filter_size > (unsigned int) height)) {
		Debug_out(DEBUG_HOMEWORK, "%s: Filter does not fit in image.\n", fname);
		goto error7;
	}

	/* crate destination image */
	cl_image_format image_format = {0, 0};

	cl_image_desc image_desc = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
	image_desc.image_type = CL_MEM_OBJECT_IMAGE2D;
//	image_desc.image_width		= 0;
//	image_desc.image_height		= 0;
//	image_desc.image_depth		= 0; /* only for 3D images */
//	image_desc.image_array_size	= 0; /* only for image arrays */
//	image_desc.image_row_pitch	= 0;
//	image_desc.image_slice_pitch	= 0; /* only for 3D images */
//	image_desc.num_mip_levels	= 0; /* mandatory */
//	image_desc.num_samples		= 0; /* mandatory */
//	image_desc.buffer		= NULL; /* only for 1D image buffers */

	ret = clGetImageInfo(source_image, CL_IMAGE_FORMAT, sizeof(image_format), &image_format, NULL);
	CLUT_CHECK_ERROR(ret, "Unable to get source image format information", error7);

	int components = clut_getImageFormatComponents(image_format);
	if (0 > components) {
		Debug_out(DEBUG_HOMEWORK, "%s: Unknown components for source image.\n", fname);
		goto error7;
	}
	Debug_out(DEBUG_HOMEWORK, "%s: Source image has %d components.\n", fname, components);

	image_desc.image_width = width - filter_size + 1;
	image_desc.image_height = height - filter_size + 1;
	image_desc.image_row_pitch = image_desc.image_width * components;

	cl_mem result_image = clCreateImage(context, CL_MEM_WRITE_ONLY, &image_format, &image_desc, NULL, &ret);
	CLUT_CHECK_ERROR(ret, "Unable to create second image", error7);

	/* fill result image with black */
	const unsigned int fill_color[4] = { 0, 0, 0, 255 };
	const size_t fill_origin[3] = { 0, 0, 0 };
	const size_t fill_region[3] = { width - filter_size + 1, height - filter_size + 1, 1 };
	ret = clEnqueueFillImage(command_queue, result_image, fill_color, fill_origin, fill_region, 0, NULL, NULL);
	CLUT_CHECK_ERROR(ret, "Unable to fill result image", error8);

	Debug_out(DEBUG_HOMEWORK, "%s: Images created.\n", fname);

	/* create filter matrix */
	unsigned char *filter_matrix = createFilterMatrix(filter_size);
	if (NULL == filter_matrix) {
		Debug_out(DEBUG_HOMEWORK, "%s: Unable to create filter matrix.\n", fname);
		goto error8;
	}
	Debug_out(DEBUG_HOMEWORK, "%s: Filter matrix created.\n", fname);
//	printFilterMatrix(filter_matrix, filter_size);

	/* copy filter matrix to device */
	cl_mem filter_matrix_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, filter_size * filter_size, filter_matrix, &ret);
	CLUT_CHECK_ERROR(ret, "Unable to create filter matrix buffer on device", error9);

	/* set kernel arguments */
	ret = clSetKernelArg(kernel, 0, sizeof(source_image), (void *) &source_image);
	CLUT_CHECK_ERROR(ret, "Unable to set source image argument", error10);
	Debug_out(DEBUG_HOMEWORK, "%s: Source image argument set.\n", fname);
	ret = clSetKernelArg(kernel, 1, sizeof(result_image), (void *) &result_image);
	CLUT_CHECK_ERROR(ret, "Unable to set result image argument", error10);
	Debug_out(DEBUG_HOMEWORK, "%s: Result image argument set.\n", fname);
	ret = clSetKernelArg(kernel, 2, sizeof(filter_matrix_buffer), (void *) &filter_matrix_buffer);
	CLUT_CHECK_ERROR(ret, "Unable to set filter matrix argument", error10);
	Debug_out(DEBUG_HOMEWORK, "%s: Filter matrix argument set.\n", fname);

	Debug_out(DEBUG_HOMEWORK, "%s: All kernel arguments set.\n", fname);

	const size_t work_size[2] = {
		COMPUTE_GLOBAL_SIZE(height - filter_size + 1, local_height),
		COMPUTE_GLOBAL_SIZE(width - filter_size + 1, local_width) };
	const size_t wg_size[2] = { local_height, local_width };
	Debug_out(DEBUG_HOMEWORK, "%s: work size is [%zu]x[%zu].\n", fname, work_size[0], work_size[1]);

	/* run kernel */
	cl_event kernel_event;
	ret = clEnqueueNDRangeKernel(command_queue, kernel, 2, NULL, work_size, wg_size, 0, NULL, &kernel_event);
	CLUT_CHECK_ERROR(ret, "Unable to enqueue kernel", error10);

	ret = clFinish(command_queue);
	CLUT_CHECK_ERROR(ret, "Unable to finish commands in queue", error10);
	Debug_out(DEBUG_HOMEWORK, "%s: Kernel executed.\n", fname);
	ret = clWaitForEvents(1, &kernel_event);
	CLUT_CHECK_ERROR(ret, "Unable to wait for kernel event", error10);

	/* check that kernel executed correctly */
	cl_int kernel_ret;
	ret = clGetEventInfo(kernel_event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(kernel_ret), &kernel_ret, NULL);
	CLUT_CHECK_ERROR(ret, "Unable to get kernel status", error10);
	Debug_out(DEBUG_HOMEWORK, "%s: Kernel status is %d.\n", fname, kernel_ret);
	if (CL_COMPLETE != kernel_ret) {
		Debug_out(DEBUG_HOMEWORK, "%s: kernel execution failed: %s.\n", fname, clut_getErrorDescription(kernel_ret));
		goto error10;
	}

	cl_ulong end_time;
	ret = clGetEventProfilingInfo(kernel_event, CL_PROFILING_COMMAND_END, sizeof(end_time), &end_time, NULL);
	CLUT_CHECK_ERROR(ret, "Unable to get kernel event end time", error10);
	if (0 == end_time) {
		Debug_out(DEBUG_HOMEWORK, "%s: kernel execution took 0 seconds.\n", fname);
		goto error10;
	}

	cl_double time_double = clut_getEventDuration(kernel_event);
	cl_ulong time_ulong = clut_getEventDuration_ns(kernel_event);
	Debug_out(DEBUG_HOMEWORK, "%s: Blurring took %f seconds (%lld nanoseconds).\n", fname, time_double, time_ulong);

	/* save image back to file */
	clut_saveImageToFile("output_unlimited.png", command_queue, result_image);

	/* output filter size, local width, local height, and duration in nanoseconds for profiling */
	printf("%d,%zu,%zu,%lld\n", filter_size, local_width, local_height, clut_getEventDuration_ns(kernel_event));

	return_value = 0;

error10:
	clReleaseMemObject(filter_matrix_buffer);
error9:
	free(filter_matrix);
error8:
	clReleaseMemObject(result_image);
error7:
	clReleaseMemObject(source_image);
error6:
	clReleaseCommandQueue(command_queue);
error5:
	clReleaseKernel(kernel);
error4:
	clReleaseProgram(program);
error3:
	clReleaseContext(context);
error2:
	free(flags);
error1:
	return return_value;
}
Пример #14
0
/*!
 * @function clut_blurImage
 * Blurs the image at [filename] with a filter of size [filter_size], and saves the result
 * to the file "output.png".
 * @param filename
 * The name of the file.
 * @param filter_size
 * The size of the blur filter.
 * @return
 * 0 on success, non-0 on failure.
 */
int clut_blurImage(const cl_device_id device, const char * const filename, const unsigned int filter_size)
{
	const char * const fname = "clut_blurImage";
	int return_value = 1;
	cl_int ret;

	if (NULL == filename) {
		Debug_out(DEBUG_HOMEWORK, "%s: NULL pointer argument.\n", fname);
		goto error1;
	}

	/* Create context */
	cl_context context = clCreateContext(NULL, 1, &device, clut_contextCallback, "clut_blurImage", &ret);
	CLUT_CHECK_ERROR(ret, "Unable to create context", error1);
	Debug_out(DEBUG_HOMEWORK, "%s: Created context successfully.\n", fname);

	/* Create program */
	cl_program program = clut_createProgramFromFile(context, "homework_global.cl", NULL);
	if (NULL == program) {
		Debug_out(DEBUG_HOMEWORK, "%s: Unable to create program.\n", fname);
		goto error3;
	}
	Debug_out(DEBUG_HOMEWORK, "%s: Program created.\n", fname);

	/* Create kernel */
	cl_kernel kernel = clCreateKernel(program, "blurImage", &ret);
	CLUT_CHECK_ERROR(ret, "Unable to create kernel", error3);
	Debug_out(DEBUG_HOMEWORK, "%s: Kernel created.\n", fname);

	/* Create command_queue */
	cl_command_queue command_queue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &ret);
	CLUT_CHECK_ERROR(ret, "Unable to create command queue", error4);
	Debug_out(DEBUG_HOMEWORK, "%s: Command queue created.\n", fname);

	/* load source image */
	int width, height;
	cl_mem source_image = clut_loadImageFromFile(context, filename, &width, &height);
	if (NULL == source_image) {
		Debug_out(DEBUG_HOMEWORK, "%s: Unable to read source image.\n", fname);
		goto error5;
	}

	if ((filter_size > (unsigned int) width) || (filter_size > (unsigned int) height)) {
		Debug_out(DEBUG_HOMEWORK, "%s: Filter does not fit in image.\n", fname);
		goto error6;
	}

	/* create destination image */
	cl_image_format image_format = {0, 0};

	cl_image_desc image_desc = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
	image_desc.image_type = CL_MEM_OBJECT_IMAGE2D;
//	image_desc.image_width		= 0;
//	image_desc.image_height		= 0;
//	image_desc.image_depth		= 0; /* only for 3D images */
//	image_desc.image_array_size	= 0; /* only for image arrays */
//	image_desc.image_row_pitch	= 0;
//	image_desc.image_slice_pitch	= 0; /* only for 3D images */
//	image_desc.num_mip_levels	= 0; /* mandatory */
//	image_desc.num_samples		= 0; /* mandatory */
//	image_desc.buffer		= NULL; /* only for 1D image buffers */

	image_desc.image_width = width - filter_size + 1;
	image_desc.image_height = height - filter_size + 1;

	ret = clGetImageInfo(source_image, CL_IMAGE_FORMAT, sizeof(image_format), &image_format, NULL);
	CLUT_CHECK_ERROR(ret, "Unable to get source image format information", error6);

	cl_mem result_image = clCreateImage(context, CL_MEM_WRITE_ONLY, &image_format, &image_desc, NULL, &ret);
	CLUT_CHECK_ERROR(ret, "Unable to create second image", error6);

	Debug_out(DEBUG_HOMEWORK, "%s: Images created.\n", fname);

	/* create filter matrix */
	unsigned char *filter_matrix = createFilterMatrix(filter_size);
	if (NULL == filter_matrix) {
		Debug_out(DEBUG_HOMEWORK, "%s: Unable to create filter matrix.\n", fname);
		goto error7;
	}
	Debug_out(DEBUG_HOMEWORK, "%s: Filter matrix created.\n", fname);
//	printFilterMatrix(filter_matrix, filter_size);

	/* copy filter matrix to device */
	cl_mem filter_matrix_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, filter_size * filter_size, filter_matrix, &ret);
	CLUT_CHECK_ERROR(ret, "Unable to create filter matrix buffer on device", error8);

	/* set kernel arguments */
	ret = clSetKernelArg(kernel, 0, sizeof(source_image), (void *) &source_image);
	CLUT_CHECK_ERROR(ret, "Unable to set source image argument", error9);
	Debug_out(DEBUG_HOMEWORK, "%s: Source image argument set.\n", fname);
	ret = clSetKernelArg(kernel, 1, sizeof(result_image), (void *) &result_image);
	CLUT_CHECK_ERROR(ret, "Unable to set result image argument", error9);
	Debug_out(DEBUG_HOMEWORK, "%s: Result image argument set.\n", fname);
	ret = clSetKernelArg(kernel, 2, sizeof(filter_size), (void *) &filter_size);
	CLUT_CHECK_ERROR(ret, "Unable to set filter size argument", error9);
	Debug_out(DEBUG_HOMEWORK, "%s: Filter size argument set.\n", fname);
	ret = clSetKernelArg(kernel, 3, sizeof(filter_matrix_buffer), (void *) &filter_matrix_buffer);
	CLUT_CHECK_ERROR(ret, "Unable to set filter matrix argument", error9);
	Debug_out(DEBUG_HOMEWORK, "%s: Filter matrix argument set.\n", fname);

	Debug_out(DEBUG_HOMEWORK, "%s: All kernel arguments set.\n", fname);

	/* run kernel */
	cl_event kernel_event;
	const size_t work_size[2] = { height - filter_size + 1, width - filter_size + 1};
	ret = clEnqueueNDRangeKernel(command_queue, kernel, 2, NULL, work_size, NULL, 0, NULL, &kernel_event);
	CLUT_CHECK_ERROR(ret, "Unable to enqueue kernel", error9);

	ret = clFinish(command_queue);
	CLUT_CHECK_ERROR(ret, "Unable to finish commands in queue", error9);
	Debug_out(DEBUG_HOMEWORK, "%s: Kernel executed.\n", fname);
	ret = clWaitForEvents(1, &kernel_event);
	CLUT_CHECK_ERROR(ret, "Unable to wait for kernel event", error9);

	/* check that kernel executed correctly */
	cl_int kernel_ret;
	ret = clGetEventInfo(kernel_event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(kernel_ret), &kernel_ret, NULL);
	CLUT_CHECK_ERROR(ret, "Unable to get kernel status", error9);
	Debug_out(DEBUG_HOMEWORK, "%s: Kernel status is %d.\n", fname, kernel_ret);
	if (CL_COMPLETE != kernel_ret) {
		Debug_out(DEBUG_HOMEWORK, "%s: kernel execution failed: %s.\n", fname, clut_getErrorDescription(kernel_ret));
		goto error9;
	}

	cl_ulong end_time;
	ret = clGetEventProfilingInfo(kernel_event, CL_PROFILING_COMMAND_END, sizeof(end_time), &end_time, NULL);
	CLUT_CHECK_ERROR(ret, "Unable to get kernel event end time", error9);
	if (0 == end_time) {
		Debug_out(DEBUG_HOMEWORK, "%s: kernel execution took 0 seconds.\n", fname);
		goto error9;
	}

	cl_double time_double = clut_getEventDuration(kernel_event);
	cl_ulong time_ulong = clut_getEventDuration_ns(kernel_event);
	Debug_out(DEBUG_HOMEWORK, "%s: Blurring took %f seconds (%lld nanoseconds).\n", fname, time_double, time_ulong);

	/* save image */
	clut_saveImageToFile("output.png", command_queue, result_image);

	/* print filter size and duration in nanoseconds for profiling */
	printf("%d,%llu\n", filter_size, clut_getEventDuration_ns(kernel_event));

	return_value = 0;

error9:
	clReleaseMemObject(filter_matrix_buffer);
error8:
	free(filter_matrix);
error7:
	clReleaseMemObject(result_image);
error6:
	clReleaseMemObject(source_image);
error5:
	clReleaseCommandQueue(command_queue);
error4:
	clReleaseKernel(kernel);
error3:
	clReleaseProgram(program);
error2:
	clReleaseContext(context);
error1:
	return return_value;

}
Пример #15
0
int main(int argc, char *argv[])
{
    cl_int ret;
    
    /* get platform ID */
    cl_platform_id platform_id;
    ret = clGetPlatformIDs(1, &platform_id, NULL);
    assert(CL_SUCCESS == ret);

    /* get device IDs */
    cl_device_id device_id;
    ret = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_ALL, 1, &device_id, NULL);
	assert(CL_SUCCESS == ret);
    
    /* create context */
    cl_context context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);
    assert(CL_SUCCESS == ret);

    /* create command queue */
    cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
    assert(CL_SUCCESS == ret);

    /* create image object */
    cl_image_format format;
    format.image_channel_order = CL_R;
    format.image_channel_data_type = CL_UNSIGNED_INT8;
    
    cl_image_desc desc;
	memset(&desc, 0, sizeof(desc));
    desc.image_type = CL_MEM_OBJECT_IMAGE2D;
    desc.image_width  = IMAGE_W;
    desc.image_height = IMAGE_H;
    
    cl_mem image = clCreateImage(context, 0, &format, &desc, NULL, &ret);
    assert(CL_SUCCESS == ret);

	/* filling background image */
    {
        const size_t origin[] = {0, 0, 0};
        const size_t region[] = {IMAGE_W, IMAGE_H, 1};
 		cl_uchar4 fill_color;
		fill_color.s[0] = 0;
		fill_color.s[1] = 0;
		fill_color.s[2] = 0;
		fill_color.s[3] = 0;
        ret = clEnqueueFillImage(command_queue, image, &fill_color, origin, region, 0, NULL, NULL);
        assert(CL_SUCCESS == ret);
    }

    /* filling front image */
    {
        const size_t origin[] = {(IMAGE_W*1)/4, (IMAGE_H*1)/4, 0};
        const size_t region[] = {(IMAGE_W*2)/4, (IMAGE_H*2)/4, 1};
        cl_uchar4 fill_color;
		fill_color.s[0] = 255;
		fill_color.s[1] = 0;
		fill_color.s[2] = 0;
		fill_color.s[3] = 0;
        ret = clEnqueueFillImage(command_queue, image, &fill_color, origin, region, 0, NULL, NULL);
        assert(CL_SUCCESS == ret);
    }

    /* reading image */
    cl_uchar *data = NULL;
    {
        size_t num_channels = 1;
        data = static_cast<cl_uchar*>(ALIGNED_MALLOC(IMAGE_W*IMAGE_H*sizeof(cl_uchar), num_channels*sizeof(cl_uchar)));
		assert(NULL != data);
		std::fill(&data[0], &data[IMAGE_W*IMAGE_H], 128);
        
        const size_t origin[] = {0, 0, 0};
        const size_t region[] = {IMAGE_W, IMAGE_H, 1};
        ret = clEnqueueReadImage(command_queue, image, CL_TRUE, origin, region, IMAGE_W*sizeof(cl_uchar), 0, data, 0, NULL, NULL);
        assert(CL_SUCCESS == ret);
    }

    /* print image */
    for (unsigned int h=0; h<IMAGE_H; ++h)
    {
        for (unsigned int w=0; w<IMAGE_W; ++w)
        {
            std::cout << std::setw(5) << std::right << static_cast<int>(data[h*IMAGE_W+w]);
        }
        std::cout << std::endl;
    }

    /* finalizing */
    ALIGNED_FREE(data);

    clReleaseMemObject(image);
    
    clReleaseCommandQueue(command_queue);
    clReleaseContext(context);

    return 0;
}
void createCLImages()
{
   // CL images
   cl_image_desc pixelDesc;
   memset(&pixelDesc, '\0', sizeof(cl_image_desc));
   pixelDesc.image_type = CL_MEM_OBJECT_IMAGE2D;
   pixelDesc.image_width = imageWidth;
   pixelDesc.image_height = imageHeight;

   cl_int ret;
   void *hostPtr = NULL;

   if( inFlags & CL_MEM_USE_HOST_PTR ||
       inFlags & CL_MEM_COPY_HOST_PTR )
       hostPtr = memIn;

   inputImage = clCreateImage ( context,
                                inFlags,
                                &pixelFormat,
                                &pixelDesc,
                                hostPtr,
                                &ret );

   ASSERT_CL_RETURN( ret );

   hostPtr = NULL;

   if( outFlags & CL_MEM_USE_HOST_PTR ||
       outFlags & CL_MEM_COPY_HOST_PTR )
       hostPtr = memOut;

   outputImage = clCreateImage ( context,
                                 outFlags,
                                 &pixelFormat,
                                 &pixelDesc,
                                 hostPtr,
                                 &ret );
   ASSERT_CL_RETURN( ret );

   hostPtr = NULL;

   if( copyFlags & CL_MEM_USE_HOST_PTR ||
       copyFlags & CL_MEM_COPY_HOST_PTR )
       hostPtr = memIn;

   if( whichTest == 2 )
   {
      inputCopyImage = clCreateImage ( context,
                                       copyFlags,
                                       &pixelFormat,
                                       &pixelDesc,
                                       hostPtr,
                                       &ret );

      ASSERT_CL_RETURN( ret );
   }

   hostPtr = NULL;

   if( copyFlags & CL_MEM_USE_HOST_PTR ||
       copyFlags & CL_MEM_COPY_HOST_PTR )
       hostPtr = memOut;

   if( whichTest == 2 )
   {
      outputCopyImage = clCreateImage ( context,
                                        copyFlags,
                                        &pixelFormat,
                                        &pixelDesc,
                                        hostPtr,
                                        &ret );

      ASSERT_CL_RETURN( ret );
   }
}
/**
 * \related cl_Mem_Object_t
 *
 * This function allocates memory for Memory Object with OpenCL image & sets
 * function pointers.
 *
 * @param[in] parent_thread parent Steel Thread, which gives OpenCL context, etc
 * @param[in] mem_flags OpenCL memory flags, which will be used for OpenCL
 * memory objects creation
 * @param[in] image_format OpenCL image format, that describe characteristics
 * @param[in] width image width
 * @param[in] height image height
 * @param[in] host_ptr pointer to Host-side memory region (if any). This argument
 * is optional. If not needed - provide null pointer instead.
 *
 * @return pointer to allocated structure in case of success,
 * \ref VOID_MEM_OBJ_PTR otherwise
 *
 * @warning always use 'Destroy' function pointer to free memory, allocated
 * by this function.
 */
scow_Mem_Object* Make_Image(
    scow_Steel_Thread       *parent_thread,
    const cl_mem_flags      mem_flags, 
    const cl_image_format   *image_format,
    const size_t            width, 
    const size_t            height, 
    const size_t            row_pitch,
    void                    *host_ptr)
{
    cl_int ret;
    scow_Mem_Object* self;

    OCL_CHECK_EXISTENCE(parent_thread, VOID_MEM_OBJ_PTR);
    OCL_CHECK_EXISTENCE(image_format, VOID_MEM_OBJ_PTR);

    self = (scow_Mem_Object*) calloc(1, sizeof(*self));
    OCL_CHECK_EXISTENCE(self, VOID_MEM_OBJ_PTR);

    self->obj_mem_type = IMAGE;
    self->parent_thread = parent_thread;
    self->host_ptr = host_ptr;
    self->mem_flags = mem_flags;
    self->width = width;
    self->height = height;
    self->row_pitch = 0;

    self->error = Make_Error();
    self->timer = Make_Timer(VOID_KERNEL_PTR);

    self->Get_Mem_Obj = Mem_Object_Get_Mem_Obj;
    self->Destroy = Mem_Object_Destroy;
    self->Swap = Mem_Object_Swap;
    self->Unmap = Mem_Object_Unmap;

    self->Map = Image_Map;
    self->Write = Image_Send_To_Device;
    self->Read = Image_Get_From_Device;
    self->Copy = Image_Copy;
    self->Erase = NULL;
    self->Sync = Mem_Object_Sync;

    self->Get_Height = Image_Get_Height;
    self->Get_Width = Image_Get_Width;
    self->Get_Row_Pitch = Image_Get_Row_Pitch;
    self->Make_Child = NULL;

#ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS
	self->cl_mem_object = clCreateImage2D(self->parent_thread->context,
            mem_flags, image_format, self->width, self->height, self->row_pitch,
            host_ptr, &ret);
#else
    cl_image_desc image_desc = {
        .image_type         = CL_MEM_OBJECT_IMAGE2D,
        .image_width        = self->width,
        .image_height       = self->height,
        .image_array_size   = 1,
        .image_row_pitch    = self->row_pitch,
        .image_slice_pitch  = host_ptr ? (self->row_pitch * self->height) : 0,
        .num_mip_levels     = 0,
        .num_samples        = 0,
        .buffer             = NULL
    };
    self->cl_mem_object = clCreateImage(self->parent_thread->context,
        mem_flags, image_format, &image_desc, host_ptr, &ret);
#endif

    OCL_DIE_ON_ERROR(ret, CL_SUCCESS, self->Destroy(self), VOID_MEM_OBJ_PTR);

    return self;
}
Пример #18
0
cl_int Simulator_3D<T>::build_kernel(const char * kernel_file)

{
    cl_int err;
    cl_program program;

    // Build program with source (filename) on device+context
    CHECK_RETURN_N(program, CreateProgram(Simulator<T>::context, Simulator<T>::device, kernel_file, err), err);
    CHECK_RETURN_N(_kernel_brac_3d, clCreateKernel(program, "brac_3d", &err), err);
    CHECK_RETURN_N(_kernel_step_3d, clCreateKernel(program, "step_3d", &err), err);

    //Create Images;
   
    cl_image_desc r_desc;
    r_desc.image_type=CL_MEM_OBJECT_IMAGE3D;
    r_desc.image_width=Simulator<T>::_dim.x;
    r_desc.image_height=Simulator<T>::_dim.y;
    r_desc.image_depth=Simulator<T>::_dim.z;
    r_desc.image_row_pitch=0;
    r_desc.image_slice_pitch=0;
    r_desc.num_mip_levels=0;
    r_desc.num_samples=0;
    r_desc.buffer=NULL;
    
    cl_image_format fmt;
    fmt.image_channel_data_type=CL_FLOAT; /* DOES NOT WORK WITH DOUBLE */
    fmt.image_channel_order=CL_R;
    
    T *v=(T *)calloc(Simulator<T>::_size,sizeof(T));
    for(int i=0;i<Simulator<T>::_size;i++)
    {
        v[i]=rand()%101/(T)100;
    }
    CHECK_RETURN_N(_img_Phi,clCreateImage(Simulator<T>::context, CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR, &fmt, &r_desc, v, &err),err)
    CHECK_RETURN_N(_img_Bracket,clCreateImage(Simulator<T>::context, CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR, &fmt, &r_desc, v, &err),err)
    CHECK_RETURN_N(_img_PhiNext,clCreateImage(Simulator<T>::context, CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR, &fmt, &r_desc, v, &err),err)
    
    CHECK_ERROR(clSetKernelArg(_kernel_brac_3d, 0, sizeof(cl_mem), &_img_Phi))
    CHECK_ERROR(clSetKernelArg(_kernel_brac_3d, 1, sizeof(cl_mem), &_img_Bracket))
    CHECK_ERROR(clSetKernelArg(_kernel_brac_3d, 2, sizeof(T), &_a_2))
    CHECK_ERROR(clSetKernelArg(_kernel_brac_3d, 3, sizeof(T), &_a_4))
    CHECK_ERROR(clSetKernelArg(_kernel_brac_3d, 4, sizeof(T), &_K))
    CHECK_ERROR(clSetKernelArg(_kernel_brac_3d, 5, sizeof(T), &_dx))
    
    CHECK_ERROR(clSetKernelArg(_kernel_step_3d, 0, sizeof(cl_mem), &_img_Phi))
    CHECK_ERROR(clSetKernelArg(_kernel_step_3d, 1, sizeof(cl_mem), &_img_Bracket))
    CHECK_ERROR(clSetKernelArg(_kernel_step_3d, 2, sizeof(cl_mem), &_img_PhiNext))
    CHECK_ERROR(clSetKernelArg(_kernel_step_3d, 3, sizeof(T), &_M))
    CHECK_ERROR(clSetKernelArg(_kernel_step_3d, 4, sizeof(T), &_dx))
    CHECK_ERROR(clSetKernelArg(_kernel_step_3d, 5, sizeof(T), &_dt))
    
    free(v);
    
    _local_size[0]=std::min((unsigned int)8,Simulator<T>::_dim.x);
    _local_size[1]=std::min((unsigned int)8,Simulator<T>::_dim.y);
    _local_size[2]=std::min((unsigned int)4,Simulator<T>::_dim.z);
    
    _global_size[0]=Simulator<T>::_dim.x;
    _global_size[1]=Simulator<T>::_dim.y;
    _global_size[2]=Simulator<T>::_dim.z;
    
    Simulator<T>::_cl_initialized = true;
    
    return CL_SUCCESS;
}
Пример #19
0
int
main(void)
{
  cl_int err;
  cl_platform_id platforms[MAX_PLATFORMS];
  cl_uint nplatforms;
  cl_device_id devices[MAX_DEVICES];
  cl_uint ndevices;
  cl_uint i, j;
  size_t el, row, col;

  CHECK_CL_ERROR(clGetPlatformIDs(MAX_PLATFORMS, platforms, &nplatforms));

  for (i = 0; i < nplatforms; i++)
  {
    CHECK_CL_ERROR(clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, MAX_DEVICES,
      devices, &ndevices));

    /* Only test the devices we actually have room for */
    if (ndevices > MAX_DEVICES)
      ndevices = MAX_DEVICES;

    for (j = 0; j < ndevices; j++)
    {
      /* skip devices that do not support images */
      cl_bool has_img;
      CHECK_CL_ERROR(clGetDeviceInfo(devices[j], CL_DEVICE_IMAGE_SUPPORT, sizeof(has_img), &has_img, NULL));
      if (!has_img)
        continue;

      cl_context context = clCreateContext(NULL, 1, &devices[j], NULL, NULL, &err);
      CHECK_OPENCL_ERROR_IN("clCreateContext");
      cl_command_queue queue = clCreateCommandQueue(context, devices[j], 0, &err);
      CHECK_OPENCL_ERROR_IN("clCreateCommandQueue");

      cl_ulong alloc;
      size_t max_height;
      size_t max_width;
#define MAXALLOC (1024U*1024U)

      CHECK_CL_ERROR(clGetDeviceInfo(devices[j], CL_DEVICE_MAX_MEM_ALLOC_SIZE,
          sizeof(alloc), &alloc, NULL));
      CHECK_CL_ERROR(clGetDeviceInfo(devices[j], CL_DEVICE_IMAGE2D_MAX_WIDTH,
          sizeof(max_width), &max_width, NULL));
      CHECK_CL_ERROR(clGetDeviceInfo(devices[j], CL_DEVICE_IMAGE2D_MAX_HEIGHT,
          sizeof(max_height), &max_height, NULL));


      while (alloc > MAXALLOC)
        alloc /= 2;

      // fit at least one max_width inside the alloc (shrink max_width for this)
      while (max_width*pixel_size > alloc)
        max_width /= 2;

      // round number of elements to next multiple of max_width elements
      const size_t nels = (alloc/pixel_size/max_width)*max_width;
      const size_t buf_size = nels*pixel_size;

      cl_image_desc img_desc;
      memset(&img_desc, 0, sizeof(img_desc));
      img_desc.image_type = CL_MEM_OBJECT_IMAGE2D;
      img_desc.image_width = max_width;
      img_desc.image_height = nels/max_width;
      img_desc.image_depth = 1;

      cl_ushort null_pixel[4] = {0, 0, 0, 0};
      cl_ushort *host_buf = malloc(buf_size);
      TEST_ASSERT(host_buf);

      for (el = 0; el < nels; el+=4) {
        host_buf[el] = el & CHANNEL_MAX;
        host_buf[el+1] = (CHANNEL_MAX - el) & CHANNEL_MAX;
        host_buf[el+2] = (CHANNEL_MAX/((el & 1) + 1) - el) & CHANNEL_MAX;
        host_buf[el+3] = (CHANNEL_MAX - el/((el & 1) + 1)) & CHANNEL_MAX;
      }

      cl_mem buf = clCreateBuffer(context, CL_MEM_READ_WRITE, buf_size, NULL, &err);
      CHECK_OPENCL_ERROR_IN("clCreateBuffer");
      cl_mem img = clCreateImage(context, CL_MEM_READ_WRITE, &img_format, &img_desc, NULL, &err);
      CHECK_OPENCL_ERROR_IN("clCreateImage");

      CHECK_CL_ERROR(clEnqueueWriteBuffer(queue, buf, CL_TRUE, 0, buf_size, host_buf, 0, NULL, NULL));

      const size_t offset = 0;
      const size_t origin[] = {0, 0, 0};
      const size_t region[] = {img_desc.image_width, img_desc.image_height, 1};

      CHECK_CL_ERROR(clEnqueueCopyBufferToImage(queue, buf, img, offset, origin, region, 0, NULL, NULL));

      size_t row_pitch, slice_pitch;
      cl_ushort *img_map = clEnqueueMapImage(queue, img, CL_TRUE, CL_MAP_READ, origin, region,
        &row_pitch, &slice_pitch, 0, NULL, NULL, &err);
      CHECK_OPENCL_ERROR_IN("clEnqueueMapImage");

      CHECK_CL_ERROR(clFinish(queue));

      for (row = 0; row < img_desc.image_height; ++row) {
        for (col = 0; col < img_desc.image_width; ++col) {
          cl_ushort *img_pixel = (cl_ushort*)((char*)img_map + row*row_pitch) + col*4;
          cl_ushort *buf_pixel = host_buf + (row*img_desc.image_width + col)*4;

          if (memcmp(img_pixel, buf_pixel, pixel_size) != 0)
            printf("%zu %zu %zu : %x %x %x %x | %x %x %x %x\n",
              row, col, (size_t)(buf_pixel - host_buf),
              buf_pixel[0],
              buf_pixel[1],
              buf_pixel[2],
              buf_pixel[3],
              img_pixel[0],
              img_pixel[1],
              img_pixel[2],
              img_pixel[3]);

          TEST_ASSERT(memcmp(img_pixel, buf_pixel, pixel_size) == 0);

        }
      }

      CHECK_CL_ERROR(clEnqueueUnmapMemObject(queue, img, img_map, 0, NULL, NULL));

      /* Clear the buffer, and ensure it has been cleared */
      CHECK_CL_ERROR(clEnqueueFillBuffer(queue, buf, null_pixel, sizeof(null_pixel), 0, buf_size, 0, NULL, NULL));
      cl_ushort *buf_map = clEnqueueMapBuffer(queue, buf, CL_TRUE, CL_MAP_READ, 0, buf_size, 0, NULL, NULL, &err);
      CHECK_OPENCL_ERROR_IN("clEnqueueMapBuffer");

      CHECK_CL_ERROR(clFinish(queue));

      for (el = 0; el < nels; ++el) {
#if 0 // debug
        if (buf_map[el] != 0) {
          printf("%zu/%zu => %u\n", el, nels, buf_map[el]);
        }
#endif
        TEST_ASSERT(buf_map[el] == 0);
      }

      CHECK_CL_ERROR(clEnqueueUnmapMemObject(queue, buf, buf_map, 0, NULL, NULL));

      /* Copy data from image to buffer, and check that it's again equal to the original buffer */
      CHECK_CL_ERROR(clEnqueueCopyImageToBuffer(queue, img, buf, origin, region, offset, 0, NULL, NULL));
      buf_map = clEnqueueMapBuffer(queue, buf, CL_TRUE, CL_MAP_READ, 0, buf_size, 0, NULL, NULL, &err);
      CHECK_CL_ERROR(clFinish(queue));

      TEST_ASSERT(memcmp(buf_map, host_buf, buf_size) == 0);

      CHECK_CL_ERROR (
          clEnqueueUnmapMemObject (queue, buf, buf_map, 0, NULL, NULL));
      CHECK_CL_ERROR (clFinish (queue));

      free(host_buf);
      CHECK_CL_ERROR (clReleaseMemObject (img));
      CHECK_CL_ERROR (clReleaseMemObject (buf));
      CHECK_CL_ERROR (clReleaseCommandQueue (queue));
      CHECK_CL_ERROR (clReleaseContext (context));
    }
  }
  return EXIT_SUCCESS;
}
int 
ImageOverlap::setupCL()
{
    cl_int status = CL_SUCCESS;
    cl_device_type dType;

    if(deviceType.compare("cpu") == 0)
    {
        dType = CL_DEVICE_TYPE_CPU;
    }
    else //deviceType = "gpu" 
	{
		dType = CL_DEVICE_TYPE_GPU;
		if(isThereGPU() == false)
		{
			std::cout << "GPU not found. Falling back to CPU device" << std::endl;
			dType = CL_DEVICE_TYPE_CPU;
		}
	}

    /*
     * Have a look at the available platforms and pick either
     * the AMD one if available or a reasonable default.
     */
    cl_platform_id platform = NULL;
    int retValue = sampleCommon->getPlatform(platform, platformId, isPlatformEnabled());
    CHECK_ERROR(retValue, SDK_SUCCESS, "sampleCommon::getPlatform() failed");

    // Display available devices.
    retValue = sampleCommon->displayDevices(platform, dType);
    CHECK_ERROR(retValue, SDK_SUCCESS, "sampleCommon::displayDevices() failed");

    // If we could find our platform, use it. Otherwise use just available platform.
    cl_context_properties cps[3] = 
    {
        CL_CONTEXT_PLATFORM, 
        (cl_context_properties)platform, 
        0
    };

    context = clCreateContextFromType(
        cps,
        dType,
        NULL,
        NULL,
        &status);
    CHECK_OPENCL_ERROR(status, "clCreateContextFromType failed.");

    // getting device on which to run the sample
    status = sampleCommon->getDevices(context, &devices, deviceId, isDeviceIdEnabled());
    CHECK_ERROR(status, SDK_SUCCESS, "sampleCommon::getDevices() failed");

    status = deviceInfo.setDeviceInfo(devices[deviceId]);
    CHECK_OPENCL_ERROR(status, "deviceInfo.setDeviceInfo failed");

    if(!deviceInfo.imageSupport)
    {
        OPENCL_EXPECTED_ERROR(" Expected Error: Device does not support Images");
    }
	 
	blockSizeX = deviceInfo.maxWorkGroupSize<GROUP_SIZE?deviceInfo.maxWorkGroupSize:GROUP_SIZE;

    // Create command queue
	cl_command_queue_properties prop = 0;
	for(int i=0;i<3;i++)
	{
		commandQueue[i] = clCreateCommandQueue(
			context,
			devices[deviceId],
			prop,
			&status);
		 CHECK_OPENCL_ERROR(status,"clCreateCommandQueuefailed.");
	}

    // Create and initialize image objects

	// Create map image
	mapImage = clCreateImage(context,
		CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
		&imageFormat,
		&image_desc,
		mapImageData,
		&status);
	CHECK_OPENCL_ERROR(status,"clCreateBuffer failed. (mapImage)");
	int color[4] = {0,0,80,255};
	size_t origin[3] = {300,300,0};
	size_t region[3] = {100,100,1};
	status = clEnqueueFillImage(commandQueue[0], mapImage, color, origin, region, NULL, NULL, &eventlist[0]);

    // Create fill image
	fillImage = clCreateImage(context,
		CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
		&imageFormat,
		&image_desc,
		fillImageData,
		&status);
	CHECK_OPENCL_ERROR(status,"clCreateBuffer failed. (fillImage)");

	color[0] = 80;
	color[1] = 0;
	color[2] = 0;
	color[3] = 0;
	origin[0] = 50;
	origin[1] = 50;
	status = clEnqueueFillImage(commandQueue[1], fillImage, color, origin, region, NULL, NULL, &eventlist[1]);
	
	//Create output image
	outputImage = clCreateImage(context,
		CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
		&imageFormat,
		&image_desc,
		NULL,
		&status);
	CHECK_OPENCL_ERROR(status,"clCreateBuffer failed. (outputImage)");

    // create a CL program using the kernel source 
    streamsdk::buildProgramData buildData;
    buildData.kernelName = std::string("ImageOverlap_Kernels.cl");
    buildData.devices = devices;
    buildData.deviceId = deviceId;
    buildData.flagsStr = std::string("");
    if(isLoadBinaryEnabled())
        buildData.binaryName = std::string(loadBinary.c_str());

    if(isComplierFlagsSpecified())
        buildData.flagsFileName = std::string(flags.c_str());

    retValue = sampleCommon->buildOpenCLProgram(program, context, buildData);
    CHECK_ERROR(retValue, SDK_SUCCESS, "sampleCommon::buildOpenCLProgram() failed");

    // get a kernel object handle for a kernel with the given name 
	kernelOverLap = clCreateKernel(program, "OverLap", &status);
	CHECK_OPENCL_ERROR(status,"clCreateKernel failed.(OverLap)");

    return SDK_SUCCESS;
}
Пример #21
0
int main()
{
    cl_platform_id platform = NULL;
    cl_device_id device = NULL;
    cl_context context = NULL;
    cl_command_queue command_queue = NULL;
    cl_program program = NULL;
    cl_kernel kernel = NULL;
    cl_int status = 0;
    cl_event task_event, map_event;
    cl_device_type dType = CL_DEVICE_TYPE_GPU;
    cl_int image_width, image_height;
    cl_float4 *result;
    int i, j;
    cl_mem clImage, out;
    cl_bool support;
    int pixels_read = 8;

    //Setup the OpenCL Platform,
    //Get the first available platform. Use it as the default platform
    status = clGetPlatformIDs(1, &platform, NULL);
    LOG_OCL_ERROR(status, "clGetPlatformIDs Failed" );

    //Get the first available device
    status = clGetDeviceIDs (platform, dType, 1, &device, NULL);
    LOG_OCL_ERROR(status, "clGetDeviceIDs Failed" );
    
    /*Check if the device support images */
    clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(support), &support, NULL);
     if (support != CL_TRUE) {
         std::cout <<"IMAGES not supported\n";
         return 1;
     }
    //Create an execution context for the selected platform and device.
    cl_context_properties contextProperty[3] =
    {
        CL_CONTEXT_PLATFORM,
        (cl_context_properties)platform,
        0
    };
    context = clCreateContextFromType(
        contextProperty,
        dType,
        NULL,
        NULL,
        &status);
    LOG_OCL_ERROR(status, "clCreateContextFromType Failed" );

    /*Create command queue*/
    command_queue = clCreateCommandQueue(context,
                                        device,
                                        0,
                                        &status);
    LOG_OCL_ERROR(status, "clCreateCommandQueue Failed" );

    /* Create Image Object */
    //Create OpenCL device input image with the format and descriptor as below

    cl_image_format image_format;
    image_format.image_channel_data_type = CL_FLOAT;
    image_format.image_channel_order = CL_R;

    //We create a 5 X 5 2D image 
    image_width  = 5; 
    image_height = 5;
    cl_image_desc image_desc;
    image_desc.image_type   = CL_MEM_OBJECT_IMAGE2D;
    image_desc.image_width  = image_width;
    image_desc.image_height = image_height;
    image_desc.image_depth  = 1;
    image_desc.image_array_size  = 1;
    image_desc.image_row_pitch   = image_width*sizeof(float);
    image_desc.image_slice_pitch = 25*sizeof(float);
    image_desc.num_mip_levels = 0;
    image_desc.num_samples    = 0;
    image_desc.buffer         = NULL;
    
    /* Create output buffer */
    out = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float4)*pixels_read, NULL, &status);
    LOG_OCL_ERROR(status, "clCreateBuffer Failed" );

    size_t origin[] = {0,0,0};  /* Transfer target coordinate*/
    size_t region[] = {image_width,image_height,1};  /* Size of object to be transferred */
    float *data = (float *)malloc(image_width*image_height*sizeof(float));
    float pixels[] = {            /* Transfer Data */
        10, 20, 10, 40, 50,
        10, 20, 20, 40, 50,
        10, 20, 30, 40, 50,
        10, 20, 40, 40, 50,
        10, 20, 50, 40, 50
    };
    memcpy(data, pixels, image_width*image_height*sizeof(float));
    clImage = clCreateImage(context, CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR, &image_format, &image_desc, pixels, &status);
    LOG_OCL_ERROR(status, "clCreateImage Failed" );

    /* If the image was not created using CL_MEM_USE_HOST_PTR, then you can write the image data to the device using the 
       clEnqueueWriteImage function. */
    //status = clEnqueueWriteImage(command_queue, clImage, CL_TRUE, origin, region, 5*sizeof(float), 25*sizeof(float), data, 0, NULL, NULL);
    //LOG_OCL_ERROR(status, "clCreateBuffer Failed" );

    /* Build program */
    program = clCreateProgramWithSource(context, 1, (const char **)&sample_image_kernel,
                                        NULL, &status);
    LOG_OCL_ERROR(status, "clCreateProgramWithSource Failed" );

    // Build the program
    status = clBuildProgram(program, 1, &device, "", NULL, NULL);
    LOG_OCL_ERROR(status, "clBuildProgram Failed" );
    if(status != CL_SUCCESS)
    {
        if(status == CL_BUILD_PROGRAM_FAILURE)
            LOG_OCL_COMPILER_ERROR(program, device);
        LOG_OCL_ERROR(status, "clBuildProgram Failed" );
    }
    printf("Printing the image pixels\n");
    for (i=0; i<image_height; i++) {
        for (j=0; j<image_width; j++) {
            printf("%f ",data[i*image_width +j]);
        }
        printf("\n");
    }

    //Create kernel and set the kernel arguments
    kernel = clCreateKernel(program, "image_test", &status);
    clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&clImage);
    clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&out);

    /*********Image sampler with image repeated at every 1.0 normalized coordinate***********/
    /*If host side sampler is not required the sampler objects can also be created on the kernel code. 
      Don't pass the thirsd argument to the kernel and create  a sample object as shown below in the kernel code*/
    //const sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST; 

    cl_sampler sampler = clCreateSampler (context,
                            CL_TRUE,
                            CL_ADDRESS_REPEAT,
                            CL_FILTER_NEAREST,
                            &status);
    clSetKernelArg(kernel, 2, sizeof(cl_sampler), (void*)&sampler);
    //Enqueue the kernel 
    status = clEnqueueTask(command_queue, kernel, 0, NULL, &task_event);
    LOG_OCL_ERROR(status, "clEnqueueTask Failed" );
    /* Map the result back to host address */
    result = (cl_float4*)clEnqueueMapBuffer(command_queue, out, CL_TRUE, CL_MAP_READ, 0, sizeof(cl_float4)*pixels_read, 1, &task_event, &map_event, &status);
    printf(" SAMPLER mode set to CL_ADDRESS_REPEAT | CL_FILTER_NEAREST\n");
    printf("\nPixel values retreived based on the filter and Addressing mode selected\n");
    printf("(float2)(0.5f,0.5f) = %f,%f,%f,%f\n",result[0].s[0],result[0].s[1],result[0].s[2],result[0].s[3]);
    printf("(float2)(0.8f,0.5f) = %f,%f,%f,%f\n",result[1].s[0],result[1].s[1],result[1].s[2],result[1].s[3]);
    printf("(float2)(1.3f,0.5f) = %f,%f,%f,%f\n",result[2].s[0],result[2].s[1],result[2].s[2],result[2].s[3]);
    printf("(float2)(0.5f,0.5f) = %f,%f,%f,%f\n",result[3].s[0],result[3].s[1],result[3].s[2],result[3].s[3]);
    printf("(float2)(0.5f,0.8f) = %f,%f,%f,%f\n",result[4].s[0],result[4].s[1],result[4].s[2],result[4].s[3]);
    printf("(float2)(0.5f,1.3f) = %f,%f,%f,%f\n",result[5].s[0],result[5].s[1],result[5].s[2],result[5].s[3]);
    printf("(float2)(4.5f,0.5f) = %f,%f,%f,%f\n",result[5].s[0],result[5].s[1],result[5].s[2],result[5].s[3]);
    printf("(float2)(5.0f,0.5f) = %f,%f,%f,%f\n",result[7].s[0],result[7].s[1],result[7].s[2],result[7].s[3]);
    clEnqueueUnmapMemObject(command_queue, out, result, 0, NULL, NULL);
    clReleaseSampler(sampler);

    /*********Image sampler with image mirrored at every 1.0 normalized coordinate***********/
    sampler = clCreateSampler (context,
                            CL_TRUE,
                            CL_ADDRESS_MIRRORED_REPEAT,
                            CL_FILTER_LINEAR,
                            &status);
    clSetKernelArg(kernel, 2, sizeof(cl_sampler), (void*)&sampler);
    //Enqueue the kernel 
    status = clEnqueueTask(command_queue, kernel, 0, NULL, &task_event);
    LOG_OCL_ERROR(status, "clEnqueueTask Failed" );
    /* Map the result back to host address */
    result = (cl_float4*)clEnqueueMapBuffer(command_queue, out, CL_TRUE, CL_MAP_READ, 0, sizeof(cl_float4)*pixels_read, 1, &task_event, &map_event, &status);
    printf(" SAMPLER mode set to CL_ADDRESS_MIRRORED_REPEAT | CL_FILTER_LINEAR\n");
    printf("\nPixel values retreived based on the filter and Addressing mode selected\n");
    printf("(float2)(0.5f,0.5f) = %f,%f,%f,%f\n",result[0].s[0],result[0].s[1],result[0].s[2],result[0].s[3]);
    printf("(float2)(0.8f,0.5f) = %f,%f,%f,%f\n",result[1].s[0],result[1].s[1],result[1].s[2],result[1].s[3]);
    printf("(float2)(1.3f,0.5f) = %f,%f,%f,%f\n",result[2].s[0],result[2].s[1],result[2].s[2],result[2].s[3]);
    printf("(float2)(0.5f,0.5f) = %f,%f,%f,%f\n",result[3].s[0],result[3].s[1],result[3].s[2],result[3].s[3]);
    printf("(float2)(0.5f,0.8f) = %f,%f,%f,%f\n",result[4].s[0],result[4].s[1],result[4].s[2],result[4].s[3]);
    printf("(float2)(0.5f,1.3f) = %f,%f,%f,%f\n",result[5].s[0],result[5].s[1],result[5].s[2],result[5].s[3]);
    printf("(float2)(4.5f,0.5f) = %f,%f,%f,%f\n",result[5].s[0],result[5].s[1],result[5].s[2],result[5].s[3]);
    printf("(float2)(5.0f,0.5f) = %f,%f,%f,%f\n",result[7].s[0],result[7].s[1],result[7].s[2],result[7].s[3]);
    clEnqueueUnmapMemObject(command_queue, out, result, 0, NULL, NULL);
    clReleaseSampler(sampler);
    /********************/

    //Free All OpenCL objects.
    clReleaseMemObject(out);
    clReleaseMemObject(clImage);
    clReleaseKernel(kernel);
    clReleaseProgram(program);
    clReleaseCommandQueue(command_queue);
    clReleaseContext(context);
    return 0;

}
Пример #22
0
enum piglit_result
piglit_cl_test(const int argc,
               const char **argv,
               const struct piglit_cl_api_test_config* config,
               const struct piglit_cl_api_test_env* env)
{
#if defined(CL_VERSION_1_2)
	enum piglit_result result = PIGLIT_PASS;
	cl_int err;

#define IMG_WIDTH 4
#define IMG_HEIGHT 4
#define IMG_DATA_SIZE 4
#define IMG_BUFFER_SIZE IMG_WIDTH * IMG_HEIGHT * IMG_DATA_SIZE

	unsigned char img_buf[IMG_BUFFER_SIZE] = {0};
	unsigned char dst_buf[IMG_BUFFER_SIZE] = {0};
	unsigned char exp_buf[IMG_BUFFER_SIZE] = {0};
	int pattern[4] = {129, 33, 77, 255};
	size_t origin[3] = {0, 0, 0};
	size_t region[3] = {2, 2, 1};
	size_t tmp;
	cl_event event;
	cl_mem image;
	cl_image_format img_format;
	cl_image_desc img_desc = {0};
	cl_command_queue queue = env->context->command_queues[0];
	int i;

	cl_bool *image_support =
		piglit_cl_get_device_info(env->context->device_ids[0],
		                          CL_DEVICE_IMAGE_SUPPORT);

	if (!*image_support) {
		fprintf(stderr, "No image support\n");
		free(image_support);
		return PIGLIT_SKIP;
	}

	img_format.image_channel_order = CL_RGBA;
	img_format.image_channel_data_type = CL_UNSIGNED_INT8;
	img_desc.image_type = CL_MEM_OBJECT_IMAGE2D;
	img_desc.image_width = IMG_WIDTH;
	img_desc.image_height = IMG_HEIGHT;
	img_desc.buffer = NULL;

/*** Normal usage ***/
	image = clCreateImage(env->context->cl_ctx,
	                      CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
	                      &img_format, &img_desc, &img_buf, &err);

	if(!piglit_cl_check_error(err, CL_SUCCESS)) {
		fprintf(stderr, "Failed (error code: %s): Creating an image\n",
		        piglit_cl_get_error_name(err));
		return PIGLIT_FAIL;
	}

	if (!test(queue, image, pattern, origin, region,
	          0, NULL, NULL,
	          CL_SUCCESS, &result, "Enqueuing the image to be filled")) {
		return PIGLIT_FAIL;
	}

	region[0] = IMG_WIDTH;
	region[1] = IMG_HEIGHT;
	err = clEnqueueReadImage(queue, image, 1, origin, region, 0, 0,
	                         dst_buf, 0, NULL, NULL);
	if(!piglit_cl_check_error(err, CL_SUCCESS)) {
		fprintf(stderr, "Failed (error code: %s): Reading image\n",
		        piglit_cl_get_error_name(err));
		return PIGLIT_FAIL;
	}

	/*
	 * fill the host buffer with the pattern
	 * for exemple : pattern == 1234
	 *
	 * 12341234abcdabcd
	 * 12341234abcdabcd
	 * abcdabcdabcdabcd
	 * abcdabcdabcdabcd
	 */
	exp_buf[0] = pattern[0];
	exp_buf[1] = pattern[1];
	exp_buf[2] = pattern[2];
	exp_buf[3] = pattern[3];
	memcpy(exp_buf + (IMG_DATA_SIZE * 1), exp_buf, IMG_DATA_SIZE);
	memcpy(exp_buf + (IMG_DATA_SIZE * 4), exp_buf, IMG_DATA_SIZE);
	memcpy(exp_buf + (IMG_DATA_SIZE * 5), exp_buf, IMG_DATA_SIZE);

	for (i = 0; i < sizeof(dst_buf) / sizeof(dst_buf[0]); ++i) {
		if (!piglit_cl_probe_integer(dst_buf[i], exp_buf[i], 0)) {
			fprintf(stderr, "Error at %d: got %d, expected %d\n",
			        i, dst_buf[i], exp_buf[i]);
			return PIGLIT_FAIL;
		}
	}

/*** Errors ***/

	/*
	 * CL_INVALID_COMMAND_QUEUE if command_queue is not a valid command-queue.
	 */
	test(NULL, image, pattern, origin, region,
		  0, NULL, NULL,
		  CL_INVALID_COMMAND_QUEUE, &result,
		  "CL_INVALID_COMMAND_QUEUE if command_queue is not a valid command-queue");

	/*
	 * CL_INVALID_CONTEXT if the context associated with command_queue and
	 * image are not the same or if the context associated with command_queue
	 * and events in event_wait_list are not the same.
	 */
	{
		piglit_cl_context context;
		cl_int err;
		context = piglit_cl_create_context(env->platform_id,
		                                   env->context->device_ids, 1);
		if (context) {
			event = clCreateUserEvent(context->cl_ctx, &err);
			if (err == CL_SUCCESS) {
				err = clSetUserEventStatus(event, CL_COMPLETE);
				if (err == CL_SUCCESS) {
					test(context->command_queues[0], image, pattern, origin, region,
					     0, NULL, NULL,
					     CL_INVALID_CONTEXT, &result,
					     "CL_INVALID_CONTEXT if the context associated with command_queue and image are not the same");

					test(queue, image, pattern, origin, region,
					     1, &event, NULL,
					     CL_INVALID_CONTEXT, &result,
					     "CL_INVALID_CONTEXT if the context associated with command_queue and events in event_wait_list are not the same");
				} else {
					fprintf(stderr, "Could not set event status.\n");
					piglit_merge_result(&result, PIGLIT_WARN);
				}
				clReleaseEvent(event);
			} else {
				fprintf(stderr, "Could not create user event.\n");
				piglit_merge_result(&result, PIGLIT_WARN);
			}

			piglit_cl_release_context(context);
		} else {
			fprintf(stderr, "Could not test triggering CL_INVALID_CONTEXT.\n");
			piglit_merge_result(&result, PIGLIT_WARN);
		}
	}

	/*
	 * CL_INVALID_MEM_OBJECT if image is not a valid buffer object.
	 */
	test(queue, NULL, pattern, origin, region,
	     0, NULL, NULL,
	     CL_INVALID_MEM_OBJECT, &result,
	     "CL_INVALID_MEM_OBJECT if image is not a valid buffer object");

	/*
	 * CL_INVALID_VALUE if fill_color is NULL.
	 */
	test(queue, image, NULL, origin, region,
	     0, NULL, NULL,
	     CL_INVALID_VALUE, &result,
	     "CL_INVALID_VALUE if fill_color is NULL");

	/*
	 * CL_INVALID_VALUE if the region being written specified by origin and
	 * region is out of bounds or if ptr is a NULL value.
	 */
	tmp = origin[0];
	origin[0] = IMG_WIDTH + 1;
	test(queue, image, pattern, origin, region,
	     0, NULL, NULL,
	     CL_INVALID_VALUE, &result,
	     "CL_INVALID_VALUE if the region being written specified by origin and region is out of bounds (origin)");
	origin[0] = tmp;

	tmp = region[0];
	region[0] = IMG_WIDTH + 1;
	test(queue, image, pattern, origin, region,
	     0, NULL, NULL,
	     CL_INVALID_VALUE, &result,
	     "CL_INVALID_VALUE if the region being written specified by origin and region is out of bounds (region)");
	region[0] = tmp;

	test(queue, image, pattern, NULL, region,
	     0, NULL, NULL,
	     CL_INVALID_VALUE, &result,
	     "CL_INVALID_VALUE if ptr is a NULL value (origin)");

	test(queue, image, pattern, origin, NULL,
	     0, NULL, NULL,
	     CL_INVALID_VALUE, &result,
	     "CL_INVALID_VALUE if ptr is a NULL value (region)");

	/*
	 * CL_INVALID_VALUE if values in origin and region do not follow rules
	 * described in the argument description for origin and region.
	 */
	tmp = origin[2];
	origin[2] = 1;
	test(queue, image, pattern, origin, region,
	     0, NULL, NULL,
	     CL_INVALID_VALUE, &result,
	     "CL_INVALID_VALUE if values in origin do not follow rules described in the argument description for origin");
	origin[2] = tmp;

	tmp = region[2];
	region[2] = 0;
	test(queue, image, pattern, origin, region,
		  0, NULL, NULL,
		CL_INVALID_VALUE, &result,
		"CL_INVALID_VALUE if values in region do not follow rules described in the argument description for region");
	region[2] = tmp;

	/*
	 * CL_INVALID_EVENT_WAIT_LIST if event_wait_list is NULL and
	 * num_events_in_wait_list > 0, or event_wait_list is not NULL and
	 * num_events_in_wait_list is 0, or if event objects in event_wait_list
	 * are not valid events.
	 */
	event = NULL;
	test(queue, image, pattern, origin, region,
	     1, NULL, NULL,
	     CL_INVALID_EVENT_WAIT_LIST, &result,
	     "CL_INVALID_EVENT_WAIT_LIST if event_wait_list is NULL and num_events_in_wait_list > 0");

	test(queue, image, pattern, origin, region,
	     0, &event, NULL,
	     CL_INVALID_EVENT_WAIT_LIST, &result,
	     "CL_INVALID_EVENT_WAIT_LIST if event_wait_list is not NULL and num_events_in_wait_list is 0");

	test(queue, image, pattern, origin, region,
	     1, &event, NULL,
	     CL_INVALID_EVENT_WAIT_LIST, &result,
	     "CL_INVALID_EVENT_WAIT_LIST if event objects in event_wait_list are not valid events");

	/*
	 * CL_INVALID_IMAGE_SIZE if image dimensions (image width, height, specified
	 * or compute row and/or slice pitch) for image are not supported by device
	 * associated with queue.
	 */
	/* This is a per device test, clCreateImage would have failed before */

	/*
	 * CL_INVALID_IMAGE_FORMAT if image format (image channel order and data type)
	 * for image are not supported by device associated with queue.
	 */
	/* This is a per device test, clCreateImage would have failed before */

	free(image_support);
	clReleaseMemObject(image);
	return result;
#else
	return PIGLIT_SKIP;
#endif
}
Пример #23
0
int main(int argc, char **argv)
{
  /* test name */
  char name[] = "test_image_query_funcs";
  size_t global_work_size[1] = { 1 }, local_work_size[1]= { 1 };
  size_t srcdir_length, name_length, filename_size;
  char *filename = NULL;
  char *source = NULL;
  cl_device_id devices[1];
  cl_context context = NULL;
  cl_command_queue queue = NULL;
  cl_program program = NULL;
  cl_kernel kernel = NULL;
  cl_int err;

  /* image parameters */
  cl_uchar4 *imageData;
  cl_image_format image_format;
  cl_image_desc image2_desc, image3_desc;

  printf("Running test %s...\n", name);

  memset(&image2_desc, 0, sizeof(cl_image_desc));
  image2_desc.image_type = CL_MEM_OBJECT_IMAGE2D;
  image2_desc.image_width = 2;
  image2_desc.image_height = 4;

  memset(&image3_desc, 0, sizeof(cl_image_desc));
  image3_desc.image_type = CL_MEM_OBJECT_IMAGE3D;
  image3_desc.image_width = 2;
  image3_desc.image_height = 4;
  image3_desc.image_depth = 8;

  image_format.image_channel_order = CL_RGBA;
  image_format.image_channel_data_type = CL_UNSIGNED_INT8;
  imageData = (cl_uchar4*)malloc (4 * 4 * sizeof(cl_uchar4));

  TEST_ASSERT (imageData != NULL && "out of host memory\n");
  memset (imageData, 1, 4*4*sizeof(cl_uchar4));

  /* determine file name of kernel source to load */
  srcdir_length = strlen(SRCDIR);
  name_length = strlen(name);
  filename_size = srcdir_length + name_length + 16;
  filename = (char *)malloc(filename_size + 1);
  TEST_ASSERT (filename != NULL && "out of host memory\n");

  snprintf(filename, filename_size, "%s/%s.cl", SRCDIR, name);

  /* read source code */
  source = poclu_read_file (filename);
  TEST_ASSERT (source != NULL && "Kernel .cl not found.");

  /* setup an OpenCL context and command queue using default device */
  context = poclu_create_any_context();
  TEST_ASSERT (context != NULL && "clCreateContextFromType call failed\n");

  cl_sampler external_sampler = clCreateSampler (
      context, CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST, &err);
  CHECK_OPENCL_ERROR_IN ("clCreateSampler");

  CHECK_CL_ERROR (clGetContextInfo (context, CL_CONTEXT_DEVICES,
                                    sizeof (cl_device_id), devices, NULL));

  queue = clCreateCommandQueue (context, devices[0], 0, &err);
  CHECK_OPENCL_ERROR_IN ("clCreateCommandQueue");

  /* Create image */
  cl_mem image2
      = clCreateImage (context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
                       &image_format, &image2_desc, imageData, &err);
  CHECK_OPENCL_ERROR_IN ("clCreateImage image2");

  cl_mem image3
      = clCreateImage (context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
                       &image_format, &image3_desc, imageData, &err);
  CHECK_OPENCL_ERROR_IN ("clCreateImage image3");

  unsigned color[4] = { 2, 9, 11, 7 };
  size_t orig[3] = { 0, 0, 0 };
  size_t reg[3] = { 2, 4, 1 };
  err = clEnqueueFillImage (queue, image2, color, orig, reg, 0, NULL, NULL);
  CHECK_OPENCL_ERROR_IN ("clCreateImage image3");

  /* create and build program */
  program = clCreateProgramWithSource (context, 1, (const char **)&source,
                                       NULL, &err);
  CHECK_OPENCL_ERROR_IN ("clCreateProgramWithSource");

  err = clBuildProgram (program, 0, NULL, NULL, NULL, NULL);
  CHECK_OPENCL_ERROR_IN ("clBuildProgram");

  /* execute the kernel with give name */
  kernel = clCreateKernel (program, name, NULL);
  CHECK_OPENCL_ERROR_IN ("clCreateKernel");

  err = clSetKernelArg (kernel, 0, sizeof (cl_mem), &image2);
  CHECK_OPENCL_ERROR_IN ("clSetKernelArg 0");

  err = clSetKernelArg (kernel, 1, sizeof (cl_mem), &image3);
  CHECK_OPENCL_ERROR_IN ("clSetKernelArg 1");

  err = clSetKernelArg (kernel, 2, sizeof (cl_sampler), &external_sampler);
  CHECK_OPENCL_ERROR_IN ("clSetKernelArg 2");

  err = clEnqueueNDRangeKernel (queue, kernel, 1, NULL, global_work_size,
                                local_work_size, 0, NULL, NULL);
  CHECK_OPENCL_ERROR_IN ("clEnqueueNDRangeKernel");

  err = clFinish (queue);
  CHECK_OPENCL_ERROR_IN ("clFinish");

  clReleaseMemObject (image2);
  clReleaseMemObject (image3);
  clReleaseKernel (kernel);
  clReleaseProgram (program);
  clReleaseCommandQueue (queue);
  clReleaseSampler (external_sampler);
  clUnloadCompiler ();
  clReleaseContext (context);
  free (source);
  free (filename);
  free (imageData);

  printf("OK\n");
  return 0;
}
Пример #24
0
        cl_mem bindTexture(const oclMat &mat)
        {
            cl_mem texture;
            cl_image_format format;
            int err;
            int depth    = mat.depth();
            int channels = mat.oclchannels();

            switch(depth)
            {
            case CV_8U:
                format.image_channel_data_type = CL_UNSIGNED_INT8;
                break;
            case CV_32S:
                format.image_channel_data_type = CL_UNSIGNED_INT32;
                break;
            case CV_32F:
                format.image_channel_data_type = CL_FLOAT;
                break;
            default:
                CV_Error(-1, "Image forma is not supported");
                break;
            }
            switch(channels)
            {
            case 1:
                format.image_channel_order     = CL_R;
                break;
            case 3:
                format.image_channel_order     = CL_RGB;
                break;
            case 4:
                format.image_channel_order     = CL_RGBA;
                break;
            default:
                CV_Error(-1, "Image format is not supported");
                break;
            }
#ifdef CL_VERSION_1_2
            //this enables backwards portability to
            //run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support
            if(Context::getContext()->supportsFeature(FEATURE_CL_VER_1_2))
            {
                cl_image_desc desc;
                desc.image_type       = CL_MEM_OBJECT_IMAGE2D;
                desc.image_width      = mat.cols;
                desc.image_height     = mat.rows;
                desc.image_depth      = 0;
                desc.image_array_size = 1;
                desc.image_row_pitch  = 0;
                desc.image_slice_pitch = 0;
                desc.buffer           = NULL;
                desc.num_mip_levels   = 0;
                desc.num_samples      = 0;
                texture = clCreateImage(*(cl_context*)mat.clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
            }
            else
#endif
            {
                texture = clCreateImage2D(
                    *(cl_context*)mat.clCxt->getOpenCLContextPtr(),
                    CL_MEM_READ_WRITE,
                    &format,
                    mat.cols,
                    mat.rows,
                    0,
                    NULL,
                    &err);
            }
            size_t origin[] = { 0, 0, 0 };
            size_t region[] = { mat.cols, mat.rows, 1 };

            cl_mem devData;
            if (mat.cols * mat.elemSize() != mat.step)
            {
                devData = clCreateBuffer(*(cl_context*)mat.clCxt->getOpenCLContextPtr(), CL_MEM_READ_ONLY, mat.cols * mat.rows
                    * mat.elemSize(), NULL, NULL);
                const size_t regin[3] = {mat.cols * mat.elemSize(), mat.rows, 1};
                clEnqueueCopyBufferRect(*(cl_command_queue*)mat.clCxt->getOpenCLCommandQueuePtr(), (cl_mem)mat.data, devData, origin, origin,
                    regin, mat.step, 0, mat.cols * mat.elemSize(), 0, 0, NULL, NULL);
                clFlush(*(cl_command_queue*)mat.clCxt->getOpenCLCommandQueuePtr());
            }
            else
            {
                devData = (cl_mem)mat.data;
            }

            clEnqueueCopyBufferToImage(*(cl_command_queue*)mat.clCxt->getOpenCLCommandQueuePtr(), devData, texture, 0, origin, region, 0, NULL, 0);
            if ((mat.cols * mat.elemSize() != mat.step))
            {
                clFlush(*(cl_command_queue*)mat.clCxt->getOpenCLCommandQueuePtr());
                clReleaseMemObject(devData);
            }

            openCLSafeCall(err);
            return texture;
        }
Пример #25
0
int main(int argc, char** argv)
{
	GLFWwindow* glfwwindow;
	{//OpenGL/GLFW Init
		if (!glfwInit()) {
			std::cerr << "Error: GLFW init failed" << std::endl;
			exit(EXIT_FAILURE);
		}
		glfwwindow = glfwCreateWindow(200, 200, "Nvidia interop bug demo", nullptr, nullptr);
		glfwMakeContextCurrent(glfwwindow);
		glewInit();
		std::cout << "OpenGL Info: " << (char*)glGetString(GL_VENDOR) << " " << (char*)glGetString(GL_RENDERER) << std::endl;
	}
	cl_context clcontext;
	cl_command_queue clqueue;
	{//OpenCL init
		cl_platform_id platform = nullptr;
		{//Platform init
			cl_uint numPlatforms;
			clGetPlatformIDs(0, nullptr, &numPlatforms);
			if (numPlatforms == 0)
			{
				std::cerr << "Error: No OpenCL platforms available" << std::endl;
				return EXIT_FAILURE;
			}
			cl_platform_id* all_platforms = new cl_platform_id[numPlatforms];
			clGetPlatformIDs(numPlatforms, all_platforms, nullptr);
			for (size_t i = 0; i < numPlatforms; i++) //Select Nvidia out of the platforms
			{
				char name[300];
				clGetPlatformInfo(all_platforms[i], CL_PLATFORM_NAME, sizeof(name), &name, nullptr);
				std::string namestring(name);
				if (namestring.find("NVIDIA") != std::string::npos || namestring.find("Nvidia") != std::string::npos)
					platform = all_platforms[i];	
			}
			if (platform == nullptr) {
				std::cerr << "No Nvidia OpenCL platform found, will default to platform 0 ";
				
			}

			delete[] all_platforms;
		}
		{ //Create shared context
			cl_context_properties properties[7];
			properties[0] = CL_CONTEXT_PLATFORM; //This is different for other operating systems than Windows
			properties[1] = (cl_context_properties)platform;
			properties[2] = CL_GL_CONTEXT_KHR;
			properties[3] = (cl_context_properties)wglGetCurrentContext();
			properties[4] = CL_WGL_HDC_KHR;
			properties[5] = (cl_context_properties)wglGetCurrentDC();
			properties[6] = 0;

			clcontext = clCreateContextFromType(properties, CL_DEVICE_TYPE_GPU, nullptr, nullptr, nullptr);
		}
		cl_device_id cldevice;
		{ //Create cldevice
			cl_device_id* devices;
			cl_command_queue commandQueue = nullptr;
			size_t numDevices = 0;

			// First get the size of the devices buffer
			clGetContextInfo(clcontext, CL_CONTEXT_DEVICES, 0, nullptr, &numDevices);

			if (numDevices == 0)
			{
				std::cerr << "Error: No OpenCL devices available" << std::endl;
				return EXIT_FAILURE;
			}
			devices = new cl_device_id[numDevices];
			clGetContextInfo(clcontext, CL_CONTEXT_DEVICES, numDevices, devices, nullptr);
			cldevice = devices[0];
			delete[] devices;
		}
		{ //Create CL command queue
			clqueue = clCreateCommandQueue(clcontext, cldevice, 0, nullptr);
		}
		char platformname[300];
		char devicename[300];
		clGetPlatformInfo(platform, CL_PLATFORM_NAME, sizeof(platformname), &platformname, nullptr);
		clGetDeviceInfo(cldevice, CL_DEVICE_NAME, sizeof(devicename), &devicename, nullptr);
		std::cout << "OpenCL platform " << platformname << " device " << devicename << std::endl;
	}
	size_t size = 200 * 200 * 4; //w=200, h=200, 4 bytes per channel
	char* databuffer = new char[size];
	GLuint glbuffer, gltexture;
	cl_mem unsharedbuffer, sharedbuffer, unsharedtexture, sharedtexture;
	{ //Init test data
		glGenBuffers(1, &glbuffer);
		glBindBuffer(GL_ARRAY_BUFFER, glbuffer);
		glBufferData(GL_ARRAY_BUFFER, size, databuffer, GL_STREAM_DRAW);
		glBindBuffer(GL_ARRAY_BUFFER, GL_NONE);

		glGenTextures(1, &gltexture);
		glBindTexture(GL_TEXTURE_2D, gltexture);
		glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, 200, 200, 0, GL_RGBA, GL_UNSIGNED_BYTE, databuffer);
		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); //Intel needs this for shared textures
		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); //Intel needs this for shared textures
		glBindTexture(GL_TEXTURE_2D, GL_NONE);

		sharedtexture = clCreateFromGLTexture(clcontext, CL_MEM_READ_WRITE, GL_TEXTURE_2D, 0, gltexture, nullptr);
		sharedbuffer = clCreateFromGLBuffer(clcontext, CL_MEM_READ_WRITE, glbuffer, nullptr);

		unsharedbuffer = clCreateBuffer(clcontext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, size, databuffer, nullptr);
		cl_image_format imgformat;
		cl_image_desc desc;
		imgformat.image_channel_data_type = CL_UNSIGNED_INT8;
		imgformat.image_channel_order = CL_RGBA;
		desc.image_type = CL_MEM_OBJECT_IMAGE2D;
		desc.image_width = 200;
		desc.image_height = 200;
		desc.image_depth = 1;
		desc.image_array_size = 1;
		desc.image_row_pitch = 0;
		desc.image_slice_pitch = 0;
		desc.num_mip_levels = 0;
		desc.num_samples = 0;
		desc.buffer = nullptr;
		unsharedtexture = clCreateImage(clcontext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &imgformat, &desc, databuffer, nullptr);
	}
	{
		const size_t origin[3] = { 0, 0, 0 };
		const size_t region[3] = { 200, 200, 1 };
		size_t pitch;
		//
		//MAIN PART BEGINS HERE
		//
		{ //OpenGL buffer
			std::cout << "Mapping buffer with OpenGL: ";
			glBindBuffer(GL_ARRAY_BUFFER, glbuffer);
			void* glmapptr = glMapBuffer(GL_ARRAY_BUFFER, GL_MAP_READ_BIT | GL_MAP_WRITE_BIT);
			glUnmapBuffer(GL_ARRAY_BUFFER);
			glBindBuffer(GL_ARRAY_BUFFER, GL_NONE);
			std::cout << "OK" << std::endl;
			glFinish();
		}
		{ //OpenCL unshared texture
			std::cout << "Mapping unshared texture with OpenCL: ";
			void* unsimgptr = clEnqueueMapImage(clqueue, unsharedtexture, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, origin, region, &pitch, nullptr, 0, nullptr, nullptr, nullptr); //This API call works fine for unshared objects
			clEnqueueUnmapMemObject(clqueue, unsharedtexture, unsimgptr, 0, nullptr, nullptr);
			std::cout << "OK" << std::endl;
		}
		{ //OpenCL shared texture
			std::cout << "Mapping shared texture with OpenCL: ";
			clEnqueueAcquireGLObjects(clqueue, 1, &sharedtexture, 0, nullptr, nullptr);
			void* shdimgptr = clEnqueueMapImage(clqueue, unsharedtexture, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, origin, region, &pitch, nullptr, 0, nullptr, nullptr, nullptr); //This API call works fine shared objects
			clEnqueueUnmapMemObject(clqueue, unsharedtexture, shdimgptr, 0, nullptr, nullptr);
			clEnqueueReleaseGLObjects(clqueue, 1, &sharedtexture, 0, nullptr, nullptr);
			std::cout << "OK" << std::endl;
		}
		{ //OpenCL unshared buffer
			std::cout << "Mapping unshared buffer with OpenCL: ";
			void* unsbufptr = clEnqueueMapBuffer(clqueue, unsharedbuffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, nullptr, nullptr, nullptr); //This API call works fine for unshared buffers
			clEnqueueUnmapMemObject(clqueue, unsharedbuffer, unsbufptr, 0, nullptr, nullptr);
			std::cout << "OK" << std::endl;
		}
		{ //OpenCL shared buffer
			std::cout << "Mapping shared buffer with OpenCL (EXPECTING CRASH ON NVIDIA SYSTEMS): " << std::endl;
			clEnqueueAcquireGLObjects(clqueue, 1, &sharedbuffer, 0, nullptr, nullptr);
			//
			//CRITICAL PART BEGINS HERE
			//

			void* shdbufptr = clEnqueueMapBuffer(clqueue, sharedbuffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, nullptr, nullptr, nullptr);
			//On Nvidia systems when using shared objects, error 0xC0000005 occurs in ntdll.dll: write access violation at position 0xSOMETHING
			//This leaves my application in an unusable state
			//But it works fine everywhere else (tested on ARM, AMD, Intel systems)

			//
			//CRITICAL PART ENDS HERE
			//
			std::cout << "did not fail" << std::endl;
			clEnqueueUnmapMemObject(clqueue, sharedbuffer, shdbufptr, 0, nullptr, nullptr);
			clEnqueueReleaseGLObjects(clqueue, 1, &sharedbuffer, 0, nullptr, nullptr);
			std::cout << "OK" << std::endl;
		}
		//
		//MAIN PART ENDS HERE
		//
	}
	clFinish(clqueue);

	delete[] databuffer;
	clReleaseMemObject(sharedbuffer);
	clReleaseMemObject(unsharedbuffer);
	clReleaseMemObject(sharedtexture);
	clReleaseMemObject(unsharedtexture);

	clReleaseCommandQueue(clqueue);
	clReleaseContext(clcontext);

	glDeleteTextures(1, &gltexture);
	glDeleteBuffers(1, &glbuffer);

	glfwDestroyWindow(glfwwindow);
	glfwTerminate();
	return EXIT_SUCCESS;
}
Пример #26
0
int main(int argc, char **argv)
{
  /* test name */
  char name[] = "test_sampler_address_clamp";
  size_t global_work_size[1] = { 1 }, local_work_size[1]= { 1 };
  size_t srcdir_length, name_length, filename_size;
  char *filename = NULL;
  char *source = NULL;
  cl_device_id devices[1];
  cl_context context = NULL;
  cl_command_queue queue = NULL;
  cl_program program = NULL;
  cl_kernel kernel = NULL;
  cl_int result;
  int retval = -1;

  /* image parameters */
  cl_uchar4 *imageData;
  cl_image_format image_format;
  cl_image_desc image_desc;

  printf("Running test %s...\n", name);
  memset(&image_desc, 0, sizeof(cl_image_desc));
  image_desc.image_type = CL_MEM_OBJECT_IMAGE2D;
  image_desc.image_width = 4;
  image_desc.image_height = 4;
  image_format.image_channel_order = CL_RGBA;
  image_format.image_channel_data_type = CL_UNSIGNED_INT8;
  imageData = (cl_uchar4*)malloc (4 * 4 * sizeof(cl_uchar4));
  
  if (imageData == NULL)
    {
      puts("out of host memory\n");
      goto error;
    }
  memset (imageData, 1, 4*4*sizeof(cl_uchar4));

  /* determine file name of kernel source to load */
  srcdir_length = strlen(SRCDIR);
  name_length = strlen(name);
  filename_size = srcdir_length + name_length + 16;
  filename = (char *)malloc(filename_size + 1);
  if (!filename) 
    {
      puts("out of memory");
      goto error;
    }
  
  snprintf(filename, filename_size, "%s/%s.cl", SRCDIR, name);
  
  /* read source code */
  source = poclu_read_file (filename);
  TEST_ASSERT (source != NULL && "Kernel .cl not found.");

  /* setup an OpenCL context and command queue using default device */
  context = poclu_create_any_context();
  if (!context) 
    {
      puts("clCreateContextFromType call failed\n");
      goto error;
    }

  result = clGetContextInfo(context, CL_CONTEXT_DEVICES,
                            sizeof(cl_device_id), devices, NULL);
  if (result != CL_SUCCESS) 
    {
      puts("clGetContextInfo call failed\n");
      goto error;
    }

  queue = clCreateCommandQueue(context, devices[0], 0, NULL); 
  if (!queue) 
    {
      puts("clCreateCommandQueue call failed\n");
      goto error;
    }

  /* Create image */

  cl_mem image = clCreateImage (context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
                                &image_format, &image_desc, imageData, &result);
  if (result != CL_SUCCESS)
    {
      puts("image creation failed\n");
      goto error;
    }


  /* create and build program */
  program = clCreateProgramWithSource (context, 1, (const char **)&source,
                                       NULL, NULL);
  if (!program) 
    {
      puts("clCreateProgramWithSource call failed\n");
      goto error;
    }

  result = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); 
  if (result != CL_SUCCESS) 
    {
      puts("clBuildProgram call failed\n");
      goto error;
    }

  /* execute the kernel with give name */
  kernel = clCreateKernel(program, name, NULL); 
  if (!kernel) 
    {
      puts("clCreateKernel call failed\n");
      goto error;
    }

   result = clSetKernelArg( kernel, 0, sizeof(cl_mem), &image);
   if (result)
     {
       puts("clSetKernelArg failed\n");
       goto error;
     }

  result = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size, 
                                  local_work_size, 0, NULL, NULL); 
  if (result != CL_SUCCESS) 
    {
      puts("clEnqueueNDRangeKernel call failed\n");
      goto error;
    }

  result = clFinish(queue);
  if (result == CL_SUCCESS)
    retval = 0;

error:

  if (image)
    {
      clReleaseMemObject (image);
    }

  if (kernel) 
    {
      clReleaseKernel(kernel);
    }
  if (program) 
    {
      clReleaseProgram(program);
    }
  if (queue) 
    {
      clReleaseCommandQueue(queue);
    }
  if (context) 
    {
      clUnloadCompiler ();
      clReleaseContext (context);
    }
  if (source) 
    {
      free(source);
    }
  if (filename)
    {
      free(filename);
    }
  if (imageData)
    {
      free(imageData);
    }


  if (retval) 
    {
      printf("FAIL\n");
      return 1;
    }
 
  printf("OK\n");
  return 0;
}