Example #1
0
int main(int argc, char *argv[])
{
    cl_int status = 0;
    cl_int binSize = 256;
    cl_int groupSize = 16;
    cl_int subHistgCnt;
    cl_device_type dType = CL_DEVICE_TYPE_GPU;
    cl_platform_id platform = NULL;
    cl_device_id   device;
    cl_context     context;
    cl_command_queue commandQueue;
    cl_mem         imageBuffer;
    cl_mem     intermediateHistR, intermediateHistG, intermediateHistB; /*Intermediate Image Histogram buffer*/
    cl_uint *  midDeviceBinR, *midDeviceBinG, *midDeviceBinB;
    cl_uint  *deviceBinR,*deviceBinG,*deviceBinB;
    //Read a BMP Image
    Image *image;
    std::string filename = "sample_color.bmp";
    ReadBMPImage(filename, &image);
    if(image == NULL)
    {
        printf("File %s not present...\n", filename.c_str());
        return 0;
    }
    subHistgCnt  = (image->width * image->height)/(binSize*groupSize);
    midDeviceBinR = (cl_uint*)malloc(binSize * subHistgCnt * sizeof(cl_uint));
    midDeviceBinG = (cl_uint*)malloc(binSize * subHistgCnt * sizeof(cl_uint));
    midDeviceBinB = (cl_uint*)malloc(binSize * subHistgCnt * sizeof(cl_uint));
    deviceBinR    = (cl_uint*)malloc(binSize * sizeof(cl_uint));
    deviceBinG    = (cl_uint*)malloc(binSize * sizeof(cl_uint));
    deviceBinB    = (cl_uint*)malloc(binSize * sizeof(cl_uint));
    
    //Setup the OpenCL Platform, 
    //Get the first available platform. Use it as the default platform
    status = clGetPlatformIDs(1, &platform, NULL);
    LOG_OCL_ERROR(status, "clGetPlatformIDs Failed." );

    //Get the first available device 
    status = clGetDeviceIDs (platform, dType, 1, &device, NULL);
    LOG_OCL_ERROR(status, "clGetDeviceIDs Failed." );
    
    //Create an execution context for the selected platform and device. 
    cl_context_properties cps[3] = 
    {
        CL_CONTEXT_PLATFORM,
        (cl_context_properties)platform,
        0
    };
    context = clCreateContextFromType(
        cps,
        dType,
        NULL,
        NULL,
        &status);
    LOG_OCL_ERROR(status, "clCreateContextFromType Failed." );

    // Create command queue
    commandQueue = clCreateCommandQueue(context,
                                        device,
                                        0,
                                        &status);
    LOG_OCL_ERROR(status, "clCreateCommandQueue Failed." );
#if !defined(USE_HOST_MEMORY)
    //Create OpenCL device input buffer
    imageBuffer = clCreateBuffer(
        context,
        CL_MEM_READ_ONLY,
        sizeof(cl_uint) * image->width * image->height,
        NULL,
        &status); 
    LOG_OCL_ERROR(status, "clCreateBuffer Failed while creating the image buffer." );

    //Set input data 
    cl_event writeEvt;
    status = clEnqueueWriteBuffer(commandQueue,
                                  imageBuffer,
                                  CL_FALSE,
                                  0,
                                  image->width * image->height * sizeof(cl_uint),
                                  image->pixels,
                                  0,
                                  NULL,
                                  &writeEvt);
    LOG_OCL_ERROR(status, "clEnqueueWriteBuffer Failed while writing the image data." );
#else
    //Create OpenCL device input buffer
    imageBuffer = clCreateBuffer(
        context,
        CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR,
        sizeof(cl_uint) * image->width * image->height,
        image->pixels,
        &status); 
    LOG_OCL_ERROR(status, "clCreateBuffer Failed while creating the image buffer." );
#endif
    status = clFinish(commandQueue);
    LOG_OCL_ERROR(status, "clFinish Failed while writing the image data." );
    
    //Create OpenCL device output buffer
    intermediateHistR = clCreateBuffer(
        context, 
        CL_MEM_WRITE_ONLY,
        sizeof(cl_uint) * binSize * subHistgCnt, 
        NULL, 
        &status);
    LOG_OCL_ERROR(status, "clCreateBuffer Failed." );

    intermediateHistG = clCreateBuffer(
        context,
        CL_MEM_WRITE_ONLY,
        sizeof(cl_uint) * binSize * subHistgCnt,
        NULL,
        &status);
    LOG_OCL_ERROR(status, "clCreateBuffer Failed." );

    intermediateHistB = clCreateBuffer(
        context,
        CL_MEM_WRITE_ONLY,
        sizeof(cl_uint) * binSize * subHistgCnt,
        NULL,
        &status);
    LOG_OCL_ERROR(status, "clCreateBuffer Failed." );

    // Create a program from the kernel source
    cl_program program = clCreateProgramWithSource(context, 1,
            (const char **)&histogram_kernel, NULL, &status);
    LOG_OCL_ERROR(status, "clCreateProgramWithSource Failed." );

    // Build the program
    status = clBuildProgram(program, 1, &device, NULL, NULL, NULL);
    if(status != CL_SUCCESS)
        LOG_OCL_COMPILER_ERROR(program, device);

    // Create the OpenCL kernel
    cl_kernel kernel = clCreateKernel(program, "histogram_kernel", &status);
    LOG_OCL_ERROR(status, "clCreateKernel Failed." );
    // Set the arguments of the kernel
    status = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&imageBuffer); 
    status |= clSetKernelArg(kernel, 1, 3 * groupSize * binSize * sizeof(cl_uchar), NULL); 
    status |= clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*)&intermediateHistR);
    status |= clSetKernelArg(kernel, 3, sizeof(cl_mem), (void*)&intermediateHistG);
    status |= clSetKernelArg(kernel, 4, sizeof(cl_mem), (void*)&intermediateHistB);
    LOG_OCL_ERROR(status, "clSetKernelArg Failed." );
    // Execute the OpenCL kernel on the list
    cl_event ndrEvt;
    size_t globalThreads = (image->width * image->height) / (binSize*groupSize) * groupSize;
    size_t localThreads = groupSize;
    status = clEnqueueNDRangeKernel(
        commandQueue,
        kernel,
        1,
        NULL,
        &globalThreads,
        &localThreads,
        0,
        NULL,
        &ndrEvt);
    LOG_OCL_ERROR(status, "clEnqueueNDRangeKernel Failed." );

    status = clFinish(commandQueue);
    LOG_OCL_ERROR(status, "clFinish Failed." );

    //Read the histogram back into the host memory.
    memset(deviceBinR, 0, binSize * sizeof(cl_uint));
    memset(deviceBinG, 0, binSize * sizeof(cl_uint));
    memset(deviceBinB, 0, binSize * sizeof(cl_uint));
    cl_event readEvt[3];
    status = clEnqueueReadBuffer(
        commandQueue,
        intermediateHistR,
        CL_FALSE,
        0,
        subHistgCnt * binSize * sizeof(cl_uint),
        midDeviceBinR,
        0,
        NULL,
        &readEvt[0]);
    LOG_OCL_ERROR(status, "clEnqueueReadBuffer of intermediateHistR Failed." );
    
    status = clEnqueueReadBuffer(
        commandQueue,
        intermediateHistG,
        CL_FALSE,
        0,
        subHistgCnt * binSize * sizeof(cl_uint),
        midDeviceBinG,
        0,
        NULL,
        &readEvt[1]);
    LOG_OCL_ERROR(status, "clEnqueueReadBuffer of intermediateHistG Failed." );
    
    status = clEnqueueReadBuffer(
        commandQueue,
        intermediateHistB,
        CL_FALSE,
        0,
        subHistgCnt * binSize * sizeof(cl_uint),
        midDeviceBinB,
        0,
        NULL,
        &readEvt[2]);
    LOG_OCL_ERROR(status, "clEnqueueReadBuffer of intermediateHistB Failed." );
    
    status = clWaitForEvents(3, readEvt);
    //status = clFinish(commandQueue);
    LOG_OCL_ERROR(status, "clWaitForEvents for readEvt." );

    // Calculate final histogram bin 
    for(int i = 0; i < subHistgCnt; ++i)
    {
        for(int j = 0; j < binSize; ++j)
        {
            deviceBinR[j] += midDeviceBinR[i * binSize + j];
            deviceBinG[j] += midDeviceBinG[i * binSize + j];
            deviceBinB[j] += midDeviceBinB[i * binSize + j];
        }
    }

    // Validate the histogram operation. 
    // The idea behind this is that once a histogram is computed the sum of all the bins should be equal to the number of pixels.
    int totalPixelsR = 0;
    int totalPixelsG = 0;
    int totalPixelsB = 0;
    for(int j = 0; j < binSize; ++j)
    {
        totalPixelsR += deviceBinR[j];
        totalPixelsG += deviceBinG[j];
        totalPixelsB += deviceBinB[j];
    }
    printf ("Total Number of Red Pixels = %d\n",totalPixelsR);
    printf ("Total Number of Green Pixels = %d\n",totalPixelsG);
    printf ("Total Number of Blue Pixels = %d\n",totalPixelsB);
    ReleaseBMPImage(&image);
    //free all allocated memory
    free(midDeviceBinR);
    free(midDeviceBinG);
    free(midDeviceBinB);
    free(deviceBinR);
    free(deviceBinG);
    free(deviceBinB);

    return 0;
}
Example #2
0
int main()
{
    cl_platform_id platform = NULL;
    cl_device_id device = NULL;
    cl_context context = NULL;
    cl_command_queue command_queue = NULL;
    cl_program program = NULL;
    cl_kernel kernel = NULL;
    cl_int status = 0;
    cl_event task_event, map_event;
    cl_device_type dType = CL_DEVICE_TYPE_GPU;
    cl_int image_width, image_height;
    cl_float4 *result;
    int i, j;
    cl_mem clImage, out;
    cl_bool support;
    int pixels_read = 8;

    //Setup the OpenCL Platform,
    //Get the first available platform. Use it as the default platform
    status = clGetPlatformIDs(1, &platform, NULL);
    LOG_OCL_ERROR(status, "clGetPlatformIDs Failed" );

    //Get the first available device
    status = clGetDeviceIDs (platform, dType, 1, &device, NULL);
    LOG_OCL_ERROR(status, "clGetDeviceIDs Failed" );
    
    /*Check if the device support images */
    clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(support), &support, NULL);
     if (support != CL_TRUE) {
         std::cout <<"IMAGES not supported\n";
         return 1;
     }
    //Create an execution context for the selected platform and device.
    cl_context_properties contextProperty[3] =
    {
        CL_CONTEXT_PLATFORM,
        (cl_context_properties)platform,
        0
    };
    context = clCreateContextFromType(
        contextProperty,
        dType,
        NULL,
        NULL,
        &status);
    LOG_OCL_ERROR(status, "clCreateContextFromType Failed" );

    /*Create command queue*/
    command_queue = clCreateCommandQueue(context,
                                        device,
                                        0,
                                        &status);
    LOG_OCL_ERROR(status, "clCreateCommandQueue Failed" );

    /* Create Image Object */
    //Create OpenCL device input image with the format and descriptor as below

    cl_image_format image_format;
    image_format.image_channel_data_type = CL_FLOAT;
    image_format.image_channel_order = CL_R;

    //We create a 5 X 5 2D image 
    image_width  = 5; 
    image_height = 5;
    cl_image_desc image_desc;
    image_desc.image_type   = CL_MEM_OBJECT_IMAGE2D;
    image_desc.image_width  = image_width;
    image_desc.image_height = image_height;
    image_desc.image_depth  = 1;
    image_desc.image_array_size  = 1;
    image_desc.image_row_pitch   = image_width*sizeof(float);
    image_desc.image_slice_pitch = 25*sizeof(float);
    image_desc.num_mip_levels = 0;
    image_desc.num_samples    = 0;
    image_desc.buffer         = NULL;
    
    /* Create output buffer */
    out = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float4)*pixels_read, NULL, &status);
    LOG_OCL_ERROR(status, "clCreateBuffer Failed" );

    size_t origin[] = {0,0,0};  /* Transfer target coordinate*/
    size_t region[] = {image_width,image_height,1};  /* Size of object to be transferred */
    float *data = (float *)malloc(image_width*image_height*sizeof(float));
    float pixels[] = {            /* Transfer Data */
        10, 20, 10, 40, 50,
        10, 20, 20, 40, 50,
        10, 20, 30, 40, 50,
        10, 20, 40, 40, 50,
        10, 20, 50, 40, 50
    };
    memcpy(data, pixels, image_width*image_height*sizeof(float));
    clImage = clCreateImage(context, CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR, &image_format, &image_desc, pixels, &status);
    LOG_OCL_ERROR(status, "clCreateImage Failed" );

    /* If the image was not created using CL_MEM_USE_HOST_PTR, then you can write the image data to the device using the 
       clEnqueueWriteImage function. */
    //status = clEnqueueWriteImage(command_queue, clImage, CL_TRUE, origin, region, 5*sizeof(float), 25*sizeof(float), data, 0, NULL, NULL);
    //LOG_OCL_ERROR(status, "clCreateBuffer Failed" );

    /* Build program */
    program = clCreateProgramWithSource(context, 1, (const char **)&sample_image_kernel,
                                        NULL, &status);
    LOG_OCL_ERROR(status, "clCreateProgramWithSource Failed" );

    // Build the program
    status = clBuildProgram(program, 1, &device, "", NULL, NULL);
    LOG_OCL_ERROR(status, "clBuildProgram Failed" );
    if(status != CL_SUCCESS)
    {
        if(status == CL_BUILD_PROGRAM_FAILURE)
            LOG_OCL_COMPILER_ERROR(program, device);
        LOG_OCL_ERROR(status, "clBuildProgram Failed" );
    }
    printf("Printing the image pixels\n");
    for (i=0; i<image_height; i++) {
        for (j=0; j<image_width; j++) {
            printf("%f ",data[i*image_width +j]);
        }
        printf("\n");
    }

    //Create kernel and set the kernel arguments
    kernel = clCreateKernel(program, "image_test", &status);
    clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&clImage);
    clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&out);

    /*********Image sampler with image repeated at every 1.0 normalized coordinate***********/
    /*If host side sampler is not required the sampler objects can also be created on the kernel code. 
      Don't pass the thirsd argument to the kernel and create  a sample object as shown below in the kernel code*/
    //const sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST; 

    cl_sampler sampler = clCreateSampler (context,
                            CL_TRUE,
                            CL_ADDRESS_REPEAT,
                            CL_FILTER_NEAREST,
                            &status);
    clSetKernelArg(kernel, 2, sizeof(cl_sampler), (void*)&sampler);
    //Enqueue the kernel 
    status = clEnqueueTask(command_queue, kernel, 0, NULL, &task_event);
    LOG_OCL_ERROR(status, "clEnqueueTask Failed" );
    /* Map the result back to host address */
    result = (cl_float4*)clEnqueueMapBuffer(command_queue, out, CL_TRUE, CL_MAP_READ, 0, sizeof(cl_float4)*pixels_read, 1, &task_event, &map_event, &status);
    printf(" SAMPLER mode set to CL_ADDRESS_REPEAT | CL_FILTER_NEAREST\n");
    printf("\nPixel values retreived based on the filter and Addressing mode selected\n");
    printf("(float2)(0.5f,0.5f) = %f,%f,%f,%f\n",result[0].s[0],result[0].s[1],result[0].s[2],result[0].s[3]);
    printf("(float2)(0.8f,0.5f) = %f,%f,%f,%f\n",result[1].s[0],result[1].s[1],result[1].s[2],result[1].s[3]);
    printf("(float2)(1.3f,0.5f) = %f,%f,%f,%f\n",result[2].s[0],result[2].s[1],result[2].s[2],result[2].s[3]);
    printf("(float2)(0.5f,0.5f) = %f,%f,%f,%f\n",result[3].s[0],result[3].s[1],result[3].s[2],result[3].s[3]);
    printf("(float2)(0.5f,0.8f) = %f,%f,%f,%f\n",result[4].s[0],result[4].s[1],result[4].s[2],result[4].s[3]);
    printf("(float2)(0.5f,1.3f) = %f,%f,%f,%f\n",result[5].s[0],result[5].s[1],result[5].s[2],result[5].s[3]);
    printf("(float2)(4.5f,0.5f) = %f,%f,%f,%f\n",result[5].s[0],result[5].s[1],result[5].s[2],result[5].s[3]);
    printf("(float2)(5.0f,0.5f) = %f,%f,%f,%f\n",result[7].s[0],result[7].s[1],result[7].s[2],result[7].s[3]);
    clEnqueueUnmapMemObject(command_queue, out, result, 0, NULL, NULL);
    clReleaseSampler(sampler);

    /*********Image sampler with image mirrored at every 1.0 normalized coordinate***********/
    sampler = clCreateSampler (context,
                            CL_TRUE,
                            CL_ADDRESS_MIRRORED_REPEAT,
                            CL_FILTER_LINEAR,
                            &status);
    clSetKernelArg(kernel, 2, sizeof(cl_sampler), (void*)&sampler);
    //Enqueue the kernel 
    status = clEnqueueTask(command_queue, kernel, 0, NULL, &task_event);
    LOG_OCL_ERROR(status, "clEnqueueTask Failed" );
    /* Map the result back to host address */
    result = (cl_float4*)clEnqueueMapBuffer(command_queue, out, CL_TRUE, CL_MAP_READ, 0, sizeof(cl_float4)*pixels_read, 1, &task_event, &map_event, &status);
    printf(" SAMPLER mode set to CL_ADDRESS_MIRRORED_REPEAT | CL_FILTER_LINEAR\n");
    printf("\nPixel values retreived based on the filter and Addressing mode selected\n");
    printf("(float2)(0.5f,0.5f) = %f,%f,%f,%f\n",result[0].s[0],result[0].s[1],result[0].s[2],result[0].s[3]);
    printf("(float2)(0.8f,0.5f) = %f,%f,%f,%f\n",result[1].s[0],result[1].s[1],result[1].s[2],result[1].s[3]);
    printf("(float2)(1.3f,0.5f) = %f,%f,%f,%f\n",result[2].s[0],result[2].s[1],result[2].s[2],result[2].s[3]);
    printf("(float2)(0.5f,0.5f) = %f,%f,%f,%f\n",result[3].s[0],result[3].s[1],result[3].s[2],result[3].s[3]);
    printf("(float2)(0.5f,0.8f) = %f,%f,%f,%f\n",result[4].s[0],result[4].s[1],result[4].s[2],result[4].s[3]);
    printf("(float2)(0.5f,1.3f) = %f,%f,%f,%f\n",result[5].s[0],result[5].s[1],result[5].s[2],result[5].s[3]);
    printf("(float2)(4.5f,0.5f) = %f,%f,%f,%f\n",result[5].s[0],result[5].s[1],result[5].s[2],result[5].s[3]);
    printf("(float2)(5.0f,0.5f) = %f,%f,%f,%f\n",result[7].s[0],result[7].s[1],result[7].s[2],result[7].s[3]);
    clEnqueueUnmapMemObject(command_queue, out, result, 0, NULL, NULL);
    clReleaseSampler(sampler);
    /********************/

    //Free All OpenCL objects.
    clReleaseMemObject(out);
    clReleaseMemObject(clImage);
    clReleaseKernel(kernel);
    clReleaseProgram(program);
    clReleaseCommandQueue(command_queue);
    clReleaseContext(context);
    return 0;

}
Example #3
0
int main(void) {
	CPUBitmap bitmap(DIM, DIM);
	unsigned char *ptr = bitmap.get_ptr();	

	cl_int clStatus; //Keeps track of the error values returned. 

	// Get platform and device information
	cl_platform_id * platforms = NULL;

	// Set up the Platform. Take a look at the MACROs used in this file. 
	// These are defined in common/ocl_macros.h
	OCL_CREATE_PLATFORMS(platforms);

	// Get the devices list and choose the type of device you want to run on
	cl_device_id *device_list = NULL;
	OCL_CREATE_DEVICE(platforms[1], DEVICE_TYPE, device_list);

	// Create OpenCL context for devices in device_list
	cl_context context;
	cl_context_properties props[3] =
	{
		CL_CONTEXT_PLATFORM,
		(cl_context_properties)platforms[0],
		0
	};
	// An OpenCL context can be associated to multiple devices, either CPU or GPU
	// based on the value of DEVICE_TYPE defined above.
	context = clCreateContext(NULL, num_devices, device_list, NULL, NULL, &clStatus);
	LOG_OCL_ERROR(clStatus, "clCreateContext Failed...");

	// Create a command queue for the first device in device_list
	cl_command_queue command_queue = clCreateCommandQueue(context, device_list[0], 0, &clStatus);
	LOG_OCL_ERROR(clStatus, "clCreateCommandQueue Failed...");

	// Create memory buffer
	cl_mem julia_clmem = clCreateBuffer(context, CL_MEM_READ_WRITE, bitmap.image_size() * sizeof(unsigned char), NULL, &clStatus);

	
	for (int u = 0; u < DIM * DIM; u++) {
		ptr[u * 4] = 0;
		ptr[u * 4 + 1] = 0;
		ptr[u * 4 + 2] = 0;
		ptr[u * 4 + 3] = 0;
	}	

	// Enqueue a write buffer
	clStatus = clEnqueueWriteBuffer(command_queue, julia_clmem, CL_TRUE, 0, bitmap.image_size() * sizeof(unsigned char), ptr, 0, NULL, NULL);
	LOG_OCL_ERROR(clStatus, "clEnqueueWriteBuffer Failed...");

	// Create a program from the kernel source
	cl_program program = clCreateProgramWithSource(context, 1, (const char **)&julia_kernel, NULL, &clStatus);
	LOG_OCL_ERROR(clStatus, "clCreateProgramWithSource Failed...");

	// Build the program
	clStatus = clBuildProgram(program, 1, device_list, NULL, NULL, NULL);
	if (clStatus != CL_SUCCESS)
		LOG_OCL_COMPILER_ERROR(program, device_list[0]);

	// Create the OpenCL kernel
	cl_kernel kernel = clCreateKernel(program, "julia_kernel", &clStatus);

	clStatus = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&julia_clmem);
	LOG_OCL_ERROR(clStatus, "clSetKernelArg Failed...");
	
	// Execute the OpenCL kernel on the list
	size_t global_size[2] = { DIM, DIM };
	size_t local_size[2] = { 1, 1 };
	cl_event julia_event;
	clStatus = clEnqueueNDRangeKernel(command_queue, kernel, 2, NULL, global_size, local_size, 0, NULL, &julia_event);
	LOG_OCL_ERROR(clStatus, "clEnqueueNDRangeKernel Failed...");

	// Read the memory buffer C_clmem on the device to the host allocated buffer
	unsigned char *A = (unsigned char*)malloc(bitmap.image_size() * sizeof(unsigned char));
	clStatus = clEnqueueReadBuffer(command_queue, julia_clmem, CL_TRUE, 0, bitmap.image_size() * sizeof(unsigned char), ptr, 1, &julia_event, NULL);
	LOG_OCL_ERROR(clStatus, "clEnqueueReadBuffer Failed...");
	
	// Clean up and wait for all the comands to complete.
	clStatus = clFinish(command_queue);

	// Finally release all OpenCL objects and release the host buffers.
	clStatus = clReleaseKernel(kernel);
	clStatus = clReleaseProgram(program);
	clStatus = clReleaseMemObject(julia_clmem);
	clStatus = clReleaseCommandQueue(command_queue);
	clStatus = clReleaseContext(context);	
	free(platforms);
	free(device_list);

	bitmap.display_and_exit();

	return 0;
}