void test01 ( void ) /******************************************************************************/ /* Purpose: TEST01 tests PPMA_EXAMPLE and PPMA_WRITE. Licensing: This code is distributed under the GNU LGPL license. Modified: 27 May 2011 Author: John Burkardt */ { int *b; int error; char *file_out_name = "test01.ascii.ppm"; int *g; int *r; int xsize = 300; int ysize = 300; printf ( "\n" ); printf ( "TEST01:\n" ); printf ( " PPMA_EXAMPLE sets up PPM data.\n" ); printf ( " PPMA_WRITE writes an ASCII PPM file.\n" ); printf ( "\n" ); printf ( " Writing the file \"%s\".\n", file_out_name ); r = ( int * ) malloc ( xsize * ysize * sizeof ( int ) ); g = ( int * ) malloc ( xsize * ysize * sizeof ( int ) ); b = ( int * ) malloc ( xsize * ysize * sizeof ( int ) ); ppma_example ( xsize, ysize, r, g, b ); printf ( "\n" ); printf ( " PPMA_EXAMPLE has set up the data.\n" ); error = ppma_write ( file_out_name, xsize, ysize, r, g, b ); if ( error == 1 ) { printf ( "\n" ); printf ( "TEST01 - Fatal error!\n" ); printf ( " PPMA_WRITE failed!\n" ); return; } printf ( "\n" ); printf ( " PPMA_WRITE was successful.\n" ); free ( r ); free ( g ); free ( b ); /* Now have PPMA_READ_TEST look at the file we think we created. */ ppma_read_test ( file_out_name ); if ( error == 1 ) { printf ( "\n" ); printf ( "TEST01 - Fatal error!\n" ); printf ( " PPMA_READ_TEST failed to read the file we wrote!\n" ); return; } printf ( "\n" ); printf ( " PPMA_READ_TEST was able to read the file we wrote.\n" ); return; }
int main(int argc, char *argv[]) { int error, xsize, ysize, rgb_max; int *r, *b, *g; float *gray, *congray, *congray_cl; // identity kernel // float filter[] = { // 0,0,0,0,0,0,0, // 0,0,0,0,0,0,0, // 0,0,0,0,0,0,0, // 0,0,0,1,0,0,0, // 0,0,0,0,0,0,0, // 0,0,0,0,0,0,0, // 0,0,0,0,0,0,0, // }; // 45 degree motion blur float filter[] = {0, 0, 0, 0, 0, 0.0145, 0, 0, 0, 0, 0, 0.0376, 0.1283, 0.0145, 0, 0, 0, 0.0376, 0.1283, 0.0376, 0, 0, 0, 0.0376, 0.1283, 0.0376, 0, 0, 0, 0.0376, 0.1283, 0.0376, 0, 0, 0, 0.0145, 0.1283, 0.0376, 0, 0, 0, 0, 0, 0.0145, 0, 0, 0, 0, 0}; // mexican hat kernel // float filter[] = { // 0, 0,-1,-1,-1, 0, 0, // 0,-1,-3,-3,-3,-1, 0, // -1,-3, 0, 7, 0,-3,-1, // -1,-3, 7,24, 7,-3,-1, // -1,-3, 0, 7, 0,-3,-1, // 0,-1,-3,-3,-3,-1, 0, // 0, 0,-1,-1,-1, 0, 0 // }; if(argc != 3) { fprintf(stderr, "Usage: %s image.ppm num_loops\n", argv[0]); abort(); } const char* filename = argv[1]; const int num_loops = atoi(argv[2]); // -------------------------------------------------------------------------- // load image // -------------------------------------------------------------------------- printf("Reading ``%s''\n", filename); ppma_read(filename, &xsize, &ysize, &rgb_max, &r, &g, &b); printf("Done reading ``%s'' of size %dx%d\n", filename, xsize, ysize); // -------------------------------------------------------------------------- // allocate CPU buffers // -------------------------------------------------------------------------- posix_memalign((void**)&gray, 32, xsize*ysize*sizeof(float)); if(!gray) { fprintf(stderr, "alloc gray"); abort(); } posix_memalign((void**)&congray, 32, xsize*ysize*sizeof(float)); if(!congray) { fprintf(stderr, "alloc gray"); abort(); } posix_memalign((void**)&congray_cl, 32, xsize*ysize*sizeof(float)); if(!congray_cl) { fprintf(stderr, "alloc gray"); abort(); } // -------------------------------------------------------------------------- // convert image to grayscale // -------------------------------------------------------------------------- for(int n = 0; n < xsize*ysize; ++n) gray[n] = (0.21f*r[n])/rgb_max + (0.72f*g[n])/rgb_max + (0.07f*b[n])/rgb_max; // -------------------------------------------------------------------------- // execute filter on cpu // -------------------------------------------------------------------------- for(int i = HALF_FILTER_WIDTH; i < ysize - HALF_FILTER_WIDTH; ++i) { for(int j = HALF_FILTER_WIDTH; j < xsize - HALF_FILTER_WIDTH; ++j) { float sum = 0; for(int k = -HALF_FILTER_WIDTH; k <= HALF_FILTER_WIDTH; ++k) { for(int l = -HALF_FILTER_WIDTH; l <= HALF_FILTER_WIDTH; ++l) { sum += gray[(i+k)*xsize + (j+l)] * filter[(k+HALF_FILTER_WIDTH)*FILTER_WIDTH + (l+HALF_FILTER_WIDTH)]; } } congray[i*xsize + j] = sum; } } // -------------------------------------------------------------------------- // output cpu filtered image // -------------------------------------------------------------------------- printf("Writing cpu filtered image\n"); for(int n = 0; n < xsize*ysize; ++n) r[n] = g[n] = b[n] = (int)(congray[n] * rgb_max); error = ppma_write("output_cpu.ppm", xsize, ysize, r, g, b); if(error) { fprintf(stderr, "error writing image"); abort(); } // -------------------------------------------------------------------------- // get an OpenCL context and queue // -------------------------------------------------------------------------- cl_context ctx; cl_command_queue queue; create_context_on(CHOOSE_INTERACTIVELY, CHOOSE_INTERACTIVELY, 0, &ctx, &queue, 0); print_device_info_from_queue(queue); // -------------------------------------------------------------------------- // load kernels // -------------------------------------------------------------------------- char *knl_text = read_file("convolution.cl"); cl_kernel knl = kernel_from_string(ctx, knl_text, "convolution", NULL); free(knl_text); #ifdef NON_OPTIMIZED int deviceWidth = xsize; #else int deviceWidth = ((xsize + WGX - 1)/WGX)* WGX; #endif int deviceHeight = ysize; size_t deviceDataSize = deviceHeight*deviceWidth*sizeof(float); // -------------------------------------------------------------------------- // allocate device memory // -------------------------------------------------------------------------- cl_int status; cl_mem buf_gray = clCreateBuffer(ctx, CL_MEM_READ_ONLY, deviceDataSize, 0, &status); CHECK_CL_ERROR(status, "clCreateBuffer"); cl_mem buf_congray = clCreateBuffer(ctx, CL_MEM_WRITE_ONLY, deviceDataSize, 0, &status); CHECK_CL_ERROR(status, "clCreateBuffer"); cl_mem buf_filter = clCreateBuffer(ctx, CL_MEM_READ_ONLY, FILTER_WIDTH*FILTER_WIDTH*sizeof(float), 0, &status); CHECK_CL_ERROR(status, "clCreateBuffer"); // -------------------------------------------------------------------------- // transfer to device // -------------------------------------------------------------------------- #ifdef NON_OPTIMIZED CALL_CL_SAFE(clEnqueueWriteBuffer( queue, buf_gray, /*blocking*/ CL_TRUE, /*offset*/ 0, deviceDataSize, gray, 0, NULL, NULL)); #else size_t buffer_origin[3] = {0,0,0}; size_t host_origin[3] = {0,0,0}; size_t region[3] = {deviceWidth*sizeof(float), ysize, 1}; clEnqueueWriteBufferRect(queue, buf_gray, CL_TRUE, buffer_origin, host_origin, region, deviceWidth*sizeof(float), 0, xsize*sizeof(float), 0, gray, 0, NULL, NULL); #endif CALL_CL_SAFE(clEnqueueWriteBuffer( queue, buf_filter, /*blocking*/ CL_TRUE, /*offset*/ 0, FILTER_WIDTH*FILTER_WIDTH*sizeof(float), filter, 0, NULL, NULL)); // -------------------------------------------------------------------------- // run code on device // -------------------------------------------------------------------------- cl_int rows = ysize; cl_int cols = xsize; cl_int filterWidth = FILTER_WIDTH; cl_int paddingPixels = 2*HALF_FILTER_WIDTH; size_t local_size[] = { WGX, WGY }; size_t global_size[] = { ((xsize-paddingPixels + local_size[0] - 1)/local_size[0])* local_size[0], ((ysize-paddingPixels + local_size[1] - 1)/local_size[1])* local_size[1], }; cl_int localWidth = local_size[0] + paddingPixels; cl_int localHeight = local_size[1] + paddingPixels; size_t localMemSize = localWidth * localHeight * sizeof(float); CALL_CL_SAFE(clSetKernelArg(knl, 0, sizeof(buf_gray), &buf_gray)); CALL_CL_SAFE(clSetKernelArg(knl, 1, sizeof(buf_congray), &buf_congray)); CALL_CL_SAFE(clSetKernelArg(knl, 2, sizeof(buf_filter), &buf_filter)); CALL_CL_SAFE(clSetKernelArg(knl, 3, sizeof(rows), &rows)); CALL_CL_SAFE(clSetKernelArg(knl, 4, sizeof(cols), &cols)); CALL_CL_SAFE(clSetKernelArg(knl, 5, sizeof(filterWidth), &filterWidth)); CALL_CL_SAFE(clSetKernelArg(knl, 6, localMemSize, NULL)); CALL_CL_SAFE(clSetKernelArg(knl, 7, sizeof(localHeight), &localHeight)); CALL_CL_SAFE(clSetKernelArg(knl, 8, sizeof(localWidth), &localWidth)); // -------------------------------------------------------------------------- // print kernel info // -------------------------------------------------------------------------- print_kernel_info(queue, knl); CALL_CL_SAFE(clFinish(queue)); timestamp_type tic, toc; get_timestamp(&tic); for(int loop = 0; loop < num_loops; ++loop) { CALL_CL_SAFE(clEnqueueNDRangeKernel(queue, knl, 2, NULL, global_size, local_size, 0, NULL, NULL)); // Edit: Copy the blurred image to input buffer #ifdef NON_OPTIMIZED CALL_CL_SAFE(clEnqueueCopyBuffer(queue, buf_congray, buf_gray, 0, 0, deviceDataSize, 0, NULL, NULL)); #else clEnqueueCopyBufferRect(queue, buf_congray, buf_gray, buffer_origin, host_origin, region, deviceWidth*sizeof(float), 0, xsize*sizeof(float), 0, 0, NULL, NULL); #endif } CALL_CL_SAFE(clFinish(queue)); get_timestamp(&toc); double elapsed = timestamp_diff_in_seconds(tic,toc)/num_loops; printf("%f s\n", elapsed); printf("%f MPixels/s\n", xsize*ysize/1e6/elapsed); printf("%f GBit/s\n", 2*xsize*ysize*sizeof(float)/1e9/elapsed); printf("%f GFlop/s\n", (xsize-HALF_FILTER_WIDTH)*(ysize-HALF_FILTER_WIDTH) *FILTER_WIDTH*FILTER_WIDTH/1e9/elapsed); // -------------------------------------------------------------------------- // transfer back & check // -------------------------------------------------------------------------- #ifdef NON_OPTIMIZED CALL_CL_SAFE(clEnqueueReadBuffer( queue, buf_congray, /*blocking*/ CL_TRUE, /*offset*/ 0, xsize * ysize * sizeof(float), congray_cl, 0, NULL, NULL)); #else buffer_origin[0] = 3*sizeof(float); buffer_origin[1] = 3; buffer_origin[2] = 0; host_origin[0] = 3*sizeof(float); host_origin[1] = 3; host_origin[2] = 0; region[0] = (xsize-paddingPixels)*sizeof(float); region[1] = (ysize-paddingPixels); region[2] = 1; clEnqueueReadBufferRect(queue, buf_congray, CL_TRUE, buffer_origin, host_origin, region, deviceWidth*sizeof(float), 0, xsize*sizeof(float), 0, congray_cl, 0, NULL, NULL); #endif // -------------------------------------------------------------------------- // output OpenCL filtered image // -------------------------------------------------------------------------- printf("Writing OpenCL filtered image\n"); // Edit: Keep pixel value in the interval [0, 255] to reduce boundary effect for(int n = 0; n < xsize*ysize; ++n) { int color = (int)(congray_cl[n] * rgb_max); if (color < 0) { color = 0; } else if (color > 255) { color = 255; } r[n] = g[n] = b[n] = color; } error = ppma_write("output_cl.ppm", xsize, ysize, r, g, b); if(error) { fprintf(stderr, "error writing image"); abort(); } // -------------------------------------------------------------------------- // clean up // -------------------------------------------------------------------------- CALL_CL_SAFE(clReleaseMemObject(buf_congray)); CALL_CL_SAFE(clReleaseMemObject(buf_gray)); CALL_CL_SAFE(clReleaseMemObject(buf_filter)); CALL_CL_SAFE(clReleaseKernel(knl)); CALL_CL_SAFE(clReleaseCommandQueue(queue)); CALL_CL_SAFE(clReleaseContext(ctx)); free(gray); free(congray); free(congray_cl); free(r); free(b); free(g); }