static int initialize(int use_gpu) { cl_int result; size_t size; #ifndef POCL_HSA // create OpenCL context cl_platform_id platform_id; if (clGetPlatformIDs(1, &platform_id, NULL) != CL_SUCCESS) { printf("ERROR: clGetPlatformIDs(1,*,0) failed\n"); return -1; } cl_context_properties ctxprop[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform_id, 0}; device_type = use_gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU; context = clCreateContextFromType( ctxprop, device_type, NULL, NULL, NULL ); #else context = poclu_create_any_context(); #endif if( !context ) { printf("ERROR: clCreateContextFromType(%s) failed\n", use_gpu ? "GPU" : "CPU"); return -1; } // get the list of GPUs result = clGetContextInfo( context, CL_CONTEXT_DEVICES, 0, NULL, &size ); num_devices = (int) (size / sizeof(cl_device_id)); if( result != CL_SUCCESS || num_devices < 1 ) { printf("ERROR: clGetContextInfo() failed\n"); return -1; } device_list = new cl_device_id[num_devices]; if( !device_list ) { printf("ERROR: new cl_device_id[] failed\n"); return -1; } result = clGetContextInfo( context, CL_CONTEXT_DEVICES, size, device_list, NULL ); if( result != CL_SUCCESS ) { printf("ERROR: clGetContextInfo() failed\n"); return -1; } // create command queue for the first device cmd_queue = clCreateCommandQueue( context, device_list[0], 0, NULL ); if( !cmd_queue ) { printf("ERROR: clCreateCommandQueue() failed\n"); return -1; } return 0; }
/// // functions for preparing create opencl program, contains CreateContext, CreateProgram, CreateCommandQueue, CreateMemBuffer, and Cleanup // Create an OpenCL context on the first available GPU platform. cl_context CreateContext() { cl_context context = NULL; cl_uint platformIdCount = 0; cl_int errNum; #ifndef POCL_HSA // get number of platforms clGetPlatformIDs (0, NULL, &platformIdCount); std::vector<cl_platform_id> platformIds(platformIdCount); clGetPlatformIDs (platformIdCount, platformIds.data(), NULL); // In this example, first platform is a CPU, the second one is a GPU. we just choose the first available device. cl_context_properties contextProperties[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platformIds[1], 0 }; context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU, NULL, NULL, &errNum); if (errNum != CL_SUCCESS) { std::cout << "Could not create GPU context, trying CPU..." << std::endl; context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_CPU, NULL, NULL, &errNum); if (errNum != CL_SUCCESS) { std::cerr << "Failed to create an OpenCL GPU or CPU context." << std::endl; return NULL; } } #else context = poclu_create_any_context(); #endif return context; }
int main(int argc, char **argv) { /* test name */ char name[] = "test_image_query_funcs"; size_t global_work_size[1] = { 1 }, local_work_size[1]= { 1 }; size_t srcdir_length, name_length, filename_size; char *filename = NULL; char *source = NULL; cl_device_id devices[1]; cl_context context = NULL; cl_command_queue queue = NULL; cl_program program = NULL; cl_kernel kernel = NULL; cl_int err; /* image parameters */ cl_uchar4 *imageData; cl_image_format image_format; cl_image_desc image2_desc, image3_desc; printf("Running test %s...\n", name); memset(&image2_desc, 0, sizeof(cl_image_desc)); image2_desc.image_type = CL_MEM_OBJECT_IMAGE2D; image2_desc.image_width = 2; image2_desc.image_height = 4; memset(&image3_desc, 0, sizeof(cl_image_desc)); image3_desc.image_type = CL_MEM_OBJECT_IMAGE3D; image3_desc.image_width = 2; image3_desc.image_height = 4; image3_desc.image_depth = 8; image_format.image_channel_order = CL_RGBA; image_format.image_channel_data_type = CL_UNSIGNED_INT8; imageData = (cl_uchar4*)malloc (4 * 4 * sizeof(cl_uchar4)); TEST_ASSERT (imageData != NULL && "out of host memory\n"); memset (imageData, 1, 4*4*sizeof(cl_uchar4)); /* determine file name of kernel source to load */ srcdir_length = strlen(SRCDIR); name_length = strlen(name); filename_size = srcdir_length + name_length + 16; filename = (char *)malloc(filename_size + 1); TEST_ASSERT (filename != NULL && "out of host memory\n"); snprintf(filename, filename_size, "%s/%s.cl", SRCDIR, name); /* read source code */ source = poclu_read_file (filename); TEST_ASSERT (source != NULL && "Kernel .cl not found."); /* setup an OpenCL context and command queue using default device */ context = poclu_create_any_context(); TEST_ASSERT (context != NULL && "clCreateContextFromType call failed\n"); cl_sampler external_sampler = clCreateSampler ( context, CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST, &err); CHECK_OPENCL_ERROR_IN ("clCreateSampler"); CHECK_CL_ERROR (clGetContextInfo (context, CL_CONTEXT_DEVICES, sizeof (cl_device_id), devices, NULL)); queue = clCreateCommandQueue (context, devices[0], 0, &err); CHECK_OPENCL_ERROR_IN ("clCreateCommandQueue"); /* Create image */ cl_mem image2 = clCreateImage (context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, &image_format, &image2_desc, imageData, &err); CHECK_OPENCL_ERROR_IN ("clCreateImage image2"); cl_mem image3 = clCreateImage (context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, &image_format, &image3_desc, imageData, &err); CHECK_OPENCL_ERROR_IN ("clCreateImage image3"); unsigned color[4] = { 2, 9, 11, 7 }; size_t orig[3] = { 0, 0, 0 }; size_t reg[3] = { 2, 4, 1 }; err = clEnqueueFillImage (queue, image2, color, orig, reg, 0, NULL, NULL); CHECK_OPENCL_ERROR_IN ("clCreateImage image3"); /* create and build program */ program = clCreateProgramWithSource (context, 1, (const char **)&source, NULL, &err); CHECK_OPENCL_ERROR_IN ("clCreateProgramWithSource"); err = clBuildProgram (program, 0, NULL, NULL, NULL, NULL); CHECK_OPENCL_ERROR_IN ("clBuildProgram"); /* execute the kernel with give name */ kernel = clCreateKernel (program, name, NULL); CHECK_OPENCL_ERROR_IN ("clCreateKernel"); err = clSetKernelArg (kernel, 0, sizeof (cl_mem), &image2); CHECK_OPENCL_ERROR_IN ("clSetKernelArg 0"); err = clSetKernelArg (kernel, 1, sizeof (cl_mem), &image3); CHECK_OPENCL_ERROR_IN ("clSetKernelArg 1"); err = clSetKernelArg (kernel, 2, sizeof (cl_sampler), &external_sampler); CHECK_OPENCL_ERROR_IN ("clSetKernelArg 2"); err = clEnqueueNDRangeKernel (queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL); CHECK_OPENCL_ERROR_IN ("clEnqueueNDRangeKernel"); err = clFinish (queue); CHECK_OPENCL_ERROR_IN ("clFinish"); clReleaseMemObject (image2); clReleaseMemObject (image3); clReleaseKernel (kernel); clReleaseProgram (program); clReleaseCommandQueue (queue); clReleaseSampler (external_sampler); clUnloadCompiler (); clReleaseContext (context); free (source); free (filename); free (imageData); printf("OK\n"); return 0; }
int main(int argc, char **argv) { /* test name */ char name[] = "test_image_query_funcs"; size_t global_work_size[1] = { 1 }, local_work_size[1]= { 1 }; size_t srcdir_length, name_length, filename_size; size_t source_size, source_read; char const *sources[1]; char *filename = NULL; char *source = NULL; FILE *source_file = NULL; cl_device_id devices[1]; cl_context context = NULL; cl_command_queue queue = NULL; cl_program program = NULL; cl_kernel kernel = NULL; cl_int result; int retval = -1; /* image parameters */ cl_uchar4 *imageData; cl_image_format image_format; cl_image_desc image2_desc, image3_desc; printf("Running test %s...\n", name); memset(&image2_desc, 0, sizeof(cl_image_desc)); image2_desc.image_type = CL_MEM_OBJECT_IMAGE2D; image2_desc.image_width = 2; image2_desc.image_height = 4; memset(&image3_desc, 0, sizeof(cl_image_desc)); image3_desc.image_type = CL_MEM_OBJECT_IMAGE3D; image3_desc.image_width = 2; image3_desc.image_height = 4; image3_desc.image_depth = 8; image_format.image_channel_order = CL_RGBA; image_format.image_channel_data_type = CL_UNSIGNED_INT8; imageData = (cl_uchar4*)malloc (4 * 4 * sizeof(cl_uchar4)); if (imageData == NULL) { puts("out of host memory\n"); goto error; } memset (imageData, 1, 4*4*sizeof(cl_uchar4)); /* determine file name of kernel source to load */ srcdir_length = strlen(SRCDIR); name_length = strlen(name); filename_size = srcdir_length + name_length + 16; filename = (char *)malloc(filename_size + 1); if (!filename) { puts("out of memory"); goto error; } snprintf(filename, filename_size, "%s/%s.cl", SRCDIR, name); /* read source code */ source_file = fopen(filename, "r"); if (!source_file) { puts("source file not found\n"); goto error; } fseek(source_file, 0, SEEK_END); source_size = ftell(source_file); fseek(source_file, 0, SEEK_SET); source = (char *)malloc(source_size + 1); if (!source) { puts("out of memory\n"); goto error; } source_read = fread(source, 1, source_size, source_file); if (source_read != source_size) { puts("error reading from file\n"); goto error; } source[source_size] = '\0'; fclose(source_file); source_file = NULL; /* setup an OpenCL context and command queue using default device */ context = poclu_create_any_context(); if (!context) { puts("clCreateContextFromType call failed\n"); goto error; } result = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(cl_device_id), devices, NULL); if (result != CL_SUCCESS) { puts("clGetContextInfo call failed\n"); goto error; } queue = clCreateCommandQueue(context, devices[0], 0, NULL); if (!queue) { puts("clCreateCommandQueue call failed\n"); goto error; } /* Create image */ cl_mem image2 = clCreateImage (context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, &image_format, &image2_desc, imageData, &result); if (result != CL_SUCCESS) { puts("image2 creation failed\n"); goto error; } cl_mem image3 = clCreateImage (context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, &image_format, &image3_desc, imageData, &result); if (result != CL_SUCCESS) { puts("image3 creation failed\n"); goto error; } /* create and build program */ sources[0] = source; program = clCreateProgramWithSource(context, 1, sources, NULL, NULL); if (!program) { puts("clCreateProgramWithSource call failed\n"); goto error; } result = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); if (result != CL_SUCCESS) { puts("clBuildProgram call failed\n"); goto error; } /* execute the kernel with give name */ kernel = clCreateKernel(program, name, NULL); if (!kernel) { puts("clCreateKernel call failed\n"); goto error; } result = clSetKernelArg( kernel, 0, sizeof(cl_mem), &image2); if (result) { puts("clSetKernelArg 0 failed\n"); goto error; } result = clSetKernelArg( kernel, 1, sizeof(cl_mem), &image3); if (result) { puts("clSetKernelArg 1 failed\n"); goto error; } result = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL); if (result != CL_SUCCESS) { puts("clEnqueueNDRangeKernel call failed\n"); goto error; } result = clFinish(queue); if (result == CL_SUCCESS) retval = 0; error: if (kernel) { clReleaseKernel(kernel); } if (program) { clReleaseProgram(program); } if (queue) { clReleaseCommandQueue(queue); } if (context) { clReleaseContext(context); } if (source_file) { fclose(source_file); } if (source) { free(source); } if (filename) { free(filename); } if (imageData) { free(imageData); } if (retval) { printf("FAIL\n"); return 1; } printf("OK\n"); return 0; }
int exec_scalarwave_kernel(char const *const program_source, cl_double *const phi, cl_double const *const phi_p, cl_double const *const phi_p_p, grid_t const *const grid) { static int initialised = 0; static cl_context context; static cl_command_queue cmd_queue; static cl_program program; static cl_kernel kernel; if (!initialised) { initialised = 1; context = poclu_create_any_context(); if (!context) return -1; size_t ndevices; clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &ndevices); ndevices /= sizeof(cl_device_id); cl_device_id *devices = (cl_device_id*)malloc(ndevices * sizeof(cl_device_id)); clGetContextInfo(context, CL_CONTEXT_DEVICES, ndevices*sizeof(cl_device_id), devices, NULL); cmd_queue = clCreateCommandQueue(context, devices[0], 0, NULL); if (!cmd_queue) return -1; program = clCreateProgramWithSource(context, 1, (const char**)&program_source, NULL, NULL); if (!program) return -1; int ierr; ierr = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); if (ierr) return -1; kernel = clCreateKernel(program, "scalarwave", NULL); if (!kernel) return -1; free (devices); } size_t const npoints = grid->ai * grid->aj * grid->ak; cl_mem const mem_phi = clCreateBuffer(context, 0, npoints*sizeof(*phi), NULL, NULL); if (!mem_phi) return -1; cl_mem const mem_phi_p = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, npoints*sizeof(*phi_p), (cl_double*)phi_p, NULL); if (!mem_phi_p) return -1; cl_mem const mem_phi_p_p = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, npoints*sizeof(*phi_p_p), (cl_double*)phi_p_p, NULL); if (!mem_phi_p_p) return -1; cl_mem const mem_grid = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(*grid), (grid_t*)grid, NULL); if (!mem_grid) return -1; int ierr; ierr = clSetKernelArg(kernel, 0, sizeof(cl_mem), &mem_phi); if (ierr) return -1; ierr = clSetKernelArg(kernel, 1, sizeof(cl_mem), &mem_phi_p); if (ierr) return -1; ierr = clSetKernelArg(kernel, 2, sizeof(cl_mem), &mem_phi_p_p); if (ierr) return -1; ierr = clSetKernelArg(kernel, 3, sizeof(cl_mem), &mem_grid); if (ierr) return -1; size_t const global_work_size[3] = {grid->ai, grid->aj, grid->ak}; size_t const local_work_size[3] = {GRID_GRANULARITY, GRID_GRANULARITY, GRID_GRANULARITY}; ierr = clEnqueueNDRangeKernel(cmd_queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, NULL); if (ierr) return -1; ierr = clFinish(cmd_queue); if (ierr) return -1; ierr = clEnqueueReadBuffer(cmd_queue, mem_phi, CL_TRUE, 0, npoints*sizeof(*phi), phi, 0, NULL, NULL); if (ierr) return -1; clReleaseMemObject(mem_phi); clReleaseMemObject(mem_phi_p); clReleaseMemObject(mem_phi_p_p); clReleaseMemObject(mem_grid); /* clReleaseKernel(kernel); */ /* clReleaseProgram(program); */ /* clReleaseCommandQueue(cmd_queue); */ /* clReleaseContext(context); */ return 0; }
int call_test(const char *name) { size_t global_work_size[1] = { 1 }, local_work_size[1]= { 1 }; size_t srcdir_length, name_length, filename_size; size_t source_size, source_read; char const *sources[1]; char *filename = NULL; char *source = NULL; FILE *source_file = NULL; cl_device_id devices[1]; cl_context context = NULL; cl_command_queue queue = NULL; cl_program program = NULL; cl_kernel kernel = NULL; cl_int result; int retval = -1; assert(name); /* determine file name of kernel source to load */ srcdir_length = strlen(SRCDIR); name_length = strlen(name); filename_size = srcdir_length + name_length + 16; filename = (char *)malloc(filename_size + 1); if (!filename) { puts("out of memory"); goto error; } snprintf(filename, filename_size, "%s/%s.cl", SRCDIR, name); /* read source code */ source_file = fopen(filename, "r"); if (!source_file) { puts("source file not found\n"); goto error; } fseek(source_file, 0, SEEK_END); source_size = ftell(source_file); fseek(source_file, 0, SEEK_SET); source = (char *)malloc(source_size + 1); if (!source) { puts("out of memory\n"); goto error; } source_read = fread(source, 1, source_size, source_file); if (source_read != source_size) { puts("error reading from file\n"); goto error; } source[source_size] = '\0'; fclose(source_file); source_file = NULL; /* setup an OpenCL context and command queue using default device */ context = poclu_create_any_context(); if (!context) { puts("clCreateContextFromType call failed\n"); goto error; } result = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(cl_device_id), devices, NULL); if (result != CL_SUCCESS) { puts("clGetContextInfo call failed\n"); goto error; } queue = clCreateCommandQueue(context, devices[0], 0, NULL); if (!queue) { puts("clCreateCommandQueue call failed\n"); goto error; } /* create and build program */ sources[0] = source; program = clCreateProgramWithSource(context, 1, sources, NULL, NULL); if (!program) { puts("clCreateProgramWithSource call failed\n"); goto error; } result = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); if (result != CL_SUCCESS) { puts("clBuildProgram call failed\n"); goto error; } /* execute the kernel with give name */ kernel = clCreateKernel(program, name, NULL); if (!kernel) { puts("clCreateKernel call failed\n"); goto error; } result = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL); if (result != CL_SUCCESS) { puts("clEnqueueNDRangeKernel call failed\n"); goto error; } result = clFinish(queue); if (result == CL_SUCCESS) retval = 0; error: if (kernel) { clReleaseKernel(kernel); } if (program) { clReleaseProgram(program); } if (queue) { clReleaseCommandQueue(queue); } if (context) { clReleaseContext(context); } if (source_file) { fclose(source_file); } if (source) { free(source); } if (filename) { free(filename); } return retval; }
int call_test(const char *name) { size_t global_work_size[1] = { 1 }, local_work_size[1]= { 1 }; size_t srcdir_length, name_length, filename_size; char *filename = NULL; char *source = NULL; cl_device_id devices[1]; cl_context context = NULL; cl_command_queue queue = NULL; cl_program program = NULL; cl_kernel kernel = NULL; cl_int result; int retval = -1; TEST_ASSERT (name != NULL); /* determine file name of kernel source to load */ srcdir_length = strlen(SRCDIR); name_length = strlen(name); filename_size = srcdir_length + name_length + 16; filename = (char *)malloc(filename_size + 1); if (!filename) { puts("out of memory"); goto error; } snprintf(filename, filename_size, "%s/%s.cl", SRCDIR, name); /* read source code */ source = poclu_read_file (filename); TEST_ASSERT (source != NULL && "Kernel .cl not found."); /* setup an OpenCL context and command queue using default device */ context = poclu_create_any_context(); if (!context) { puts("clCreateContextFromType call failed\n"); goto error; } result = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(cl_device_id), devices, NULL); if (result != CL_SUCCESS) { puts("clGetContextInfo call failed\n"); goto error; } queue = clCreateCommandQueue(context, devices[0], 0, NULL); if (!queue) { puts("clCreateCommandQueue call failed\n"); goto error; } /* create and build program */ program = clCreateProgramWithSource (context, 1, (const char **)&source, NULL, NULL); if (!program) { puts("clCreateProgramWithSource call failed\n"); goto error; } result = clBuildProgram(program, 0, NULL, "-I" SRCDIR, NULL, NULL); if (result != CL_SUCCESS) { puts("clBuildProgram call failed\n"); goto error; } /* execute the kernel with give name */ kernel = clCreateKernel(program, name, NULL); if (!kernel) { puts("clCreateKernel call failed\n"); goto error; } result = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL); if (result != CL_SUCCESS) { puts("clEnqueueNDRangeKernel call failed\n"); goto error; } result = clFinish(queue); if (result == CL_SUCCESS) retval = 0; error: if (kernel) { clReleaseKernel(kernel); } if (program) { clReleaseProgram(program); } if (queue) { clReleaseCommandQueue(queue); } if (context) { clUnloadCompiler (); clReleaseContext (context); } if (source) { free(source); } if (filename) { free(filename); } return retval; }
int exec_dot_product_kernel(const char *program_source, size_t source_size, int n, cl_float4 *srcA, cl_float4 *srcB, cl_float *dst) { cl_context context; cl_command_queue cmd_queue; cl_device_id *devices; cl_program program; cl_kernel kernel; cl_mem memobjs[3]; size_t global_work_size[1]; size_t local_work_size[1]; size_t cb; cl_int err; int i; context = poclu_create_any_context(); if (context == (cl_context)0) return -1; // get the list of GPU devices associated with context clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &cb); devices = (cl_device_id *) malloc(cb); clGetContextInfo(context, CL_CONTEXT_DEVICES, cb, devices, NULL); // create a command-queue cmd_queue = clCreateCommandQueue(context, devices[0], 0, NULL); if (cmd_queue == (cl_command_queue)0) { clReleaseContext(context); free(devices); return -1; } for (i = 0; i < n; ++i) { poclu_bswap_cl_float_array(devices[0], (cl_float*)&srcA[i], 4); poclu_bswap_cl_float_array(devices[0], (cl_float*)&srcB[i], 4); } // allocate the buffer memory objects memobjs[0] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(cl_float4) * n, srcA, NULL); if (memobjs[0] == (cl_mem)0) { clReleaseCommandQueue(cmd_queue); clReleaseContext(context); return -1; } memobjs[1] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(cl_float4) * n, srcB, NULL); if (memobjs[1] == (cl_mem)0) { delete_memobjs(memobjs, 1); clReleaseCommandQueue(cmd_queue); clReleaseContext(context); return -1; } memobjs[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float) * n, NULL, NULL); if (memobjs[2] == (cl_mem)0) { delete_memobjs(memobjs, 2); clReleaseCommandQueue(cmd_queue); clReleaseContext(context); return -1; } // create the program program = clCreateProgramWithBinary (context, 1, devices, &source_size, (const unsigned char**)&program_source, NULL, NULL); if (program == (cl_program)0) { delete_memobjs(memobjs, 3); clReleaseCommandQueue(cmd_queue); clReleaseContext(context); return -1; } // build the program err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); if (err != CL_SUCCESS) { delete_memobjs(memobjs, 3); clReleaseProgram(program); clReleaseCommandQueue(cmd_queue); clReleaseContext(context); return -1; } // create the kernel kernel = clCreateKernel(program, "dot_product", NULL); if (kernel == (cl_kernel)0) { delete_memobjs(memobjs, 3); clReleaseProgram(program); clReleaseCommandQueue(cmd_queue); clReleaseContext(context); return -1; } // set the args values err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *) &memobjs[0]); err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *) &memobjs[1]); err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *) &memobjs[2]); if (err != CL_SUCCESS) { delete_memobjs(memobjs, 3); clReleaseKernel(kernel); clReleaseProgram(program); clReleaseCommandQueue(cmd_queue); clReleaseContext(context); return -1; } // set work-item dimensions global_work_size[0] = n; local_work_size[0]= 128; // execute kernel err = clEnqueueNDRangeKernel(cmd_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL); if (err != CL_SUCCESS) { delete_memobjs(memobjs, 3); clReleaseKernel(kernel); clReleaseProgram(program); clReleaseCommandQueue(cmd_queue); clReleaseContext(context); return -1; } // read output image err = clEnqueueReadBuffer(cmd_queue, memobjs[2], CL_TRUE, 0, n * sizeof(cl_float), dst, 0, NULL, NULL); if (err != CL_SUCCESS) { delete_memobjs(memobjs, 3); clReleaseKernel(kernel); clReleaseProgram(program); clReleaseCommandQueue(cmd_queue); clReleaseContext(context); return -1; } for (i = 0; i < n; ++i) { poclu_bswap_cl_float_array(devices[0], (cl_float*)&dst[i], 1); poclu_bswap_cl_float_array(devices[0], (cl_float*)&srcA[i], 4); poclu_bswap_cl_float_array(devices[0], (cl_float*)&srcB[i], 4); } free(devices); // release kernel, program, and memory objects delete_memobjs(memobjs, 3); clReleaseKernel(kernel); clReleaseProgram(program); clReleaseCommandQueue(cmd_queue); clReleaseContext(context); return 0; // success... }
int exec_trig_kernel(const char *program_source, int n, void *srcA, void *dst) { cl_context context; cl_command_queue cmd_queue; cl_device_id *devices; cl_program program; cl_kernel kernel; cl_mem memobjs[2]; size_t global_work_size[1]; size_t local_work_size[1]; size_t cb; cl_int err; float c = 7.3f; // a scalar number to test non-pointer args // create the OpenCL context on a GPU device context = poclu_create_any_context(); if (context == (cl_context)0) return -1; // get the list of GPU devices associated with context clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &cb); devices = malloc(cb); clGetContextInfo(context, CL_CONTEXT_DEVICES, cb, devices, NULL); // create a command-queue cmd_queue = clCreateCommandQueue(context, devices[0], 0, NULL); if (cmd_queue == (cl_command_queue)0) { clReleaseContext(context); free(devices); return -1; } free(devices); // allocate the buffer memory objects memobjs[0] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(cl_float4) * n, srcA, NULL); if (memobjs[0] == (cl_mem)0) { clReleaseCommandQueue(cmd_queue); clReleaseContext(context); return -1; } memobjs[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float4) * n, NULL, NULL); if (memobjs[1] == (cl_mem)0) { delete_memobjs(memobjs, 1); clReleaseCommandQueue(cmd_queue); clReleaseContext(context); return -1; } // create the program program = clCreateProgramWithSource(context, 1, (const char**)&program_source, NULL, NULL); if (program == (cl_program)0) { delete_memobjs(memobjs, 2); clReleaseCommandQueue(cmd_queue); clReleaseContext(context); return -1; } // build the program err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); if (err != CL_SUCCESS) { delete_memobjs(memobjs, 2); clReleaseProgram(program); clReleaseCommandQueue(cmd_queue); clReleaseContext(context); return -1; } // create the kernel kernel = clCreateKernel(program, "trig", NULL); if (kernel == (cl_kernel)0) { delete_memobjs(memobjs, 2); clReleaseProgram(program); clReleaseCommandQueue(cmd_queue); clReleaseContext(context); return -1; } // set the args values err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *) &memobjs[0]); err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *) &memobjs[1]); err |= clSetKernelArg(kernel, 2, sizeof(float), (void *) &c); if (err != CL_SUCCESS) { delete_memobjs(memobjs, 2); clReleaseKernel(kernel); clReleaseProgram(program); clReleaseCommandQueue(cmd_queue); clReleaseContext(context); return -1; } // set work-item dimensions global_work_size[0] = n; local_work_size[0]= 2; // execute kernel err = clEnqueueNDRangeKernel(cmd_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL); if (err != CL_SUCCESS) { delete_memobjs(memobjs, 2); clReleaseKernel(kernel); clReleaseProgram(program); clReleaseCommandQueue(cmd_queue); clReleaseContext(context); return -1; } // read output image err = clEnqueueReadBuffer(cmd_queue, memobjs[1], CL_TRUE, 0, n * sizeof(cl_float4), dst, 0, NULL, NULL); if (err != CL_SUCCESS) { delete_memobjs(memobjs, 2); clReleaseKernel(kernel); clReleaseProgram(program); clReleaseCommandQueue(cmd_queue); clReleaseContext(context); return -1; } // release kernel, program, and memory objects delete_memobjs(memobjs, 2); clReleaseKernel(kernel); clReleaseProgram(program); clReleaseCommandQueue(cmd_queue); clReleaseContext(context); return 0; // success... }
int main(int argc, char** argv) { printf("WG size of kernel = %d X %d\n", BLOCK_SIZE, BLOCK_SIZE); cl_int error; cl_uint num_platforms; // Get the number of platforms error = clGetPlatformIDs(0, NULL, &num_platforms); if (error != CL_SUCCESS) fatal_CL(error, __LINE__); // Get the list of platforms cl_platform_id* platforms = (cl_platform_id *) malloc(sizeof(cl_platform_id) * num_platforms); error = clGetPlatformIDs(num_platforms, platforms, NULL); if (error != CL_SUCCESS) fatal_CL(error, __LINE__); char pbuf[100]; #ifndef POCL_HSA // Print the chosen platform (if there are multiple platforms, choose the first one) cl_platform_id platform = platforms[0]; error = clGetPlatformInfo(platform, CL_PLATFORM_VENDOR, sizeof(pbuf), pbuf, NULL); if (error != CL_SUCCESS) fatal_CL(error, __LINE__); printf("Platform: %s\n", pbuf); // Create a GPU context cl_context_properties context_properties[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties) platform, 0}; context = clCreateContextFromType(context_properties, CL_DEVICE_TYPE_GPU, NULL, NULL, &error); if (error != CL_SUCCESS) fatal_CL(error, __LINE__); #else context = poclu_create_any_context(); #endif // Get and print the chosen device (if there are multiple devices, choose the first one) size_t devices_size; error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &devices_size); if (error != CL_SUCCESS) fatal_CL(error, __LINE__); cl_device_id *devices = (cl_device_id *) malloc(devices_size); error = clGetContextInfo(context, CL_CONTEXT_DEVICES, devices_size, devices, NULL); if (error != CL_SUCCESS) fatal_CL(error, __LINE__); device = devices[0]; error = clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(pbuf), pbuf, NULL); if (error != CL_SUCCESS) fatal_CL(error, __LINE__); printf("Device: %s\n", pbuf); size_t wgs; error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(wgs), &wgs, NULL); if (error != CL_SUCCESS) fatal_CL(error, __LINE__); printf("CL_DEVICE_MAX_WORK_GROUP_SIZE: %lu\n", wgs); // Create a command queue command_queue = DIVIDEND_CL_WRAP(clCreateCommandQueue)(context, device, 0, &error); if (error != CL_SUCCESS) fatal_CL(error, __LINE__); int size; int grid_rows,grid_cols = 0; float *FilesavingTemp,*FilesavingPower; //,*MatrixOut; char *tfile, *pfile, *ofile; int total_iterations = 60; int pyramid_height = 1; // number of iterations if (argc < 7) usage(argc, argv); if((grid_rows = atoi(argv[1]))<=0|| (grid_cols = atoi(argv[1]))<=0|| (pyramid_height = atoi(argv[2]))<=0|| (total_iterations = atoi(argv[3]))<=0) usage(argc, argv); tfile=argv[4]; pfile=argv[5]; ofile=argv[6]; size=grid_rows*grid_cols; // --------------- pyramid parameters --------------- int borderCols = (pyramid_height)*EXPAND_RATE/2; int borderRows = (pyramid_height)*EXPAND_RATE/2; int smallBlockCol = BLOCK_SIZE-(pyramid_height)*EXPAND_RATE; int smallBlockRow = BLOCK_SIZE-(pyramid_height)*EXPAND_RATE; int blockCols = grid_cols/smallBlockCol+((grid_cols%smallBlockCol==0)?0:1); int blockRows = grid_rows/smallBlockRow+((grid_rows%smallBlockRow==0)?0:1); FilesavingTemp = (float *) malloc(size*sizeof(float)); FilesavingPower = (float *) malloc(size*sizeof(float)); // MatrixOut = (float *) calloc (size, sizeof(float)); if( !FilesavingPower || !FilesavingTemp) // || !MatrixOut) fatal("unable to allocate memory"); // Read input data from disk readinput(FilesavingTemp, grid_rows, grid_cols, tfile); readinput(FilesavingPower, grid_rows, grid_cols, pfile); // Load kernel source from file const char *source = load_kernel_source("hotspot_kernel.cl"); size_t sourceSize = strlen(source); // Compile the kernel cl_program program = clCreateProgramWithSource(context, 1, &source, &sourceSize, &error); if (error != CL_SUCCESS) fatal_CL(error, __LINE__); char clOptions[110]; // sprintf(clOptions,"-I../../src"); sprintf(clOptions," "); #ifdef BLOCK_SIZE sprintf(clOptions + strlen(clOptions), " -DBLOCK_SIZE=%d", BLOCK_SIZE); #endif // Create an executable from the kernel error = DIVIDEND_CL_WRAP(clBuildProgram)(program, 1, &device, clOptions, NULL, NULL); // Show compiler warnings/errors static char log[65536]; memset(log, 0, sizeof(log)); clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, sizeof(log)-1, log, NULL); if (strstr(log,"warning:") || strstr(log, "error:")) printf("<<<<\n%s\n>>>>\n", log); if (error != CL_SUCCESS) fatal_CL(error, __LINE__); kernel = clCreateKernel(program, "hotspot", &error); if (error != CL_SUCCESS) fatal_CL(error, __LINE__); long long start_time = get_time(); // Create two temperature matrices and copy the temperature input data cl_mem MatrixTemp[2]; // Create input memory buffers on device MatrixTemp[0] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, sizeof(float) * size, FilesavingTemp, &error); if (error != CL_SUCCESS) fatal_CL(error, __LINE__); // Lingjie Zhang modifited at Nov 1, 2015 //MatrixTemp[1] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, sizeof(float) * size, NULL, &error); MatrixTemp[1] = clCreateBuffer(context, CL_MEM_READ_WRITE , sizeof(float) * size, NULL, &error); // end Lingjie Zhang modification if (error != CL_SUCCESS) fatal_CL(error, __LINE__); // Copy the power input data cl_mem MatrixPower = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(float) * size, FilesavingPower, &error); if (error != CL_SUCCESS) fatal_CL(error, __LINE__); // Perform the computation int ret = compute_tran_temp(MatrixPower, MatrixTemp, grid_cols, grid_rows, total_iterations, pyramid_height, blockCols, blockRows, borderCols, borderRows, FilesavingTemp, FilesavingPower); // Copy final temperature data back cl_float *MatrixOut = (cl_float *) clEnqueueMapBuffer(command_queue, MatrixTemp[ret], CL_TRUE, CL_MAP_READ, 0, sizeof(float) * size, 0, NULL, NULL, &error); if (error != CL_SUCCESS) fatal_CL(error, __LINE__); long long end_time = get_time(); printf("Total time: %.3f seconds\n", ((float) (end_time - start_time)) / (1000*1000)); // Write final output to output file writeoutput(MatrixOut, grid_rows, grid_cols, ofile); error = clEnqueueUnmapMemObject(command_queue, MatrixTemp[ret], (void *) MatrixOut, 0, NULL, NULL); if (error != CL_SUCCESS) fatal_CL(error, __LINE__); clReleaseMemObject(MatrixTemp[0]); clReleaseMemObject(MatrixTemp[1]); clReleaseMemObject(MatrixPower); clReleaseContext(context); return 0; }
/** * The test kernels are assumed to: * * 1) called 'test_kernel' * 2) no inputs or outputs, only work item id printfs to verify the correct * workgroup transformations * 3) executable with any local and global dimensions and sizes * * Usage: * * ./run_kernel somekernel.cl 2 2 3 4 * * Where the first integer is the number of work groups to execute and the * rest are the local dimensions. */ int main (int argc, char **argv) { FILE *source_file; char *source; int source_size; cl_context context; size_t cb; cl_device_id *devices; cl_command_queue cmd_queue; cl_program program; cl_int err; cl_kernel kernel; size_t global_work_size[3]; size_t local_work_size[3]; char kernel_path[2048]; snprintf (kernel_path, 2048, "%s/%s", SRCDIR, argv[1]); source_file = fopen(kernel_path, "r"); assert(source_file != NULL && "Kernel .cl not found."); fseek (source_file, 0, SEEK_END); source_size = ftell (source_file); fseek (source_file, 0, SEEK_SET); source = malloc (source_size + 1); assert (source != NULL); fread (source, source_size, 1, source_file); source[source_size] = '\0'; fclose(source_file); local_work_size[0] = atoi(argv[3]); local_work_size[1] = atoi(argv[4]); local_work_size[2] = atoi(argv[5]); global_work_size[0] = local_work_size[0] * atoi(argv[2]); global_work_size[1] = local_work_size[1]; global_work_size[2] = local_work_size[2]; context = poclu_create_any_context(); if (context == (cl_context)0) return -1; clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &cb); devices = malloc(cb); clGetContextInfo(context, CL_CONTEXT_DEVICES, cb, devices, NULL); cmd_queue = clCreateCommandQueue(context, devices[0], 0, NULL); if (cmd_queue == (cl_command_queue)0) { clReleaseContext(context); free(devices); return -1; } free(devices); program = clCreateProgramWithSource(context, 1, (const char**)&source, NULL, NULL); if (program == (cl_program)0) { clReleaseCommandQueue(cmd_queue); clReleaseContext(context); return -1; } err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); if (err != CL_SUCCESS) { clReleaseProgram(program); clReleaseCommandQueue(cmd_queue); clReleaseContext(context); return -1; } kernel = clCreateKernel(program, "test_kernel", NULL); if (kernel == (cl_kernel)0) { clReleaseProgram(program); clReleaseCommandQueue(cmd_queue); clReleaseContext(context); return -1; } err = clEnqueueNDRangeKernel(cmd_queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, NULL); if(err != CL_SUCCESS) { clReleaseKernel(kernel); clReleaseProgram(program); clReleaseCommandQueue(cmd_queue); clReleaseContext(context); return -1; } clFinish(cmd_queue); clReleaseKernel(kernel); clReleaseProgram(program); clReleaseCommandQueue(cmd_queue); clReleaseContext(context); return 0; }
int main(int argc, char **argv) { /* test name */ char name[] = "test_sampler_address_clamp"; size_t global_work_size[1] = { 1 }, local_work_size[1]= { 1 }; size_t srcdir_length, name_length, filename_size; char *filename = NULL; char *source = NULL; cl_device_id devices[1]; cl_context context = NULL; cl_command_queue queue = NULL; cl_program program = NULL; cl_kernel kernel = NULL; cl_int result; int retval = -1; /* image parameters */ cl_uchar4 *imageData; cl_image_format image_format; cl_image_desc image_desc; printf("Running test %s...\n", name); memset(&image_desc, 0, sizeof(cl_image_desc)); image_desc.image_type = CL_MEM_OBJECT_IMAGE2D; image_desc.image_width = 4; image_desc.image_height = 4; image_format.image_channel_order = CL_RGBA; image_format.image_channel_data_type = CL_UNSIGNED_INT8; imageData = (cl_uchar4*)malloc (4 * 4 * sizeof(cl_uchar4)); if (imageData == NULL) { puts("out of host memory\n"); goto error; } memset (imageData, 1, 4*4*sizeof(cl_uchar4)); /* determine file name of kernel source to load */ srcdir_length = strlen(SRCDIR); name_length = strlen(name); filename_size = srcdir_length + name_length + 16; filename = (char *)malloc(filename_size + 1); if (!filename) { puts("out of memory"); goto error; } snprintf(filename, filename_size, "%s/%s.cl", SRCDIR, name); /* read source code */ source = poclu_read_file (filename); TEST_ASSERT (source != NULL && "Kernel .cl not found."); /* setup an OpenCL context and command queue using default device */ context = poclu_create_any_context(); if (!context) { puts("clCreateContextFromType call failed\n"); goto error; } result = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(cl_device_id), devices, NULL); if (result != CL_SUCCESS) { puts("clGetContextInfo call failed\n"); goto error; } queue = clCreateCommandQueue(context, devices[0], 0, NULL); if (!queue) { puts("clCreateCommandQueue call failed\n"); goto error; } /* Create image */ cl_mem image = clCreateImage (context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, &image_format, &image_desc, imageData, &result); if (result != CL_SUCCESS) { puts("image creation failed\n"); goto error; } /* create and build program */ program = clCreateProgramWithSource (context, 1, (const char **)&source, NULL, NULL); if (!program) { puts("clCreateProgramWithSource call failed\n"); goto error; } result = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); if (result != CL_SUCCESS) { puts("clBuildProgram call failed\n"); goto error; } /* execute the kernel with give name */ kernel = clCreateKernel(program, name, NULL); if (!kernel) { puts("clCreateKernel call failed\n"); goto error; } result = clSetKernelArg( kernel, 0, sizeof(cl_mem), &image); if (result) { puts("clSetKernelArg failed\n"); goto error; } result = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL); if (result != CL_SUCCESS) { puts("clEnqueueNDRangeKernel call failed\n"); goto error; } result = clFinish(queue); if (result == CL_SUCCESS) retval = 0; error: if (image) { clReleaseMemObject (image); } if (kernel) { clReleaseKernel(kernel); } if (program) { clReleaseProgram(program); } if (queue) { clReleaseCommandQueue(queue); } if (context) { clUnloadCompiler (); clReleaseContext (context); } if (source) { free(source); } if (filename) { free(filename); } if (imageData) { free(imageData); } if (retval) { printf("FAIL\n"); return 1; } printf("OK\n"); return 0; }