int main(void){ cl_int err; cl_platform_id platforms[MAX_PLATFORMS]; cl_uint nplatforms; cl_device_id devices[MAX_DEVICES + 1]; // + 1 for duplicate test cl_uint num_devices; cl_program program = NULL; err = clGetPlatformIDs(MAX_PLATFORMS, platforms, &nplatforms); CHECK_OPENCL_ERROR_IN("clGetPlatformIDs"); if (!nplatforms) return EXIT_FAILURE; err = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_ALL, MAX_DEVICES, devices, &num_devices); CHECK_OPENCL_ERROR_IN("clGetDeviceIDs"); cl_context context = clCreateContext(NULL, num_devices, devices, NULL, NULL, &err); CHECK_OPENCL_ERROR_IN("clCreateContext"); size_t program_size = strlen(program_src); char* program_buffer = program_src; program = clCreateProgramWithSource(context, 1, (const char**)&program_buffer, &program_size, &err); //clCreateProgramWithSource for the program with #include failed CHECK_OPENCL_ERROR_IN("clCreateProgramWithSource"); err = clBuildProgram(program, num_devices, devices, NULL, NULL, NULL); TEST_ASSERT(err == CL_BUILD_PROGRAM_FAILURE); return EXIT_SUCCESS; }
int main(void){ cl_int err; cl_context context; cl_device_id did; cl_command_queue queue; CHECK_CL_ERROR(poclu_get_any_device(&context, &did, &queue)); TEST_ASSERT( context ); TEST_ASSERT( did ); TEST_ASSERT( queue ); size_t program_size = strlen(program_src); char* program_buffer = program_src; cl_program program = clCreateProgramWithSource(context, 1, (const char**)&program_buffer, &program_size, &err); //clCreateProgramWithSource for the program with #include failed CHECK_OPENCL_ERROR_IN("clCreateProgramWithSource"); err = clBuildProgram(program, 1, &did, NULL, NULL, NULL); TEST_ASSERT(err == CL_BUILD_PROGRAM_FAILURE); CHECK_CL_ERROR (clReleaseCommandQueue (queue)); CHECK_CL_ERROR (clReleaseProgram (program)); CHECK_CL_ERROR (clReleaseContext (context)); CHECK_CL_ERROR (clUnloadCompiler ()); return EXIT_SUCCESS; }
int main(void) { cl_int err; cl_platform_id platforms[MAX_PLATFORMS]; cl_uint nplatforms; cl_device_id devices[MAX_DEVICES]; cl_uint ndevices; cl_uint i, j; err = clGetPlatformIDs(MAX_PLATFORMS, platforms, &nplatforms); CHECK_OPENCL_ERROR_IN("clGetPlatformIDs"); if (!nplatforms) return EXIT_FAILURE; for (i = 0; i < nplatforms; i++) { err = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, MAX_DEVICES, devices, &ndevices); CHECK_OPENCL_ERROR_IN("clGetDeviceIDs"); for (j = 0; j < ndevices; j++) { cl_long global_memsize, max_mem_alloc_size, min_max_mem_alloc_size; err = clGetDeviceInfo(devices[j], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(global_memsize), &global_memsize, NULL); CHECK_OPENCL_ERROR_IN("clGetDeviceInfo"); err = clGetDeviceInfo(devices[j], CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(max_mem_alloc_size), &max_mem_alloc_size, NULL); CHECK_OPENCL_ERROR_IN("clGetDeviceInfo"); TEST_ASSERT(global_memsize > 0); min_max_mem_alloc_size = 128*1024*1024; if (min_max_mem_alloc_size < global_memsize/4) min_max_mem_alloc_size = global_memsize/4; TEST_ASSERT(max_mem_alloc_size >= min_max_mem_alloc_size); } } return EXIT_SUCCESS; }
int main() { cl_int err; cl_event user_evt = NULL; int i; // An user event can be set to either complete or a negative value, indicating error; // additionally, no objects involved in a command that waits on the user event should // be released before the event status is set; however, it should be possible to release // everything even if the status is set to something which is NOT CL_COMPLETE. So // try both CL_COMPLETE and a negative value cl_int status[] = {CL_INVALID_EVENT, CL_COMPLETE }; // We also query for profiling info of the event, which according to the standard // should return CL_PROFILING_INFO_NOT_AVAILABLE cl_ulong queued, submitted, started, endtime; for (i = 0; i < ARRAY_SIZE(status); ++i) { cl_context context; cl_command_queue queue; cl_device_id device; CHECK_CL_ERROR(poclu_get_any_device(&context, &device, &queue)); TEST_ASSERT( context ); TEST_ASSERT( device ); TEST_ASSERT( queue ); user_evt = clCreateUserEvent(context, &err); CHECK_OPENCL_ERROR_IN("clCreateUserEvent"); TEST_ASSERT( user_evt ); CHECK_CL_ERROR(clSetUserEventStatus(user_evt, status[i])); err = clGetEventProfilingInfo(user_evt, CL_PROFILING_COMMAND_QUEUED, sizeof(queued), &queued, NULL); TEST_ASSERT(err == CL_PROFILING_INFO_NOT_AVAILABLE); err = clGetEventProfilingInfo(user_evt, CL_PROFILING_COMMAND_SUBMIT, sizeof(submitted), &submitted, NULL); TEST_ASSERT(err == CL_PROFILING_INFO_NOT_AVAILABLE); err = clGetEventProfilingInfo(user_evt, CL_PROFILING_COMMAND_START, sizeof(started), &started, NULL); TEST_ASSERT(err == CL_PROFILING_INFO_NOT_AVAILABLE); err = clGetEventProfilingInfo(user_evt, CL_PROFILING_COMMAND_END, sizeof(endtime), &endtime, NULL); TEST_ASSERT(err == CL_PROFILING_INFO_NOT_AVAILABLE); CHECK_CL_ERROR(clReleaseEvent(user_evt)); CHECK_CL_ERROR(clReleaseCommandQueue(queue)); CHECK_CL_ERROR(clReleaseContext(context)); } return EXIT_SUCCESS; }
int main(int argc, char **argv) { cl_int err; const char *krn_src; cl_program program; cl_context ctx; cl_command_queue queue; cl_device_id did; cl_uint num_krn; cl_kernel kernel; poclu_get_any_device(&ctx, &did, &queue); TEST_ASSERT(ctx); TEST_ASSERT(did); TEST_ASSERT(queue); krn_src = poclu_read_file(SRCDIR "/tests/runtime/test_clCreateKernelsInProgram.cl"); TEST_ASSERT(krn_src); program = clCreateProgramWithSource(ctx, 1, &krn_src, NULL, &err); CHECK_OPENCL_ERROR_IN("clCreateProgramWithSource"); err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); CHECK_OPENCL_ERROR_IN("clBuildProgram"); kernel = clCreateKernel(program, NULL, &err); TEST_ASSERT(err == CL_INVALID_VALUE); TEST_ASSERT(kernel == NULL); kernel = clCreateKernel(program, "nonexistent_kernel", &err); TEST_ASSERT(err == CL_INVALID_KERNEL_NAME); TEST_ASSERT(kernel == NULL); printf("OK\n"); return 0; }
int main(int argc, char **argv) { cl_int err; const char *krn_src; cl_program program; cl_context ctx; cl_device_id did; cl_command_queue queue; cl_uint num_krn; cl_kernel kernels[2]; poclu_get_any_device(&ctx, &did, &queue); TEST_ASSERT( ctx ); TEST_ASSERT( did ); TEST_ASSERT( queue ); krn_src = poclu_read_file(SRCDIR "/tests/runtime/test_clCreateKernelsInProgram.cl"); TEST_ASSERT(krn_src); program = clCreateProgramWithSource(ctx, 1, &krn_src, NULL, &err); CHECK_OPENCL_ERROR_IN("clCreateProgramWithSource"); err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); CHECK_OPENCL_ERROR_IN("clBuildProgram"); err = clCreateKernelsInProgram(program, 0, NULL, &num_krn); CHECK_OPENCL_ERROR_IN("clCreateKernelsInProgram"); // test_clCreateKernelsInProgram.cl has two kernel functions. TEST_ASSERT(num_krn == 2); err = clCreateKernelsInProgram(program, 2, kernels, NULL); CHECK_OPENCL_ERROR_IN("clCreateKernelsInProgram"); // make sure the kernels were actually created // Note: nothing in the specification says which kernel function // is kernels[0], which is kernels[1]. For now assume pocl/LLVM // orders these deterministacally err = clEnqueueTask(queue, kernels[0], 0, NULL, NULL); CHECK_OPENCL_ERROR_IN("clEnqueueTask"); err = clEnqueueTask(queue, kernels[1], 0, NULL, NULL); CHECK_OPENCL_ERROR_IN("clEnqueueTask"); err = clFinish(queue); CHECK_OPENCL_ERROR_IN("clFinish"); return EXIT_SUCCESS; }
int main(void){ cl_int err; cl_platform_id platforms[MAX_PLATFORMS]; cl_uint nplatforms; cl_device_id devices[MAX_DEVICES + 1]; // + 1 for duplicate test cl_device_id device_id0; cl_uint num_devices; size_t i; size_t num_binaries; const unsigned char **binaries = NULL; size_t *binary_sizes = NULL; size_t num_bytes_copied; cl_int binary_statuses[MAX_BINARIES]; cl_int binary_statuses2[MAX_BINARIES]; cl_program program = NULL; cl_program program_with_binary = NULL; err = clGetPlatformIDs(MAX_PLATFORMS, platforms, &nplatforms); CHECK_OPENCL_ERROR_IN("clGetPlatformIDs"); if (!nplatforms) return EXIT_FAILURE; err = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_ALL, MAX_DEVICES, devices, &num_devices); CHECK_OPENCL_ERROR_IN("clGetDeviceIDs"); cl_context context = clCreateContext(NULL, num_devices, devices, NULL, NULL, &err); CHECK_OPENCL_ERROR_IN("clCreateContext"); size_t kernel_size = strlen(kernel); char* kernel_buffer = kernel; program = clCreateProgramWithSource(context, 1, (const char**)&kernel_buffer, &kernel_size, &err); CHECK_OPENCL_ERROR_IN("clCreateProgramWithSource"); err = clBuildProgram(program, num_devices, devices, NULL, NULL, NULL); CHECK_OPENCL_ERROR_IN("clBuildProgram"); err = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, 0, 0, &num_binaries); CHECK_OPENCL_ERROR_IN("clGetProgramInfo"); num_binaries = num_binaries/sizeof(size_t); binary_sizes = (size_t*)malloc(num_binaries * sizeof(size_t)); binaries = (const unsigned char**)calloc(num_binaries, sizeof(unsigned char*)); err = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, num_binaries*sizeof(size_t), binary_sizes , &num_bytes_copied); CHECK_OPENCL_ERROR_IN("clGetProgramInfo"); for (i = 0; i < num_binaries; ++i) binaries[i] = (const unsigned char*) malloc(binary_sizes[i] * sizeof(const unsigned char)); err = clGetProgramInfo(program, CL_PROGRAM_BINARIES, num_binaries*sizeof(char*), binaries, &num_bytes_copied); CHECK_OPENCL_ERROR_IN("clGetProgramInfo"); cl_uint num = num_binaries < num_devices ? num_binaries : num_devices; if (num == 0) { err = !CL_SUCCESS; goto FREE_AND_EXIT; } program_with_binary = clCreateProgramWithBinary(context, num, devices, binary_sizes, binaries, binary_statuses, &err); CHECK_OPENCL_ERROR_IN("clCreateProgramWithBinary"); for (i = 0; i < num; ++i) { cl_program_binary_type bin_type = 0; err = clGetProgramBuildInfo(program_with_binary, devices[i], CL_PROGRAM_BINARY_TYPE, sizeof(bin_type), (void *)&bin_type, NULL); CHECK_OPENCL_ERROR_IN("get program binary type"); /* cl_program_binary_type */ switch(bin_type) { case CL_PROGRAM_BINARY_TYPE_NONE: /*0x0*/ fprintf(stderr, "program binary type: CL_PROGRAM_BINARY_TYPE_NONE\n"); break; case CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT: /*0x1*/ fprintf(stderr, "program binary type: CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT\n"); break; case CL_PROGRAM_BINARY_TYPE_LIBRARY: /*0x2*/ fprintf(stderr, "program binary type: CL_PROGRAM_BINARY_TYPE_LIBRARY\n"); break; case CL_PROGRAM_BINARY_TYPE_EXECUTABLE: /*0x4*/ fprintf(stderr, "program binary type: CL_PROGRAM_BINARY_TYPE_EXECUTABLE\n"); break; } } err = clReleaseProgram(program_with_binary); CHECK_OPENCL_ERROR_IN("clReleaseProgram"); for (i = 0; i < num; i++) { if (binary_statuses[i] != CL_SUCCESS) { err = !CL_SUCCESS; goto FREE_AND_EXIT; } } // negative test1: invalid device device_id0 = devices[0]; devices[0] = NULL; // invalid device program_with_binary = clCreateProgramWithBinary(context, num, devices, binary_sizes, binaries, binary_statuses, &err); if (err != CL_INVALID_DEVICE || program_with_binary != NULL) { err = !CL_SUCCESS; goto FREE_AND_EXIT; } err = CL_SUCCESS; devices[0] = device_id0; for (i = 0; i < num_binaries; ++i) free((void*)binaries[i]); free(binary_sizes); free(binaries); // negative test2: duplicate device num_binaries = 2; devices[1] = devices[0]; // duplicate binary_sizes = (size_t*)malloc(num_binaries * sizeof(size_t)); binaries = (const unsigned char**)calloc(num_binaries, sizeof(unsigned char*)); err = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, 1*sizeof(size_t), binary_sizes , &num_bytes_copied); CHECK_OPENCL_ERROR_IN("clGetProgramInfo"); binary_sizes[1] = binary_sizes[0]; binaries[0] = (const unsigned char*) malloc(binary_sizes[0] * sizeof(const unsigned char)); binaries[1] = (const unsigned char*) malloc(binary_sizes[1] * sizeof(const unsigned char)); err = clGetProgramInfo(program, CL_PROGRAM_BINARIES, 1 * sizeof(char*), binaries, &num_bytes_copied); CHECK_OPENCL_ERROR_IN("clGetProgramInfo"); memcpy((void*)binaries[1], (void*)binaries[0], binary_sizes[0]); program_with_binary = clCreateProgramWithBinary(context, 2, devices, binary_sizes, binaries, binary_statuses2, &err); if (err != CL_INVALID_DEVICE || program_with_binary != NULL) { err = !CL_SUCCESS; goto FREE_AND_EXIT; } err = CL_SUCCESS; FREE_AND_EXIT: // Free resources for (i = 0; i < num_binaries; ++i) if (binaries) if(binaries[i]) free((void*)binaries[i]); if (binary_sizes) free(binary_sizes); if (binaries) free(binaries); if (program) CHECK_CL_ERROR (clReleaseProgram (program)); if (program_with_binary) CHECK_CL_ERROR (clReleaseProgram (program_with_binary)); if (context) CHECK_CL_ERROR (clReleaseContext (context)); CHECK_CL_ERROR (clUnloadCompiler ()); return err == CL_SUCCESS ? EXIT_SUCCESS : EXIT_FAILURE; }
int main(int argc, char **argv) { cl_int err; const char *krn_src; cl_program program, program2; cl_context ctx; cl_command_queue queue; cl_device_id did; cl_kernel kernel, kernel2; poclu_get_any_device(&ctx, &did, &queue); TEST_ASSERT(ctx); TEST_ASSERT(did); TEST_ASSERT(queue); krn_src = poclu_read_file(SRCDIR "/tests/runtime/test_kernel_cache_includes.cl"); TEST_ASSERT(krn_src); err = poclu_write_file(SRCDIR "/tests/runtime/test_include.h", first_include, sizeof(first_include)-1); TEST_ASSERT(err == 0); program = clCreateProgramWithSource(ctx, 1, &krn_src, NULL, &err); CHECK_OPENCL_ERROR_IN("clCreateProgramWithSource"); err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); CHECK_OPENCL_ERROR_IN("clBuildProgram 1"); kernel = clCreateKernel(program, "testk", &err); CHECK_OPENCL_ERROR_IN("clCreateKernel 1"); size_t off[3] = {0,0,0}; size_t ws[3] = {1,1,1}; err = clEnqueueNDRangeKernel(queue, kernel, 3, off, ws, ws, 0, NULL, 0); CHECK_OPENCL_ERROR_IN("clEnqueueNDRangeKernel 1"); err = clFinish(queue); CHECK_OPENCL_ERROR_IN("clFinish 1"); /***************************************/ program2 = clCreateProgramWithSource(ctx, 1, &krn_src, NULL, &err); CHECK_OPENCL_ERROR_IN("clCreateProgramWithSource 2"); err = poclu_write_file(SRCDIR "/tests/runtime/test_include.h", second_include, sizeof(second_include)-1); TEST_ASSERT(err == 0); err = clBuildProgram(program2, 0, NULL, NULL, NULL, NULL); CHECK_OPENCL_ERROR_IN("clBuildProgram 2"); kernel2 = clCreateKernel(program2, "testk", &err); CHECK_OPENCL_ERROR_IN("clCreateKernel 2"); err = clEnqueueNDRangeKernel(queue, kernel2, 3, off, ws, ws, 0, NULL, 0); CHECK_OPENCL_ERROR_IN("clEnqueueNDRangeKernel 2"); err = clFinish(queue); CHECK_OPENCL_ERROR_IN("clFinish 2"); return 0; }
int main(int argc, char **argv) { /* test name */ char name[] = "test_image_query_funcs"; size_t global_work_size[1] = { 1 }, local_work_size[1]= { 1 }; size_t srcdir_length, name_length, filename_size; char *filename = NULL; char *source = NULL; cl_device_id devices[1]; cl_context context = NULL; cl_command_queue queue = NULL; cl_program program = NULL; cl_kernel kernel = NULL; cl_int err; /* image parameters */ cl_uchar4 *imageData; cl_image_format image_format; cl_image_desc image2_desc, image3_desc; printf("Running test %s...\n", name); memset(&image2_desc, 0, sizeof(cl_image_desc)); image2_desc.image_type = CL_MEM_OBJECT_IMAGE2D; image2_desc.image_width = 2; image2_desc.image_height = 4; memset(&image3_desc, 0, sizeof(cl_image_desc)); image3_desc.image_type = CL_MEM_OBJECT_IMAGE3D; image3_desc.image_width = 2; image3_desc.image_height = 4; image3_desc.image_depth = 8; image_format.image_channel_order = CL_RGBA; image_format.image_channel_data_type = CL_UNSIGNED_INT8; imageData = (cl_uchar4*)malloc (4 * 4 * sizeof(cl_uchar4)); TEST_ASSERT (imageData != NULL && "out of host memory\n"); memset (imageData, 1, 4*4*sizeof(cl_uchar4)); /* determine file name of kernel source to load */ srcdir_length = strlen(SRCDIR); name_length = strlen(name); filename_size = srcdir_length + name_length + 16; filename = (char *)malloc(filename_size + 1); TEST_ASSERT (filename != NULL && "out of host memory\n"); snprintf(filename, filename_size, "%s/%s.cl", SRCDIR, name); /* read source code */ source = poclu_read_file (filename); TEST_ASSERT (source != NULL && "Kernel .cl not found."); /* setup an OpenCL context and command queue using default device */ context = poclu_create_any_context(); TEST_ASSERT (context != NULL && "clCreateContextFromType call failed\n"); cl_sampler external_sampler = clCreateSampler ( context, CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST, &err); CHECK_OPENCL_ERROR_IN ("clCreateSampler"); CHECK_CL_ERROR (clGetContextInfo (context, CL_CONTEXT_DEVICES, sizeof (cl_device_id), devices, NULL)); queue = clCreateCommandQueue (context, devices[0], 0, &err); CHECK_OPENCL_ERROR_IN ("clCreateCommandQueue"); /* Create image */ cl_mem image2 = clCreateImage (context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, &image_format, &image2_desc, imageData, &err); CHECK_OPENCL_ERROR_IN ("clCreateImage image2"); cl_mem image3 = clCreateImage (context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, &image_format, &image3_desc, imageData, &err); CHECK_OPENCL_ERROR_IN ("clCreateImage image3"); unsigned color[4] = { 2, 9, 11, 7 }; size_t orig[3] = { 0, 0, 0 }; size_t reg[3] = { 2, 4, 1 }; err = clEnqueueFillImage (queue, image2, color, orig, reg, 0, NULL, NULL); CHECK_OPENCL_ERROR_IN ("clCreateImage image3"); /* create and build program */ program = clCreateProgramWithSource (context, 1, (const char **)&source, NULL, &err); CHECK_OPENCL_ERROR_IN ("clCreateProgramWithSource"); err = clBuildProgram (program, 0, NULL, NULL, NULL, NULL); CHECK_OPENCL_ERROR_IN ("clBuildProgram"); /* execute the kernel with give name */ kernel = clCreateKernel (program, name, NULL); CHECK_OPENCL_ERROR_IN ("clCreateKernel"); err = clSetKernelArg (kernel, 0, sizeof (cl_mem), &image2); CHECK_OPENCL_ERROR_IN ("clSetKernelArg 0"); err = clSetKernelArg (kernel, 1, sizeof (cl_mem), &image3); CHECK_OPENCL_ERROR_IN ("clSetKernelArg 1"); err = clSetKernelArg (kernel, 2, sizeof (cl_sampler), &external_sampler); CHECK_OPENCL_ERROR_IN ("clSetKernelArg 2"); err = clEnqueueNDRangeKernel (queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL); CHECK_OPENCL_ERROR_IN ("clEnqueueNDRangeKernel"); err = clFinish (queue); CHECK_OPENCL_ERROR_IN ("clFinish"); clReleaseMemObject (image2); clReleaseMemObject (image3); clReleaseKernel (kernel); clReleaseProgram (program); clReleaseCommandQueue (queue); clReleaseSampler (external_sampler); clUnloadCompiler (); clReleaseContext (context); free (source); free (filename); free (imageData); printf("OK\n"); return 0; }
int test_context(cl_context ctx, const char *prog_src, int mul, int ndevs, cl_device_id *devs) { cl_int err; cl_command_queue queue[ndevs]; cl_program prog; cl_kernel krn; cl_mem buf; cl_event evt[ndevs]; cl_int i; prog = clCreateProgramWithSource(ctx, 1, &prog_src, NULL, &err); CHECK_OPENCL_ERROR_IN("create program"); CHECK_CL_ERROR(clBuildProgram(prog, 0, NULL, NULL, NULL, NULL)); krn = clCreateKernel(prog, "setidx", &err); CHECK_OPENCL_ERROR_IN("create kernel"); buf = clCreateBuffer(ctx, CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE | CL_MEM_HOST_READ_ONLY, ndevs*sizeof(cl_int), NULL, &err); CHECK_OPENCL_ERROR_IN("create buffer"); CHECK_CL_ERROR(clSetKernelArg(krn, 0, sizeof(cl_mem), &buf)); /* create one queue per device, and submit task, waiting for all * previous */ for (i = 0; i < ndevs; ++i) { queue[i] = clCreateCommandQueue(ctx, devs[i], 0, &err); CHECK_OPENCL_ERROR_IN("create queue"); err = clSetKernelArg(krn, 1, sizeof(i), &i); CHECK_OPENCL_ERROR_IN("set kernel arg 1"); // no wait list for first (root) device err = clEnqueueTask(queue[i], krn, i, i ? evt : NULL, evt + i); CHECK_OPENCL_ERROR_IN("submit task"); } /* enqueue map on last */ cl_int *buf_host = clEnqueueMapBuffer(queue[ndevs - 1], buf, CL_TRUE, CL_MAP_READ, 0, ndevs*sizeof(cl_int), ndevs, evt, NULL, &err); CHECK_OPENCL_ERROR_IN("map buffer"); int mismatch = 0; for (i = 0; i < ndevs; ++i) { mismatch += !!(buf_host[i] != i*mul); } TEST_ASSERT(mismatch == 0); /* enqueue unmap on first */ CHECK_CL_ERROR(clEnqueueUnmapMemObject(queue[0], buf, buf_host, 0, NULL, NULL)); for (i = 0 ; i < ndevs; ++i) { err = clFinish(queue[i]); err |= clReleaseCommandQueue(queue[i]); err |= clReleaseEvent(evt[i]); } err |= clReleaseKernel(krn); err |= clReleaseMemObject(buf); err |= clReleaseProgram(prog); err |= clReleaseContext(ctx); CHECK_OPENCL_ERROR_IN("cleanup"); return CL_SUCCESS; }
int main(int argc, char **argv) { cl_context ctx; cl_command_queue q; // root device, all devices #define NUMDEVS 6 cl_device_id rootdev, alldevs[NUMDEVS]; // pointers to the sub devices of the partitions EQUALLY and BY_COUNTS // respectively cl_device_id *eqdev = alldevs + 1, *countdev = alldevs + 4; cl_uint max_cus, max_subs, split; cl_uint i, j; cl_int err = poclu_get_any_device(&ctx, &rootdev, &q); CHECK_OPENCL_ERROR_IN("poclu_get_any_device"); TEST_ASSERT( ctx ); TEST_ASSERT( rootdev ); TEST_ASSERT( q ); alldevs[0] = rootdev; err = clGetDeviceInfo(rootdev, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(max_cus), &max_cus, NULL); CHECK_OPENCL_ERROR_IN("CL_DEVICE_MAX_COMPUTE_UNITS"); if (max_cus < 2) { printf("This test requires a cl device with at least 2 compute units" " (a dual-core or better CPU)\n"); return 1; } err = clGetDeviceInfo(rootdev, CL_DEVICE_PARTITION_MAX_SUB_DEVICES, sizeof(max_subs), &max_subs, NULL); CHECK_OPENCL_ERROR_IN("CL_DEVICE_PARTITION_MAX_SUB_DEVICES"); // test fails without possible sub-devices, e.g. with basic pocl device TEST_ASSERT(max_subs > 1); cl_device_partition_property *dev_pt; size_t dev_pt_size; err = clGetDeviceInfo(rootdev, CL_DEVICE_PARTITION_PROPERTIES, 0, NULL, &dev_pt_size); CHECK_OPENCL_ERROR_IN("CL_DEVICE_PARTITION_PROPERTIES size"); dev_pt = malloc(dev_pt_size); TEST_ASSERT(dev_pt); err = clGetDeviceInfo(rootdev, CL_DEVICE_PARTITION_PROPERTIES, dev_pt_size, dev_pt, NULL); CHECK_OPENCL_ERROR_IN("CL_DEVICE_PARTITION_PROPERTIES"); j = dev_pt_size / sizeof (*dev_pt); // number of partition types // check that partition types EQUALLY and BY_COUNTS are supported int found = 0; for (i = 0; i < j; ++i) { if (dev_pt[i] == CL_DEVICE_PARTITION_EQUALLY || dev_pt[i] == CL_DEVICE_PARTITION_BY_COUNTS) ++found; } TEST_ASSERT(found == 2); // here we will store the partition types returned by the subdevices cl_device_partition_property *ptype = NULL; size_t ptype_size; cl_uint numdevs = 0; cl_device_id parent; cl_uint sub_cus; /* CL_DEVICE_PARTITION_EQUALLY */ printf("Max CUs: %u\n", max_cus); /* if the device has 3 CUs, 3 subdevices will be created, otherwise 2. */ if (max_cus == 3) split = 3; else split = 2; const cl_device_partition_property equal_splitter[] = { CL_DEVICE_PARTITION_EQUALLY, max_cus/split, 0 }; err = clCreateSubDevices(rootdev, equal_splitter, 0, NULL, &numdevs); CHECK_OPENCL_ERROR_IN("count sub devices"); TEST_ASSERT(numdevs == split); err = clCreateSubDevices(rootdev, equal_splitter, split, eqdev, NULL); CHECK_OPENCL_ERROR_IN("partition equally"); if (split == 2) eqdev[2] = NULL; cl_uint refc; err = clGetDeviceInfo (eqdev[0], CL_DEVICE_REFERENCE_COUNT, sizeof (refc), &refc, NULL); CHECK_OPENCL_ERROR_IN ("get refcount"); TEST_ASSERT (refc == 1); /* First, check that the root device is untouched */ err = clGetDeviceInfo(rootdev, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(sub_cus), &sub_cus, NULL); CHECK_OPENCL_ERROR_IN("parenty CU"); TEST_ASSERT(sub_cus == max_cus); err = clGetDeviceInfo(rootdev, CL_DEVICE_PARENT_DEVICE, sizeof(parent), &parent, NULL); CHECK_OPENCL_ERROR_IN("root parent device"); TEST_ASSERT(parent == NULL); /* partition type may either be NULL or contain a 0 entry */ err = clGetDeviceInfo(rootdev, CL_DEVICE_PARTITION_TYPE, 0, NULL, &ptype_size); CHECK_OPENCL_ERROR_IN("root partition type"); if (ptype_size != 0) { /* abuse dev_pt which should be large enough */ TEST_ASSERT(ptype_size == sizeof(cl_device_partition_property)); TEST_ASSERT(ptype_size <= dev_pt_size); err = clGetDeviceInfo(rootdev, CL_DEVICE_PARTITION_TYPE, ptype_size, dev_pt, NULL); CHECK_OPENCL_ERROR_IN("root partition type #2"); TEST_ASSERT(dev_pt[0] == 0); } /* now test the subdevices */ for (i = 0; i < split; ++i) { err = clGetDeviceInfo(eqdev[i], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(sub_cus), &sub_cus, NULL); CHECK_OPENCL_ERROR_IN("sub CU"); TEST_ASSERT(sub_cus == max_cus/split); err = clGetDeviceInfo(eqdev[i], CL_DEVICE_PARENT_DEVICE, sizeof(parent), &parent, NULL); CHECK_OPENCL_ERROR_IN("sub parent device"); TEST_ASSERT(parent == rootdev); err = clGetDeviceInfo(eqdev[i], CL_DEVICE_PARTITION_TYPE, 0, NULL, &ptype_size); CHECK_OPENCL_ERROR_IN("sub partition type"); TEST_ASSERT(ptype_size == sizeof(equal_splitter)); ptype = malloc(ptype_size); TEST_ASSERT(ptype); err = clGetDeviceInfo(eqdev[i], CL_DEVICE_PARTITION_TYPE, ptype_size, ptype, NULL); CHECK_OPENCL_ERROR_IN("sub partition type #2"); TEST_ASSERT(memcmp(ptype, equal_splitter, ptype_size) == 0); /* free the partition type */ free(ptype) ; ptype = NULL; } /* CL_DEVICE_PARTITION_BY_COUNTS */ /* Note that the platform will only read this to the first 0, * which is actually CL_DEVICE_PARTITION_BY_COUNTS_LIST_END; * the test is structured with an additional final 0 intentionally, * to follow the Khoronos doc example */ const cl_device_partition_property count_splitter[] = { CL_DEVICE_PARTITION_BY_COUNTS, 1, max_cus - 1, CL_DEVICE_PARTITION_BY_COUNTS_LIST_END, 0 }; err = clCreateSubDevices(rootdev, count_splitter, 0, NULL, &numdevs); CHECK_OPENCL_ERROR_IN("count sub devices"); TEST_ASSERT(numdevs == 2); err = clCreateSubDevices(rootdev, count_splitter, 2, countdev, NULL); CHECK_OPENCL_ERROR_IN("partition by counts"); /* First, check that the root device is untouched */ err = clGetDeviceInfo(rootdev, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(sub_cus), &sub_cus, NULL); CHECK_OPENCL_ERROR_IN("parenty CU"); TEST_ASSERT(sub_cus == max_cus); err = clGetDeviceInfo(rootdev, CL_DEVICE_PARENT_DEVICE, sizeof(parent), &parent, NULL); CHECK_OPENCL_ERROR_IN("root parent device"); TEST_ASSERT(parent == NULL); /* partition type may either be NULL or contain a 0 entry */ err = clGetDeviceInfo(rootdev, CL_DEVICE_PARTITION_TYPE, 0, NULL, &ptype_size); CHECK_OPENCL_ERROR_IN("root partition type"); if (ptype_size != 0) { /* abuse dev_pt which should be large enough */ TEST_ASSERT(ptype_size == sizeof(cl_device_partition_property)); TEST_ASSERT(ptype_size <= dev_pt_size); err = clGetDeviceInfo(rootdev, CL_DEVICE_PARTITION_TYPE, ptype_size, dev_pt, NULL); CHECK_OPENCL_ERROR_IN("root partition type #2"); TEST_ASSERT(dev_pt[0] == 0); } // devices might be returned in different order than the counts // in the count_splitter int found_cus[2] = {0, 0}; /* now test the subdevices */ for (i = 0; i < 2; ++i) { err = clGetDeviceInfo(countdev[i], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(sub_cus), &sub_cus, NULL); CHECK_OPENCL_ERROR_IN("sub CU"); if (sub_cus == count_splitter[1]) found_cus[0] += 1; else if (sub_cus == count_splitter[2]) found_cus[1] += 1; err = clGetDeviceInfo(countdev[i], CL_DEVICE_PARENT_DEVICE, sizeof(parent), &parent, NULL); CHECK_OPENCL_ERROR_IN("sub parent device"); TEST_ASSERT(parent == rootdev); /* The partition type returned is up to the first 0, * which happens to be the CL_DEVICE_PARTITION_BY_COUNTS_LIST_END, * not the final terminating 0 in count_splitter, so it has one less * element. It should be otherwise equal */ err = clGetDeviceInfo(countdev[i], CL_DEVICE_PARTITION_TYPE, 0, NULL, &ptype_size); CHECK_OPENCL_ERROR_IN("sub partition type"); TEST_ASSERT(ptype_size == sizeof(count_splitter) - sizeof(*count_splitter)); ptype = malloc(ptype_size); TEST_ASSERT(ptype); err = clGetDeviceInfo(countdev[i], CL_DEVICE_PARTITION_TYPE, ptype_size, ptype, NULL); CHECK_OPENCL_ERROR_IN("sub partition type #2"); TEST_ASSERT(memcmp(ptype, count_splitter, ptype_size) == 0); /* free the partition type */ free(ptype) ; ptype = NULL; } /* the previous loop finds 1+1 subdevices only on >dual core systems; * on dual cores, the count_splitter is [1, 1] and the above * "(sub_cus == count_splitter[x])" results in 2+0 subdevices found */ if (max_cus > 2) TEST_ASSERT(found_cus[0] == 1 && found_cus[1] == 1); else TEST_ASSERT((found_cus[0] + found_cus[1]) == 2); /* So far, so good. Let's now try and use these devices, * by building a program for all of them and launching kernels on them. * * Note that there's a discrepancy in behavior between implementations: * some assume you can treat sub-devices as their parent device, and thus * e.g. using them through any context which includes their parent devices, * other fail miserably if you try this. * * For the time being we will test the stricter behavior, where * sub-devices should be added manually to a context. */ err = clReleaseCommandQueue(q); CHECK_OPENCL_ERROR_IN("clReleaseCommandQueue"); err = clReleaseContext(ctx); CHECK_OPENCL_ERROR_IN("clReleaseContext"); /* if we split into 2 equal parts, third pointer is NULL. Let's copy the * previous device to it */ if (split == 2) eqdev[2] = eqdev[1]; ctx = clCreateContext(NULL, NUMDEVS, alldevs, NULL, NULL, &err); CHECK_OPENCL_ERROR_IN("clCreateContext"); TEST_ASSERT( test_context(ctx, prog_src_all, 1, NUMDEVS, alldevs) == CL_SUCCESS ); ctx = clCreateContext(NULL, NUMDEVS - 1, alldevs + 1, NULL, NULL, &err); CHECK_OPENCL_ERROR_IN("clCreateContext"); TEST_ASSERT( test_context(ctx, prog_src_two, -1, NUMDEVS - 1, alldevs + 1) == CL_SUCCESS ); /* Don't release the same device twice. clReleaseDevice(NULL) should return * an error but not crash. */ if (split == 2) eqdev[2] = NULL; for (i = 0; i < NUMDEVS; i++) clReleaseDevice (alldevs[i]); CHECK_CL_ERROR (clUnloadCompiler ()); free (dev_pt); printf ("OK\n"); return 0; }
int main(void) { cl_int err; cl_platform_id platforms[MAX_PLATFORMS]; cl_uint nplatforms; cl_device_id devices[MAX_DEVICES]; cl_uint ndevices; cl_uint i, j; size_t el, row, col; CHECK_CL_ERROR(clGetPlatformIDs(MAX_PLATFORMS, platforms, &nplatforms)); for (i = 0; i < nplatforms; i++) { CHECK_CL_ERROR(clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, MAX_DEVICES, devices, &ndevices)); /* Only test the devices we actually have room for */ if (ndevices > MAX_DEVICES) ndevices = MAX_DEVICES; for (j = 0; j < ndevices; j++) { /* skip devices that do not support images */ cl_bool has_img; CHECK_CL_ERROR(clGetDeviceInfo(devices[j], CL_DEVICE_IMAGE_SUPPORT, sizeof(has_img), &has_img, NULL)); if (!has_img) continue; cl_context context = clCreateContext(NULL, 1, &devices[j], NULL, NULL, &err); CHECK_OPENCL_ERROR_IN("clCreateContext"); cl_command_queue queue = clCreateCommandQueue(context, devices[j], 0, &err); CHECK_OPENCL_ERROR_IN("clCreateCommandQueue"); cl_ulong alloc; size_t max_height; size_t max_width; #define MAXALLOC (1024U*1024U) CHECK_CL_ERROR(clGetDeviceInfo(devices[j], CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(alloc), &alloc, NULL)); CHECK_CL_ERROR(clGetDeviceInfo(devices[j], CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(max_width), &max_width, NULL)); CHECK_CL_ERROR(clGetDeviceInfo(devices[j], CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(max_height), &max_height, NULL)); while (alloc > MAXALLOC) alloc /= 2; // fit at least one max_width inside the alloc (shrink max_width for this) while (max_width*pixel_size > alloc) max_width /= 2; // round number of elements to next multiple of max_width elements const size_t nels = (alloc/pixel_size/max_width)*max_width; const size_t buf_size = nels*pixel_size; cl_image_desc img_desc; memset(&img_desc, 0, sizeof(img_desc)); img_desc.image_type = CL_MEM_OBJECT_IMAGE2D; img_desc.image_width = max_width; img_desc.image_height = nels/max_width; img_desc.image_depth = 1; cl_ushort null_pixel[4] = {0, 0, 0, 0}; cl_ushort *host_buf = malloc(buf_size); TEST_ASSERT(host_buf); for (el = 0; el < nels; el+=4) { host_buf[el] = el & CHANNEL_MAX; host_buf[el+1] = (CHANNEL_MAX - el) & CHANNEL_MAX; host_buf[el+2] = (CHANNEL_MAX/((el & 1) + 1) - el) & CHANNEL_MAX; host_buf[el+3] = (CHANNEL_MAX - el/((el & 1) + 1)) & CHANNEL_MAX; } cl_mem buf = clCreateBuffer(context, CL_MEM_READ_WRITE, buf_size, NULL, &err); CHECK_OPENCL_ERROR_IN("clCreateBuffer"); cl_mem img = clCreateImage(context, CL_MEM_READ_WRITE, &img_format, &img_desc, NULL, &err); CHECK_OPENCL_ERROR_IN("clCreateImage"); CHECK_CL_ERROR(clEnqueueWriteBuffer(queue, buf, CL_TRUE, 0, buf_size, host_buf, 0, NULL, NULL)); const size_t offset = 0; const size_t origin[] = {0, 0, 0}; const size_t region[] = {img_desc.image_width, img_desc.image_height, 1}; CHECK_CL_ERROR(clEnqueueCopyBufferToImage(queue, buf, img, offset, origin, region, 0, NULL, NULL)); size_t row_pitch, slice_pitch; cl_ushort *img_map = clEnqueueMapImage(queue, img, CL_TRUE, CL_MAP_READ, origin, region, &row_pitch, &slice_pitch, 0, NULL, NULL, &err); CHECK_OPENCL_ERROR_IN("clEnqueueMapImage"); CHECK_CL_ERROR(clFinish(queue)); for (row = 0; row < img_desc.image_height; ++row) { for (col = 0; col < img_desc.image_width; ++col) { cl_ushort *img_pixel = (cl_ushort*)((char*)img_map + row*row_pitch) + col*4; cl_ushort *buf_pixel = host_buf + (row*img_desc.image_width + col)*4; if (memcmp(img_pixel, buf_pixel, pixel_size) != 0) printf("%zu %zu %zu : %x %x %x %x | %x %x %x %x\n", row, col, (size_t)(buf_pixel - host_buf), buf_pixel[0], buf_pixel[1], buf_pixel[2], buf_pixel[3], img_pixel[0], img_pixel[1], img_pixel[2], img_pixel[3]); TEST_ASSERT(memcmp(img_pixel, buf_pixel, pixel_size) == 0); } } CHECK_CL_ERROR(clEnqueueUnmapMemObject(queue, img, img_map, 0, NULL, NULL)); /* Clear the buffer, and ensure it has been cleared */ CHECK_CL_ERROR(clEnqueueFillBuffer(queue, buf, null_pixel, sizeof(null_pixel), 0, buf_size, 0, NULL, NULL)); cl_ushort *buf_map = clEnqueueMapBuffer(queue, buf, CL_TRUE, CL_MAP_READ, 0, buf_size, 0, NULL, NULL, &err); CHECK_OPENCL_ERROR_IN("clEnqueueMapBuffer"); CHECK_CL_ERROR(clFinish(queue)); for (el = 0; el < nels; ++el) { #if 0 // debug if (buf_map[el] != 0) { printf("%zu/%zu => %u\n", el, nels, buf_map[el]); } #endif TEST_ASSERT(buf_map[el] == 0); } CHECK_CL_ERROR(clEnqueueUnmapMemObject(queue, buf, buf_map, 0, NULL, NULL)); /* Copy data from image to buffer, and check that it's again equal to the original buffer */ CHECK_CL_ERROR(clEnqueueCopyImageToBuffer(queue, img, buf, origin, region, offset, 0, NULL, NULL)); buf_map = clEnqueueMapBuffer(queue, buf, CL_TRUE, CL_MAP_READ, 0, buf_size, 0, NULL, NULL, &err); CHECK_CL_ERROR(clFinish(queue)); TEST_ASSERT(memcmp(buf_map, host_buf, buf_size) == 0); CHECK_CL_ERROR ( clEnqueueUnmapMemObject (queue, buf, buf_map, 0, NULL, NULL)); CHECK_CL_ERROR (clFinish (queue)); free(host_buf); CHECK_CL_ERROR (clReleaseMemObject (img)); CHECK_CL_ERROR (clReleaseMemObject (buf)); CHECK_CL_ERROR (clReleaseCommandQueue (queue)); CHECK_CL_ERROR (clReleaseContext (context)); } } return EXIT_SUCCESS; }
int main(int argc, char **argv) { unsigned int n = 100; double *h_a; double *h_b; double *h_c; cl_mem mem_list[3]; const void *args_mem_loc[3]; struct native_kernel_args args; cl_mem d_a; cl_mem d_b; cl_mem d_c; cl_context ctx; cl_device_id did; cl_command_queue queue; size_t bytes = n * sizeof(double); h_a = (double *) malloc(bytes); h_b = (double *) malloc(bytes); h_c = (double *) malloc(bytes); size_t i; for( i = 0; i < n; i++ ) { h_a[i] = (double)i; h_b[i] = (double)i; } cl_int err; CHECK_CL_ERROR(poclu_get_any_device(&ctx, &did, &queue)); TEST_ASSERT( ctx ); TEST_ASSERT( did ); TEST_ASSERT( queue ); d_a = clCreateBuffer(ctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, bytes, h_a, &err); CHECK_OPENCL_ERROR_IN("clCreateBuffer"); TEST_ASSERT(d_a); d_b = clCreateBuffer(ctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, bytes, h_b, &err); CHECK_OPENCL_ERROR_IN("clCreateBuffer"); TEST_ASSERT(d_b); d_c = clCreateBuffer(ctx, CL_MEM_WRITE_ONLY, bytes, NULL, &err); CHECK_OPENCL_ERROR_IN("clCreateBuffer"); TEST_ASSERT(d_c); args.size = n; args.a = 0; args.b = 0; args.c = 0; mem_list[0] = d_a; mem_list[1] = d_b; mem_list[2] = d_c; args_mem_loc[0] = &args.a; args_mem_loc[1] = &args.b; args_mem_loc[2] = &args.c; err = clEnqueueNativeKernel ( queue, native_vec_add, &args, sizeof(struct native_kernel_args), 3, mem_list, args_mem_loc, 0, NULL, NULL); CHECK_OPENCL_ERROR_IN("clEnqueueNativeKernel"); err = clEnqueueReadBuffer(queue, d_c, CL_TRUE, 0, bytes, h_c, 0, NULL, NULL ); CHECK_OPENCL_ERROR_IN("clEnqueueReadBuffer"); err = clFinish(queue); CHECK_OPENCL_ERROR_IN("clFinish"); for(i = 0; i < n; i++) if(h_c[i] != 2 * i) { printf("Fail to validate vector\n"); goto error; } CHECK_CL_ERROR (clReleaseMemObject (d_a)); CHECK_CL_ERROR (clReleaseMemObject (d_b)); CHECK_CL_ERROR (clReleaseMemObject (d_c)); CHECK_CL_ERROR (clReleaseCommandQueue (queue)); CHECK_CL_ERROR (clReleaseContext (ctx)); free(h_a); free(h_b); free(h_c); return EXIT_SUCCESS; error: return EXIT_FAILURE; }
int main(void) { cl_int err; cl_platform_id platforms[MAX_PLATFORMS]; cl_uint nplatforms; cl_device_id devices[MAX_DEVICES]; cl_uint ndevices; cl_uint i, j; /* set up a signal handler for ALRM that will kill * the program with EXIT_FAILURE on timeout */ struct sigaction sa; memset(&sa, 0, sizeof(sa)); sa.sa_handler = timeout; sigaction(SIGALRM, &sa, NULL); err = clGetPlatformIDs(MAX_PLATFORMS, platforms, &nplatforms); CHECK_OPENCL_ERROR_IN("clGetPlatformIDs"); for (i = 0; i < nplatforms; i++) { err = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, MAX_DEVICES, devices, &ndevices); CHECK_OPENCL_ERROR_IN("clGetDeviceIDs"); for (j = 0; j < ndevices; j++) { cl_context context = clCreateContext(NULL, 1, &devices[j], NULL, NULL, &err); CHECK_OPENCL_ERROR_IN("clCreateContext"); cl_command_queue queue = clCreateCommandQueue(context, devices[j], 0, &err); CHECK_OPENCL_ERROR_IN("clCreateCommandQueue"); cl_ulong alloc; #define MAXALLOC (128*1024U*1024U) if (clGetDeviceInfo(devices[j], CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(alloc), &alloc, NULL) != CL_SUCCESS) CHECK_OPENCL_ERROR_IN("get max alloc"); while (alloc > MAXALLOC) alloc /= 2; const size_t buf_size = alloc; cl_int *host_buf1 = malloc(buf_size); if (host_buf1 == NULL) return EXIT_FAILURE; cl_int *host_buf2 = malloc(buf_size); if (host_buf2 == NULL) return EXIT_FAILURE; memset(host_buf1, 1, buf_size); memset(host_buf2, 2, buf_size); cl_mem buf1 = clCreateBuffer(context, CL_MEM_READ_WRITE, buf_size, NULL, &err); CHECK_OPENCL_ERROR_IN("create buf1"); cl_mem buf2 = clCreateBuffer(context, CL_MEM_READ_WRITE, buf_size, NULL, &err); CHECK_OPENCL_ERROR_IN("create buf2"); cl_event buf1_event, bufcp_event, buf2_event; /* we test if recycling the wait list leads to neverending loops */ cl_event wait_list[1]; /* Note that this must be CL_TRUE because to trigger the bug the next * command must have a completed event in the waiting lists */ err = clEnqueueWriteBuffer(queue, buf1, CL_TRUE, 0, buf_size, host_buf1, 0, NULL, &buf1_event); CHECK_OPENCL_ERROR_IN("write buf1"); *wait_list = buf1_event; err = clEnqueueCopyBuffer(queue, buf1, buf2, 0, 0, buf_size, 1, wait_list, &bufcp_event); CHECK_OPENCL_ERROR_IN("copy buffers"); *wait_list = bufcp_event; err = clEnqueueReadBuffer(queue, buf2, CL_FALSE, 0, buf_size, host_buf2, 1, wait_list, &buf2_event); CHECK_OPENCL_ERROR_IN("read buf"); /* timeout after 30 seconds: if we're not done by then, timeout() will be * invoked and terminate the program with an EXIT_FAILURE */ alarm(30); err = clFinish(queue); CHECK_OPENCL_ERROR_IN("clFinish"); if (memcmp(host_buf2, host_buf1, buf_size) != 0) return EXIT_FAILURE; free(host_buf2); free(host_buf1); clReleaseEvent(buf2_event); clReleaseEvent(bufcp_event); clReleaseEvent(buf1_event); clReleaseMemObject(buf2); clReleaseMemObject(buf1); clReleaseCommandQueue(queue); } } return EXIT_SUCCESS; }