int DeviceFission::cleanup() { // Releases all OpenCL resources of root device cl_int status; status = clReleaseProgram(subProgram); CHECK_OPENCL_ERROR(status, "clReleaseProgram failed. (subProgram)"); status = clReleaseProgram(gpuProgram); CHECK_OPENCL_ERROR(status, "clReleaseProgram failed. (gpuProgram)"); status = clReleaseMemObject(InBuf); CHECK_OPENCL_ERROR(status, "clReleaseMemObject failed. (InBuf)"); status = clReleaseCommandQueue(gpuCmdQueue); CHECK_OPENCL_ERROR(status, "clReleaseCommandQueue failed. (gpuCmdQueue)"); for(cl_uint i = 0; i < numSubDevices; ++i) { status = clReleaseCommandQueue(subCmdQueue[i]); CHECK_OPENCL_ERROR(status, "clReleaseCommandQueue failed. (subCmdQueue)"); status = clReleaseDevice(subDevices[i]); CHECK_OPENCL_ERROR(status, "clReleaseDevice failed. (subDevices)"); status = clReleaseKernel(subKernel[i]); CHECK_OPENCL_ERROR(status, "clReleaseKernel failed. (subKernel)"); status = clReleaseKernel(gpuKernel[i]); CHECK_OPENCL_ERROR(status, "clReleaseKernel failed. (gpuKernel)"); status = clReleaseMemObject(subOutBuf[i]); CHECK_OPENCL_ERROR(status, "clReleaseMemObject failed. (subOutBuf)"); status = clReleaseMemObject(gpuOutBuf[i]); CHECK_OPENCL_ERROR(status, "clReleaseMemObject failed. (gpuOutBuf)"); } for(cl_uint i = 0; i < deviceListSize / sizeof(cl_device_id) ; ++i) { status = clReleaseDevice(Devices[i]); CHECK_OPENCL_ERROR(status, "clReleaseDevice failed. (Devices)"); } status = clReleaseContext(rContext); CHECK_OPENCL_ERROR(status, "clReleaseContext failed. (rContext)"); return SDK_SUCCESS; }
OpenCLInfo::~OpenCLInfo() { for(int i = 0; i < numDevices_; ++i) { clReleaseDevice(devices_[i]); } free ( devices_ ); free ( platforms_); }
inline void releaseContext(CLcontext& c){ clFinish(c.queue); clFlush(c.queue); clReleaseCommandQueue(c.queue); clReleaseDevice(c.device); clReleaseContext(c.context); }
void release(oclHardware& hardware) { clReleaseCommandQueue(hardware.mQueue); clReleaseContext(hardware.mContext); if ((hardware.mMajorVersion >= 1) && (hardware.mMinorVersion > 1)) { // Only available in OpenCL >= 1.2 clReleaseDevice(hardware.mDevice); } }
void OpenCLObject_delete(OpenCLObject *self) { clReleaseProgram(self->program); clReleaseContext(self->context); clReleaseCommandQueue(self->commandQueue); clReleaseDevice(self->device); free(self); self = NULL; }
void ClSetup::Destroy() { if(queue!=NULL) clReleaseCommandQueue(queue); if(context!=NULL) clReleaseContext(context); if(device!=NULL) clReleaseDevice(device); }
extern void opencl_done() { clCheckError(clReleaseContext(context), "releasing context"); uint i; for (i=0; i<ndevices; i++) { clCheckError(clReleaseCommandQueue(queues[i]), "releasing queues"); clCheckError(clReleaseDevice(devices[i]), "releasing devices"); } free(queues); free(devices); }
// DONE: clean up memory allocated on the GPU void cleanup() { clFinish(commandQueue); openCLErrorID = clReleaseKernel(kernel); openCLErrorID = clReleaseProgram(kernelProgramm); // Free device memory openCLErrorID = clReleaseMemObject(sourceDevPtr); openCLErrorID = clReleaseMemObject(swirlDevPtr); openCLErrorID = clReleaseCommandQueue(commandQueue); openCLErrorID = clReleaseContext(contextHandle); openCLErrorID = clReleaseDevice(deviceHandle); }
int main () { cl_int err; // get first platform cl_platform_id platform; err = clGetPlatformIDs(1, &platform, NULL); // get device count cl_uint deviceCount; err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 0, NULL, &deviceCount); printf("deviceCount: %d\n", deviceCount); // get all devices cl_device_id* devices; devices = new cl_device_id[deviceCount]; err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, deviceCount, devices, NULL); // for each device create a separate context AND queue cl_context* contexts = new cl_context[deviceCount]; cl_command_queue* queues = new cl_command_queue[deviceCount]; for (int i = 0; i < deviceCount; i++) { char stringOfDevice[1024]; err = clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(stringOfDevice), &stringOfDevice, NULL); printf("context and queue id: %d, device_name: %s\n", i, stringOfDevice); contexts[i] = clCreateContext(NULL, deviceCount, devices, NULL, NULL, &err); queues[i] = clCreateCommandQueue(contexts[i], devices[i], 0, &err); } /* * Here you have one context and one command queue per device. * You can choose to send your tasks to any of these queues. */ // cleanup for(int i = 0; i < deviceCount; i++) { clReleaseDevice(devices[i]); clReleaseContext(contexts[i]); clReleaseCommandQueue(queues[i]); } delete[] devices; delete[] contexts; delete[] queues; return 0; }
void Cleanup(void) { clFinish(commandQueue); openCLErrorID = clReleaseKernel(kernel); openCLErrorID = clReleaseProgram(kernelProgramm); // Free device memory openCLErrorID = clReleaseMemObject(d_E); openCLErrorID = clReleaseMemObject(d_D); openCLErrorID = clReleaseMemObject(d_C); openCLErrorID = clReleaseMemObject(d_B); openCLErrorID = clReleaseMemObject(d_A); openCLErrorID = clReleaseCommandQueue(commandQueue); openCLErrorID = clReleaseContext(contextHandle); openCLErrorID = clReleaseDevice(deviceHandle); // Free host memory if (h_A) free(h_A); if (h_B) free(h_B); if (h_C) free(h_C); // DONE: Free host memory of D and E. if (h_D) free(h_D); if (h_E) free(h_E); // printf("\nPress ENTER to exit...\n"); // fflush( stdout); // fflush( stderr); // getchar(); exit(0); }
void clwTerminate() { clFinish(g_cl_Command_Queue); size_t bufN = g_cl_Outputs.size(); clReleaseMemObject(g_cl_Outputs[0]); for (size_t i=1; i<bufN; i++) { clReleaseMemObject(g_cl_Outputs[i]); clReleaseMemObject(g_cl_Weights[i]); clReleaseMemObject(g_cl_WCorrec[i]); clReleaseMemObject(g_cl_PartDer[i]); } g_cl_Outputs.clear(); g_cl_Weights.clear(); g_cl_WCorrec.clear(); g_cl_PartDer.clear(); for (int i=0; i<NUM_KERNELS; i++) clReleaseKernel(g_cl_Kernels[i]); clReleaseCommandQueue(g_cl_Command_Queue); clReleaseDevice(g_cl_Device); clReleaseContext(g_cl_Context); }
bool cl_runner::release(bool bForce /* = false */) { if (!m_bInitCL && !bForce) return false; if (m_clKernel) { clReleaseKernel(m_clKernel); m_clKernel = NULL; } if (m_clKernel1) { clReleaseKernel(m_clKernel1); m_clKernel1 = NULL; } if (m_clProgram) { clReleaseProgram(m_clProgram); m_clProgram = NULL; } if (m_clCmdQueue) { clReleaseCommandQueue(m_clCmdQueue); m_clCmdQueue = NULL; } if (m_clContext) { clReleaseContext(m_clContext); m_clContext = NULL; } if (m_clDeviceId) { #if defined(CL_VERSION_1_1) || defined(CL_VERSION_1_2) clReleaseDevice(m_clDeviceId); #endif m_clDeviceId = NULL; } m_bInitCL = false; return true; }
int main(int argc, char **argv) { cl_context ctx; cl_command_queue q; // root device, all devices #define NUMDEVS 6 cl_device_id rootdev, alldevs[NUMDEVS]; // pointers to the sub devices of the partitions EQUALLY and BY_COUNTS // respectively cl_device_id *eqdev = alldevs + 1, *countdev = alldevs + 4; cl_uint max_cus, max_subs, split; cl_uint i, j; cl_int err = poclu_get_any_device(&ctx, &rootdev, &q); CHECK_OPENCL_ERROR_IN("poclu_get_any_device"); TEST_ASSERT( ctx ); TEST_ASSERT( rootdev ); TEST_ASSERT( q ); alldevs[0] = rootdev; err = clGetDeviceInfo(rootdev, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(max_cus), &max_cus, NULL); CHECK_OPENCL_ERROR_IN("CL_DEVICE_MAX_COMPUTE_UNITS"); if (max_cus < 2) { printf("This test requires a cl device with at least 2 compute units" " (a dual-core or better CPU)\n"); return 1; } err = clGetDeviceInfo(rootdev, CL_DEVICE_PARTITION_MAX_SUB_DEVICES, sizeof(max_subs), &max_subs, NULL); CHECK_OPENCL_ERROR_IN("CL_DEVICE_PARTITION_MAX_SUB_DEVICES"); // test fails without possible sub-devices, e.g. with basic pocl device TEST_ASSERT(max_subs > 1); cl_device_partition_property *dev_pt; size_t dev_pt_size; err = clGetDeviceInfo(rootdev, CL_DEVICE_PARTITION_PROPERTIES, 0, NULL, &dev_pt_size); CHECK_OPENCL_ERROR_IN("CL_DEVICE_PARTITION_PROPERTIES size"); dev_pt = malloc(dev_pt_size); TEST_ASSERT(dev_pt); err = clGetDeviceInfo(rootdev, CL_DEVICE_PARTITION_PROPERTIES, dev_pt_size, dev_pt, NULL); CHECK_OPENCL_ERROR_IN("CL_DEVICE_PARTITION_PROPERTIES"); j = dev_pt_size / sizeof (*dev_pt); // number of partition types // check that partition types EQUALLY and BY_COUNTS are supported int found = 0; for (i = 0; i < j; ++i) { if (dev_pt[i] == CL_DEVICE_PARTITION_EQUALLY || dev_pt[i] == CL_DEVICE_PARTITION_BY_COUNTS) ++found; } TEST_ASSERT(found == 2); // here we will store the partition types returned by the subdevices cl_device_partition_property *ptype = NULL; size_t ptype_size; cl_uint numdevs = 0; cl_device_id parent; cl_uint sub_cus; /* CL_DEVICE_PARTITION_EQUALLY */ printf("Max CUs: %u\n", max_cus); /* if the device has 3 CUs, 3 subdevices will be created, otherwise 2. */ if (max_cus == 3) split = 3; else split = 2; const cl_device_partition_property equal_splitter[] = { CL_DEVICE_PARTITION_EQUALLY, max_cus/split, 0 }; err = clCreateSubDevices(rootdev, equal_splitter, 0, NULL, &numdevs); CHECK_OPENCL_ERROR_IN("count sub devices"); TEST_ASSERT(numdevs == split); err = clCreateSubDevices(rootdev, equal_splitter, split, eqdev, NULL); CHECK_OPENCL_ERROR_IN("partition equally"); if (split == 2) eqdev[2] = NULL; cl_uint refc; err = clGetDeviceInfo (eqdev[0], CL_DEVICE_REFERENCE_COUNT, sizeof (refc), &refc, NULL); CHECK_OPENCL_ERROR_IN ("get refcount"); TEST_ASSERT (refc == 1); /* First, check that the root device is untouched */ err = clGetDeviceInfo(rootdev, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(sub_cus), &sub_cus, NULL); CHECK_OPENCL_ERROR_IN("parenty CU"); TEST_ASSERT(sub_cus == max_cus); err = clGetDeviceInfo(rootdev, CL_DEVICE_PARENT_DEVICE, sizeof(parent), &parent, NULL); CHECK_OPENCL_ERROR_IN("root parent device"); TEST_ASSERT(parent == NULL); /* partition type may either be NULL or contain a 0 entry */ err = clGetDeviceInfo(rootdev, CL_DEVICE_PARTITION_TYPE, 0, NULL, &ptype_size); CHECK_OPENCL_ERROR_IN("root partition type"); if (ptype_size != 0) { /* abuse dev_pt which should be large enough */ TEST_ASSERT(ptype_size == sizeof(cl_device_partition_property)); TEST_ASSERT(ptype_size <= dev_pt_size); err = clGetDeviceInfo(rootdev, CL_DEVICE_PARTITION_TYPE, ptype_size, dev_pt, NULL); CHECK_OPENCL_ERROR_IN("root partition type #2"); TEST_ASSERT(dev_pt[0] == 0); } /* now test the subdevices */ for (i = 0; i < split; ++i) { err = clGetDeviceInfo(eqdev[i], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(sub_cus), &sub_cus, NULL); CHECK_OPENCL_ERROR_IN("sub CU"); TEST_ASSERT(sub_cus == max_cus/split); err = clGetDeviceInfo(eqdev[i], CL_DEVICE_PARENT_DEVICE, sizeof(parent), &parent, NULL); CHECK_OPENCL_ERROR_IN("sub parent device"); TEST_ASSERT(parent == rootdev); err = clGetDeviceInfo(eqdev[i], CL_DEVICE_PARTITION_TYPE, 0, NULL, &ptype_size); CHECK_OPENCL_ERROR_IN("sub partition type"); TEST_ASSERT(ptype_size == sizeof(equal_splitter)); ptype = malloc(ptype_size); TEST_ASSERT(ptype); err = clGetDeviceInfo(eqdev[i], CL_DEVICE_PARTITION_TYPE, ptype_size, ptype, NULL); CHECK_OPENCL_ERROR_IN("sub partition type #2"); TEST_ASSERT(memcmp(ptype, equal_splitter, ptype_size) == 0); /* free the partition type */ free(ptype) ; ptype = NULL; } /* CL_DEVICE_PARTITION_BY_COUNTS */ /* Note that the platform will only read this to the first 0, * which is actually CL_DEVICE_PARTITION_BY_COUNTS_LIST_END; * the test is structured with an additional final 0 intentionally, * to follow the Khoronos doc example */ const cl_device_partition_property count_splitter[] = { CL_DEVICE_PARTITION_BY_COUNTS, 1, max_cus - 1, CL_DEVICE_PARTITION_BY_COUNTS_LIST_END, 0 }; err = clCreateSubDevices(rootdev, count_splitter, 0, NULL, &numdevs); CHECK_OPENCL_ERROR_IN("count sub devices"); TEST_ASSERT(numdevs == 2); err = clCreateSubDevices(rootdev, count_splitter, 2, countdev, NULL); CHECK_OPENCL_ERROR_IN("partition by counts"); /* First, check that the root device is untouched */ err = clGetDeviceInfo(rootdev, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(sub_cus), &sub_cus, NULL); CHECK_OPENCL_ERROR_IN("parenty CU"); TEST_ASSERT(sub_cus == max_cus); err = clGetDeviceInfo(rootdev, CL_DEVICE_PARENT_DEVICE, sizeof(parent), &parent, NULL); CHECK_OPENCL_ERROR_IN("root parent device"); TEST_ASSERT(parent == NULL); /* partition type may either be NULL or contain a 0 entry */ err = clGetDeviceInfo(rootdev, CL_DEVICE_PARTITION_TYPE, 0, NULL, &ptype_size); CHECK_OPENCL_ERROR_IN("root partition type"); if (ptype_size != 0) { /* abuse dev_pt which should be large enough */ TEST_ASSERT(ptype_size == sizeof(cl_device_partition_property)); TEST_ASSERT(ptype_size <= dev_pt_size); err = clGetDeviceInfo(rootdev, CL_DEVICE_PARTITION_TYPE, ptype_size, dev_pt, NULL); CHECK_OPENCL_ERROR_IN("root partition type #2"); TEST_ASSERT(dev_pt[0] == 0); } // devices might be returned in different order than the counts // in the count_splitter int found_cus[2] = {0, 0}; /* now test the subdevices */ for (i = 0; i < 2; ++i) { err = clGetDeviceInfo(countdev[i], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(sub_cus), &sub_cus, NULL); CHECK_OPENCL_ERROR_IN("sub CU"); if (sub_cus == count_splitter[1]) found_cus[0] += 1; else if (sub_cus == count_splitter[2]) found_cus[1] += 1; err = clGetDeviceInfo(countdev[i], CL_DEVICE_PARENT_DEVICE, sizeof(parent), &parent, NULL); CHECK_OPENCL_ERROR_IN("sub parent device"); TEST_ASSERT(parent == rootdev); /* The partition type returned is up to the first 0, * which happens to be the CL_DEVICE_PARTITION_BY_COUNTS_LIST_END, * not the final terminating 0 in count_splitter, so it has one less * element. It should be otherwise equal */ err = clGetDeviceInfo(countdev[i], CL_DEVICE_PARTITION_TYPE, 0, NULL, &ptype_size); CHECK_OPENCL_ERROR_IN("sub partition type"); TEST_ASSERT(ptype_size == sizeof(count_splitter) - sizeof(*count_splitter)); ptype = malloc(ptype_size); TEST_ASSERT(ptype); err = clGetDeviceInfo(countdev[i], CL_DEVICE_PARTITION_TYPE, ptype_size, ptype, NULL); CHECK_OPENCL_ERROR_IN("sub partition type #2"); TEST_ASSERT(memcmp(ptype, count_splitter, ptype_size) == 0); /* free the partition type */ free(ptype) ; ptype = NULL; } /* the previous loop finds 1+1 subdevices only on >dual core systems; * on dual cores, the count_splitter is [1, 1] and the above * "(sub_cus == count_splitter[x])" results in 2+0 subdevices found */ if (max_cus > 2) TEST_ASSERT(found_cus[0] == 1 && found_cus[1] == 1); else TEST_ASSERT((found_cus[0] + found_cus[1]) == 2); /* So far, so good. Let's now try and use these devices, * by building a program for all of them and launching kernels on them. * * Note that there's a discrepancy in behavior between implementations: * some assume you can treat sub-devices as their parent device, and thus * e.g. using them through any context which includes their parent devices, * other fail miserably if you try this. * * For the time being we will test the stricter behavior, where * sub-devices should be added manually to a context. */ err = clReleaseCommandQueue(q); CHECK_OPENCL_ERROR_IN("clReleaseCommandQueue"); err = clReleaseContext(ctx); CHECK_OPENCL_ERROR_IN("clReleaseContext"); /* if we split into 2 equal parts, third pointer is NULL. Let's copy the * previous device to it */ if (split == 2) eqdev[2] = eqdev[1]; ctx = clCreateContext(NULL, NUMDEVS, alldevs, NULL, NULL, &err); CHECK_OPENCL_ERROR_IN("clCreateContext"); TEST_ASSERT( test_context(ctx, prog_src_all, 1, NUMDEVS, alldevs) == CL_SUCCESS ); ctx = clCreateContext(NULL, NUMDEVS - 1, alldevs + 1, NULL, NULL, &err); CHECK_OPENCL_ERROR_IN("clCreateContext"); TEST_ASSERT( test_context(ctx, prog_src_two, -1, NUMDEVS - 1, alldevs + 1) == CL_SUCCESS ); /* Don't release the same device twice. clReleaseDevice(NULL) should return * an error but not crash. */ if (split == 2) eqdev[2] = NULL; for (i = 0; i < NUMDEVS; i++) clReleaseDevice (alldevs[i]); CHECK_CL_ERROR (clUnloadCompiler ()); free (dev_pt); printf ("OK\n"); return 0; }
void run_vec_add(size_t num_elems, size_t buf_size, cl_int* data) { cl_int err; // Query platforms and devices cl_platform_id platform; err = clGetPlatformIDs(1, &platform, NULL); cl_device_id device; err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, NULL); const cl_context_properties prop[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 }; // Create context cl_context ctx = clCreateContext(prop, 1, &device, NULL, NULL, &err); // Create program unsigned char* program_file = NULL; size_t program_size = 0; read_file(&program_file, &program_size, "src/vec_add.cl"); cl_program program = clCreateProgramWithSource(ctx, 1, (const char **)&program_file, &program_size, &err); err = clBuildProgram(program, 1, &device, NULL, NULL, NULL); free(program_file); // Allocate memory buffers (on the device) cl_mem a = clCreateBuffer(ctx, CL_MEM_READ_ONLY, buf_size, NULL, &err); cl_mem b = clCreateBuffer(ctx, CL_MEM_READ_ONLY, buf_size, NULL, &err); cl_mem c = clCreateBuffer(ctx, CL_MEM_WRITE_ONLY, buf_size, NULL, &err); // Create command queue cl_command_queue queue = clCreateCommandQueue(ctx, device, 0, NULL); // Enqueue the write buffer commands cl_event wb_events[2]; err = clEnqueueWriteBuffer(queue, a, CL_FALSE, 0, buf_size, data, 0, NULL, &wb_events[0]); err = clEnqueueWriteBuffer(queue, b, CL_FALSE, 0, buf_size, data, 0, NULL, &wb_events[1]); // Enqueue the kernel execution command cl_kernel kernel = clCreateKernel(program, "vec_add", &err); err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &c); err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &a); err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &b); const size_t global_offset = 0; cl_event kernel_event; err = clEnqueueNDRangeKernel(queue, kernel, 1, &global_offset, &num_elems, NULL, 2, wb_events, &kernel_event); // Enqueue the read buffer command err = clEnqueueReadBuffer(queue, c, CL_TRUE, 0, buf_size, data, 1, &kernel_event, NULL); // Wait until every commands are finished err = clFinish(queue); // Release the resources clReleaseMemObject(a); clReleaseMemObject(b); clReleaseMemObject(c); clReleaseKernel(kernel); clReleaseProgram(program); clReleaseCommandQueue(queue); clReleaseContext(ctx); clReleaseDevice(device); }
JNIEXPORT jint JNICALL Java_org_lwjgl_opencl_CL12_nclReleaseDevice(JNIEnv *env, jclass clazz, jlong device, jlong function_pointer) { clReleaseDevicePROC clReleaseDevice = (clReleaseDevicePROC)((intptr_t)function_pointer); cl_int __result = clReleaseDevice((cl_device_id)(intptr_t)device); return __result; }
int main(int argc, char **argv) { cl_int err; int generations = 0; if (argc < 3) { fprintf(stderr, "Usage: %s <bbbattle_file> <bbbout_file>\n", argv[0]); exit(1); } /* create buffers and load bbbattle file */ int width = 0; int height = 0; int teams = 0; char *alive_h; char *dying_h; struct rgb24 team_colors[256]; int team_counts[256]; FILE *bbbf = fopen(argv[1], "r"); if (bbbf == NULL) { perror(argv[1]); return 1; } int bbberr = read_bbbattle(&width, &height, &teams, &alive_h, &dying_h, team_colors, bbbf); fclose(bbbf); assert(bbberr == READ_BBBATTLE_SUCCESS); /* open bbbout stream */ bbbout_stream *bbbo = bbbout_open_write(argv[2], width, height, teams, team_colors); if (bbbo == NULL) { perror(argv[2]); return 1; } bbbout_write_generation(bbbo, 0, alive_h, dying_h, team_counts); if (check_winner(teams, team_counts, team_colors) != 0) { fputs("Error: the initial generation was already won, i.e., only one team had alive cells. Check the input file.\n", stderr); return 1; } /* create platform */ cl_uint n_platforms = 0; err = clGetPlatformIDs(1, &platform, &n_platforms); if (n_platforms == 0) return 1; char platform_name[256]; size_t platform_name_size; char platform_vendor[256]; size_t platform_vendor_size; err = clGetPlatformInfo(platform, CL_PLATFORM_NAME, 256, platform_name, &platform_name_size); err = clGetPlatformInfo(platform, CL_PLATFORM_VENDOR, 256, platform_vendor, &platform_vendor_size); platform_name[platform_name_size] = '\0'; platform_vendor[platform_vendor_size] = '\0'; printf("Platform Name: %s, Vendor: %s\n", platform_name, platform_vendor); /* create device */ cl_uint n_devices = 0; #ifdef OPENCL_CPU err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 1, &device, &n_devices); #else err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, &n_devices); #endif if (n_devices == 0) return 1; char device_name[256]; size_t device_name_size; char device_vendor[256]; size_t device_vendor_size; err = clGetDeviceInfo(device, CL_DEVICE_NAME, 256, device_name, &device_name_size); err = clGetDeviceInfo(device, CL_DEVICE_VENDOR, 256, device_vendor, &device_vendor_size); device_name[device_name_size] = '\0'; device_vendor[device_vendor_size] = '\0'; #ifdef OPENCL_CPU printf("CPU Name: %s, Vendor: %s\n\n", device_name, device_vendor); #else printf("GPU Name: %s, Vendor: %s\n\n", device_name, device_vendor); #endif /* create context */ cl_context_properties cprops[3]; cprops[0] = CL_CONTEXT_PLATFORM; cprops[1] = (cl_context_properties) platform; cprops[2] = 0; context = clCreateContext(cprops, 1, &device, NULL, NULL, &err); assert(err == CL_SUCCESS); const size_t program_source_len = strlen(program_source); cl_program program = clCreateProgramWithSource(context, 1, (const char **) &program_source, &program_source_len, &err); assert(err == CL_SUCCESS); char options[64]; #ifdef OPENCL_CPU sprintf(options, "-DWIDTH=%i -DHEIGHT=%i -DOPENCL_CPU", width, height); #else sprintf(options, "-DWIDTH=%i -DHEIGHT=%i", width, height); #endif err = clBuildProgram(program, 1, &device, options, NULL, NULL); if (err != CL_SUCCESS) { char log[65536]; size_t log_size; clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 65536, log, &log_size); fwrite(log, 1, log_size, stderr); assert(err == CL_SUCCESS); } /* create command queue */ queue = clCreateCommandQueue(context, device, 0, &err); /* create device buffers */ const size_t mem_size = width * height * sizeof(char); dimensions[0] = width; dimensions[1] = height; alive_d = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR | CL_MEM_ALLOC_HOST_PTR, mem_size, alive_h, &err); assert(err == CL_SUCCESS); dying_d = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR | CL_MEM_ALLOC_HOST_PTR, mem_size, dying_h, &err); assert(err == CL_SUCCESS); new_alive_d = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, mem_size, NULL, &err); assert(err == CL_SUCCESS); /* get the kernel */ step_bbbattle = clCreateKernel(program, "step_bbbattle", &err); assert(err == CL_SUCCESS); err = clSetKernelArg(step_bbbattle, 0, sizeof(cl_mem), &alive_d); assert(err == CL_SUCCESS); err = clSetKernelArg(step_bbbattle, 1, sizeof(cl_mem), &dying_d); assert(err == CL_SUCCESS); err = clSetKernelArg(step_bbbattle, 2, sizeof(cl_mem), &new_alive_d); assert(err == CL_SUCCESS); /* run kernel and stream to bbbout */ char *alive_target; int gen = 1; while (1) { step(); alive_target = clEnqueueMapBuffer(queue, alive_d, CL_TRUE, CL_MAP_READ, 0, mem_size, 0, NULL, NULL, &err); assert(err == CL_SUCCESS); memcpy(alive_h, alive_target, mem_size); clEnqueueUnmapMemObject(queue, alive_d, alive_target, 0, NULL, NULL); bbbout_write_generation(bbbo, gen, alive_h, NULL, team_counts); print_status(gen, teams, team_counts, team_colors); if (check_winner(teams, team_counts, team_colors) != 0) { break; } gen++; } bbbout_close(bbbo); free(alive_h); free(dying_h); clReleaseCommandQueue(queue); clReleaseKernel(step_bbbattle); clReleaseProgram(program); clReleaseMemObject(alive_d); clReleaseMemObject(dying_d); clReleaseMemObject(new_alive_d); clReleaseContext(context); #ifdef CL_VERSION_1_2 clReleaseDevice(device); #endif return 0; }
Context& initializeContextFromVA(VADisplay display) { (void)display; #if !defined(HAVE_VAAPI) NO_VAAPI_SUPPORT_ERROR; #elif !defined(HAVE_OPENCL) NO_OPENCL_SUPPORT_ERROR; #else contextInitialized = false; cl_uint numPlatforms; cl_int status = clGetPlatformIDs(0, NULL, &numPlatforms); if (status != CL_SUCCESS) CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't get number of platforms"); if (numPlatforms == 0) CV_Error(cv::Error::OpenCLInitError, "OpenCL: No available platforms"); std::vector<cl_platform_id> platforms(numPlatforms); status = clGetPlatformIDs(numPlatforms, &platforms[0], NULL); if (status != CL_SUCCESS) CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't get platform Id list"); // For CL-VA interop, we must find platform/device with "cl_intel_va_api_media_sharing" extension. // With standard initialization procedure, we should examine platform extension string for that. // But in practice, the platform ext string doesn't contain it, while device ext string does. // Follow Intel procedure (see tutorial), we should obtain device IDs by extension call. // Note that we must obtain function pointers using specific platform ID, and can't provide pointers in advance. // So, we iterate and select the first platform, for which we got non-NULL pointers, device, and CL context. int found = -1; cl_context context = 0; cl_device_id device = 0; for (int i = 0; i < (int)numPlatforms; ++i) { // Get extension function pointers clGetDeviceIDsFromVA_APIMediaAdapterINTEL = (clGetDeviceIDsFromVA_APIMediaAdapterINTEL_fn) clGetExtensionFunctionAddressForPlatform(platforms[i], "clGetDeviceIDsFromVA_APIMediaAdapterINTEL"); clCreateFromVA_APIMediaSurfaceINTEL = (clCreateFromVA_APIMediaSurfaceINTEL_fn) clGetExtensionFunctionAddressForPlatform(platforms[i], "clCreateFromVA_APIMediaSurfaceINTEL"); clEnqueueAcquireVA_APIMediaSurfacesINTEL = (clEnqueueAcquireVA_APIMediaSurfacesINTEL_fn) clGetExtensionFunctionAddressForPlatform(platforms[i], "clEnqueueAcquireVA_APIMediaSurfacesINTEL"); clEnqueueReleaseVA_APIMediaSurfacesINTEL = (clEnqueueReleaseVA_APIMediaSurfacesINTEL_fn) clGetExtensionFunctionAddressForPlatform(platforms[i], "clEnqueueReleaseVA_APIMediaSurfacesINTEL"); if (((void*)clGetDeviceIDsFromVA_APIMediaAdapterINTEL == NULL) || ((void*)clCreateFromVA_APIMediaSurfaceINTEL == NULL) || ((void*)clEnqueueAcquireVA_APIMediaSurfacesINTEL == NULL) || ((void*)clEnqueueReleaseVA_APIMediaSurfacesINTEL == NULL)) { continue; } // Query device list cl_uint numDevices = 0; status = clGetDeviceIDsFromVA_APIMediaAdapterINTEL(platforms[i], CL_VA_API_DISPLAY_INTEL, display, CL_PREFERRED_DEVICES_FOR_VA_API_INTEL, 0, NULL, &numDevices); if ((status != CL_SUCCESS) || !(numDevices > 0)) continue; numDevices = 1; // initializeContextFromHandle() expects only 1 device status = clGetDeviceIDsFromVA_APIMediaAdapterINTEL(platforms[i], CL_VA_API_DISPLAY_INTEL, display, CL_PREFERRED_DEVICES_FOR_VA_API_INTEL, numDevices, &device, NULL); if (status != CL_SUCCESS) continue; // Creating CL-VA media sharing OpenCL context cl_context_properties props[] = { CL_CONTEXT_VA_API_DISPLAY_INTEL, (cl_context_properties) display, CL_CONTEXT_INTEROP_USER_SYNC, CL_FALSE, // no explicit sync required 0 }; context = clCreateContext(props, numDevices, &device, NULL, NULL, &status); if (status != CL_SUCCESS) { clReleaseDevice(device); } else { found = i; break; } } if (found < 0) CV_Error(cv::Error::OpenCLInitError, "OpenCL: Can't create context for VA-API interop"); Context& ctx = Context::getDefault(false); initializeContextFromHandle(ctx, platforms[found], context, device); contextInitialized = true; return ctx; #endif }
/* * destructor - called only once * Release all OpenCL objects * This is a regular sequence of calls to deallocate all created OpenCL resources in bootstrapOpenCL. * * You may want to call these deallocation procedures in the middle of your application execution * (not at the end) if you don't further need OpenCL runtime. * You may want to do that in order to free some memory, for example, * or recreate OpenCL objects with different parameters. * */ ocl_args_d_t::~ocl_args_d_t() { cl_int err = CL_SUCCESS; if (kernel) { err = clReleaseKernel(kernel); if (CL_SUCCESS != err) { LogError("Error: clReleaseKernel returned '%s'.\n", TranslateOpenCLError(err)); } } if (program) { err = clReleaseProgram(program); if (CL_SUCCESS != err) { LogError("Error: clReleaseProgram returned '%s'.\n", TranslateOpenCLError(err)); } } if (srcA) { err = clReleaseMemObject(srcA); if (CL_SUCCESS != err) { LogError("Error: clReleaseMemObject returned '%s'.\n", TranslateOpenCLError(err)); } } if (srcB) { err = clReleaseMemObject(srcB); if (CL_SUCCESS != err) { LogError("Error: clReleaseMemObject returned '%s'.\n", TranslateOpenCLError(err)); } } if (dstMem) { err = clReleaseMemObject(dstMem); if (CL_SUCCESS != err) { LogError("Error: clReleaseMemObject returned '%s'.\n", TranslateOpenCLError(err)); } } if (commandQueue) { err = clReleaseCommandQueue(commandQueue); if (CL_SUCCESS != err) { LogError("Error: clReleaseCommandQueue returned '%s'.\n", TranslateOpenCLError(err)); } } if (device) { err = clReleaseDevice(device); if (CL_SUCCESS != err) { LogError("Error: clReleaseDevice returned '%s'.\n", TranslateOpenCLError(err)); } } if (context) { err = clReleaseContext(context); if (CL_SUCCESS != err) { LogError("Error: clReleaseContext returned '%s'.\n", TranslateOpenCLError(err)); } } /* * Note there is no procedure to deallocate platform * because it was not created at the startup, * but just queried from OpenCL runtime. */ }
cl_int cl_runner::init_cl() { if (m_bInitCL) return CL_SUCCESS; // Error code cl_int err_num = CL_SUCCESS; cl_uint num_platforms = 0; size_t i; // Get the number of PlatformIDs // /* Additional Error Codes */ // CL_PLATFORM_NOT_FOUND_KHR -1001 err_num = clGetPlatformIDs(0, 0, &num_platforms); if (err_num != CL_SUCCESS) { DOL_TRACE("cl_runner: Unable to get platforms \n"); //std::cerr << "Unable to get platforms" << endl; return err_num; } // Get the PlatformIDs std::vector<cl_platform_id> platformIds(num_platforms + 1); for (i = 0; i < num_platforms; ++i) platformIds[i] = NULL; // err_num = clGetPlatformIDs(num_platforms, &m_clPlatformId, &num_platforms); err_num = clGetPlatformIDs(num_platforms, &platformIds[0], &num_platforms); if (err_num != CL_SUCCESS) { DOL_TRACE("cl_runner: Error getting platform id \n"); return err_num; } if (num_platforms > 0) { char pbuf[256]; for (i = 0; i < num_platforms; ++i) { err_num = clGetPlatformInfo(platformIds[i], CL_PLATFORM_VENDOR, sizeof(pbuf), pbuf, NULL); if (err_num != CL_SUCCESS) { DOL_TRACE("Error getting platform vendor info \n"); return err_num; } m_clPlatformId = platformIds[i]; if (!strcmp(pbuf, "Advanced Micro Devices, Inc.")) { printf("cl_runner: Find platform: %s\n\n", pbuf); break; } else printf("cl_runner: Find unknown platform: %s\n\n", pbuf); } } else { printf("cl_runner: num_platforms = %d\n\n", num_platforms); } //m_clPlatformId = platformIds[0]; #if 0 cl_device_id deviceIDs[MAX_COPROC_INSTANCES] = { NULL }; size_t num_devices = 0; // Get the DeviceIDs // #define CL_DEVICE_NOT_FOUND -1 //err_num = clGetDeviceIDs(m_clPlatformId, CL_DEVICE_TYPE_GPU, 1, &m_clDeviceId, NULL); err_num = clGetDeviceIDs(m_clPlatformId, CL_DEVICE_TYPE_DEFAULT, 1, &m_clDeviceId, NULL); //err_num = clGetDeviceIDs(m_clPlatformId, CL_DEVICE_TYPE_ACCELERATOR, 1, &m_clDeviceId, NULL); //err_num = clGetDeviceIDs(m_clPlatformId, CL_DEVICE_TYPE_DEFAULT, MAX_COPROC_INSTANCES, deviceIDs, &num_devices); //m_clDeviceId = deviceIDs[0]; if (err_num != CL_SUCCESS) { DOL_TRACE("cl_runner: Error getting device ids \n"); return err_num; } // Create the Context m_clContext = clCreateContext(0, 1, &m_clDeviceId, NULL, NULL, &err_num); if (err_num != CL_SUCCESS) { DOL_TRACE("cl_runner: Error creating context \n"); return err_num; } #else /* * If we could find our platform, use it. * Otherwise use just available platform. */ cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)m_clPlatformId, 0 }; // Create the Context m_clContext = clCreateContextFromType(cps, CL_DEVICE_TYPE_DEFAULT, //CL_DEVICE_TYPE_ALL, //CL_DEVICE_TYPE_CPU, //CL_DEVICE_TYPE_GPU, NULL, NULL, &err_num); if (err_num != CL_SUCCESS) { DOL_TRACE("cl_runner: Error creating context \n"); return err_num; } #endif // // Problems imstalling ATI Stream SDK on AT HD 4850 // // FindNumDevices(), From: http://devgurus.amd.com/thread/131594 // size_t num_devices, cb, cb_devices = 0; // Get number of contect devices - first step err_num = clGetContextInfo(m_clContext, CL_CONTEXT_DEVICES, 0, NULL, &cb_devices); if (err_num != CL_SUCCESS) { DOL_TRACE("cl_runner: Error getting context info \n"); return err_num; } num_devices = cb_devices / sizeof(cl_device_id); std::vector<cl_device_id> devices(num_devices + 1); for (i = 0; i <= num_devices; ++i) devices[i] = NULL; // Get number of contect devices - second step err_num = clGetContextInfo(m_clContext, CL_CONTEXT_DEVICES, cb_devices, &devices[0], 0); if (err_num != CL_SUCCESS) { DOL_TRACE("cl_runner: Error getting context info \n"); return err_num; } std::string dev_name; for (i = 0; i < num_devices; ++i) { cb = 0; // Get device name - first step err_num = clGetDeviceInfo(devices[i], CL_DEVICE_NAME, 0, NULL, &cb); if (err_num == CL_SUCCESS) { dev_name = ""; dev_name.resize(cb); // Get device name - second step err_num = clGetDeviceInfo(devices[i], CL_DEVICE_NAME, cb, &dev_name[0], 0); if (err_num == CL_SUCCESS) std::cout << "cl_runner: Device Name: " << dev_name.c_str() << endl; else std::cout << "cl_runner: Device Name: unknown device name." << endl; } } if (num_devices > 0) { m_clDeviceId = devices[0]; for (i = 1; i < num_devices; ++i) { cl_device_id deviceId = devices[i]; #if defined(CL_VERSION_1_1) || defined(CL_VERSION_1_2) if (deviceId) clReleaseDevice(deviceId); #endif } } else { DOL_TRACE1("cl_runner: num_devices = %d\n", num_devices); } // Create the command-queue m_clCmdQueue = clCreateCommandQueue(m_clContext, m_clDeviceId, 0, &err_num); if (err_num != CL_SUCCESS || m_clCmdQueue == NULL) { DOL_TRACE("cl_runner: Error creating command queue \n"); return err_num; } m_bInitCL = true; return CL_SUCCESS; }