/// Partitions the device into multiple sub-devices according to /// \p properties. /// /// \opencl_version_warning{1,2} std::vector<device> partition(const cl_device_partition_property *properties) const { // get sub-device count uint_ count = 0; int_ ret = clCreateSubDevices(m_id, properties, 0, 0, &count); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } // get sub-device ids std::vector<cl_device_id> ids(count); ret = clCreateSubDevices(m_id, properties, count, &ids[0], 0); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } // convert ids to device objects std::vector<device> devices(count); for(size_t i = 0; i < count; i++){ devices[i] = device(ids[i], false); } return devices; }
std::vector<Device> Device::partition( std::vector<cl_device_partition_property> const& properties, error::ErrorMap error_map ) { auto error = cl_int{CL_INVALID_VALUE}; auto count_elems = cl_uint{0}; error = clCreateSubDevices(m_id, properties.data(), 0, nullptr, std::addressof(count_elems)); error::handle<DeviceException>(error, error_map); auto subdevices = std::vector<Device>(count_elems, m_id); error = clCreateSubDevices( m_id, properties.data(), count_elems, reinterpret_cast<cl_device_id*>(subdevices.data()), nullptr ); error::handle<DeviceException>(error, error_map); return std::move(subdevices); }
JNIEXPORT jint JNICALL Java_org_lwjgl_opencl_CL12_nclCreateSubDevices(JNIEnv *env, jclass clazz, jlong in_device, jlong properties, jint num_devices, jlong out_devices, jlong num_devices_ret, jlong function_pointer) { const cl_device_partition_property *properties_address = (const cl_device_partition_property *)(intptr_t)properties; cl_device_id *out_devices_address = (cl_device_id *)(intptr_t)out_devices; cl_uint *num_devices_ret_address = (cl_uint *)(intptr_t)num_devices_ret; clCreateSubDevicesPROC clCreateSubDevices = (clCreateSubDevicesPROC)((intptr_t)function_pointer); cl_int __result = clCreateSubDevices((cl_device_id)(intptr_t)in_device, properties_address, num_devices, out_devices_address, num_devices_ret_address); return __result; }
cl_context OclHost::partitionDevice(cl_platform_id platform, cl_uint ciDeviceCount, cl_device_id *cdDevices, cl_int cores) { cl_uint numSubDevices = 0; cl_int ciErrNum = 0; // cl_context oclCPUContext = clCreateContext(0, ciDeviceCount, cdDevices, // NULL, NULL, &ciErrNum); //cl_device_id device_id = 0; Log.Message("%d", ciDeviceCount); // clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 1, &device_id, &ciDeviceCount); // Log.Message("%d", ciDeviceCount); //pfn_clCreateSubDevicesEXT = (clCreateSubDevicesEXT_fn) (clGetExtensionFunctionAddress("clCreateSubDevicesEXT")); cl_device_partition_property partitionPrty[3]; partitionPrty[0] = CL_DEVICE_PARTITION_EQUALLY; partitionPrty[1] = 1; partitionPrty[2] = 0; // pfn_clCreateSubDevicesEXT(cdDevices[0], partitionPrty, 0, NULL, &numSubDevices); clCreateSubDevices(cdDevices[0], partitionPrty, 0, NULL, &numSubDevices); Log.Message("%d", numSubDevices); cl_device_id *subDevices = (cl_device_id*) (malloc( numSubDevices * sizeof(cl_device_id))); clCreateSubDevices(cdDevices[0], partitionPrty, numSubDevices, subDevices, NULL); // Create context for sub-devices cl_context context = clCreateContext(0, 1, subDevices, NULL, NULL, &ciErrNum); checkClError("BLABLABLAB", ciErrNum); Log.Verbose("Dividing CPU into %d devices.", numSubDevices); free(subDevices); // clReleaseDevice(device); // clReleaseContext(oclCPUContext); return context; }
int main(int argc, char **argv) { cl_context ctx; cl_command_queue q; // root device, all devices #define NUMDEVS 6 cl_device_id rootdev, alldevs[NUMDEVS]; // pointers to the sub devices of the partitions EQUALLY and BY_COUNTS // respectively cl_device_id *eqdev = alldevs + 1, *countdev = alldevs + 4; cl_uint max_cus, max_subs, split; cl_uint i, j; cl_int err = poclu_get_any_device(&ctx, &rootdev, &q); CHECK_OPENCL_ERROR_IN("poclu_get_any_device"); TEST_ASSERT( ctx ); TEST_ASSERT( rootdev ); TEST_ASSERT( q ); alldevs[0] = rootdev; err = clGetDeviceInfo(rootdev, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(max_cus), &max_cus, NULL); CHECK_OPENCL_ERROR_IN("CL_DEVICE_MAX_COMPUTE_UNITS"); if (max_cus < 2) { printf("This test requires a cl device with at least 2 compute units" " (a dual-core or better CPU)\n"); return 1; } err = clGetDeviceInfo(rootdev, CL_DEVICE_PARTITION_MAX_SUB_DEVICES, sizeof(max_subs), &max_subs, NULL); CHECK_OPENCL_ERROR_IN("CL_DEVICE_PARTITION_MAX_SUB_DEVICES"); // test fails without possible sub-devices, e.g. with basic pocl device TEST_ASSERT(max_subs > 1); cl_device_partition_property *dev_pt; size_t dev_pt_size; err = clGetDeviceInfo(rootdev, CL_DEVICE_PARTITION_PROPERTIES, 0, NULL, &dev_pt_size); CHECK_OPENCL_ERROR_IN("CL_DEVICE_PARTITION_PROPERTIES size"); dev_pt = malloc(dev_pt_size); TEST_ASSERT(dev_pt); err = clGetDeviceInfo(rootdev, CL_DEVICE_PARTITION_PROPERTIES, dev_pt_size, dev_pt, NULL); CHECK_OPENCL_ERROR_IN("CL_DEVICE_PARTITION_PROPERTIES"); j = dev_pt_size / sizeof (*dev_pt); // number of partition types // check that partition types EQUALLY and BY_COUNTS are supported int found = 0; for (i = 0; i < j; ++i) { if (dev_pt[i] == CL_DEVICE_PARTITION_EQUALLY || dev_pt[i] == CL_DEVICE_PARTITION_BY_COUNTS) ++found; } TEST_ASSERT(found == 2); // here we will store the partition types returned by the subdevices cl_device_partition_property *ptype = NULL; size_t ptype_size; cl_uint numdevs = 0; cl_device_id parent; cl_uint sub_cus; /* CL_DEVICE_PARTITION_EQUALLY */ printf("Max CUs: %u\n", max_cus); /* if the device has 3 CUs, 3 subdevices will be created, otherwise 2. */ if (max_cus == 3) split = 3; else split = 2; const cl_device_partition_property equal_splitter[] = { CL_DEVICE_PARTITION_EQUALLY, max_cus/split, 0 }; err = clCreateSubDevices(rootdev, equal_splitter, 0, NULL, &numdevs); CHECK_OPENCL_ERROR_IN("count sub devices"); TEST_ASSERT(numdevs == split); err = clCreateSubDevices(rootdev, equal_splitter, split, eqdev, NULL); CHECK_OPENCL_ERROR_IN("partition equally"); if (split == 2) eqdev[2] = NULL; cl_uint refc; err = clGetDeviceInfo (eqdev[0], CL_DEVICE_REFERENCE_COUNT, sizeof (refc), &refc, NULL); CHECK_OPENCL_ERROR_IN ("get refcount"); TEST_ASSERT (refc == 1); /* First, check that the root device is untouched */ err = clGetDeviceInfo(rootdev, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(sub_cus), &sub_cus, NULL); CHECK_OPENCL_ERROR_IN("parenty CU"); TEST_ASSERT(sub_cus == max_cus); err = clGetDeviceInfo(rootdev, CL_DEVICE_PARENT_DEVICE, sizeof(parent), &parent, NULL); CHECK_OPENCL_ERROR_IN("root parent device"); TEST_ASSERT(parent == NULL); /* partition type may either be NULL or contain a 0 entry */ err = clGetDeviceInfo(rootdev, CL_DEVICE_PARTITION_TYPE, 0, NULL, &ptype_size); CHECK_OPENCL_ERROR_IN("root partition type"); if (ptype_size != 0) { /* abuse dev_pt which should be large enough */ TEST_ASSERT(ptype_size == sizeof(cl_device_partition_property)); TEST_ASSERT(ptype_size <= dev_pt_size); err = clGetDeviceInfo(rootdev, CL_DEVICE_PARTITION_TYPE, ptype_size, dev_pt, NULL); CHECK_OPENCL_ERROR_IN("root partition type #2"); TEST_ASSERT(dev_pt[0] == 0); } /* now test the subdevices */ for (i = 0; i < split; ++i) { err = clGetDeviceInfo(eqdev[i], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(sub_cus), &sub_cus, NULL); CHECK_OPENCL_ERROR_IN("sub CU"); TEST_ASSERT(sub_cus == max_cus/split); err = clGetDeviceInfo(eqdev[i], CL_DEVICE_PARENT_DEVICE, sizeof(parent), &parent, NULL); CHECK_OPENCL_ERROR_IN("sub parent device"); TEST_ASSERT(parent == rootdev); err = clGetDeviceInfo(eqdev[i], CL_DEVICE_PARTITION_TYPE, 0, NULL, &ptype_size); CHECK_OPENCL_ERROR_IN("sub partition type"); TEST_ASSERT(ptype_size == sizeof(equal_splitter)); ptype = malloc(ptype_size); TEST_ASSERT(ptype); err = clGetDeviceInfo(eqdev[i], CL_DEVICE_PARTITION_TYPE, ptype_size, ptype, NULL); CHECK_OPENCL_ERROR_IN("sub partition type #2"); TEST_ASSERT(memcmp(ptype, equal_splitter, ptype_size) == 0); /* free the partition type */ free(ptype) ; ptype = NULL; } /* CL_DEVICE_PARTITION_BY_COUNTS */ /* Note that the platform will only read this to the first 0, * which is actually CL_DEVICE_PARTITION_BY_COUNTS_LIST_END; * the test is structured with an additional final 0 intentionally, * to follow the Khoronos doc example */ const cl_device_partition_property count_splitter[] = { CL_DEVICE_PARTITION_BY_COUNTS, 1, max_cus - 1, CL_DEVICE_PARTITION_BY_COUNTS_LIST_END, 0 }; err = clCreateSubDevices(rootdev, count_splitter, 0, NULL, &numdevs); CHECK_OPENCL_ERROR_IN("count sub devices"); TEST_ASSERT(numdevs == 2); err = clCreateSubDevices(rootdev, count_splitter, 2, countdev, NULL); CHECK_OPENCL_ERROR_IN("partition by counts"); /* First, check that the root device is untouched */ err = clGetDeviceInfo(rootdev, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(sub_cus), &sub_cus, NULL); CHECK_OPENCL_ERROR_IN("parenty CU"); TEST_ASSERT(sub_cus == max_cus); err = clGetDeviceInfo(rootdev, CL_DEVICE_PARENT_DEVICE, sizeof(parent), &parent, NULL); CHECK_OPENCL_ERROR_IN("root parent device"); TEST_ASSERT(parent == NULL); /* partition type may either be NULL or contain a 0 entry */ err = clGetDeviceInfo(rootdev, CL_DEVICE_PARTITION_TYPE, 0, NULL, &ptype_size); CHECK_OPENCL_ERROR_IN("root partition type"); if (ptype_size != 0) { /* abuse dev_pt which should be large enough */ TEST_ASSERT(ptype_size == sizeof(cl_device_partition_property)); TEST_ASSERT(ptype_size <= dev_pt_size); err = clGetDeviceInfo(rootdev, CL_DEVICE_PARTITION_TYPE, ptype_size, dev_pt, NULL); CHECK_OPENCL_ERROR_IN("root partition type #2"); TEST_ASSERT(dev_pt[0] == 0); } // devices might be returned in different order than the counts // in the count_splitter int found_cus[2] = {0, 0}; /* now test the subdevices */ for (i = 0; i < 2; ++i) { err = clGetDeviceInfo(countdev[i], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(sub_cus), &sub_cus, NULL); CHECK_OPENCL_ERROR_IN("sub CU"); if (sub_cus == count_splitter[1]) found_cus[0] += 1; else if (sub_cus == count_splitter[2]) found_cus[1] += 1; err = clGetDeviceInfo(countdev[i], CL_DEVICE_PARENT_DEVICE, sizeof(parent), &parent, NULL); CHECK_OPENCL_ERROR_IN("sub parent device"); TEST_ASSERT(parent == rootdev); /* The partition type returned is up to the first 0, * which happens to be the CL_DEVICE_PARTITION_BY_COUNTS_LIST_END, * not the final terminating 0 in count_splitter, so it has one less * element. It should be otherwise equal */ err = clGetDeviceInfo(countdev[i], CL_DEVICE_PARTITION_TYPE, 0, NULL, &ptype_size); CHECK_OPENCL_ERROR_IN("sub partition type"); TEST_ASSERT(ptype_size == sizeof(count_splitter) - sizeof(*count_splitter)); ptype = malloc(ptype_size); TEST_ASSERT(ptype); err = clGetDeviceInfo(countdev[i], CL_DEVICE_PARTITION_TYPE, ptype_size, ptype, NULL); CHECK_OPENCL_ERROR_IN("sub partition type #2"); TEST_ASSERT(memcmp(ptype, count_splitter, ptype_size) == 0); /* free the partition type */ free(ptype) ; ptype = NULL; } /* the previous loop finds 1+1 subdevices only on >dual core systems; * on dual cores, the count_splitter is [1, 1] and the above * "(sub_cus == count_splitter[x])" results in 2+0 subdevices found */ if (max_cus > 2) TEST_ASSERT(found_cus[0] == 1 && found_cus[1] == 1); else TEST_ASSERT((found_cus[0] + found_cus[1]) == 2); /* So far, so good. Let's now try and use these devices, * by building a program for all of them and launching kernels on them. * * Note that there's a discrepancy in behavior between implementations: * some assume you can treat sub-devices as their parent device, and thus * e.g. using them through any context which includes their parent devices, * other fail miserably if you try this. * * For the time being we will test the stricter behavior, where * sub-devices should be added manually to a context. */ err = clReleaseCommandQueue(q); CHECK_OPENCL_ERROR_IN("clReleaseCommandQueue"); err = clReleaseContext(ctx); CHECK_OPENCL_ERROR_IN("clReleaseContext"); /* if we split into 2 equal parts, third pointer is NULL. Let's copy the * previous device to it */ if (split == 2) eqdev[2] = eqdev[1]; ctx = clCreateContext(NULL, NUMDEVS, alldevs, NULL, NULL, &err); CHECK_OPENCL_ERROR_IN("clCreateContext"); TEST_ASSERT( test_context(ctx, prog_src_all, 1, NUMDEVS, alldevs) == CL_SUCCESS ); ctx = clCreateContext(NULL, NUMDEVS - 1, alldevs + 1, NULL, NULL, &err); CHECK_OPENCL_ERROR_IN("clCreateContext"); TEST_ASSERT( test_context(ctx, prog_src_two, -1, NUMDEVS - 1, alldevs + 1) == CL_SUCCESS ); /* Don't release the same device twice. clReleaseDevice(NULL) should return * an error but not crash. */ if (split == 2) eqdev[2] = NULL; for (i = 0; i < NUMDEVS; i++) clReleaseDevice (alldevs[i]); CHECK_CL_ERROR (clUnloadCompiler ()); free (dev_pt); printf ("OK\n"); return 0; }
int DeviceFission::setupCLPlatform() { cl_int status = CL_SUCCESS; /* * Have a look at the available platforms and pick either * the AMD one if available or a reasonable default. */ cl_platform_id platform = NULL; int retValue = sampleCommon->getPlatform(platform, platformId, isPlatformEnabled()); CHECK_ERROR(retValue, SDK_SUCCESS, "sampleCommon::getPlatform(rootplatform) failed"); // Display available devices. retValue = sampleCommon->displayDevices(platform, CL_DEVICE_TYPE_ALL); CHECK_ERROR(retValue, SDK_SUCCESS, "sampleCommon::displayDevices(rootplatform) failed"); /* * If we could find our platform, use it. Otherwise use just available platform. */ cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 }; rContext = clCreateContextFromType(platform ? cps : NULL, CL_DEVICE_TYPE_ALL, NULL, NULL, &status); CHECK_OPENCL_ERROR( status, "clCreateContextFromType failed."); // getting devices on which to run the sample status = sampleCommon->getDevices(rContext, &Devices, 0, isDeviceIdEnabled()); CHECK_ERROR(status, SDK_SUCCESS, "sampleCommon::getDevices() failed"); // Set deviceListSize from clGetContextInfo status = clGetContextInfo(rContext, CL_CONTEXT_DEVICES, 0, 0, &deviceListSize); CHECK_ERROR(status, SDK_SUCCESS, "clGetContextInfo failed. (deviceListSize)"); // Get GPU device and CPU devices by the deviceInfo. for (cl_uint i = 0 ; i < deviceListSize / sizeof(cl_device_id) ; i++) { retValue = deviceInfo.setDeviceInfo(Devices[i]); CHECK_ERROR(retValue, 0, "SDKDeviceInfo::setDeviceInfo() failed"); if (deviceInfo.dType == CL_DEVICE_TYPE_GPU) { gpuAvailable = CL_TRUE; gpuDevice = Devices[i]; groupSize = deviceInfo.maxWorkGroupSize; } else if (deviceInfo.dType == CL_DEVICE_TYPE_CPU) { cpuDevice = Devices[i]; } } // Using CPU to replace GPU if unable to find GPU. if(gpuAvailable == CL_FALSE) { std::cout << "\nUnable to find GPU, disable cpu2gpu mode."<< std::endl; gpuDevice = cpuDevice; cpu2gpu = CL_FALSE; } // Get allocate memory for subDevices subDevices = (cl_device_id*)malloc(numSubDevices * sizeof(cl_device_id)); CHECK_ALLOCATION(subDevices, "Failed to allocate memory. (subDevices)"); // Get allocate memory for subKernel subKernel = (cl_kernel*)malloc(numSubDevices * sizeof(cl_kernel)); CHECK_ALLOCATION(subKernel, "Failed to allocate memory. (subKernel)"); // Get allocate memory for gpuKernel gpuKernel = (cl_kernel*)malloc(numSubDevices * sizeof(cl_kernel)); CHECK_ALLOCATION(gpuKernel, "Failed to allocate memory. (gpuKernel)"); // Get maxSubDevices from clGetDeviceInfo cl_uint maxSubDevices; status = clGetDeviceInfo(cpuDevice, CL_DEVICE_PARTITION_MAX_SUB_DEVICES, sizeof(maxSubDevices), &maxSubDevices, NULL); CHECK_OPENCL_ERROR(status, "clGetDeviceInfo failed. (maxSubDevices)") if(maxSubDevices <= 1) { std::cout<<"Error: The CPU should have than one core to run this sample."<<std::endl; return SDK_FAILURE; } // Initialize required partition property cl_device_partition_property partitionPrty[5] = { CL_DEVICE_PARTITION_BY_COUNTS, maxSubDevices / 2, maxSubDevices / 2, CL_DEVICE_PARTITION_BY_COUNTS_LIST_END, 0 }; // Create sub-devices status = clCreateSubDevices(cpuDevice, partitionPrty, numSubDevices, subDevices, NULL); CHECK_OPENCL_ERROR( status, "clCreateSubDevices failed."); return SDK_SUCCESS; }
void initialize_ocl(cl_vars_t& cv) { cl_uint num_platforms; cv.err = clGetPlatformIDs(1, &(cv.platform), &(num_platforms)); if(cv.err != CL_SUCCESS) { std::cout << "Could not get platform ID" << std::endl; exit(1); } if(getenv("HM_CPU0")) { std::cout << "Running on CPU 0" << std::endl; cl_uint max_devices = 1; cv.err = clGetDeviceIDs(cv.platform, CL_DEVICE_TYPE_CPU, max_devices, cv.device_ids, &(cv.num_devices)); cv.num_devices = 1; } else if(getenv("HM_CPU0_SUB1")) { std::cout << "Running on Subdivided1 CPU 0" << std::endl; cl_uint max_devices = 1; cl_device_id dev0; cv.err = clGetDeviceIDs(cv.platform, CL_DEVICE_TYPE_CPU, max_devices, &dev0, &(cv.num_devices)); cl_uint num_subdevices; cl_device_partition_property props[3]; props[0] = CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN; props[1] = CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE; props[2] = 0; cl_device_id id4[4]; cv.err = clCreateSubDevices(dev0, props, 2, id4, &num_subdevices); std::cout << "num subdevices: " << num_subdevices << std::endl; cv.device_ids[0] = id4[1]; cv.num_devices = 1; } else if(getenv("HM_GPU0")) { std::cout << "Running on GPU 0" << std::endl; cl_uint max_devices = 1; cv.err = clGetDeviceIDs(cv.platform, CL_DEVICE_TYPE_GPU, max_devices, cv.device_ids, &(cv.num_devices)); cv.num_devices = 1; } else if(getenv("HM_GPU01")) { std::cout << "Running on GPU 0 and GPU 1" << std::endl; cl_uint max_devices = 2; cv.err = clGetDeviceIDs(cv.platform, CL_DEVICE_TYPE_GPU, max_devices, cv.device_ids, &(cv.num_devices)); cv.num_devices = 2; } else if(getenv("HM_GPU1")) { std::cout << "Running on GPU 1" << std::endl; cl_uint max_devices = 2; cv.err = clGetDeviceIDs(cv.platform, CL_DEVICE_TYPE_GPU, max_devices, cv.device_ids, &(cv.num_devices)); assert(cv.num_devices > 1); cv.device_ids[0] = cv.device_ids[1]; cv.num_devices = 1; } else { std::cout << "Error: Specify target either HM_CPU0, HM_GPU0, HM_GPU01, or HM_GPU1" << std::endl; } if(cv.err != CL_SUCCESS) { std::cout << "Could not get GPU device ID" << std::endl; exit(1); } cv.context = clCreateContext(0, cv.num_devices, cv.device_ids, NULL, NULL, &(cv.err)); if(!cv.context) { std::cout << "Could not create context" << std::endl; exit(1); } //cv.commands = clCreateCommandQueue(cv.context, cv.device_id, 0, &(cv.err)); for(size_t devId = 0 ; devId < cv.num_devices ; devId++) { cv.commands[devId] = clCreateCommandQueue(cv.context, cv.device_ids[devId], CL_QUEUE_PROFILING_ENABLE, &(cv.err)); if(!cv.commands[devId]) { std::cout << "Could not create command queue" << std::endl; exit(1); } } compile_ocl_program(cv.memset_program, cv.memset_kernel, cv, memset_kernel_str, "memset_kernel"); #ifdef VERBOSE_COMPILATION docs.opencl_ss << "CL fill vars success" << std::endl; // Device info for(size_t devId = 0 ; devId < cv.num_devices ; devId++) { docs.opencl_ss << "Device ID: " << devId << std::endl; char device_name[255]; cv.err = clGetDeviceInfo(cv.device_ids[devId], CL_DEVICE_NAME, 255, device_name, NULL); docs.opencl_ss << "Device Name: " << device_name << std::endl; cl_ulong mem_size; cv.err = clGetDeviceInfo(cv.device_ids[devId], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(cl_ulong), &mem_size, NULL); docs.opencl_ss << "Global mem size: " << mem_size << std::endl; size_t max_work_item[3]; cv.err = clGetDeviceInfo(cv.device_ids[devId], CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_work_item), max_work_item, NULL); docs.opencl_ss << "Max work item sizes: " << max_work_item[0] << ", " << max_work_item[1] << ", " << max_work_item[2] << std::endl; } #endif }
OclHost::OclHost(int const device_type, int gpu_id, int const cpu_cores) : devType(device_type), maxGlobalMem(0), maxLocalMem(0) { // if (!isGPU()) { // gpu_id = 0; // } cl_int ciErrNum = CL_SUCCESS; Log.Verbose("Using device number %d", gpu_id); //#pragma omp critical // { if (contextUserCount == 0) { Log.Verbose("Creating ocl context."); // cl_uint ciDeviceCount = 0; cl_platform_id cpPlatform = NULL; cpPlatform = getPlatform(); //Get the devices //Get number of devices ciErrNum = clGetDeviceIDs(cpPlatform, devType, 0, NULL, &ciDeviceCount); checkClError("Couldn't get number of OpenCl devices. Error: ", ciErrNum); if (isGPU()) { //Getting device ids devices = (cl_device_id *) malloc( ciDeviceCount * sizeof(cl_device_id)); ciErrNum = clGetDeviceIDs(cpPlatform, devType, ciDeviceCount, devices, NULL); checkClError("Couldn't get OpenCl device ids. Error: ", ciErrNum); //Create context oclGpuContext = clCreateContext(0, ciDeviceCount, devices, NULL, NULL, &ciErrNum); checkClError("Couldn't create context. Error: ", ciErrNum); Log.Message("Context for GPU devices created."); Log.Message("%d GPU device(s) found: ", ciDeviceCount); for (int i = 0; i < ciDeviceCount; ++i) { char device_string[1024]; char driver_string[1024]; clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(device_string), &device_string, NULL); clGetDeviceInfo(devices[i], CL_DRIVER_VERSION, sizeof(driver_string), &driver_string, NULL); Log.Message("Device %d: %s (Driver: %s)", i, device_string, driver_string); } } else { if (ciDeviceCount > 1) { Log.Error("More than one CPU device found."); exit(-1); } cl_device_id device_id; ciErrNum = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_CPU, 1, &device_id, NULL); checkClError("Couldn't get CPU device id. Error: ", ciErrNum); Log.Message("%d CPU device found.", ciDeviceCount); char device_string[1024]; char driver_string[1024]; clGetDeviceInfo(device_id, CL_DEVICE_NAME, sizeof(device_string), &device_string, NULL); clGetDeviceInfo(device_id, CL_DRIVER_VERSION, sizeof(driver_string), &driver_string, NULL); Log.Message("Device %d: %s (Driver: %s)", 0, device_string, driver_string); cl_device_partition_property props[3]; props[0] = CL_DEVICE_PARTITION_EQUALLY; // Equally props[1] = 1; // 4 compute units per sub-device props[2] = 0; devices = (cl_device_id *) malloc(256 * sizeof(cl_device_id)); ciErrNum = clCreateSubDevices(device_id, props, 256, devices, &ciDeviceCount); if (ciErrNum == -18) { ciDeviceCount = 1; devices[0] = device_id; } else { checkClError("Couldn't create sub-devices. Error: ", ciErrNum); } Log.Message("%d CPU cores available.", ciDeviceCount); //Create context oclGpuContext = clCreateContext(0, ciDeviceCount, devices, NULL, NULL, &ciErrNum); checkClError("Couldn't create context. Error: ", ciErrNum); } } contextUserCount += 1; //} if (!isGPU()) { gpu_id = gpu_id % ciDeviceCount; } oclDevice = devices[gpu_id]; //Create context //oclGpuContext = clCreateContext(0, 1, &oclDevice, NULL, NULL, &ciErrNum); //checkClError("Couldn't create context. Error: ", ciErrNum); // create command queue oclCommandQueue = clCreateCommandQueue(oclGpuContext, oclDevice, 0, &ciErrNum); checkClError("Couldn't create command queue for device: ", ciErrNum); }