int main(void) { hwloc_topology_t topology; struct ibv_device **dev_list, *dev; int count, i; int err; dev_list = ibv_get_device_list(&count); if (!dev_list) { fprintf(stderr, "ibv_get_device_list failed\n"); return 0; } printf("ibv_get_device_list found %d devices\n", count); hwloc_topology_init(&topology); hwloc_topology_set_type_filter(topology, HWLOC_OBJ_PCI_DEVICE, HWLOC_TYPE_FILTER_KEEP_IMPORTANT); hwloc_topology_set_type_filter(topology, HWLOC_OBJ_OS_DEVICE, HWLOC_TYPE_FILTER_KEEP_IMPORTANT); hwloc_topology_load(topology); for(i=0; i<count; i++) { hwloc_bitmap_t set; dev = dev_list[i]; set = hwloc_bitmap_alloc(); err = hwloc_ibv_get_device_cpuset(topology, dev, set); if (err < 0) { printf("failed to get cpuset for device %d (%s)\n", i, ibv_get_device_name(dev)); } else { char *cpuset_string = NULL; hwloc_obj_t os; hwloc_bitmap_asprintf(&cpuset_string, set); printf("got cpuset %s for device %d (%s)\n", cpuset_string, i, ibv_get_device_name(dev)); free(cpuset_string); os = hwloc_ibv_get_device_osdev(topology, dev); if (os) { assert(os->type == HWLOC_OBJ_OS_DEVICE); printf("found OS object subtype %u lindex %u name %s\n", (unsigned) os->attr->osdev.type, os->logical_index, os->name); assert(os->attr->osdev.type == HWLOC_OBJ_OSDEV_OPENFABRICS); if (strcmp(ibv_get_device_name(dev), os->name)) assert(0); } } hwloc_bitmap_free(set); } hwloc_topology_destroy(topology); ibv_free_device_list(dev_list); return 0; }
int pocl_topology_detect_device_info(cl_device_id device) { hwloc_topology_t pocl_topology; int ret = 0; #ifdef HWLOC_API_2 if (hwloc_get_api_version () < 0x20000) POCL_MSG_ERR ("pocl was compiled against libhwloc 2.x but is" "actually running against libhwloc 1.x \n"); #else if (hwloc_get_api_version () >= 0x20000) POCL_MSG_ERR ("pocl was compiled against libhwloc 1.x but is" "actually running against libhwloc 2.x \n"); #endif /* * hwloc's OpenCL backend causes problems at the initialization stage * because it reloads libpocl.so via the ICD loader. * * See: https://github.com/pocl/pocl/issues/261 * * The only trick to stop hwloc from initializing the OpenCL plugin * I could find is to point the plugin search path to a place where there * are no plugins to be found. */ setenv ("HWLOC_PLUGINS_PATH", "/dev/null", 1); ret = hwloc_topology_init (&pocl_topology); if (ret == -1) { POCL_MSG_ERR ("Cannot initialize the topology.\n"); return ret; } #ifdef HWLOC_API_2 hwloc_topology_set_io_types_filter(pocl_topology, HWLOC_TYPE_FILTER_KEEP_NONE); hwloc_topology_set_type_filter (pocl_topology, HWLOC_OBJ_SYSTEM, HWLOC_TYPE_FILTER_KEEP_NONE); hwloc_topology_set_type_filter (pocl_topology, HWLOC_OBJ_GROUP, HWLOC_TYPE_FILTER_KEEP_NONE); hwloc_topology_set_type_filter (pocl_topology, HWLOC_OBJ_BRIDGE, HWLOC_TYPE_FILTER_KEEP_NONE); hwloc_topology_set_type_filter (pocl_topology, HWLOC_OBJ_MISC, HWLOC_TYPE_FILTER_KEEP_NONE); hwloc_topology_set_type_filter (pocl_topology, HWLOC_OBJ_PCI_DEVICE, HWLOC_TYPE_FILTER_KEEP_NONE); hwloc_topology_set_type_filter (pocl_topology, HWLOC_OBJ_OS_DEVICE, HWLOC_TYPE_FILTER_KEEP_NONE); #else hwloc_topology_ignore_type (pocl_topology, HWLOC_TOPOLOGY_FLAG_WHOLE_IO); hwloc_topology_ignore_type (pocl_topology, HWLOC_OBJ_SYSTEM); hwloc_topology_ignore_type (pocl_topology, HWLOC_OBJ_GROUP); hwloc_topology_ignore_type (pocl_topology, HWLOC_OBJ_BRIDGE); hwloc_topology_ignore_type (pocl_topology, HWLOC_OBJ_MISC); hwloc_topology_ignore_type (pocl_topology, HWLOC_OBJ_PCI_DEVICE); hwloc_topology_ignore_type (pocl_topology, HWLOC_OBJ_OS_DEVICE); #endif ret = hwloc_topology_load (pocl_topology); if (ret == -1) { POCL_MSG_ERR ("Cannot load the topology.\n"); goto exit_destroy; } #ifdef HWLOC_API_2 device->global_mem_size = hwloc_get_root_obj(pocl_topology)->total_memory; #else device->global_mem_size = hwloc_get_root_obj(pocl_topology)->memory.total_memory; #endif // Try to get the number of CPU cores from topology int depth = hwloc_get_type_depth(pocl_topology, HWLOC_OBJ_PU); if(depth != HWLOC_TYPE_DEPTH_UNKNOWN) device->max_compute_units = hwloc_get_nbobjs_by_depth(pocl_topology, depth); /* Find information about global memory cache by looking at the first * cache covering the first PU */ do { size_t cache_size = 0, cacheline_size = 0; hwloc_obj_t core = hwloc_get_next_obj_by_type (pocl_topology, HWLOC_OBJ_CORE, NULL); if (core) { hwloc_obj_t cache = hwloc_get_shared_cache_covering_obj (pocl_topology, core); if ((cache) && (cache->attr)) { cacheline_size = cache->attr->cache.linesize; cache_size = cache->attr->cache.size; } else core = NULL; /* fallback to L1 cache size */ } hwloc_obj_t pu = hwloc_get_next_obj_by_type (pocl_topology, HWLOC_OBJ_PU, NULL); if (!core && pu) { hwloc_obj_t cache = hwloc_get_shared_cache_covering_obj (pocl_topology, pu); if ((cache) && (cache->attr)) { cacheline_size = cache->attr->cache.linesize; cache_size = cache->attr->cache.size; } } if (!cache_size || !cacheline_size) break; device->global_mem_cache_type = 0x2; // CL_READ_WRITE_CACHE, without including all of CL/cl.h device->global_mem_cacheline_size = cacheline_size; device->global_mem_cache_size = cache_size; } while (0); // Destroy topology object and return exit_destroy: hwloc_topology_destroy (pocl_topology); return ret; }
int main(void) { hwloc_topology_t topology; unsigned depth; char buffer[1024]; int err; /* check a synthetic topology */ hwloc_topology_init(&topology); err = hwloc_topology_set_synthetic(topology, "pack:2 numa:3 l2:4 core:5 pu:6"); assert(!err); hwloc_topology_load(topology); assert(hwloc_get_memory_parents_depth(topology) == 2); /* internal checks */ hwloc_topology_check(topology); /* local checks */ depth = hwloc_topology_get_depth(topology); assert(depth == 6); check_level(topology, 0, 1, 2); check_level(topology, 1, 2, 3); check_level(topology, 2, 6, 4); check_level(topology, 3, 24, 5); check_level(topology, 4, 120, 6); check_level(topology, 5, 720, 0); check_level(topology, HWLOC_TYPE_DEPTH_NUMANODE, 6, 0); err = hwloc_topology_export_synthetic(topology, buffer, sizeof(buffer), 0); assert(err == 83); err = strcmp("Package:2 Group:3 [NUMANode(memory=1073741824)] L2Cache:4(size=4194304) Core:5 PU:6", buffer); assert(!err); assert(hwloc_get_memory_parents_depth(topology) == 2); err = hwloc_topology_export_synthetic(topology, buffer, sizeof(buffer), HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_EXTENDED_TYPES|HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_ATTRS|HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_V1); assert(err == 47); err = strcmp("Socket:2 Group:3 NUMANode:1 Cache:4 Core:5 PU:6", buffer); assert(!err); hwloc_topology_destroy(topology); hwloc_topology_init(&topology); err = hwloc_topology_set_type_filter(topology, HWLOC_OBJ_L1ICACHE, HWLOC_TYPE_FILTER_KEEP_ALL); err = hwloc_topology_set_synthetic(topology, "pack:2(indexes=3,5) numa:2(memory=256GB indexes=pack) l3u:1(size=20mb) l2:2 l1i:1(size=16kB) l1dcache:2 core:1 pu:2(indexes=l2)"); assert(!err); hwloc_topology_load(topology); assert(hwloc_get_memory_parents_depth(topology) == 2); err = hwloc_topology_export_synthetic(topology, buffer, sizeof(buffer), 0); assert(err == 181); err = strcmp("Package:2 L3Cache:2(size=20971520) [NUMANode(memory=274877906944 indexes=2*2:1*2)] L2Cache:2(size=4194304) L1iCache:1(size=16384) L1dCache:2(size=32768) Core:1 PU:2(indexes=4*8:1*4)", buffer); assert(!err); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PACKAGE, 1)->os_index == 5); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, 1)->os_index == 2); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 12)->os_index == 3); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 13)->os_index == 11); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 14)->os_index == 19); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 15)->os_index == 27); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 16)->os_index == 4); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 17)->os_index == 12); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 18)->os_index == 20); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 19)->os_index == 28); hwloc_topology_destroy(topology); hwloc_topology_init(&topology); err = hwloc_topology_set_synthetic(topology, "pack:2 core:2 pu:2(indexes=0,4,2,6,1,5,3,7)"); assert(!err); hwloc_topology_load(topology); assert(hwloc_get_memory_parents_depth(topology) == 0); err = hwloc_topology_export_synthetic(topology, buffer, sizeof(buffer), 0); assert(err == 72); err = strcmp("[NUMANode(memory=1073741824)] Package:2 Core:2 PU:2(indexes=4*2:2*2:1*2)", buffer); assert(!err); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 0)->os_index == 0); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 1)->os_index == 4); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 2)->os_index == 2); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 3)->os_index == 6); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 4)->os_index == 1); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 5)->os_index == 5); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 6)->os_index == 3); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 7)->os_index == 7); hwloc_topology_destroy(topology); hwloc_topology_init(&topology); err = hwloc_topology_set_synthetic(topology, "pack:2 numa:2 core:1 pu:2(indexes=0,4,2,6,1,3,5,7)"); assert(!err); hwloc_topology_load(topology); assert(hwloc_get_memory_parents_depth(topology) == 2); err = hwloc_topology_export_synthetic(topology, buffer, sizeof(buffer), 0); assert(err == 76); err = strcmp("Package:2 Core:2 [NUMANode(memory=1073741824)] PU:2(indexes=0,4,2,6,1,3,5,7)", buffer); assert(!err); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 0)->os_index == 0); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 1)->os_index == 4); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 2)->os_index == 2); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 3)->os_index == 6); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 4)->os_index == 1); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 5)->os_index == 3); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 6)->os_index == 5); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 7)->os_index == 7); hwloc_topology_destroy(topology); hwloc_topology_init(&topology); err = hwloc_topology_set_synthetic(topology, "pack:2 [numa(memory=1GB)] [numa(memory=1MB)] core:2 [numa(indexes=8,7,5,6,4,3,1,2)] pu:4"); assert(!err); hwloc_topology_load(topology); assert(hwloc_get_memory_parents_depth(topology) == HWLOC_TYPE_DEPTH_MULTIPLE); err = hwloc_topology_export_synthetic(topology, buffer, sizeof(buffer), 0); assert(err == 114); err = strcmp("Package:2 [NUMANode(memory=1073741824)] [NUMANode(memory=1048576)] Core:2 [NUMANode(indexes=8,7,5,6,4,3,1,2)] PU:4", buffer); assert(!err); err = hwloc_topology_export_synthetic(topology, buffer, sizeof(buffer), HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_V1); assert(err == -1); assert(errno == EINVAL); err = hwloc_topology_export_synthetic(topology, buffer, sizeof(buffer), HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_IGNORE_MEMORY); assert(err == 21); err = strcmp("Package:2 Core:2 PU:4", buffer); assert(!err); hwloc_topology_destroy(topology); return 0; }
int main(void) { hwloc_topology_t topology; cl_int clret; cl_platform_id *platform_ids; unsigned nrp, nrd, count, i, j; int err; hwloc_topology_init(&topology); hwloc_topology_set_type_filter(topology, HWLOC_OBJ_PCI_DEVICE, HWLOC_TYPE_FILTER_KEEP_IMPORTANT); hwloc_topology_set_type_filter(topology, HWLOC_OBJ_OS_DEVICE, HWLOC_TYPE_FILTER_KEEP_IMPORTANT); hwloc_topology_load(topology); clret = clGetPlatformIDs(0, NULL, &nrp); if (CL_SUCCESS != clret || !nrp) return 0; platform_ids = malloc(nrp * sizeof(*platform_ids)); if (!platform_ids) return 0; clret = clGetPlatformIDs(nrp, platform_ids, &nrp); if (CL_SUCCESS != clret || !nrp) return 0; count = 0; for(i=0; i<nrp; i++) { cl_device_id *device_ids; clret = clGetDeviceIDs(platform_ids[i], CL_DEVICE_TYPE_ALL, 0, NULL, &nrd); if (CL_SUCCESS != clret || !nrd) continue; device_ids = malloc(nrd * sizeof(*device_ids)); if (!device_ids) continue; clret = clGetDeviceIDs(platform_ids[i], CL_DEVICE_TYPE_ALL, nrd, device_ids, &nrd); if (CL_SUCCESS != clret || !nrd) continue; for(j=0; j<nrd; j++) { hwloc_bitmap_t set; hwloc_obj_t osdev, osdev2, ancestor; const char *value; unsigned p, d; osdev = hwloc_opencl_get_device_osdev_by_index(topology, i, j); /* we currently insert all OpenCL devices, except CPU devices */ if (!osdev) { cl_device_type type; clGetDeviceInfo(device_ids[j], CL_DEVICE_TYPE, sizeof(type), &type, NULL); assert(type == CL_DEVICE_TYPE_CPU); continue; } /* try to get it from PCI locality (only works with AMD extensions) */ osdev2 = hwloc_opencl_get_device_osdev(topology, device_ids[j]); if (osdev2) { assert(osdev == osdev2); } ancestor = hwloc_get_non_io_ancestor_obj(topology, osdev); set = hwloc_bitmap_alloc(); err = hwloc_opencl_get_device_cpuset(topology, device_ids[j], set); if (err < 0) { printf("no cpuset for platform %u device %u\n", i, j); } else { char *cpuset_string = NULL; hwloc_bitmap_asprintf(&cpuset_string, set); printf("got cpuset %s for platform %u device %u\n", cpuset_string, i, j); free(cpuset_string); if (hwloc_bitmap_isequal(hwloc_topology_get_complete_cpuset(topology), hwloc_topology_get_topology_cpuset(topology))) /* only compare if the topology is complete, otherwise things can be significantly different */ assert(hwloc_bitmap_isequal(set, ancestor->cpuset)); } hwloc_bitmap_free(set); printf("found OSDev %s\n", osdev->name); err = sscanf(osdev->name, "opencl%ud%u", &p, &d); assert(err == 2); assert(p == i); assert(d == j); value = hwloc_obj_get_info_by_name(osdev, "Backend"); err = strcmp(value, "OpenCL"); assert(!err); assert(osdev->attr->osdev.type == HWLOC_OBJ_OSDEV_COPROC); value = osdev->subtype; assert(value); err = strcmp(value, "OpenCL"); assert(!err); value = hwloc_obj_get_info_by_name(osdev, "GPUModel"); printf("found OSDev model %s\n", value); count++; } } hwloc_topology_destroy(topology); return 0; }
int main(void) { hwloc_topology_t topology; int i; int err; hwloc_topology_init(&topology); hwloc_topology_set_type_filter(topology, HWLOC_OBJ_PCI_DEVICE, HWLOC_TYPE_FILTER_KEEP_IMPORTANT); hwloc_topology_set_type_filter(topology, HWLOC_OBJ_OS_DEVICE, HWLOC_TYPE_FILTER_KEEP_IMPORTANT); hwloc_topology_load(topology); for(i=0; ; i++) { hwloc_bitmap_t set; hwloc_obj_t osdev, ancestor; const char *value; osdev = hwloc_intel_mic_get_device_osdev_by_index(topology, i); if (!osdev) break; assert(osdev); ancestor = hwloc_get_non_io_ancestor_obj(topology, osdev); printf("found OSDev %s\n", osdev->name); err = strncmp(osdev->name, "mic", 3); assert(!err); assert(atoi(osdev->name+3) == (int) i); assert(osdev->attr->osdev.type == HWLOC_OBJ_OSDEV_COPROC); value = hwloc_obj_get_info_by_name(osdev, "CoProcType"); err = strcmp(value, "MIC"); assert(!err); value = hwloc_obj_get_info_by_name(osdev, "MICFamily"); printf("found MICFamily %s\n", value); value = hwloc_obj_get_info_by_name(osdev, "MICSKU"); printf("found MICSKU %s\n", value); value = hwloc_obj_get_info_by_name(osdev, "MICActiveCores"); printf("found MICActiveCores %s\n", value); value = hwloc_obj_get_info_by_name(osdev, "MICMemorySize"); printf("found MICMemorySize %s\n", value); set = hwloc_bitmap_alloc(); err = hwloc_intel_mic_get_device_cpuset(topology, i, set); if (err < 0) { printf("failed to get cpuset for device %d\n", i); } else { char *cpuset_string = NULL; hwloc_bitmap_asprintf(&cpuset_string, set); printf("got cpuset %s for device %d\n", cpuset_string, i); if (hwloc_bitmap_isequal(hwloc_topology_get_complete_cpuset(topology), hwloc_topology_get_topology_cpuset(topology))) /* only compare if the topology is complete, otherwise things can be significantly different */ assert(hwloc_bitmap_isequal(set, ancestor->cpuset)); free(cpuset_string); } hwloc_bitmap_free(set); } hwloc_topology_destroy(topology); return 0; }
int main(void) { hwloc_topology_t topology; unsigned depth; unsigned i,j, width; char buffer[1024]; int err; /* check a synthetic topology */ hwloc_topology_init(&topology); err = hwloc_topology_set_synthetic(topology, "2 3 4 5 6"); assert(!err); hwloc_topology_load(topology); /* internal checks */ hwloc_topology_check(topology); /* local checks */ depth = hwloc_topology_get_depth(topology); assert(depth == 6); width = 1; for(i=0; i<6; i++) { /* check arities */ assert(hwloc_get_nbobjs_by_depth(topology, i) == width); for(j=0; j<width; j++) { hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, i, j); assert(obj); assert(obj->arity == (i<5 ? i+2 : 0)); } width *= i+2; } err = hwloc_topology_export_synthetic(topology, buffer, sizeof(buffer), 0); assert(err == 75); err = strcmp("Package:2 NUMANode:3(memory=1073741824) L2Cache:4(size=4194304) Core:5 PU:6", buffer); assert(!err); err = hwloc_topology_export_synthetic(topology, buffer, sizeof(buffer), HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_EXTENDED_TYPES|HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_ATTRS); assert(err == 42); err = strcmp("Package:2 NUMANode:3 L2Cache:4 Core:5 PU:6", buffer); assert(!err); hwloc_topology_destroy(topology); hwloc_topology_init(&topology); err = hwloc_topology_set_type_filter(topology, HWLOC_OBJ_L1ICACHE, HWLOC_TYPE_FILTER_KEEP_ALL); err = hwloc_topology_set_synthetic(topology, "pack:2(indexes=3,5) numa:2(memory=256GB indexes=pack) l3u:1(size=20mb) l2:2 l1i:1(size=16kB) l1dcache:2 core:1 pu:2(indexes=l2)"); assert(!err); hwloc_topology_load(topology); err = hwloc_topology_export_synthetic(topology, buffer, sizeof(buffer), 0); assert(err == 181); err = strcmp("Package:2 NUMANode:2(memory=274877906944 indexes=2*2:1*2) L3Cache:1(size=20971520) L2Cache:2(size=4194304) L1iCache:1(size=16384) L1dCache:2(size=32768) Core:1 PU:2(indexes=4*8:1*4)", buffer); assert(!err); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PACKAGE, 1)->os_index == 5); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, 1)->os_index == 2); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 12)->os_index == 3); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 13)->os_index == 11); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 14)->os_index == 19); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 15)->os_index == 27); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 16)->os_index == 4); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 17)->os_index == 12); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 18)->os_index == 20); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 19)->os_index == 28); hwloc_topology_destroy(topology); hwloc_topology_init(&topology); err = hwloc_topology_set_synthetic(topology, "pack:2 core:2 pu:2(indexes=0,4,2,6,1,5,3,7)"); assert(!err); hwloc_topology_load(topology); err = hwloc_topology_export_synthetic(topology, buffer, sizeof(buffer), 0); assert(err == 72); err = strcmp("NUMANode:1(memory=1073741824) Package:2 Core:2 PU:2(indexes=4*2:2*2:1*2)", buffer); assert(!err); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 0)->os_index == 0); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 1)->os_index == 4); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 2)->os_index == 2); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 3)->os_index == 6); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 4)->os_index == 1); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 5)->os_index == 5); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 6)->os_index == 3); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 7)->os_index == 7); hwloc_topology_destroy(topology); hwloc_topology_init(&topology); err = hwloc_topology_set_synthetic(topology, "pack:2 numa:2 core:1 pu:2(indexes=0,4,2,6,1,3,5,7)"); assert(!err); hwloc_topology_load(topology); err = hwloc_topology_export_synthetic(topology, buffer, sizeof(buffer), 0); assert(err == 76); err = strcmp("Package:2 NUMANode:2(memory=1073741824) Core:1 PU:2(indexes=0,4,2,6,1,3,5,7)", buffer); assert(!err); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 0)->os_index == 0); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 1)->os_index == 4); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 2)->os_index == 2); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 3)->os_index == 6); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 4)->os_index == 1); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 5)->os_index == 3); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 6)->os_index == 5); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 7)->os_index == 7); hwloc_topology_destroy(topology); return 0; }
extern int get_cpuinfo(uint16_t *p_cpus, uint16_t *p_boards, uint16_t *p_sockets, uint16_t *p_cores, uint16_t *p_threads, uint16_t *p_block_map_size, uint16_t **p_block_map, uint16_t **p_block_map_inv) { enum { SOCKET=0, CORE=1, PU=2, LAST_OBJ=3 }; hwloc_topology_t topology; hwloc_obj_t obj; hwloc_obj_type_t objtype[LAST_OBJ]; unsigned idx[LAST_OBJ]; int nobj[LAST_OBJ]; bitstr_t *used_socket = NULL; int *cores_per_socket; int actual_cpus; int macid; int absid; int actual_boards = 1, depth, sock_cnt, tot_socks = 0; int i, used_core_idx, used_sock_idx; debug2("hwloc_topology_init"); if (hwloc_topology_init(&topology)) { /* error in initialize hwloc library */ debug("hwloc_topology_init() failed."); return 1; } /* parse all system */ hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM); /* ignores cache, misc */ #if HWLOC_API_VERSION < 0x00020000 hwloc_topology_ignore_type(topology, HWLOC_OBJ_CACHE); hwloc_topology_ignore_type(topology, HWLOC_OBJ_MISC); #else hwloc_topology_set_type_filter(topology, HWLOC_OBJ_L1CACHE, HWLOC_TYPE_FILTER_KEEP_NONE); hwloc_topology_set_type_filter(topology, HWLOC_OBJ_L2CACHE, HWLOC_TYPE_FILTER_KEEP_NONE); hwloc_topology_set_type_filter(topology, HWLOC_OBJ_L3CACHE, HWLOC_TYPE_FILTER_KEEP_NONE); hwloc_topology_set_type_filter(topology, HWLOC_OBJ_L4CACHE, HWLOC_TYPE_FILTER_KEEP_NONE); hwloc_topology_set_type_filter(topology, HWLOC_OBJ_L5CACHE, HWLOC_TYPE_FILTER_KEEP_NONE); hwloc_topology_set_type_filter(topology, HWLOC_OBJ_MISC, HWLOC_TYPE_FILTER_KEEP_NONE); #endif /* load topology */ debug2("hwloc_topology_load"); if (hwloc_topology_load(topology)) { /* error in load hardware topology */ debug("hwloc_topology_load() failed."); hwloc_topology_destroy(topology); return 2; } #if _DEBUG _hwloc_children(topology, hwloc_get_root_obj(topology), 0); #endif /* * Some processors (e.g. AMD Opteron 6000 series) contain multiple * NUMA nodes per socket. This is a configuration which does not map * into the hardware entities that Slurm optimizes resource allocation * for (PU/thread, core, socket, baseboard, node and network switch). * In order to optimize resource allocations on such hardware, Slurm * will consider each NUMA node within the socket as a separate socket. * You can disable this configuring "SchedulerParameters=Ignore_NUMA", * in which case Slurm will report the correct socket count on the node, * but not be able to optimize resource allocations on the NUMA nodes. */ objtype[SOCKET] = HWLOC_OBJ_SOCKET; objtype[CORE] = HWLOC_OBJ_CORE; objtype[PU] = HWLOC_OBJ_PU; if (hwloc_get_type_depth(topology, HWLOC_OBJ_NODE) > hwloc_get_type_depth(topology, HWLOC_OBJ_SOCKET)) { char *sched_params = slurm_get_sched_params(); if (sched_params && strcasestr(sched_params, "Ignore_NUMA")) { info("Ignoring NUMA nodes within a socket"); } else { info("Considering each NUMA node as a socket"); objtype[SOCKET] = HWLOC_OBJ_NODE; } xfree(sched_params); } /* number of objects */ depth = hwloc_get_type_depth(topology, HWLOC_OBJ_GROUP); if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) { actual_boards = MAX(hwloc_get_nbobjs_by_depth(topology, depth), 1); } /* * Count sockets/NUMA containing any cores. * KNL NUMA with no cores are NOT counted. */ nobj[SOCKET] = 0; depth = hwloc_get_type_depth(topology, objtype[SOCKET]); used_socket = bit_alloc(_MAX_SOCKET_INX); cores_per_socket = xmalloc(sizeof(int) * _MAX_SOCKET_INX); sock_cnt = hwloc_get_nbobjs_by_depth(topology, depth); for (i = 0; i < sock_cnt; i++) { obj = hwloc_get_obj_by_depth(topology, depth, i); if (obj->type == objtype[SOCKET]) { cores_per_socket[i] = _core_child_count(topology, obj); if (cores_per_socket[i] > 0) { nobj[SOCKET]++; bit_set(used_socket, tot_socks); } if (++tot_socks >= _MAX_SOCKET_INX) { /* Bitmap size */ fatal("Socket count exceeds %d, expand data structure size", _MAX_SOCKET_INX); break; } } } nobj[CORE] = hwloc_get_nbobjs_by_type(topology, objtype[CORE]); /* * Workaround for hwloc bug, in some cases the topology "children" array * does not get populated, so _core_child_count() always returns 0 */ if (nobj[SOCKET] == 0) { nobj[SOCKET] = hwloc_get_nbobjs_by_type(topology, objtype[SOCKET]); if (nobj[SOCKET] == 0) { debug("get_cpuinfo() fudging nobj[SOCKET] from 0 to 1"); nobj[SOCKET] = 1; } if (nobj[SOCKET] >= _MAX_SOCKET_INX) { /* Bitmap size */ fatal("Socket count exceeds %d, expand data structure size", _MAX_SOCKET_INX); } bit_nset(used_socket, 0, nobj[SOCKET] - 1); } /* * Workaround for hwloc * hwloc_get_nbobjs_by_type() returns 0 on some architectures. */ if ( nobj[CORE] == 0 ) { debug("get_cpuinfo() fudging nobj[CORE] from 0 to 1"); nobj[CORE] = 1; } if ( nobj[SOCKET] == -1 ) fatal("get_cpuinfo() can not handle nobj[SOCKET] = -1"); if ( nobj[CORE] == -1 ) fatal("get_cpuinfo() can not handle nobj[CORE] = -1"); actual_cpus = hwloc_get_nbobjs_by_type(topology, objtype[PU]); #if 0 /* Used to find workaround above */ info("CORE = %d SOCKET = %d actual_cpus = %d nobj[CORE] = %d", CORE, SOCKET, actual_cpus, nobj[CORE]); #endif if ((actual_cpus % nobj[CORE]) != 0) { error("Thread count (%d) not multiple of core count (%d)", actual_cpus, nobj[CORE]); } nobj[PU] = actual_cpus / nobj[CORE]; /* threads per core */ if ((nobj[CORE] % nobj[SOCKET]) != 0) { error("Core count (%d) not multiple of socket count (%d)", nobj[CORE], nobj[SOCKET]); } nobj[CORE] /= nobj[SOCKET]; /* cores per socket */ debug("CPUs:%d Boards:%d Sockets:%d CoresPerSocket:%d ThreadsPerCore:%d", actual_cpus, actual_boards, nobj[SOCKET], nobj[CORE], nobj[PU]); /* allocate block_map */ if (p_block_map_size) *p_block_map_size = (uint16_t)actual_cpus; if (p_block_map && p_block_map_inv) { *p_block_map = xmalloc(actual_cpus * sizeof(uint16_t)); *p_block_map_inv = xmalloc(actual_cpus * sizeof(uint16_t)); /* initialize default as linear mapping */ for (i = 0; i < actual_cpus; i++) { (*p_block_map)[i] = i; (*p_block_map_inv)[i] = i; } /* create map with hwloc */ used_sock_idx = -1; used_core_idx = -1; for (idx[SOCKET] = 0; (used_sock_idx + 1) < nobj[SOCKET]; idx[SOCKET]++) { if (!bit_test(used_socket, idx[SOCKET])) continue; used_sock_idx++; for (idx[CORE] = 0; idx[CORE] < cores_per_socket[idx[SOCKET]]; idx[CORE]++) { used_core_idx++; for (idx[PU]=0; idx[PU]<nobj[PU]; ++idx[PU]) { /* get hwloc_obj by indexes */ obj=hwloc_get_obj_below_array_by_type( topology, 3, objtype, idx); if (!obj) continue; macid = obj->os_index; absid = used_core_idx * nobj[PU] + idx[PU]; if ((macid >= actual_cpus) || (absid >= actual_cpus)) { /* physical or logical ID are * out of range */ continue; } debug4("CPU map[%d]=>%d S:C:T %d:%d:%d", absid, macid, used_sock_idx, idx[CORE], idx[PU]); (*p_block_map)[absid] = macid; (*p_block_map_inv)[macid] = absid; } } } } FREE_NULL_BITMAP(used_socket); xfree(cores_per_socket); hwloc_topology_destroy(topology); /* update output parameters */ *p_cpus = actual_cpus; *p_boards = actual_boards; *p_sockets = nobj[SOCKET]; *p_cores = nobj[CORE]; *p_threads = nobj[PU]; #if _DEBUG /*** Display raw data ***/ debug("CPUs:%u Boards:%u Sockets:%u CoresPerSocket:%u ThreadsPerCore:%u", *p_cpus, *p_boards, *p_sockets, *p_cores, *p_threads); /* Display the mapping tables */ if (p_block_map && p_block_map_inv) { debug("------"); debug("Abstract -> Machine logical CPU ID block mapping:"); debug("AbstractId PhysicalId Inverse"); for (i = 0; i < *p_cpus; i++) { debug3(" %4d %4u %4u", i, (*p_block_map)[i], (*p_block_map_inv)[i]); } debug("------"); } #endif return SLURM_SUCCESS; }