/******************* FUNCTION *********************/ int TopoHwloc::getAbsDepth ( int level, int depth ) const { //vars int curDepth = 0; //get max depth int topodepth = hwloc_topology_get_depth(topology); for (int i = 0 ; i < topodepth ; i++) { //get current type hwloc_obj_type_t type = hwloc_get_depth_type(topology,i); if (type == level) { curDepth++; if (curDepth == depth) return i; } } //error allocFatal("Fail to find the depth of requested level levelType=%d, depth=%d in hwloc.",level,depth); return -1; }
void output_console(struct lstopo_output *loutput, const char *filename) { hwloc_topology_t topology = loutput->topology; unsigned topodepth; int verbose_mode = loutput->verbose_mode; int logical = loutput->logical; FILE *output; output = open_output(filename, loutput->overwrite); if (!output) { fprintf(stderr, "Failed to open %s for writing (%s)\n", filename, strerror(errno)); return; } topodepth = hwloc_topology_get_depth(topology); /* * if verbose_mode == 0, only print the summary. * if verbose_mode == 1, only print the topology tree. * if verbose_mode > 1, print both. */ if (lstopo_show_only != (hwloc_obj_type_t)-1) { if (verbose_mode > 1) fprintf(output, "Only showing %s objects\n", hwloc_obj_type_string(lstopo_show_only)); output_only (topology, hwloc_get_root_obj(topology), output, logical, verbose_mode); } else if (verbose_mode >= 1) { output_topology (topology, hwloc_get_root_obj(topology), NULL, output, 0, logical, verbose_mode); fprintf(output, "\n"); } if ((verbose_mode > 1 || !verbose_mode) && lstopo_show_only == (hwloc_obj_type_t)-1) { hwloc_lstopo_show_summary(output, topology); } if (verbose_mode > 1 && lstopo_show_only == (hwloc_obj_type_t)-1) { const struct hwloc_distances_s * distances; unsigned depth; for (depth = 0; depth < topodepth; depth++) { distances = hwloc_get_whole_distance_matrix_by_depth(topology, depth); if (!distances || !distances->latency) continue; fprintf(output, "relative latency matrix between %ss (depth %u) by %s indexes:\n", hwloc_obj_type_string(hwloc_get_depth_type(topology, depth)), depth, logical ? "logical" : "physical"); hwloc_utils_print_distance_matrix(output, topology, hwloc_get_root_obj(topology), distances->nbobjs, depth, distances->latency, logical); } } if (verbose_mode > 1 && lstopo_show_only == (hwloc_obj_type_t)-1) { hwloc_const_bitmap_t complete = hwloc_topology_get_complete_cpuset(topology); hwloc_const_bitmap_t topo = hwloc_topology_get_topology_cpuset(topology); hwloc_const_bitmap_t allowed = hwloc_topology_get_allowed_cpuset(topology); if (!hwloc_bitmap_isequal(topo, complete)) { hwloc_bitmap_t unknown = hwloc_bitmap_alloc(); char *unknownstr; hwloc_bitmap_copy(unknown, complete); hwloc_bitmap_andnot(unknown, unknown, topo); hwloc_bitmap_asprintf(&unknownstr, unknown); fprintf (output, "%d processors not represented in topology: %s\n", hwloc_bitmap_weight(unknown), unknownstr); free(unknownstr); hwloc_bitmap_free(unknown); } if (!hwloc_bitmap_isequal(topo, allowed)) { hwloc_bitmap_t disallowed = hwloc_bitmap_alloc(); char *disallowedstr; hwloc_bitmap_copy(disallowed, topo); hwloc_bitmap_andnot(disallowed, disallowed, allowed); hwloc_bitmap_asprintf(&disallowedstr, disallowed); fprintf(output, "%d processors represented but not allowed: %s\n", hwloc_bitmap_weight(disallowed), disallowedstr); free(disallowedstr); hwloc_bitmap_free(disallowed); } if (!hwloc_topology_is_thissystem(topology)) fprintf (output, "Topology not from this system\n"); } if (output != stdout) fclose(output); }
int main(void) { hwloc_topology_t local, global; hwloc_obj_t sw1, sw2, sw11, sw12, sw21, sw22, root; int err; printf("Loading the local topology...\n"); hwloc_topology_init(&local); hwloc_topology_set_synthetic(local, "n:2 s:2 ca:1 core:2 ca:2 pu:2"); hwloc_topology_load(local); printf("Try to create an empty custom topology...\n"); hwloc_topology_init(&global); hwloc_topology_set_custom(global); err = hwloc_topology_load(global); assert(err == -1); assert(errno == EINVAL); hwloc_topology_destroy(global); printf("Creating a custom topology...\n"); hwloc_topology_init(&global); hwloc_topology_set_custom(global); printf("Inserting the local topology into the global one...\n"); root = hwloc_get_root_obj(global); sw1 = hwloc_custom_insert_group_object_by_parent(global, root, 0); sw11 = hwloc_custom_insert_group_object_by_parent(global, sw1, 1); hwloc_custom_insert_topology(global, sw11, local, NULL); hwloc_custom_insert_topology(global, sw11, local, NULL); sw12 = hwloc_custom_insert_group_object_by_parent(global, sw1, 1); hwloc_custom_insert_topology(global, sw12, local, NULL); hwloc_custom_insert_topology(global, sw12, local, NULL); sw2 = hwloc_custom_insert_group_object_by_parent(global, root, 0); sw21 = hwloc_custom_insert_group_object_by_parent(global, sw2, 1); hwloc_custom_insert_topology(global, sw21, local, NULL); hwloc_custom_insert_topology(global, sw21, local, NULL); hwloc_custom_insert_topology(global, sw21, local, NULL); sw22 = hwloc_custom_insert_group_object_by_parent(global, sw2, 1); hwloc_custom_insert_topology(global, sw22, local, NULL); /* only one to check that it won't get merged */ hwloc_topology_destroy(local); printf("Building the global topology...\n"); hwloc_topology_load(global); hwloc_topology_check(global); assert(hwloc_topology_get_depth(global) == 10); assert(hwloc_get_depth_type(global, 0) == HWLOC_OBJ_SYSTEM); assert(hwloc_get_nbobjs_by_type(global, HWLOC_OBJ_SYSTEM) == 1); assert(hwloc_get_depth_type(global, 1) == HWLOC_OBJ_GROUP); assert(hwloc_get_nbobjs_by_depth(global, 1) == 2); assert(hwloc_get_depth_type(global, 2) == HWLOC_OBJ_GROUP); assert(hwloc_get_nbobjs_by_depth(global, 2) == 4); /* the last group of this level shouldn't be merged */ assert(hwloc_get_depth_type(global, 3) == HWLOC_OBJ_MACHINE); assert(hwloc_get_nbobjs_by_type(global, HWLOC_OBJ_MACHINE) == 8); assert(hwloc_get_depth_type(global, 4) == HWLOC_OBJ_NODE); assert(hwloc_get_nbobjs_by_type(global, HWLOC_OBJ_NODE) == 16); assert(hwloc_get_depth_type(global, 5) == HWLOC_OBJ_SOCKET); assert(hwloc_get_nbobjs_by_type(global, HWLOC_OBJ_SOCKET) == 32); assert(hwloc_get_depth_type(global, 6) == HWLOC_OBJ_CACHE); assert(hwloc_get_nbobjs_by_depth(global, 6) == 32); assert(hwloc_get_depth_type(global, 7) == HWLOC_OBJ_CORE); assert(hwloc_get_nbobjs_by_type(global, HWLOC_OBJ_CORE) == 64); assert(hwloc_get_depth_type(global, 8) == HWLOC_OBJ_CACHE); assert(hwloc_get_nbobjs_by_depth(global, 8) == 128); assert(hwloc_get_depth_type(global, 9) == HWLOC_OBJ_PU); assert(hwloc_get_nbobjs_by_type(global, HWLOC_OBJ_PU) == 256); hwloc_topology_destroy(global); return 0; }
void output_console(hwloc_topology_t topology, const char *filename, int logical, int legend __hwloc_attribute_unused, int verbose_mode) { unsigned topodepth; FILE *output; if (!filename || !strcmp(filename, "-")) output = stdout; else { output = open_file(filename, "w"); if (!output) { fprintf(stderr, "Failed to open %s for writing (%s)\n", filename, strerror(errno)); return; } } topodepth = hwloc_topology_get_depth(topology); /* * if verbose_mode == 0, only print the summary. * if verbose_mode == 1, only print the topology tree. * if verbose_mode > 1, print both. */ if (lstopo_show_only != (hwloc_obj_type_t)-1) { if (verbose_mode > 1) fprintf(output, "Only showing %s objects\n", hwloc_obj_type_string(lstopo_show_only)); output_only (topology, hwloc_get_root_obj(topology), output, logical, verbose_mode); } else if (verbose_mode >= 1) { output_topology (topology, hwloc_get_root_obj(topology), NULL, output, 0, logical, verbose_mode); fprintf(output, "\n"); } if ((verbose_mode > 1 || !verbose_mode) && lstopo_show_only == (hwloc_obj_type_t)-1) { hwloc_lstopo_show_summary(output, topology); } if (verbose_mode > 1 && lstopo_show_only == (hwloc_obj_type_t)-1) { const struct hwloc_distances_s * distances; unsigned depth; for (depth = 0; depth < topodepth; depth++) { distances = hwloc_get_whole_distance_matrix_by_depth(topology, depth); if (!distances || !distances->latency) continue; printf("latency matrix between %ss (depth %u) by %s indexes:\n", hwloc_obj_type_string(hwloc_get_depth_type(topology, depth)), depth, logical ? "logical" : "physical"); hwloc_utils_print_distance_matrix(topology, hwloc_get_root_obj(topology), distances->nbobjs, depth, distances->latency, logical); } } if (verbose_mode > 1 && lstopo_show_only == (hwloc_obj_type_t)-1) { hwloc_const_bitmap_t complete = hwloc_topology_get_complete_cpuset(topology); hwloc_const_bitmap_t topo = hwloc_topology_get_topology_cpuset(topology); hwloc_const_bitmap_t online = hwloc_topology_get_online_cpuset(topology); hwloc_const_bitmap_t allowed = hwloc_topology_get_allowed_cpuset(topology); if (complete && !hwloc_bitmap_isequal(topo, complete)) { hwloc_bitmap_t unknown = hwloc_bitmap_alloc(); char *unknownstr; hwloc_bitmap_copy(unknown, complete); hwloc_bitmap_andnot(unknown, unknown, topo); hwloc_bitmap_asprintf(&unknownstr, unknown); fprintf (output, "%d processors not represented in topology: %s\n", hwloc_bitmap_weight(unknown), unknownstr); free(unknownstr); hwloc_bitmap_free(unknown); } if (complete && !hwloc_bitmap_isequal(online, complete)) { hwloc_bitmap_t offline = hwloc_bitmap_alloc(); char *offlinestr; hwloc_bitmap_copy(offline, complete); hwloc_bitmap_andnot(offline, offline, online); hwloc_bitmap_asprintf(&offlinestr, offline); fprintf (output, "%d processors offline: %s\n", hwloc_bitmap_weight(offline), offlinestr); free(offlinestr); hwloc_bitmap_free(offline); } if (complete && !hwloc_bitmap_isequal(allowed, online)) { if (!hwloc_bitmap_isincluded(online, allowed)) { hwloc_bitmap_t forbidden = hwloc_bitmap_alloc(); char *forbiddenstr; hwloc_bitmap_copy(forbidden, online); hwloc_bitmap_andnot(forbidden, forbidden, allowed); hwloc_bitmap_asprintf(&forbiddenstr, forbidden); fprintf(output, "%d processors online but not allowed: %s\n", hwloc_bitmap_weight(forbidden), forbiddenstr); free(forbiddenstr); hwloc_bitmap_free(forbidden); } if (!hwloc_bitmap_isincluded(allowed, online)) { hwloc_bitmap_t potential = hwloc_bitmap_alloc(); char *potentialstr; hwloc_bitmap_copy(potential, allowed); hwloc_bitmap_andnot(potential, potential, online); hwloc_bitmap_asprintf(&potentialstr, potential); fprintf(output, "%d processors allowed but not online: %s\n", hwloc_bitmap_weight(potential), potentialstr); free(potentialstr); hwloc_bitmap_free(potential); } } if (!hwloc_topology_is_thissystem(topology)) fprintf (output, "Topology not from this system\n"); } if (output != stdout) fclose(output); }
void hwloc_init_cacheTopology(void) { int maxNumLevels=0; int id=0; CacheLevel* cachePool = NULL; hwloc_obj_t obj; int depth; int d; /* Sum up all depths with caches */ depth = hwloc_topology_get_depth(hwloc_topology); for (d = 0; d < depth; d++) { if (hwloc_get_depth_type(hwloc_topology, d) == HWLOC_OBJ_CACHE) maxNumLevels++; } cachePool = (CacheLevel*) malloc(maxNumLevels * sizeof(CacheLevel)); /* Start at the bottom of the tree to get all cache levels in order */ depth = hwloc_topology_get_depth(hwloc_topology); id = 0; for(d=depth-1;d >= 0; d--) { /* We only need caches, so skip other levels */ if (hwloc_get_depth_type(hwloc_topology, d) != HWLOC_OBJ_CACHE) { continue; } /* Get the cache object */ obj = hwloc_get_obj_by_depth(hwloc_topology, d, 0); /* All caches have this attribute, so safe to access */ switch (obj->attr->cache.type) { case HWLOC_OBJ_CACHE_DATA: cachePool[id].type = DATACACHE; break; case HWLOC_OBJ_CACHE_INSTRUCTION: cachePool[id].type = INSTRUCTIONCACHE; break; case HWLOC_OBJ_CACHE_UNIFIED: cachePool[id].type = UNIFIEDCACHE; break; default: cachePool[id].type = NOCACHE; break; } cachePool[id].associativity = obj->attr->cache.associativity; cachePool[id].level = obj->attr->cache.depth; cachePool[id].lineSize = obj->attr->cache.linesize; cachePool[id].size = obj->attr->cache.size; cachePool[id].sets = 0; if ((cachePool[id].associativity * cachePool[id].lineSize) != 0) { cachePool[id].sets = cachePool[id].size / (cachePool[id].associativity * cachePool[id].lineSize); } /* Count all HWThreads below the current cache */ cachePool[id].threads = hwloc_record_objs_of_type_below_obj( hwloc_topology, obj, HWLOC_OBJ_PU, NULL, NULL); /* We need to read the inclusiveness from CPUID, no possibility in hwloc */ switch ( cpuid_info.family ) { case MIC_FAMILY: case P6_FAMILY: cachePool[id].inclusive = readCacheInclusiveIntel(cachePool[id].level); break; case K16_FAMILY: case K15_FAMILY: cachePool[id].inclusive = readCacheInclusiveAMD(cachePool[id].level); break; /* For K8 and K10 it is known that they are inclusive */ case K8_FAMILY: case K10_FAMILY: cachePool[id].inclusive = 1; break; default: ERROR_PLAIN_PRINT(Processor is not supported); break; } id++; } cpuid_topology.numCacheLevels = maxNumLevels; cpuid_topology.cacheLevels = cachePool; return; }
void chpl_topo_init(void) { // // We only load hwloc topology information in configurations where // the locale model is other than "flat" or the tasking is based on // Qthreads (which will use the topology we load). We don't use // it otherwise (so far) because loading it is somewhat expensive. // if (strcmp(CHPL_LOCALE_MODEL, "flat") != 0 || strcmp(CHPL_TASKS, "qthreads") == 0) { haveTopology = true; } else { haveTopology = false; return; } // Check hwloc API version. // Require at least hwloc version 1.11 (we need 1.11.5 to not crash // in some NUMA configurations). // Check both at build time and run time. #define REQUIRE_HWLOC_VERSION 0x00010b00 #if HWLOC_API_VERSION < REQUIRE_HWLOC_VERSION #error hwloc version 1.11.5 or newer is required #endif CHK_ERR(hwloc_get_api_version() >= REQUIRE_HWLOC_VERSION); // // Allocate and initialize topology object. // CHK_ERR_ERRNO(hwloc_topology_init(&topology) == 0); // // Perform the topology detection. // CHK_ERR_ERRNO(hwloc_topology_load(topology) == 0); // // What is supported? // topoSupport = hwloc_topology_get_support(topology); // // TODO: update comment // For now, don't support setting memory locality when comm=ugni or // comm=gasnet, seg!=everything. Those are the two configurations in // which we use hugepages and/or memory registered with the comm // interface, both of which may be a problem for the set-membind call. // We will have other ways to achieve locality for these configs in // the future. // do_set_area_membind = true; if ((strcmp(CHPL_COMM, "gasnet") == 0 && strcmp(CHPL_GASNET_SEGMENT, "everything") != 0)) { do_set_area_membind = false; } // // We need depth information. // topoDepth = hwloc_topology_get_depth(topology); // // How many NUMA domains do we have? // { int level; // // Note: If there are multiple levels with NUMA nodes, this finds // only the uppermost. // for (level = 0, numaLevel = -1; level < topoDepth && numaLevel == -1; level++) { if (hwloc_get_depth_type(topology, level) == HWLOC_OBJ_NUMANODE) { numaLevel = level; } } } // // Find the NUMA nodes, that is, the objects at numaLevel that also // have CPUs. This is as opposed to things like Xeon Phi HBM, which // is memory-only, no CPUs. // { const hwloc_cpuset_t cpusetAll = hwloc_get_root_obj(topology)->cpuset; numNumaDomains = hwloc_get_nbobjs_inside_cpuset_by_depth(topology, cpusetAll, numaLevel); } }
void chpl_topo_init(void) { // // For now we don't load topology information for locModel=flat, since // we won't use it in that case and loading it is somewhat expensive. // Eventually we will probably load it even for locModel=flat and use // it as the information source for what's currently in chplsys, and // also pass it to Qthreads when we use that (so it doesn't load it // again), but that's work for the future. // haveTopology = (strcmp(CHPL_LOCALE_MODEL, "flat") != 0) ? true : false; if (!haveTopology) { return; } // Check hwloc API version. // Require at least hwloc version 1.11 (we need 1.11.5 to not crash // in some NUMA configurations). // Check both at build time and run time. #define REQUIRE_HWLOC_VERSION 0x00010b00 #if HWLOC_API_VERSION < REQUIRE_HWLOC_VERSION #error hwloc version 1.11.5 or newer is required #else { unsigned version = hwloc_get_api_version(); // check that the version is at least REQUIRE_HWLOC_VERSION if (version < REQUIRE_HWLOC_VERSION) chpl_internal_error("hwloc version 1.11.5 or newer is required"); } #endif // // Allocate and initialize topology object. // if (hwloc_topology_init(&topology)) { report_error("hwloc_topology_init()", errno); } // // Perform the topology detection. // if (hwloc_topology_load(topology)) { report_error("hwloc_topology_load()", errno); } // // What is supported? // topoSupport = hwloc_topology_get_support(topology); // // TODO: update comment // For now, don't support setting memory locality when comm=ugni or // comm=gasnet, seg!=everything. Those are the two configurations in // which we use hugepages and/or memory registered with the comm // interface, both of which may be a problem for the set-membind call. // We will have other ways to achieve locality for these configs in // the future. // do_set_area_membind = true; if ((strcmp(CHPL_COMM, "gasnet") == 0 && strcmp(CHPL_GASNET_SEGMENT, "everything") != 0)) { do_set_area_membind = false; } // // We need depth information. // topoDepth = hwloc_topology_get_depth(topology); // // How many NUMA domains do we have? // { int level; // // Note: If there are multiple levels with NUMA nodes, this finds // only the uppermost. // for (level = 0, numaLevel = -1; level < topoDepth && numaLevel == -1; level++) { if (hwloc_get_depth_type(topology, level) == HWLOC_OBJ_NUMANODE) { numaLevel = level; } } } // // Find the NUMA nodes, that is, the objects at numaLevel that also // have CPUs. This is as opposed to things like Xeon Phi HBM, which // is memory-only, no CPUs. // { const hwloc_cpuset_t cpusetAll = hwloc_get_root_obj(topology)->cpuset; numNumaDomains = hwloc_get_nbobjs_inside_cpuset_by_depth(topology, cpusetAll, numaLevel); } }