static int _get_cpuinfo(uint32_t *nsockets, uint32_t *ncores, uint32_t *nthreads, uint32_t *npus) { hwloc_topology_t topology; if (hwloc_topology_init(&topology)) { /* error in initialize hwloc library */ error("%s: hwloc_topology_init() failed", __func__); return -1; } /* parse full system info */ hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM); /* ignores cache, misc */ hwloc_topology_ignore_type (topology, HWLOC_OBJ_CACHE); hwloc_topology_ignore_type (topology, HWLOC_OBJ_MISC); /* load topology */ if (hwloc_topology_load(topology)) { error("%s: hwloc_topology_load() failed", __func__); hwloc_topology_destroy(topology); return -1; } *nsockets = (uint32_t) hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_SOCKET); *ncores = (uint32_t) hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_CORE); *nthreads = (uint32_t) hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PU); *npus = (uint32_t) hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PU); hwloc_topology_destroy(topology); return 0; }
int main (int argc, char *argv[]) { int err; hwloc_topology_t topology; const char *filename = NULL; unsigned long flags = HWLOC_TOPOLOGY_FLAG_IO_DEVICES | HWLOC_TOPOLOGY_FLAG_IO_BRIDGES | HWLOC_TOPOLOGY_FLAG_ICACHES; unsigned long restrict_flags = 0; int merge = 0; int ignorecache = 0; char * callname; char * input = NULL; enum hwloc_utils_input_format input_format = HWLOC_UTILS_INPUT_DEFAULT; enum output_format output_format = LSTOPO_OUTPUT_DEFAULT; char *restrictstring = NULL; struct lstopo_output loutput; int opt; unsigned i; loutput.overwrite = 0; loutput.logical = -1; loutput.legend = 1; loutput.verbose_mode = LSTOPO_VERBOSE_MODE_DEFAULT; for(i=0; i<HWLOC_OBJ_TYPE_MAX; i++) force_orient[i] = LSTOPO_ORIENT_NONE; force_orient[HWLOC_OBJ_PU] = LSTOPO_ORIENT_HORIZ; force_orient[HWLOC_OBJ_CACHE] = LSTOPO_ORIENT_HORIZ; force_orient[HWLOC_OBJ_NUMANODE] = LSTOPO_ORIENT_HORIZ; /* enable verbose backends */ putenv("HWLOC_XML_VERBOSE=1"); putenv("HWLOC_SYNTHETIC_VERBOSE=1"); #ifdef HAVE_SETLOCALE setlocale(LC_ALL, ""); #endif callname = strrchr(argv[0], '/'); if (!callname) callname = argv[0]; else callname++; /* skip argv[0], handle options */ argc--; argv++; err = hwloc_topology_init (&topology); if (err) return EXIT_FAILURE; while (argc >= 1) { opt = 0; if (!strcmp (argv[0], "-v") || !strcmp (argv[0], "--verbose")) { loutput.verbose_mode++; } else if (!strcmp (argv[0], "-s") || !strcmp (argv[0], "--silent")) { loutput.verbose_mode--; } else if (!strcmp (argv[0], "-h") || !strcmp (argv[0], "--help")) { usage(callname, stdout); exit(EXIT_SUCCESS); } else if (!strcmp (argv[0], "-f") || !strcmp (argv[0], "--force")) loutput.overwrite = 1; else if (!strcmp (argv[0], "-l") || !strcmp (argv[0], "--logical")) loutput.logical = 1; else if (!strcmp (argv[0], "-p") || !strcmp (argv[0], "--physical")) loutput.logical = 0; else if (!strcmp (argv[0], "-c") || !strcmp (argv[0], "--cpuset")) lstopo_show_cpuset = 1; else if (!strcmp (argv[0], "-C") || !strcmp (argv[0], "--cpuset-only")) lstopo_show_cpuset = 2; else if (!strcmp (argv[0], "--taskset")) { lstopo_show_taskset = 1; if (!lstopo_show_cpuset) lstopo_show_cpuset = 1; } else if (!strcmp (argv[0], "--only")) { if (argc < 2) { usage (callname, stderr); exit(EXIT_FAILURE); } if (hwloc_obj_type_sscanf(argv[1], &lstopo_show_only, NULL, NULL, 0) < 0) fprintf(stderr, "Unsupported type `%s' passed to --only, ignoring.\n", argv[1]); opt = 1; } else if (!strcmp (argv[0], "--ignore")) { hwloc_obj_type_t type; if (argc < 2) { usage (callname, stderr); exit(EXIT_FAILURE); } if (hwloc_obj_type_sscanf(argv[1], &type, NULL, NULL, 0) < 0) fprintf(stderr, "Unsupported type `%s' passed to --ignore, ignoring.\n", argv[1]); else if (type == HWLOC_OBJ_PU) lstopo_ignore_pus = 1; else hwloc_topology_ignore_type(topology, type); opt = 1; } else if (!strcmp (argv[0], "--no-caches")) ignorecache = 2; else if (!strcmp (argv[0], "--no-useless-caches")) ignorecache = 1; else if (!strcmp (argv[0], "--no-icaches")) flags &= ~HWLOC_TOPOLOGY_FLAG_ICACHES; else if (!strcmp (argv[0], "--whole-system")) flags |= HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM; else if (!strcmp (argv[0], "--no-io")) flags &= ~(HWLOC_TOPOLOGY_FLAG_IO_DEVICES | HWLOC_TOPOLOGY_FLAG_IO_BRIDGES); else if (!strcmp (argv[0], "--no-bridges")) flags &= ~(HWLOC_TOPOLOGY_FLAG_IO_BRIDGES); else if (!strcmp (argv[0], "--whole-io")) flags |= HWLOC_TOPOLOGY_FLAG_WHOLE_IO; else if (!strcmp (argv[0], "--merge")) merge = 1; else if (!strcmp (argv[0], "--no-collapse")) lstopo_collapse = 0; else if (!strcmp (argv[0], "--thissystem")) flags |= HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM; else if (!strcmp (argv[0], "--restrict")) { if (argc < 2) { usage (callname, stderr); exit(EXIT_FAILURE); } restrictstring = strdup(argv[1]); opt = 1; } else if (!strcmp (argv[0], "--restrict-flags")) { if (argc < 2) { usage (callname, stderr); exit(EXIT_FAILURE); } restrict_flags = (unsigned long) strtoull(argv[1], NULL, 0); opt = 1; } else if (!strcmp (argv[0], "--export-synthetic-flags")) { if (argc < 2) { usage (callname, stderr); exit(EXIT_FAILURE); } lstopo_export_synthetic_flags = (unsigned long) strtoull(argv[1], NULL, 0); opt = 1; } else if (!strcmp (argv[0], "--horiz")) for(i=0; i<HWLOC_OBJ_TYPE_MAX; i++) force_orient[i] = LSTOPO_ORIENT_HORIZ; else if (!strcmp (argv[0], "--vert")) for(i=0; i<HWLOC_OBJ_TYPE_MAX; i++) force_orient[i] = LSTOPO_ORIENT_VERT; else if (!strcmp (argv[0], "--rect")) for(i=0; i<HWLOC_OBJ_TYPE_MAX; i++) force_orient[i] = LSTOPO_ORIENT_RECT; else if (!strncmp (argv[0], "--horiz=", 8) || !strncmp (argv[0], "--vert=", 7) || !strncmp (argv[0], "--rect=", 7)) { enum lstopo_orient_e orient = (argv[0][2] == 'h') ? LSTOPO_ORIENT_HORIZ : (argv[0][2] == 'v') ? LSTOPO_ORIENT_VERT : LSTOPO_ORIENT_RECT; char *tmp = argv[0] + ((argv[0][2] == 'h') ? 8 : 7); while (tmp) { char *end = strchr(tmp, ','); hwloc_obj_type_t type; if (end) *end = '\0'; if (hwloc_obj_type_sscanf(tmp, &type, NULL, NULL, 0) < 0) fprintf(stderr, "Unsupported type `%s' passed to %s, ignoring.\n", tmp, argv[0]); else force_orient[type] = orient; if (!end) break; tmp = end+1; } } else if (!strcmp (argv[0], "--fontsize")) { if (argc < 2) { usage (callname, stderr); exit(EXIT_FAILURE); } fontsize = atoi(argv[1]); opt = 1; } else if (!strcmp (argv[0], "--gridsize")) { if (argc < 2) { usage (callname, stderr); exit(EXIT_FAILURE); } gridsize = atoi(argv[1]); opt = 1; } else if (!strcmp (argv[0], "--no-legend")) { loutput.legend = 0; } else if (!strcmp (argv[0], "--append-legend")) { char **tmp; if (argc < 2) { usage (callname, stderr); exit(EXIT_FAILURE); } tmp = realloc(lstopo_append_legends, (lstopo_append_legends_nr+1) * sizeof(*lstopo_append_legends)); if (!tmp) { fprintf(stderr, "Failed to realloc legend append array, legend ignored.\n"); } else { lstopo_append_legends = tmp; lstopo_append_legends[lstopo_append_legends_nr] = strdup(argv[1]); lstopo_append_legends_nr++; } opt = 1; } else if (hwloc_utils_lookup_input_option(argv, argc, &opt, &input, &input_format, callname)) { /* nothing to do anymore */ } else if (!strcmp (argv[0], "--pid")) { if (argc < 2) { usage (callname, stderr); exit(EXIT_FAILURE); } lstopo_pid_number = atoi(argv[1]); opt = 1; } else if (!strcmp (argv[0], "--ps") || !strcmp (argv[0], "--top")) top = 1; else if (!strcmp (argv[0], "--version")) { printf("%s %s\n", callname, HWLOC_VERSION); exit(EXIT_SUCCESS); } else if (!strcmp (argv[0], "--output-format") || !strcmp (argv[0], "--of")) { if (argc < 2) { usage (callname, stderr); exit(EXIT_FAILURE); } output_format = parse_output_format(argv[1], callname); opt = 1; } else { if (filename) { fprintf (stderr, "Unrecognized option: %s\n", argv[0]); usage (callname, stderr); exit(EXIT_FAILURE); } else filename = argv[0]; } argc -= opt+1; argv += opt+1; } if (lstopo_show_only != (hwloc_obj_type_t)-1) merge = 0; hwloc_topology_set_flags(topology, flags); if (ignorecache > 1) { hwloc_topology_ignore_type(topology, HWLOC_OBJ_CACHE); } else if (ignorecache) { hwloc_topology_ignore_type_keep_structure(topology, HWLOC_OBJ_CACHE); } if (merge) hwloc_topology_ignore_all_keep_structure(topology); if (input) { err = hwloc_utils_enable_input_format(topology, input, &input_format, loutput.verbose_mode > 1, callname); if (err) return err; } if (lstopo_pid_number > 0) { lstopo_pid = hwloc_pid_from_number(lstopo_pid_number, 0); if (hwloc_topology_set_pid(topology, lstopo_pid)) { perror("Setting target pid"); return EXIT_FAILURE; } } /* if the output format wasn't enforced, look at the filename */ if (filename && output_format == LSTOPO_OUTPUT_DEFAULT) { if (!strcmp(filename, "-") || !strcmp(filename, "/dev/stdout")) { output_format = LSTOPO_OUTPUT_CONSOLE; } else { char *dot = strrchr(filename, '.'); if (dot) output_format = parse_output_format(dot+1, callname); else { fprintf(stderr, "Cannot infer output type for file `%s' without any extension, using default output.\n", filename); filename = NULL; } } } /* if the output format wasn't enforced, think a bit about what the user probably want */ if (output_format == LSTOPO_OUTPUT_DEFAULT) { if (lstopo_show_cpuset || lstopo_show_only != (hwloc_obj_type_t)-1 || loutput.verbose_mode != LSTOPO_VERBOSE_MODE_DEFAULT) output_format = LSTOPO_OUTPUT_CONSOLE; } if (input_format == HWLOC_UTILS_INPUT_XML && output_format == LSTOPO_OUTPUT_XML) { /* must be after parsing output format and before loading the topology */ putenv("HWLOC_XML_USERDATA_NOT_DECODED=1"); hwloc_topology_set_userdata_import_callback(topology, hwloc_utils_userdata_import_cb); hwloc_topology_set_userdata_export_callback(topology, hwloc_utils_userdata_export_cb); } err = hwloc_topology_load (topology); if (err) { fprintf(stderr, "hwloc_topology_load() failed (%s).\n", strerror(errno)); return EXIT_FAILURE; } if (top) add_process_objects(topology); if (restrictstring) { hwloc_bitmap_t restrictset = hwloc_bitmap_alloc(); if (!strcmp (restrictstring, "binding")) { if (lstopo_pid_number > 0) hwloc_get_proc_cpubind(topology, lstopo_pid, restrictset, HWLOC_CPUBIND_PROCESS); else hwloc_get_cpubind(topology, restrictset, HWLOC_CPUBIND_PROCESS); } else { hwloc_bitmap_sscanf(restrictset, restrictstring); } err = hwloc_topology_restrict (topology, restrictset, restrict_flags); if (err) { perror("Restricting the topology"); /* fallthrough */ } hwloc_bitmap_free(restrictset); free(restrictstring); } if (loutput.logical == -1) { if (output_format == LSTOPO_OUTPUT_CONSOLE) loutput.logical = 1; else if (output_format != LSTOPO_OUTPUT_DEFAULT) loutput.logical = 0; } loutput.topology = topology; loutput.file = NULL; lstopo_populate_userdata(hwloc_get_root_obj(topology)); if (output_format != LSTOPO_OUTPUT_XML && lstopo_collapse) lstopo_add_collapse_attributes(topology); switch (output_format) { case LSTOPO_OUTPUT_DEFAULT: #ifdef LSTOPO_HAVE_GRAPHICS #if CAIRO_HAS_XLIB_SURFACE && defined HWLOC_HAVE_X11_KEYSYM if (getenv("DISPLAY")) { if (loutput.logical == -1) loutput.logical = 0; output_x11(&loutput, NULL); } else #endif /* CAIRO_HAS_XLIB_SURFACE */ #ifdef HWLOC_WIN_SYS { if (loutput.logical == -1) loutput.logical = 0; output_windows(&loutput, NULL); } #endif #endif /* !LSTOPO_HAVE_GRAPHICS */ #if !defined HWLOC_WIN_SYS || !defined LSTOPO_HAVE_GRAPHICS { if (loutput.logical == -1) loutput.logical = 1; output_console(&loutput, NULL); } #endif break; case LSTOPO_OUTPUT_CONSOLE: output_console(&loutput, filename); break; case LSTOPO_OUTPUT_SYNTHETIC: output_synthetic(&loutput, filename); break; case LSTOPO_OUTPUT_ASCII: output_ascii(&loutput, filename); break; case LSTOPO_OUTPUT_FIG: output_fig(&loutput, filename); break; #ifdef LSTOPO_HAVE_GRAPHICS # if CAIRO_HAS_PNG_FUNCTIONS case LSTOPO_OUTPUT_PNG: output_png(&loutput, filename); break; # endif /* CAIRO_HAS_PNG_FUNCTIONS */ # if CAIRO_HAS_PDF_SURFACE case LSTOPO_OUTPUT_PDF: output_pdf(&loutput, filename); break; # endif /* CAIRO_HAS_PDF_SURFACE */ # if CAIRO_HAS_PS_SURFACE case LSTOPO_OUTPUT_PS: output_ps(&loutput, filename); break; #endif /* CAIRO_HAS_PS_SURFACE */ #if CAIRO_HAS_SVG_SURFACE case LSTOPO_OUTPUT_SVG: output_svg(&loutput, filename); break; #endif /* CAIRO_HAS_SVG_SURFACE */ #endif /* LSTOPO_HAVE_GRAPHICS */ case LSTOPO_OUTPUT_XML: output_xml(&loutput, filename); break; default: fprintf(stderr, "file format not supported\n"); usage(callname, stderr); exit(EXIT_FAILURE); } lstopo_destroy_userdata(hwloc_get_root_obj(topology)); hwloc_utils_userdata_free_recursive(hwloc_get_root_obj(topology)); hwloc_topology_destroy (topology); for(i=0; i<lstopo_append_legends_nr; i++) free(lstopo_append_legends[i]); free(lstopo_append_legends); return EXIT_SUCCESS; }
extern int get_cpuinfo(uint16_t *p_cpus, uint16_t *p_boards, uint16_t *p_sockets, uint16_t *p_cores, uint16_t *p_threads, uint16_t *p_block_map_size, uint16_t **p_block_map, uint16_t **p_block_map_inv) { enum { SOCKET=0, CORE=1, PU=2, LAST_OBJ=3 }; hwloc_topology_t topology; hwloc_obj_t obj; hwloc_obj_type_t objtype[LAST_OBJ]; unsigned idx[LAST_OBJ]; int nobj[LAST_OBJ]; int actual_cpus; int macid; int absid; int actual_boards = 1, depth; int i; debug2("hwloc_topology_init"); if (hwloc_topology_init(&topology)) { /* error in initialize hwloc library */ debug("hwloc_topology_init() failed."); return 1; } /* parse all system */ hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM); /* ignores cache, misc */ hwloc_topology_ignore_type (topology, HWLOC_OBJ_CACHE); hwloc_topology_ignore_type (topology, HWLOC_OBJ_MISC); /* load topology */ debug2("hwloc_topology_load"); if (hwloc_topology_load(topology)) { /* error in load hardware topology */ debug("hwloc_topology_load() failed."); hwloc_topology_destroy(topology); return 2; } /* Some processors (e.g. AMD Opteron 6000 series) contain multiple * NUMA nodes per socket. This is a configuration which does not map * into the hardware entities that Slurm optimizes resource allocation * for (PU/thread, core, socket, baseboard, node and network switch). * In order to optimize resource allocations on such hardware, Slurm * will consider each NUMA node within the socket as a separate socket. * You can disable this configuring "SchedulerParameters=Ignore_NUMA", * in which case Slurm will report the correct socket count on the node, * but not be able to optimize resource allocations on the NUMA nodes. */ objtype[SOCKET] = HWLOC_OBJ_SOCKET; objtype[CORE] = HWLOC_OBJ_CORE; objtype[PU] = HWLOC_OBJ_PU; if (hwloc_get_type_depth(topology, HWLOC_OBJ_NODE) > hwloc_get_type_depth(topology, HWLOC_OBJ_SOCKET)) { char *sched_params = slurm_get_sched_params(); if (sched_params && strcasestr(sched_params, "Ignore_NUMA")) { info("Ignoring NUMA nodes within a socket"); } else { info("Considering each NUMA node as a socket"); objtype[SOCKET] = HWLOC_OBJ_NODE; } xfree(sched_params); } /* number of objects */ depth = hwloc_get_type_depth(topology, HWLOC_OBJ_GROUP); if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) { actual_boards = MAX(hwloc_get_nbobjs_by_depth(topology, depth), 1); } nobj[SOCKET] = hwloc_get_nbobjs_by_type(topology, objtype[SOCKET]); nobj[CORE] = hwloc_get_nbobjs_by_type(topology, objtype[CORE]); /* * Workaround for hwloc * hwloc_get_nbobjs_by_type() returns 0 on some architectures. */ if ( nobj[SOCKET] == 0 ) { debug("get_cpuinfo() fudging nobj[SOCKET] from 0 to 1"); nobj[SOCKET] = 1; } if ( nobj[CORE] == 0 ) { debug("get_cpuinfo() fudging nobj[CORE] from 0 to 1"); nobj[CORE] = 1; } if ( nobj[SOCKET] == -1 ) fatal("get_cpuinfo() can not handle nobj[SOCKET] = -1"); if ( nobj[CORE] == -1 ) fatal("get_cpuinfo() can not handle nobj[CORE] = -1"); actual_cpus = hwloc_get_nbobjs_by_type(topology, objtype[PU]); #if 0 /* Used to find workaround above */ info("CORE = %d SOCKET = %d actual_cpus = %d nobj[CORE] = %d", CORE, SOCKET, actual_cpus, nobj[CORE]); #endif nobj[PU] = actual_cpus/nobj[CORE]; /* threads per core */ nobj[CORE] /= nobj[SOCKET]; /* cores per socket */ debug("CPUs:%d Boards:%u Sockets:%d CoresPerSocket:%d ThreadsPerCore:%d", actual_cpus, actual_boards, nobj[SOCKET], nobj[CORE], nobj[PU]); /* allocate block_map */ *p_block_map_size = (uint16_t)actual_cpus; if (p_block_map && p_block_map_inv) { *p_block_map = xmalloc(actual_cpus * sizeof(uint16_t)); *p_block_map_inv = xmalloc(actual_cpus * sizeof(uint16_t)); /* initialize default as linear mapping */ for (i = 0; i < actual_cpus; i++) { (*p_block_map)[i] = i; (*p_block_map_inv)[i] = i; } /* create map with hwloc */ for (idx[SOCKET]=0; idx[SOCKET]<nobj[SOCKET]; ++idx[SOCKET]) { for (idx[CORE]=0; idx[CORE]<nobj[CORE]; ++idx[CORE]) { for (idx[PU]=0; idx[PU]<nobj[PU]; ++idx[PU]) { /* get hwloc_obj by indexes */ obj=hwloc_get_obj_below_array_by_type( topology, 3, objtype, idx); if (!obj) continue; macid = obj->os_index; absid = idx[SOCKET]*nobj[CORE]*nobj[PU] + idx[CORE]*nobj[PU] + idx[PU]; if ((macid >= actual_cpus) || (absid >= actual_cpus)) { /* physical or logical ID are * out of range */ continue; } debug4("CPU map[%d]=>%d", absid, macid); (*p_block_map)[absid] = macid; (*p_block_map_inv)[macid] = absid; } } } } hwloc_topology_destroy(topology); /* update output parameters */ *p_cpus = actual_cpus; *p_boards = actual_boards; *p_sockets = nobj[SOCKET]; *p_cores = nobj[CORE]; *p_threads = nobj[PU]; #if DEBUG_DETAIL /*** Display raw data ***/ debug("CPUs:%u Boards:%u Sockets:%u CoresPerSocket:%u ThreadsPerCore:%u", *p_cpus, *p_boards, *p_sockets, *p_cores, *p_threads); /* Display the mapping tables */ if (p_block_map && p_block_map_inv) { debug("------"); debug("Abstract -> Machine logical CPU ID block mapping:"); debug("AbstractId PhysicalId Inverse"); for (i = 0; i < *p_cpus; i++) { debug3(" %4d %4u %4u", i, (*p_block_map)[i], (*p_block_map_inv)[i]); } debug("------"); } #endif return 0; }
int pocl_topology_detect_device_info(cl_device_id device) { hwloc_topology_t pocl_topology; int ret = 0; #ifdef HWLOC_API_2 if (hwloc_get_api_version () < 0x20000) POCL_MSG_ERR ("pocl was compiled against libhwloc 2.x but is" "actually running against libhwloc 1.x \n"); #else if (hwloc_get_api_version () >= 0x20000) POCL_MSG_ERR ("pocl was compiled against libhwloc 1.x but is" "actually running against libhwloc 2.x \n"); #endif /* * hwloc's OpenCL backend causes problems at the initialization stage * because it reloads libpocl.so via the ICD loader. * * See: https://github.com/pocl/pocl/issues/261 * * The only trick to stop hwloc from initializing the OpenCL plugin * I could find is to point the plugin search path to a place where there * are no plugins to be found. */ setenv ("HWLOC_PLUGINS_PATH", "/dev/null", 1); ret = hwloc_topology_init (&pocl_topology); if (ret == -1) { POCL_MSG_ERR ("Cannot initialize the topology.\n"); return ret; } #ifdef HWLOC_API_2 hwloc_topology_set_io_types_filter(pocl_topology, HWLOC_TYPE_FILTER_KEEP_NONE); hwloc_topology_set_type_filter (pocl_topology, HWLOC_OBJ_SYSTEM, HWLOC_TYPE_FILTER_KEEP_NONE); hwloc_topology_set_type_filter (pocl_topology, HWLOC_OBJ_GROUP, HWLOC_TYPE_FILTER_KEEP_NONE); hwloc_topology_set_type_filter (pocl_topology, HWLOC_OBJ_BRIDGE, HWLOC_TYPE_FILTER_KEEP_NONE); hwloc_topology_set_type_filter (pocl_topology, HWLOC_OBJ_MISC, HWLOC_TYPE_FILTER_KEEP_NONE); hwloc_topology_set_type_filter (pocl_topology, HWLOC_OBJ_PCI_DEVICE, HWLOC_TYPE_FILTER_KEEP_NONE); hwloc_topology_set_type_filter (pocl_topology, HWLOC_OBJ_OS_DEVICE, HWLOC_TYPE_FILTER_KEEP_NONE); #else hwloc_topology_ignore_type (pocl_topology, HWLOC_TOPOLOGY_FLAG_WHOLE_IO); hwloc_topology_ignore_type (pocl_topology, HWLOC_OBJ_SYSTEM); hwloc_topology_ignore_type (pocl_topology, HWLOC_OBJ_GROUP); hwloc_topology_ignore_type (pocl_topology, HWLOC_OBJ_BRIDGE); hwloc_topology_ignore_type (pocl_topology, HWLOC_OBJ_MISC); hwloc_topology_ignore_type (pocl_topology, HWLOC_OBJ_PCI_DEVICE); hwloc_topology_ignore_type (pocl_topology, HWLOC_OBJ_OS_DEVICE); #endif ret = hwloc_topology_load (pocl_topology); if (ret == -1) { POCL_MSG_ERR ("Cannot load the topology.\n"); goto exit_destroy; } #ifdef HWLOC_API_2 device->global_mem_size = hwloc_get_root_obj(pocl_topology)->total_memory; #else device->global_mem_size = hwloc_get_root_obj(pocl_topology)->memory.total_memory; #endif // Try to get the number of CPU cores from topology int depth = hwloc_get_type_depth(pocl_topology, HWLOC_OBJ_PU); if(depth != HWLOC_TYPE_DEPTH_UNKNOWN) device->max_compute_units = hwloc_get_nbobjs_by_depth(pocl_topology, depth); /* Find information about global memory cache by looking at the first * cache covering the first PU */ do { size_t cache_size = 0, cacheline_size = 0; hwloc_obj_t core = hwloc_get_next_obj_by_type (pocl_topology, HWLOC_OBJ_CORE, NULL); if (core) { hwloc_obj_t cache = hwloc_get_shared_cache_covering_obj (pocl_topology, core); if ((cache) && (cache->attr)) { cacheline_size = cache->attr->cache.linesize; cache_size = cache->attr->cache.size; } else core = NULL; /* fallback to L1 cache size */ } hwloc_obj_t pu = hwloc_get_next_obj_by_type (pocl_topology, HWLOC_OBJ_PU, NULL); if (!core && pu) { hwloc_obj_t cache = hwloc_get_shared_cache_covering_obj (pocl_topology, pu); if ((cache) && (cache->attr)) { cacheline_size = cache->attr->cache.linesize; cache_size = cache->attr->cache.size; } } if (!cache_size || !cacheline_size) break; device->global_mem_cache_type = 0x2; // CL_READ_WRITE_CACHE, without including all of CL/cl.h device->global_mem_cacheline_size = cacheline_size; device->global_mem_cache_size = cache_size; } while (0); // Destroy topology object and return exit_destroy: hwloc_topology_destroy (pocl_topology); return ret; }
extern int get_cpuinfo(uint16_t *p_cpus, uint16_t *p_boards, uint16_t *p_sockets, uint16_t *p_cores, uint16_t *p_threads, uint16_t *p_block_map_size, uint16_t **p_block_map, uint16_t **p_block_map_inv) { enum { SOCKET=0, CORE=1, PU=2, LAST_OBJ=3 }; hwloc_topology_t topology; hwloc_obj_t obj; hwloc_obj_type_t objtype[LAST_OBJ]; unsigned idx[LAST_OBJ]; int nobj[LAST_OBJ]; int actual_cpus; int macid; int absid; int actual_boards = 1, depth; int i; debug2("hwloc_topology_init"); if (hwloc_topology_init(&topology)) { /* error in initialize hwloc library */ debug("hwloc_topology_init() failed."); return 1; } /* parse all system */ hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM); /* ignores cache, misc */ hwloc_topology_ignore_type (topology, HWLOC_OBJ_CACHE); hwloc_topology_ignore_type (topology, HWLOC_OBJ_MISC); /* load topology */ debug2("hwloc_topology_load"); if (hwloc_topology_load(topology)) { /* error in load hardware topology */ debug("hwloc_topology_load() failed."); hwloc_topology_destroy(topology); return 2; } /* At least on a temporary basis, one could map AMD Bulldozer entities * onto the entities that Slurm does optimize placement for today (e.g. * map each Bulldozer core to a thread and each Bulldozer module to a * Slurm core, alternately map the Bulldozer module to a Slurm socket * and the Bulldozer socket to a Slurm board). Perhaps not ideal, but * it would achieve the desired locality. */ if ( hwloc_get_type_depth(topology, HWLOC_OBJ_NODE) > hwloc_get_type_depth(topology, HWLOC_OBJ_SOCKET) ) { /* One socket contains multiple NUMA-nodes * like AMD Opteron 6000 series etc. * In such case, use NUMA-node instead of socket. */ objtype[SOCKET] = HWLOC_OBJ_NODE; objtype[CORE] = HWLOC_OBJ_CORE; objtype[PU] = HWLOC_OBJ_PU; } else { objtype[SOCKET] = HWLOC_OBJ_SOCKET; objtype[CORE] = HWLOC_OBJ_CORE; objtype[PU] = HWLOC_OBJ_PU; } /* number of objects */ depth = hwloc_get_type_depth(topology, HWLOC_OBJ_GROUP); if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) { actual_boards = MAX(hwloc_get_nbobjs_by_depth(topology, depth), 1); } nobj[SOCKET] = hwloc_get_nbobjs_by_type(topology, objtype[SOCKET]); nobj[CORE] = hwloc_get_nbobjs_by_type(topology, objtype[CORE]); actual_cpus = hwloc_get_nbobjs_by_type(topology, objtype[PU]); nobj[PU] = actual_cpus/nobj[CORE]; /* threads per core */ nobj[CORE] /= nobj[SOCKET]; /* cores per socket */ debug("CPUs:%d Boards:%u Sockets:%d CoresPerSocket:%d ThreadsPerCore:%d", actual_cpus, actual_boards, nobj[SOCKET], nobj[CORE], nobj[PU]); /* allocate block_map */ *p_block_map_size = (uint16_t)actual_cpus; if (p_block_map && p_block_map_inv) { *p_block_map = xmalloc(actual_cpus * sizeof(uint16_t)); *p_block_map_inv = xmalloc(actual_cpus * sizeof(uint16_t)); /* initialize default as linear mapping */ for (i = 0; i < actual_cpus; i++) { (*p_block_map)[i] = i; (*p_block_map_inv)[i] = i; } /* create map with hwloc */ for (idx[SOCKET]=0; idx[SOCKET]<nobj[SOCKET]; ++idx[SOCKET]) { for (idx[CORE]=0; idx[CORE]<nobj[CORE]; ++idx[CORE]) { for (idx[PU]=0; idx[PU]<nobj[PU]; ++idx[PU]) { /* get hwloc_obj by indexes */ obj=hwloc_get_obj_below_array_by_type( topology, 3, objtype, idx); if (!obj) continue; macid = obj->os_index; absid = idx[SOCKET]*nobj[CORE]*nobj[PU] + idx[CORE]*nobj[PU] + idx[PU]; if ((macid >= actual_cpus) || (absid >= actual_cpus)) { /* physical or logical ID are * out of range */ continue; } debug4("CPU map[%d]=>%d", absid, macid); (*p_block_map)[absid] = macid; (*p_block_map_inv)[macid] = absid; } } } } hwloc_topology_destroy(topology); /* update output parameters */ *p_cpus = actual_cpus; *p_boards = actual_boards; *p_sockets = nobj[SOCKET]; *p_cores = nobj[CORE]; *p_threads = nobj[PU]; #if DEBUG_DETAIL /*** Display raw data ***/ debug("CPUs:%u Boards:%u Sockets:%u CoresPerSocket:%u ThreadsPerCore:%u", *p_cpus, *p_boards, *p_sockets, *p_cores, *p_threads); /* Display the mapping tables */ if (p_block_map && p_block_map_inv) { debug("------"); debug("Abstract -> Machine logical CPU ID block mapping:"); debug("AbstractId PhysicalId Inverse"); for (i = 0; i < *p_cpus; i++) { debug3(" %4d %4u %4u", i, (*p_block_map)[i], (*p_block_map_inv)[i]); } debug("------"); } #endif return 0; }
int main(void) { hwloc_topology_t topology; hwloc_obj_t obj; hwloc_topology_init(&topology); hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_WHOLE_IO); assert(-1 == hwloc_topology_ignore_type(topology, HWLOC_OBJ_PCI_DEVICE)); assert(-1 == hwloc_topology_ignore_type_keep_structure(topology, HWLOC_OBJ_BRIDGE)); assert(-1 == hwloc_topology_ignore_type(topology, HWLOC_OBJ_OS_DEVICE)); hwloc_topology_load(topology); printf("Found %d bridges\n", hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_BRIDGE)); obj = NULL; while ((obj = hwloc_get_next_bridge(topology, obj)) != NULL) { assert(obj->type == HWLOC_OBJ_BRIDGE); /* only host->pci and pci->pci bridge supported so far */ if (obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_HOST) { assert(obj->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI); printf(" Found host->PCI bridge for domain %04x bus %02x-%02x\n", obj->attr->bridge.downstream.pci.domain, obj->attr->bridge.downstream.pci.secondary_bus, obj->attr->bridge.downstream.pci.subordinate_bus); } else { assert(obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI); assert(obj->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI); printf(" Found PCI->PCI bridge [%04x:%04x] for domain %04x bus %02x-%02x\n", obj->attr->bridge.upstream.pci.vendor_id, obj->attr->bridge.upstream.pci.device_id, obj->attr->bridge.downstream.pci.domain, obj->attr->bridge.downstream.pci.secondary_bus, obj->attr->bridge.downstream.pci.subordinate_bus); } } printf("Found %d PCI devices\n", hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PCI_DEVICE)); obj = NULL; while ((obj = hwloc_get_next_pcidev(topology, obj)) != NULL) { assert(obj->type == HWLOC_OBJ_PCI_DEVICE); printf(" Found PCI device class %04x vendor %04x model %04x\n", obj->attr->pcidev.class_id, obj->attr->pcidev.vendor_id, obj->attr->pcidev.device_id); } printf("Found %d OS devices\n", hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_OS_DEVICE)); obj = NULL; while ((obj = hwloc_get_next_osdev(topology, obj)) != NULL) { assert(obj->type == HWLOC_OBJ_OS_DEVICE); printf(" Found OS device %s subtype %d\n", obj->name, obj->attr->osdev.type); } assert(HWLOC_TYPE_DEPTH_BRIDGE == hwloc_get_type_depth(topology, HWLOC_OBJ_BRIDGE)); assert(HWLOC_TYPE_DEPTH_PCI_DEVICE == hwloc_get_type_depth(topology, HWLOC_OBJ_PCI_DEVICE)); assert(HWLOC_TYPE_DEPTH_OS_DEVICE == hwloc_get_type_depth(topology, HWLOC_OBJ_OS_DEVICE)); assert(hwloc_compare_types(HWLOC_OBJ_BRIDGE, HWLOC_OBJ_PCI_DEVICE) < 0); assert(hwloc_compare_types(HWLOC_OBJ_BRIDGE, HWLOC_OBJ_OS_DEVICE) < 0); assert(hwloc_compare_types(HWLOC_OBJ_PCI_DEVICE, HWLOC_OBJ_OS_DEVICE) < 0); /* check that hwloc_get_hostbridge_by_pcibus() and hwloc_get_non_io_ancestor_obj work fine */ obj = NULL; while ((obj = hwloc_get_next_pcidev(topology, obj)) != NULL) { assert(hwloc_get_hostbridge_by_pcibus(topology, obj->attr->pcidev.domain, obj->attr->pcidev.bus)->parent == hwloc_get_non_io_ancestor_obj(topology, obj)); } hwloc_topology_destroy(topology); return 0; }
extern int get_cpuinfo(uint16_t *p_cpus, uint16_t *p_boards, uint16_t *p_sockets, uint16_t *p_cores, uint16_t *p_threads, uint16_t *p_block_map_size, uint16_t **p_block_map, uint16_t **p_block_map_inv) { enum { SOCKET=0, CORE=1, PU=2, LAST_OBJ=3 }; hwloc_topology_t topology; hwloc_obj_t obj; hwloc_obj_type_t objtype[LAST_OBJ]; unsigned idx[LAST_OBJ]; int nobj[LAST_OBJ]; bitstr_t *used_socket = NULL; int *cores_per_socket; int actual_cpus; int macid; int absid; int actual_boards = 1, depth, sock_cnt, tot_socks = 0; int i, used_core_idx, used_sock_idx; debug2("hwloc_topology_init"); if (hwloc_topology_init(&topology)) { /* error in initialize hwloc library */ debug("hwloc_topology_init() failed."); return 1; } /* parse all system */ hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM); /* ignores cache, misc */ #if HWLOC_API_VERSION < 0x00020000 hwloc_topology_ignore_type(topology, HWLOC_OBJ_CACHE); hwloc_topology_ignore_type(topology, HWLOC_OBJ_MISC); #else hwloc_topology_set_type_filter(topology, HWLOC_OBJ_L1CACHE, HWLOC_TYPE_FILTER_KEEP_NONE); hwloc_topology_set_type_filter(topology, HWLOC_OBJ_L2CACHE, HWLOC_TYPE_FILTER_KEEP_NONE); hwloc_topology_set_type_filter(topology, HWLOC_OBJ_L3CACHE, HWLOC_TYPE_FILTER_KEEP_NONE); hwloc_topology_set_type_filter(topology, HWLOC_OBJ_L4CACHE, HWLOC_TYPE_FILTER_KEEP_NONE); hwloc_topology_set_type_filter(topology, HWLOC_OBJ_L5CACHE, HWLOC_TYPE_FILTER_KEEP_NONE); hwloc_topology_set_type_filter(topology, HWLOC_OBJ_MISC, HWLOC_TYPE_FILTER_KEEP_NONE); #endif /* load topology */ debug2("hwloc_topology_load"); if (hwloc_topology_load(topology)) { /* error in load hardware topology */ debug("hwloc_topology_load() failed."); hwloc_topology_destroy(topology); return 2; } #if _DEBUG _hwloc_children(topology, hwloc_get_root_obj(topology), 0); #endif /* * Some processors (e.g. AMD Opteron 6000 series) contain multiple * NUMA nodes per socket. This is a configuration which does not map * into the hardware entities that Slurm optimizes resource allocation * for (PU/thread, core, socket, baseboard, node and network switch). * In order to optimize resource allocations on such hardware, Slurm * will consider each NUMA node within the socket as a separate socket. * You can disable this configuring "SchedulerParameters=Ignore_NUMA", * in which case Slurm will report the correct socket count on the node, * but not be able to optimize resource allocations on the NUMA nodes. */ objtype[SOCKET] = HWLOC_OBJ_SOCKET; objtype[CORE] = HWLOC_OBJ_CORE; objtype[PU] = HWLOC_OBJ_PU; if (hwloc_get_type_depth(topology, HWLOC_OBJ_NODE) > hwloc_get_type_depth(topology, HWLOC_OBJ_SOCKET)) { char *sched_params = slurm_get_sched_params(); if (sched_params && strcasestr(sched_params, "Ignore_NUMA")) { info("Ignoring NUMA nodes within a socket"); } else { info("Considering each NUMA node as a socket"); objtype[SOCKET] = HWLOC_OBJ_NODE; } xfree(sched_params); } /* number of objects */ depth = hwloc_get_type_depth(topology, HWLOC_OBJ_GROUP); if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) { actual_boards = MAX(hwloc_get_nbobjs_by_depth(topology, depth), 1); } /* * Count sockets/NUMA containing any cores. * KNL NUMA with no cores are NOT counted. */ nobj[SOCKET] = 0; depth = hwloc_get_type_depth(topology, objtype[SOCKET]); used_socket = bit_alloc(_MAX_SOCKET_INX); cores_per_socket = xmalloc(sizeof(int) * _MAX_SOCKET_INX); sock_cnt = hwloc_get_nbobjs_by_depth(topology, depth); for (i = 0; i < sock_cnt; i++) { obj = hwloc_get_obj_by_depth(topology, depth, i); if (obj->type == objtype[SOCKET]) { cores_per_socket[i] = _core_child_count(topology, obj); if (cores_per_socket[i] > 0) { nobj[SOCKET]++; bit_set(used_socket, tot_socks); } if (++tot_socks >= _MAX_SOCKET_INX) { /* Bitmap size */ fatal("Socket count exceeds %d, expand data structure size", _MAX_SOCKET_INX); break; } } } nobj[CORE] = hwloc_get_nbobjs_by_type(topology, objtype[CORE]); /* * Workaround for hwloc bug, in some cases the topology "children" array * does not get populated, so _core_child_count() always returns 0 */ if (nobj[SOCKET] == 0) { nobj[SOCKET] = hwloc_get_nbobjs_by_type(topology, objtype[SOCKET]); if (nobj[SOCKET] == 0) { debug("get_cpuinfo() fudging nobj[SOCKET] from 0 to 1"); nobj[SOCKET] = 1; } if (nobj[SOCKET] >= _MAX_SOCKET_INX) { /* Bitmap size */ fatal("Socket count exceeds %d, expand data structure size", _MAX_SOCKET_INX); } bit_nset(used_socket, 0, nobj[SOCKET] - 1); } /* * Workaround for hwloc * hwloc_get_nbobjs_by_type() returns 0 on some architectures. */ if ( nobj[CORE] == 0 ) { debug("get_cpuinfo() fudging nobj[CORE] from 0 to 1"); nobj[CORE] = 1; } if ( nobj[SOCKET] == -1 ) fatal("get_cpuinfo() can not handle nobj[SOCKET] = -1"); if ( nobj[CORE] == -1 ) fatal("get_cpuinfo() can not handle nobj[CORE] = -1"); actual_cpus = hwloc_get_nbobjs_by_type(topology, objtype[PU]); #if 0 /* Used to find workaround above */ info("CORE = %d SOCKET = %d actual_cpus = %d nobj[CORE] = %d", CORE, SOCKET, actual_cpus, nobj[CORE]); #endif if ((actual_cpus % nobj[CORE]) != 0) { error("Thread count (%d) not multiple of core count (%d)", actual_cpus, nobj[CORE]); } nobj[PU] = actual_cpus / nobj[CORE]; /* threads per core */ if ((nobj[CORE] % nobj[SOCKET]) != 0) { error("Core count (%d) not multiple of socket count (%d)", nobj[CORE], nobj[SOCKET]); } nobj[CORE] /= nobj[SOCKET]; /* cores per socket */ debug("CPUs:%d Boards:%d Sockets:%d CoresPerSocket:%d ThreadsPerCore:%d", actual_cpus, actual_boards, nobj[SOCKET], nobj[CORE], nobj[PU]); /* allocate block_map */ if (p_block_map_size) *p_block_map_size = (uint16_t)actual_cpus; if (p_block_map && p_block_map_inv) { *p_block_map = xmalloc(actual_cpus * sizeof(uint16_t)); *p_block_map_inv = xmalloc(actual_cpus * sizeof(uint16_t)); /* initialize default as linear mapping */ for (i = 0; i < actual_cpus; i++) { (*p_block_map)[i] = i; (*p_block_map_inv)[i] = i; } /* create map with hwloc */ used_sock_idx = -1; used_core_idx = -1; for (idx[SOCKET] = 0; (used_sock_idx + 1) < nobj[SOCKET]; idx[SOCKET]++) { if (!bit_test(used_socket, idx[SOCKET])) continue; used_sock_idx++; for (idx[CORE] = 0; idx[CORE] < cores_per_socket[idx[SOCKET]]; idx[CORE]++) { used_core_idx++; for (idx[PU]=0; idx[PU]<nobj[PU]; ++idx[PU]) { /* get hwloc_obj by indexes */ obj=hwloc_get_obj_below_array_by_type( topology, 3, objtype, idx); if (!obj) continue; macid = obj->os_index; absid = used_core_idx * nobj[PU] + idx[PU]; if ((macid >= actual_cpus) || (absid >= actual_cpus)) { /* physical or logical ID are * out of range */ continue; } debug4("CPU map[%d]=>%d S:C:T %d:%d:%d", absid, macid, used_sock_idx, idx[CORE], idx[PU]); (*p_block_map)[absid] = macid; (*p_block_map_inv)[macid] = absid; } } } } FREE_NULL_BITMAP(used_socket); xfree(cores_per_socket); hwloc_topology_destroy(topology); /* update output parameters */ *p_cpus = actual_cpus; *p_boards = actual_boards; *p_sockets = nobj[SOCKET]; *p_cores = nobj[CORE]; *p_threads = nobj[PU]; #if _DEBUG /*** Display raw data ***/ debug("CPUs:%u Boards:%u Sockets:%u CoresPerSocket:%u ThreadsPerCore:%u", *p_cpus, *p_boards, *p_sockets, *p_cores, *p_threads); /* Display the mapping tables */ if (p_block_map && p_block_map_inv) { debug("------"); debug("Abstract -> Machine logical CPU ID block mapping:"); debug("AbstractId PhysicalId Inverse"); for (i = 0; i < *p_cpus; i++) { debug3(" %4d %4u %4u", i, (*p_block_map)[i], (*p_block_map_inv)[i]); } debug("------"); } #endif return SLURM_SUCCESS; }