static int rte_init(void) { int ret; char *error = NULL; char *envar, *ev1, *ev2; uint64_t unique_key[2]; char *string_key; char *rmluri; opal_value_t *kv; char *val; int u32, *u32ptr; uint16_t u16, *u16ptr; char **peers=NULL, *mycpuset, **cpusets=NULL; opal_process_name_t name; size_t i; /* run the prolog */ if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) { error = "orte_ess_base_std_prolog"; goto error; } /* get an async event base - we use the opal_async one so * we don't startup extra threads if not needed */ orte_event_base = opal_progress_thread_init(NULL); progress_thread_running = true; /* open and setup pmix */ if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) { ORTE_ERROR_LOG(ret); /* we cannot run */ error = "pmix init"; goto error; } if (OPAL_SUCCESS != (ret = opal_pmix_base_select())) { /* we cannot run */ error = "pmix init"; goto error; } /* set the event base */ opal_pmix_base_set_evbase(orte_event_base); /* initialize the selected module */ if (!opal_pmix.initialized() && (OPAL_SUCCESS != (ret = opal_pmix.init()))) { /* we cannot run */ error = "pmix init"; goto error; } u32ptr = &u32; u16ptr = &u16; /**** THE FOLLOWING ARE REQUIRED VALUES ***/ /* pmix.init set our process name down in the OPAL layer, * so carry it forward here */ ORTE_PROC_MY_NAME->jobid = OPAL_PROC_MY_NAME.jobid; ORTE_PROC_MY_NAME->vpid = OPAL_PROC_MY_NAME.vpid; /* get our local rank from PMI */ OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCAL_RANK, ORTE_PROC_MY_NAME, &u16ptr, OPAL_UINT16); if (OPAL_SUCCESS != ret) { error = "getting local rank"; goto error; } orte_process_info.my_local_rank = u16; /* get our node rank from PMI */ OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_NODE_RANK, ORTE_PROC_MY_NAME, &u16ptr, OPAL_UINT16); if (OPAL_SUCCESS != ret) { error = "getting node rank"; goto error; } orte_process_info.my_node_rank = u16; /* get max procs */ OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_MAX_PROCS, ORTE_PROC_MY_NAME, &u32ptr, OPAL_UINT32); if (OPAL_SUCCESS != ret) { error = "getting max procs"; goto error; } orte_process_info.max_procs = u32; /* get job size */ OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_JOB_SIZE, ORTE_PROC_MY_NAME, &u32ptr, OPAL_UINT32); if (OPAL_SUCCESS != ret) { error = "getting job size"; goto error; } orte_process_info.num_procs = u32; /* push into the environ for pickup in MPI layer for * MPI-3 required info key */ if (NULL == getenv(OPAL_MCA_PREFIX"orte_ess_num_procs")) { asprintf(&ev1, OPAL_MCA_PREFIX"orte_ess_num_procs=%d", orte_process_info.num_procs); putenv(ev1); added_num_procs = true; } if (NULL == getenv("OMPI_APP_CTX_NUM_PROCS")) { asprintf(&ev2, "OMPI_APP_CTX_NUM_PROCS=%d", orte_process_info.num_procs); putenv(ev2); added_app_ctx = true; } /* get our app number from PMI - ok if not found */ OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_APPNUM, ORTE_PROC_MY_NAME, &u32ptr, OPAL_UINT32); if (OPAL_SUCCESS == ret) { orte_process_info.app_num = u32; } else { orte_process_info.app_num = 0; } /* get the number of local peers - required for wireup of * shared memory BTL */ OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCAL_SIZE, ORTE_PROC_MY_NAME, &u32ptr, OPAL_UINT32); if (OPAL_SUCCESS == ret) { orte_process_info.num_local_peers = u32 - 1; // want number besides ourselves } else { orte_process_info.num_local_peers = 0; } /* setup transport keys in case the MPI layer needs them - * we can use the jobfam and stepid as unique keys * because they are unique values assigned by the RM */ if (NULL == getenv(OPAL_MCA_PREFIX"orte_precondition_transports")) { unique_key[0] = ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid); unique_key[1] = ORTE_LOCAL_JOBID(ORTE_PROC_MY_NAME->jobid); if (NULL == (string_key = orte_pre_condition_transports_print(unique_key))) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE; } opal_output_verbose(2, orte_ess_base_framework.framework_output, "%s transport key %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), string_key); asprintf(&envar, OPAL_MCA_PREFIX"orte_precondition_transports=%s", string_key); putenv(envar); added_transport_keys = true; /* cannot free the envar as that messes up our environ */ free(string_key); } /* retrieve our topology */ val = NULL; OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_LOCAL_TOPO, ORTE_PROC_MY_NAME, &val, OPAL_STRING); if (OPAL_SUCCESS == ret && NULL != val) { /* load the topology */ if (0 != hwloc_topology_init(&opal_hwloc_topology)) { ret = OPAL_ERROR; free(val); error = "setting topology"; goto error; } if (0 != hwloc_topology_set_xmlbuffer(opal_hwloc_topology, val, strlen(val))) { ret = OPAL_ERROR; free(val); hwloc_topology_destroy(opal_hwloc_topology); error = "setting topology"; goto error; } /* since we are loading this from an external source, we have to * explicitly set a flag so hwloc sets things up correctly */ if (0 != hwloc_topology_set_flags(opal_hwloc_topology, (HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM | HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM | HWLOC_TOPOLOGY_FLAG_IO_DEVICES))) { ret = OPAL_ERROR; hwloc_topology_destroy(opal_hwloc_topology); free(val); error = "setting topology"; goto error; } /* now load the topology */ if (0 != hwloc_topology_load(opal_hwloc_topology)) { ret = OPAL_ERROR; hwloc_topology_destroy(opal_hwloc_topology); free(val); error = "setting topology"; goto error; } free(val); /* filter the cpus thru any default cpu set */ if (OPAL_SUCCESS != (ret = opal_hwloc_base_filter_cpus(opal_hwloc_topology))) { error = "filtering topology"; goto error; } } else { /* it wasn't passed down to us, so go get it */ if (OPAL_SUCCESS != (ret = opal_hwloc_base_get_topology())) { error = "topology discovery"; goto error; } /* push it into the PMIx database in case someone * tries to retrieve it so we avoid an attempt to * get it again */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_LOCAL_TOPO); kv->type = OPAL_STRING; if (0 != (ret = hwloc_topology_export_xmlbuffer(opal_hwloc_topology, &kv->data.string, &u32))) { error = "topology export"; goto error; } if (OPAL_SUCCESS != (ret = opal_pmix.store_local(ORTE_PROC_MY_NAME, kv))) { error = "topology store"; goto error; } OBJ_RELEASE(kv); } /* get our local peers */ if (0 < orte_process_info.num_local_peers) { /* if my local rank if too high, then that's an error */ if (orte_process_info.num_local_peers < orte_process_info.my_local_rank) { ret = ORTE_ERR_BAD_PARAM; error = "num local peers"; goto error; } /* retrieve the local peers */ OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCAL_PEERS, ORTE_PROC_MY_NAME, &val, OPAL_STRING); if (OPAL_SUCCESS == ret && NULL != val) { peers = opal_argv_split(val, ','); free(val); /* and their cpusets, if available */ OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_LOCAL_CPUSETS, ORTE_PROC_MY_NAME, &val, OPAL_STRING); if (OPAL_SUCCESS == ret && NULL != val) { cpusets = opal_argv_split(val, ':'); free(val); } else { cpusets = NULL; } } else { peers = NULL; cpusets = NULL; } } else { peers = NULL; cpusets = NULL; } /* set the locality */ if (NULL != peers) { /* indentify our cpuset */ if (NULL != cpusets) { mycpuset = cpusets[orte_process_info.my_local_rank]; } else { mycpuset = NULL; } name.jobid = ORTE_PROC_MY_NAME->jobid; for (i=0; NULL != peers[i]; i++) { kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_LOCALITY); kv->type = OPAL_UINT16; name.vpid = strtoul(peers[i], NULL, 10); if (name.vpid == ORTE_PROC_MY_NAME->vpid) { /* we are fully local to ourselves */ u16 = OPAL_PROC_ALL_LOCAL; } else if (NULL == mycpuset || NULL == cpusets[i] || 0 == strcmp(cpusets[i], "UNBOUND")) { /* all we can say is that it shares our node */ u16 = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE; } else { /* we have it, so compute the locality */ u16 = opal_hwloc_base_get_relative_locality(opal_hwloc_topology, mycpuset, cpusets[i]); } OPAL_OUTPUT_VERBOSE((1, orte_ess_base_framework.framework_output, "%s ess:pmi:locality: proc %s locality %x", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&name), u16)); kv->data.uint16 = u16; ret = opal_pmix.store_local(&name, kv); if (OPAL_SUCCESS != ret) { error = "local store of locality"; opal_argv_free(peers); opal_argv_free(cpusets); goto error; } OBJ_RELEASE(kv); } opal_argv_free(peers); opal_argv_free(cpusets); } /* now that we have all required info, complete the setup */ if (ORTE_SUCCESS != (ret = orte_ess_base_app_setup(false))) { ORTE_ERROR_LOG(ret); error = "orte_ess_base_app_setup"; goto error; } /* setup process binding */ if (ORTE_SUCCESS != (ret = orte_ess_base_proc_binding())) { error = "proc_binding"; goto error; } /* this needs to be set to enable debugger use when direct launched */ if (NULL == orte_process_info.my_daemon_uri) { orte_standalone_operation = true; } /* set max procs */ if (orte_process_info.max_procs < orte_process_info.num_procs) { orte_process_info.max_procs = orte_process_info.num_procs; } /*** PUSH DATA FOR OTHERS TO FIND ***/ /* push our RML URI in case others need to talk directly to us */ rmluri = orte_rml.get_contact_info(); /* push it out for others to use */ OPAL_MODEX_SEND_VALUE(ret, OPAL_PMIX_GLOBAL, OPAL_PMIX_PROC_URI, rmluri, OPAL_STRING); if (ORTE_SUCCESS != ret) { error = "pmix put uri"; goto error; } free(rmluri); /* push our hostname so others can find us, if they need to */ OPAL_MODEX_SEND_VALUE(ret, OPAL_PMIX_GLOBAL, OPAL_PMIX_HOSTNAME, orte_process_info.nodename, OPAL_STRING); if (ORTE_SUCCESS != ret) { error = "db store hostname"; goto error; } /* if we are an ORTE app - and not an MPI app - then * we need to exchange our connection info here. * MPI_Init has its own modex, so we don't need to do * two of them. However, if we don't do a modex at all, * then processes have no way to communicate * * NOTE: only do this when the process originally launches. * Cannot do this on a restart as the rest of the processes * in the job won't be executing this step, so we would hang */ if (ORTE_PROC_IS_NON_MPI && !orte_do_not_barrier) { opal_pmix.fence(NULL, 0); } return ORTE_SUCCESS; error: if (!progress_thread_running) { /* can't send the help message, so ensure it * comes out locally */ orte_show_help_finalize(); } if (ORTE_ERR_SILENT != ret && !orte_report_silent_errors) { orte_show_help("help-orte-runtime.txt", "orte_init:startup:internal-failure", true, error, ORTE_ERROR_NAME(ret), ret); } return ret; }
int topology_init(hwloc_topology_t * topology){ hwloc_topology_init(topology); hwloc_topology_set_flags(*topology, HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM | HWLOC_TOPOLOGY_FLAG_ICACHES); hwloc_topology_load(*topology); }
int main(int argc, char *argv[]) { hwloc_obj_type_t type; unsigned old_index, new_index; const char *callname = argv[0]; hwloc_topology_t topology; int err; if (argc < 6) { usage(stderr, callname); exit(EXIT_FAILURE); } #ifdef HWLOC2 err = hwloc_type_sscanf(argv[3], &type, NULL, 0); #else err = hwloc_obj_type_sscanf(argv[3], &type, NULL, NULL, 0); #endif if (err < 0) { fprintf(stderr, "Failed to recognize type `%s'\n", argv[3]); usage(stderr, callname); exit(EXIT_FAILURE); } if (type != HWLOC_OBJ_PU && type != HWLOC_OBJ_NUMANODE) { fprintf(stderr, "Invalid type `%s', should be PU or NUMA node\n", argv[3]); usage(stderr, callname); exit(EXIT_FAILURE); } old_index = atoi(argv[4]); new_index = atoi(argv[5]); if (old_index == new_index) { fprintf(stderr, "Nothing to do\n"); exit(EXIT_SUCCESS); } err = hwloc_topology_init(&topology); if (err < 0) { fprintf(stderr, "hwloc_topology_init() failed (%s)\n", strerror(errno)); usage(stderr, callname); exit(EXIT_FAILURE); } err = hwloc_topology_set_xml(topology, argv[1]); if (err < 0) { fprintf(stderr, "hwloc_topology_set_xml() on file `%s' failed (%s)\n", argv[1], strerror(errno)); usage(stderr, callname); exit(EXIT_FAILURE); } #ifdef HWLOC2 err = hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM); err = hwloc_topology_set_all_types_filter(topology, HWLOC_TYPE_FILTER_KEEP_ALL); #else err = hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM | HWLOC_TOPOLOGY_FLAG_WHOLE_IO | HWLOC_TOPOLOGY_FLAG_ICACHES); #endif err = hwloc_topology_load(topology); if (err < 0) { fprintf(stderr, "hwloc_topology_load() failed (%s)\n", strerror(errno)); usage(stderr, callname); exit(EXIT_FAILURE); } if (HWLOC_OBJ_PU == type) { hwloc_const_bitmap_t cpset = hwloc_topology_get_complete_cpuset(topology); if (!hwloc_bitmap_isset(cpset, old_index)) { fprintf(stderr, "Old PU os_index %u doesn't exist\n", old_index); usage(stderr, callname); exit(EXIT_FAILURE); } if (hwloc_bitmap_isset(cpset, new_index)) { fprintf(stderr, "New PU os_index %u already exists\n", new_index); usage(stderr, callname); exit(EXIT_FAILURE); } switch_pu_index(hwloc_get_root_obj(topology), old_index, new_index); } else if (HWLOC_OBJ_NUMANODE == type) { hwloc_const_bitmap_t cnset = hwloc_topology_get_complete_nodeset(topology); if (!cnset || hwloc_bitmap_isfull(cnset)) { fprintf(stderr, "Topology doesn't have NUMA nodes\n"); usage(stderr, callname); exit(EXIT_FAILURE); } if (!hwloc_bitmap_isset(cnset, old_index)) { fprintf(stderr, "Old NUMA node os_index %u doesn't exist\n", old_index); usage(stderr, callname); exit(EXIT_FAILURE); } if (hwloc_bitmap_isset(cnset, new_index)) { fprintf(stderr, "New NUMA node os_index %u already exists\n", new_index); usage(stderr, callname); exit(EXIT_FAILURE); } switch_numa_index(hwloc_get_root_obj(topology), old_index, new_index); } err = hwloc_topology_export_xml(topology, argv[2], 0); if (err < 0) { fprintf(stderr, "hwloc_topology_export_xml() on file `%s' failed (%s)\n", argv[2], strerror(errno)); usage(stderr, callname); exit(EXIT_FAILURE); } hwloc_topology_destroy(topology); printf("Beware that hwloc may warn about out-of-order objects when reloading %s\n", argv[2]); return 0; }
extern int get_cpuinfo(uint16_t *p_cpus, uint16_t *p_boards, uint16_t *p_sockets, uint16_t *p_cores, uint16_t *p_threads, uint16_t *p_block_map_size, uint16_t **p_block_map, uint16_t **p_block_map_inv) { enum { SOCKET=0, CORE=1, PU=2, LAST_OBJ=3 }; hwloc_topology_t topology; hwloc_obj_t obj; hwloc_obj_type_t objtype[LAST_OBJ]; unsigned idx[LAST_OBJ]; int nobj[LAST_OBJ]; int actual_cpus; int macid; int absid; int actual_boards = 1, depth; int i; debug2("hwloc_topology_init"); if (hwloc_topology_init(&topology)) { /* error in initialize hwloc library */ debug("hwloc_topology_init() failed."); return 1; } /* parse all system */ hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM); /* ignores cache, misc */ hwloc_topology_ignore_type (topology, HWLOC_OBJ_CACHE); hwloc_topology_ignore_type (topology, HWLOC_OBJ_MISC); /* load topology */ debug2("hwloc_topology_load"); if (hwloc_topology_load(topology)) { /* error in load hardware topology */ debug("hwloc_topology_load() failed."); hwloc_topology_destroy(topology); return 2; } /* At least on a temporary basis, one could map AMD Bulldozer entities * onto the entities that Slurm does optimize placement for today (e.g. * map each Bulldozer core to a thread and each Bulldozer module to a * Slurm core, alternately map the Bulldozer module to a Slurm socket * and the Bulldozer socket to a Slurm board). Perhaps not ideal, but * it would achieve the desired locality. */ if ( hwloc_get_type_depth(topology, HWLOC_OBJ_NODE) > hwloc_get_type_depth(topology, HWLOC_OBJ_SOCKET) ) { /* One socket contains multiple NUMA-nodes * like AMD Opteron 6000 series etc. * In such case, use NUMA-node instead of socket. */ objtype[SOCKET] = HWLOC_OBJ_NODE; objtype[CORE] = HWLOC_OBJ_CORE; objtype[PU] = HWLOC_OBJ_PU; } else { objtype[SOCKET] = HWLOC_OBJ_SOCKET; objtype[CORE] = HWLOC_OBJ_CORE; objtype[PU] = HWLOC_OBJ_PU; } /* number of objects */ depth = hwloc_get_type_depth(topology, HWLOC_OBJ_GROUP); if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) { actual_boards = MAX(hwloc_get_nbobjs_by_depth(topology, depth), 1); } nobj[SOCKET] = hwloc_get_nbobjs_by_type(topology, objtype[SOCKET]); nobj[CORE] = hwloc_get_nbobjs_by_type(topology, objtype[CORE]); actual_cpus = hwloc_get_nbobjs_by_type(topology, objtype[PU]); nobj[PU] = actual_cpus/nobj[CORE]; /* threads per core */ nobj[CORE] /= nobj[SOCKET]; /* cores per socket */ debug("CPUs:%d Boards:%u Sockets:%d CoresPerSocket:%d ThreadsPerCore:%d", actual_cpus, actual_boards, nobj[SOCKET], nobj[CORE], nobj[PU]); /* allocate block_map */ *p_block_map_size = (uint16_t)actual_cpus; if (p_block_map && p_block_map_inv) { *p_block_map = xmalloc(actual_cpus * sizeof(uint16_t)); *p_block_map_inv = xmalloc(actual_cpus * sizeof(uint16_t)); /* initialize default as linear mapping */ for (i = 0; i < actual_cpus; i++) { (*p_block_map)[i] = i; (*p_block_map_inv)[i] = i; } /* create map with hwloc */ for (idx[SOCKET]=0; idx[SOCKET]<nobj[SOCKET]; ++idx[SOCKET]) { for (idx[CORE]=0; idx[CORE]<nobj[CORE]; ++idx[CORE]) { for (idx[PU]=0; idx[PU]<nobj[PU]; ++idx[PU]) { /* get hwloc_obj by indexes */ obj=hwloc_get_obj_below_array_by_type( topology, 3, objtype, idx); if (!obj) continue; macid = obj->os_index; absid = idx[SOCKET]*nobj[CORE]*nobj[PU] + idx[CORE]*nobj[PU] + idx[PU]; if ((macid >= actual_cpus) || (absid >= actual_cpus)) { /* physical or logical ID are * out of range */ continue; } debug4("CPU map[%d]=>%d", absid, macid); (*p_block_map)[absid] = macid; (*p_block_map_inv)[macid] = absid; } } } } hwloc_topology_destroy(topology); /* update output parameters */ *p_cpus = actual_cpus; *p_boards = actual_boards; *p_sockets = nobj[SOCKET]; *p_cores = nobj[CORE]; *p_threads = nobj[PU]; #if DEBUG_DETAIL /*** Display raw data ***/ debug("CPUs:%u Boards:%u Sockets:%u CoresPerSocket:%u ThreadsPerCore:%u", *p_cpus, *p_boards, *p_sockets, *p_cores, *p_threads); /* Display the mapping tables */ if (p_block_map && p_block_map_inv) { debug("------"); debug("Abstract -> Machine logical CPU ID block mapping:"); debug("AbstractId PhysicalId Inverse"); for (i = 0; i < *p_cpus; i++) { debug3(" %4d %4u %4u", i, (*p_block_map)[i], (*p_block_map_inv)[i]); } debug("------"); } #endif return 0; }
int main (int argc, char *argv[]) { int err; int verbose_mode = LSTOPO_VERBOSE_MODE_DEFAULT; hwloc_topology_t topology; const char *filename = NULL; unsigned long flags = HWLOC_TOPOLOGY_FLAG_IO_DEVICES | HWLOC_TOPOLOGY_FLAG_IO_BRIDGES | HWLOC_TOPOLOGY_FLAG_ICACHES; int merge = 0; int ignorecache = 0; char * callname; char * input = NULL; enum hwloc_utils_input_format input_format = HWLOC_UTILS_INPUT_DEFAULT; enum output_format output_format = LSTOPO_OUTPUT_DEFAULT; char *restrictstring = NULL; int opt; unsigned i; for(i=0; i<HWLOC_OBJ_TYPE_MAX; i++) force_orient[i] = LSTOPO_ORIENT_NONE; force_orient[HWLOC_OBJ_PU] = LSTOPO_ORIENT_HORIZ; force_orient[HWLOC_OBJ_CACHE] = LSTOPO_ORIENT_HORIZ; force_orient[HWLOC_OBJ_NODE] = LSTOPO_ORIENT_HORIZ; /* enable verbose backends */ putenv("HWLOC_XML_VERBOSE=1"); putenv("HWLOC_SYNTHETIC_VERBOSE=1"); #ifdef HAVE_SETLOCALE setlocale(LC_ALL, ""); #endif callname = strrchr(argv[0], '/'); if (!callname) callname = argv[0]; else callname++; /* skip argv[0], handle options */ argc--; argv++; err = hwloc_topology_init (&topology); if (err) return EXIT_FAILURE; while (argc >= 1) { opt = 0; if (!strcmp (argv[0], "-v") || !strcmp (argv[0], "--verbose")) { verbose_mode++; } else if (!strcmp (argv[0], "-s") || !strcmp (argv[0], "--silent")) { verbose_mode--; } else if (!strcmp (argv[0], "-h") || !strcmp (argv[0], "--help")) { usage(callname, stdout); exit(EXIT_SUCCESS); } else if (!strcmp (argv[0], "-f") || !strcmp (argv[0], "--force")) overwrite = 1; else if (!strcmp (argv[0], "-l") || !strcmp (argv[0], "--logical")) logical = 1; else if (!strcmp (argv[0], "-p") || !strcmp (argv[0], "--physical")) logical = 0; else if (!strcmp (argv[0], "-c") || !strcmp (argv[0], "--cpuset")) lstopo_show_cpuset = 1; else if (!strcmp (argv[0], "-C") || !strcmp (argv[0], "--cpuset-only")) lstopo_show_cpuset = 2; else if (!strcmp (argv[0], "--taskset")) { lstopo_show_taskset = 1; if (!lstopo_show_cpuset) lstopo_show_cpuset = 1; } else if (!strcmp (argv[0], "--only")) { if (argc < 2) { usage (callname, stderr); exit(EXIT_FAILURE); } if (hwloc_obj_type_sscanf(argv[1], &lstopo_show_only, NULL, NULL, 0) < 0) fprintf(stderr, "Unsupported type `%s' passed to --only, ignoring.\n", argv[1]); opt = 1; } else if (!strcmp (argv[0], "--ignore")) { hwloc_obj_type_t type; if (argc < 2) { usage (callname, stderr); exit(EXIT_FAILURE); } if (hwloc_obj_type_sscanf(argv[1], &type, NULL, NULL, 0) < 0) fprintf(stderr, "Unsupported type `%s' passed to --ignore, ignoring.\n", argv[1]); else if (type == HWLOC_OBJ_PU) lstopo_ignore_pus = 1; else hwloc_topology_ignore_type(topology, type); opt = 1; } else if (!strcmp (argv[0], "--no-caches")) ignorecache = 2; else if (!strcmp (argv[0], "--no-useless-caches")) ignorecache = 1; else if (!strcmp (argv[0], "--no-icaches")) flags &= ~HWLOC_TOPOLOGY_FLAG_ICACHES; else if (!strcmp (argv[0], "--whole-system")) flags |= HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM; else if (!strcmp (argv[0], "--no-io")) flags &= ~(HWLOC_TOPOLOGY_FLAG_IO_DEVICES | HWLOC_TOPOLOGY_FLAG_IO_BRIDGES); else if (!strcmp (argv[0], "--no-bridges")) flags &= ~(HWLOC_TOPOLOGY_FLAG_IO_BRIDGES); else if (!strcmp (argv[0], "--whole-io")) flags |= HWLOC_TOPOLOGY_FLAG_WHOLE_IO; else if (!strcmp (argv[0], "--merge")) merge = 1; else if (!strcmp (argv[0], "--thissystem")) flags |= HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM; else if (!strcmp (argv[0], "--restrict")) { if (argc < 2) { usage (callname, stderr); exit(EXIT_FAILURE); } restrictstring = strdup(argv[1]); opt = 1; } else if (!strcmp (argv[0], "--horiz")) for(i=0; i<HWLOC_OBJ_TYPE_MAX; i++) force_orient[i] = LSTOPO_ORIENT_HORIZ; else if (!strcmp (argv[0], "--vert")) for(i=0; i<HWLOC_OBJ_TYPE_MAX; i++) force_orient[i] = LSTOPO_ORIENT_VERT; else if (!strncmp (argv[0], "--horiz=", 8) || !strncmp (argv[0], "--vert=", 7)) { enum lstopo_orient_e orient = (argv[0][2] == 'h') ? LSTOPO_ORIENT_HORIZ : LSTOPO_ORIENT_VERT; char *tmp = argv[0] + ((argv[0][2] == 'h') ? 8 : 7); while (tmp) { char *end = strchr(tmp, ','); hwloc_obj_type_t type; if (end) *end = '\0'; if (hwloc_obj_type_sscanf(tmp, &type, NULL, NULL, 0) < 0) fprintf(stderr, "Unsupported type `%s' passed to %s, ignoring.\n", tmp, argv[0]); else force_orient[type] = orient; if (!end) break; tmp = end+1; } } else if (!strcmp (argv[0], "--fontsize")) { if (argc < 2) { usage (callname, stderr); exit(EXIT_FAILURE); } fontsize = atoi(argv[1]); opt = 1; } else if (!strcmp (argv[0], "--gridsize")) { if (argc < 2) { usage (callname, stderr); exit(EXIT_FAILURE); } gridsize = atoi(argv[1]); opt = 1; } else if (!strcmp (argv[0], "--no-legend")) { legend = 0; } else if (!strcmp (argv[0], "--append-legend")) { if (argc < 2) { usage (callname, stderr); exit(EXIT_FAILURE); } lstopo_append_legends = realloc(lstopo_append_legends, (lstopo_append_legends_nr+1) * sizeof(*lstopo_append_legends)); lstopo_append_legends[lstopo_append_legends_nr] = strdup(argv[1]); lstopo_append_legends_nr++; opt = 1; } else if (hwloc_utils_lookup_input_option(argv, argc, &opt, &input, &input_format, callname)) { /* nothing to do anymore */ } else if (!strcmp (argv[0], "--pid")) { if (argc < 2) { usage (callname, stderr); exit(EXIT_FAILURE); } lstopo_pid_number = atoi(argv[1]); opt = 1; } else if (!strcmp (argv[0], "--ps") || !strcmp (argv[0], "--top")) top = 1; else if (!strcmp (argv[0], "--version")) { printf("%s %s\n", callname, VERSION); exit(EXIT_SUCCESS); } else if (!strcmp (argv[0], "--output-format") || !strcmp (argv[0], "--of")) { if (argc < 2) { usage (callname, stderr); exit(EXIT_FAILURE); } output_format = parse_output_format(argv[1], callname); opt = 1; } else { if (filename) { fprintf (stderr, "Unrecognized option: %s\n", argv[0]); usage (callname, stderr); exit(EXIT_FAILURE); } else filename = argv[0]; } argc -= opt+1; argv += opt+1; } if (lstopo_show_only != (hwloc_obj_type_t)-1) merge = 0; hwloc_topology_set_flags(topology, flags); if (ignorecache > 1) { hwloc_topology_ignore_type(topology, HWLOC_OBJ_CACHE); } else if (ignorecache) { hwloc_topology_ignore_type_keep_structure(topology, HWLOC_OBJ_CACHE); } if (merge) hwloc_topology_ignore_all_keep_structure(topology); if (input) { err = hwloc_utils_enable_input_format(topology, input, input_format, verbose_mode > 1, callname); if (err) return err; } if (lstopo_pid_number != -1 && lstopo_pid_number != 0) { lstopo_pid = hwloc_pid_from_number(lstopo_pid_number, 0); if (hwloc_topology_set_pid(topology, lstopo_pid)) { perror("Setting target pid"); return EXIT_FAILURE; } } err = hwloc_topology_load (topology); if (err) { fprintf(stderr, "hwloc_topology_load() failed (%s).\n", strerror(errno)); return EXIT_FAILURE; } if (top) add_process_objects(topology); if (restrictstring) { hwloc_bitmap_t restrictset = hwloc_bitmap_alloc(); if (!strcmp (restrictstring, "binding")) { if (lstopo_pid_number != -1 && lstopo_pid_number != 0) hwloc_get_proc_cpubind(topology, lstopo_pid, restrictset, HWLOC_CPUBIND_PROCESS); else hwloc_get_cpubind(topology, restrictset, HWLOC_CPUBIND_PROCESS); } else { hwloc_bitmap_sscanf(restrictset, restrictstring); } err = hwloc_topology_restrict (topology, restrictset, 0); if (err) { perror("Restricting the topology"); /* fallthrough */ } hwloc_bitmap_free(restrictset); free(restrictstring); } /* if the output format wasn't enforced, look at the filename */ if (filename && output_format == LSTOPO_OUTPUT_DEFAULT) { if (!strcmp(filename, "-") || !strcmp(filename, "/dev/stdout")) { output_format = LSTOPO_OUTPUT_CONSOLE; } else { char *dot = strrchr(filename, '.'); if (dot) output_format = parse_output_format(dot+1, callname); else { fprintf(stderr, "Cannot infer output type for file `%s' without any extension, using default output.\n", filename); filename = NULL; } } } /* if the output format wasn't enforced, think a bit about what the user probably want */ if (output_format == LSTOPO_OUTPUT_DEFAULT) { if (lstopo_show_cpuset || lstopo_show_only != (hwloc_obj_type_t)-1 || verbose_mode != LSTOPO_VERBOSE_MODE_DEFAULT) output_format = LSTOPO_OUTPUT_CONSOLE; } if (logical == -1) { if (output_format == LSTOPO_OUTPUT_CONSOLE) logical = 1; else if (output_format != LSTOPO_OUTPUT_DEFAULT) logical = 0; } switch (output_format) { case LSTOPO_OUTPUT_DEFAULT: #ifdef LSTOPO_HAVE_GRAPHICS #if CAIRO_HAS_XLIB_SURFACE && defined HWLOC_HAVE_X11_KEYSYM if (getenv("DISPLAY")) { if (logical == -1) logical = 0; output_x11(topology, NULL, overwrite, logical, legend, verbose_mode); } else #endif /* CAIRO_HAS_XLIB_SURFACE */ #ifdef HWLOC_WIN_SYS { if (logical == -1) logical = 0; output_windows(topology, NULL, overwrite, logical, legend, verbose_mode); } #endif #endif /* !LSTOPO_HAVE_GRAPHICS */ #if !defined HWLOC_WIN_SYS || !defined LSTOPO_HAVE_GRAPHICS { if (logical == -1) logical = 1; output_console(topology, NULL, overwrite, logical, legend, verbose_mode); } #endif break; case LSTOPO_OUTPUT_CONSOLE: output_console(topology, filename, overwrite, logical, legend, verbose_mode); break; case LSTOPO_OUTPUT_SYNTHETIC: output_synthetic(topology, filename, overwrite, logical, legend, verbose_mode); break; case LSTOPO_OUTPUT_TEXT: output_text(topology, filename, overwrite, logical, legend, verbose_mode); break; case LSTOPO_OUTPUT_FIG: output_fig(topology, filename, overwrite, logical, legend, verbose_mode); break; #ifdef LSTOPO_HAVE_GRAPHICS # if CAIRO_HAS_PNG_FUNCTIONS case LSTOPO_OUTPUT_PNG: output_png(topology, filename, overwrite, logical, legend, verbose_mode); break; # endif /* CAIRO_HAS_PNG_FUNCTIONS */ # if CAIRO_HAS_PDF_SURFACE case LSTOPO_OUTPUT_PDF: output_pdf(topology, filename, overwrite, logical, legend, verbose_mode); break; # endif /* CAIRO_HAS_PDF_SURFACE */ # if CAIRO_HAS_PS_SURFACE case LSTOPO_OUTPUT_PS: output_ps(topology, filename, overwrite, logical, legend, verbose_mode); break; #endif /* CAIRO_HAS_PS_SURFACE */ #if CAIRO_HAS_SVG_SURFACE case LSTOPO_OUTPUT_SVG: output_svg(topology, filename, overwrite, logical, legend, verbose_mode); break; #endif /* CAIRO_HAS_SVG_SURFACE */ #endif /* LSTOPO_HAVE_GRAPHICS */ case LSTOPO_OUTPUT_XML: output_xml(topology, filename, overwrite, logical, legend, verbose_mode); break; default: fprintf(stderr, "file format not supported\n"); usage(callname, stderr); exit(EXIT_FAILURE); } hwloc_topology_destroy (topology); for(i=0; i<lstopo_append_legends_nr; i++) free(lstopo_append_legends[i]); free(lstopo_append_legends); return EXIT_SUCCESS; }
//Initializes HWLOC and load the machine architecture int hw_topology_init (struct arch_topology *topo) { hwloc_obj_t obj, core1, core2; int count, i, j, error; //Create the machine representation error = hwloc_topology_init(&topology); //Go throught the topology only if HWLOC is //successifully initialized if(!error) { hwloc_topology_load(topology); local_topo = malloc(sizeof(struct arch_topology)); #if defined (__DBCSR_ACC) || defined (__PW_CUDA) int nDev; ma_get_ndevices_cu(&nDev); #endif //Extract number of NUMA nodes if (hwloc_get_type_depth (topology, HWLOC_OBJ_NODE)) topo->nnodes = hwloc_get_nbobjs_by_depth (topology, hwloc_get_type_depth (topology, HWLOC_OBJ_NODE)); else topo->nnodes = 0; //Get number of cores, sockets and processing units topo->ncores = hwloc_get_nbobjs_by_depth (topology, hwloc_get_type_depth (topology, HWLOC_OBJ_CORE)); topo->nsockets = hwloc_get_nbobjs_by_depth (topology, hwloc_get_type_depth (topology, HWLOC_OBJ_SOCKET)); topo->npus = hwloc_get_nbobjs_by_depth (topology, hwloc_get_type_depth (topology, HWLOC_OBJ_PU)); //Compute number of memory controlers per socket //basically the number of NUMA nodes per socket if (topo->nnodes > topo->nsockets) topo->nmemcontroller = topo->nnodes/topo->nsockets; else topo->nmemcontroller = 1; count = 0; topo->nshared_caches = 0; //Get derivate information - get number of cache per PU for(obj = hwloc_get_obj_by_type(topology,HWLOC_OBJ_PU,0); obj; obj = obj->parent) { if (obj->type == HWLOC_OBJ_CACHE) { if (obj->arity>1) topo->nshared_caches++; else { count++; topo->ncaches = count; } } } //Number of direct siblings //Siblings cores are the ones that share at least one component //level of the architecture count = 0; core1 = hwloc_get_obj_by_type(topology, HWLOC_OBJ_CORE, 0); core2 = hwloc_get_obj_by_type(topology, HWLOC_OBJ_CORE, 1); obj = hwloc_get_common_ancestor_obj(topology, core1, core2); if (obj) topo->nsiblings = obj->arity; //Machine node and core representation machine_nodes = (struct node*) malloc (topo->nnodes*sizeof(struct node)); machine_cores = (struct core*) malloc (topo->ncores*sizeof(struct core)); phys_cpus = malloc (topo->ncores*sizeof(int)); get_phys_id(topology, topo->ncores, 0); //Get the caches sizes and other information for each core for (i = 0; i < topo->ncores ; i++) { machine_cores[i].caches = malloc (topo->ncaches*sizeof(size_t)); machine_cores[i].shared_caches = malloc (topo->ncaches*sizeof(int)); for (j = 0; j < topo->ncaches; j++) machine_cores[i].shared_caches[j] = 0; for (j = topo->ncaches ; j > topo->ncaches - topo->nshared_caches; j--) machine_cores[i].shared_caches[j-1] = 1; machine_cores[i].nsiblings = topo->nsiblings; machine_cores[i].siblings_id = malloc (topo->nsiblings*sizeof(unsigned)); if(topo->ncores == topo->npus){ core1 = hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, i); machine_cores[i].id = core1->os_index; count = 0; for(obj = hwloc_get_obj_by_type(topology,HWLOC_OBJ_PU,i); obj; obj = obj->parent) { if (obj->type == HWLOC_OBJ_CACHE){ machine_cores[i].caches[count] = obj->attr->cache.size / 1024; count++; } if (obj->type == HWLOC_OBJ_NODE) machine_cores[i].numaNode = obj->logical_index; } } else{ core1 = hwloc_get_obj_by_type(topology, HWLOC_OBJ_CORE, i); machine_cores[i].id = core1->os_index; count = 0; for(obj = hwloc_get_obj_by_type(topology,HWLOC_OBJ_CORE,i); obj; obj = obj->parent) { if (obj->type == HWLOC_OBJ_CACHE) { machine_cores[i].caches[count] = obj->attr->cache.size / 1024; count++; } if (obj->type == HWLOC_OBJ_NODE) machine_cores[i].numaNode = obj->logical_index; } } } //Get siblings id - so each core knows its siblings for (i = 0; i < topo->ncores ; i++) { if(topo->ncores == topo->npus){ core1 = hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, i); set_phys_siblings(i,machine_cores[i].id,core1,topo->ncores,topo->nsiblings,HWLOC_OBJ_PU); } else{ core1 = hwloc_get_obj_by_type(topology, HWLOC_OBJ_CORE, i); set_phys_siblings(i,machine_cores[i].id,core1,topo->ncores,topo->nsiblings,HWLOC_OBJ_CORE); } } int ncore_node = topo->ncores/topo->nnodes; int count_cores; //Get the information for each NUMAnode for (i = 0; i < topo->nnodes ; i++) { obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NODE, i); machine_nodes[i].id = obj->os_index; machine_nodes[i].memory = obj->memory.total_memory; machine_nodes[i].ncores = ncore_node; machine_nodes[i].mycores = malloc (ncore_node*sizeof(unsigned)); //Get the cores id of each NUMAnode count_cores = 0; set_node_cores(topology, obj, i, &count_cores); //GPU support #if defined (__DBCSR_ACC) || defined (__PW_CUDA) int *devIds; devIds = malloc (nDev*sizeof(int)); topo->ngpus = nDev; ma_get_cu(i,devIds); machine_nodes[i].mygpus = devIds; #endif } //counting network cards count = 0; hwloc_topology_t topo_net; error = hwloc_topology_init(&topo_net); hwloc_topology_set_flags(topo_net, HWLOC_TOPOLOGY_FLAG_IO_DEVICES); if (!error){ hwloc_topology_load(topo_net); for (obj = hwloc_get_obj_by_type(topo_net, HWLOC_OBJ_OS_DEVICE, 0); obj; obj = hwloc_get_next_osdev(topo_net,obj)) if (obj->attr->osdev.type == HWLOC_OBJ_OSDEV_NETWORK || obj->attr->osdev.type == HWLOC_OBJ_OSDEV_OPENFABRICS) count++; topo->nnetcards = count; } else //if can not load I/O devices topo->nnetcards = 0; hwloc_topology_destroy(topo_net); /*Local copy of the machine topology components*/ local_topo->nnodes = topo->nnodes; local_topo->nsockets = topo->nsockets; local_topo->ncores = topo->ncores; local_topo->npus = topo->npus; local_topo->ngpus = topo->ngpus; local_topo->ncaches = topo->ncaches; local_topo->nshared_caches = topo->nshared_caches; local_topo->nsiblings = topo->nsiblings; local_topo->nmemcontroller = topo->nmemcontroller; local_topo->nnetcards = topo->nnetcards; } return error; }
int main(void) { hwloc_topology_t topology; cl_int clret; cl_platform_id *platform_ids; unsigned nrp, nrd, count, i, j; int err; hwloc_topology_init(&topology); hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_IO_DEVICES); hwloc_topology_load(topology); clret = clGetPlatformIDs(0, NULL, &nrp); if (CL_SUCCESS != clret || !nrp) return 0; platform_ids = malloc(nrp * sizeof(*platform_ids)); if (!platform_ids) return 0; clret = clGetPlatformIDs(nrp, platform_ids, &nrp); if (CL_SUCCESS != clret || !nrp) return 0; count = 0; for(i=0; i<nrp; i++) { cl_device_id *device_ids; clret = clGetDeviceIDs(platform_ids[i], CL_DEVICE_TYPE_ALL, 0, NULL, &nrd); if (CL_SUCCESS != clret || !nrd) continue; device_ids = malloc(nrd * sizeof(*device_ids)); if (!device_ids) continue; clret = clGetDeviceIDs(platform_ids[i], CL_DEVICE_TYPE_ALL, nrd, device_ids, &nrd); if (CL_SUCCESS != clret || !nrd) continue; for(j=0; j<nrd; j++) { hwloc_bitmap_t set; hwloc_obj_t osdev, osdev2, ancestor; const char *value; osdev = hwloc_opencl_get_device_osdev(topology, device_ids[j]); osdev2 = hwloc_opencl_get_device_osdev_by_index(topology, i, j); assert(osdev == osdev2); if (!osdev) { printf("no osdev for platform %d device %d\n", i, j); continue; } ancestor = hwloc_get_non_io_ancestor_obj(topology, osdev); set = hwloc_bitmap_alloc(); err = hwloc_opencl_get_device_cpuset(topology, device_ids[j], set); if (err < 0) { printf("no cpuset for platform %d device %d\n", i, j); } else { char *cpuset_string = NULL; hwloc_bitmap_asprintf(&cpuset_string, set); printf("got cpuset %s for platform %d device %d\n", cpuset_string, i, j); free(cpuset_string); assert(hwloc_bitmap_isequal(set, ancestor->cpuset)); } hwloc_bitmap_free(set); printf("found OSDev %s\n", osdev->name); err = strncmp(osdev->name, "opencl", 6); assert(!err); assert(atoi(osdev->name+6) == (int) count); value = hwloc_obj_get_info_by_name(osdev, "Backend"); err = strcmp(value, "OpenCL"); assert(!err); value = hwloc_obj_get_info_by_name(osdev, "Name"); printf("found OSDev name %s\n", value); count++; } } hwloc_topology_destroy(topology); return 0; }
int main(void) { hwloc_topology_t topology; CUresult cres; CUdevice device; int count, i; int err; cres = cuInit(0); if (cres != CUDA_SUCCESS) { printf("cuInit failed %d\n", cres); return 0; } cres = cuDeviceGetCount(&count); if (cres != CUDA_SUCCESS) { printf("cuDeviceGetCount failed %d\n", cres); return 0; } printf("cuDeviceGetCount found %d devices\n", count); hwloc_topology_init(&topology); hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_IO_DEVICES); hwloc_topology_load(topology); for(i=0; i<count; i++) { hwloc_bitmap_t set; hwloc_obj_t osdev, osdev2, ancestor; const char *value; cres = cuDeviceGet(&device, i); if (cres != CUDA_SUCCESS) { printf("failed to get device %d\n", i); continue; } osdev = hwloc_cuda_get_device_osdev(topology, device); assert(osdev); osdev2 = hwloc_cuda_get_device_osdev_by_index(topology, i); assert(osdev == osdev2); ancestor = hwloc_get_non_io_ancestor_obj(topology, osdev); printf("found OSDev %s\n", osdev->name); err = strncmp(osdev->name, "cuda", 4); assert(!err); assert(atoi(osdev->name+4) == (int) i); value = hwloc_obj_get_info_by_name(osdev, "Backend"); err = strcmp(value, "CUDA"); assert(!err); assert(osdev->attr->osdev.type == HWLOC_OBJ_OSDEV_COPROC); value = hwloc_obj_get_info_by_name(osdev, "CoProcType"); err = strcmp(value, "CUDA"); assert(!err); value = hwloc_obj_get_info_by_name(osdev, "GPUModel"); printf("found OSDev model %s\n", value); set = hwloc_bitmap_alloc(); err = hwloc_cuda_get_device_cpuset(topology, device, set); if (err < 0) { printf("failed to get cpuset for device %d\n", i); } else { char *cpuset_string = NULL; hwloc_bitmap_asprintf(&cpuset_string, set); printf("got cpuset %s for device %d\n", cpuset_string, i); assert(hwloc_bitmap_isequal(set, ancestor->cpuset)); free(cpuset_string); } hwloc_bitmap_free(set); } hwloc_topology_destroy(topology); return 0; }
int pmix_bfrop_unpack_topo(pmix_buffer_t *buffer, void *dest, int32_t *num_vals, pmix_data_type_t type) { /* NOTE: hwloc defines topology_t as a pointer to a struct! */ hwloc_topology_t t, *tarray = (hwloc_topology_t*)dest; int rc=PMIX_SUCCESS, i, cnt, j; char *xmlbuffer; struct hwloc_topology_support *support; for (i=0, j=0; i < *num_vals; i++) { /* unpack the xml string */ cnt=1; xmlbuffer = NULL; if (PMIX_SUCCESS != (rc = pmix_bfrop_unpack_string(buffer, &xmlbuffer, &cnt, PMIX_STRING))) { goto cleanup; } if (NULL == xmlbuffer) { goto cleanup; } /* convert the xml */ if (0 != hwloc_topology_init(&t)) { rc = PMIX_ERROR; goto cleanup; } if (0 != hwloc_topology_set_xmlbuffer(t, xmlbuffer, strlen(xmlbuffer))) { rc = PMIX_ERROR; free(xmlbuffer); hwloc_topology_destroy(t); goto cleanup; } /* since we are loading this from an external source, we have to * explicitly set a flag so hwloc sets things up correctly */ if (0 != hwloc_topology_set_flags(t, HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM | HWLOC_TOPOLOGY_FLAG_IO_DEVICES)) { free(xmlbuffer); rc = PMIX_ERROR; hwloc_topology_destroy(t); goto cleanup; } /* now load the topology */ if (0 != hwloc_topology_load(t)) { free(xmlbuffer); rc = PMIX_ERROR; hwloc_topology_destroy(t); goto cleanup; } if (NULL != xmlbuffer) { free(xmlbuffer); } /* get the available support - hwloc unfortunately does * not include this info in its xml import! */ support = (struct hwloc_topology_support*)hwloc_topology_get_support(t); cnt = sizeof(struct hwloc_topology_discovery_support); if (PMIX_SUCCESS != (rc = pmix_bfrop_unpack_byte(buffer, support->discovery, &cnt, PMIX_BYTE))) { goto cleanup; } cnt = sizeof(struct hwloc_topology_cpubind_support); if (PMIX_SUCCESS != (rc = pmix_bfrop_unpack_byte(buffer, support->cpubind, &cnt, PMIX_BYTE))) { goto cleanup; } cnt = sizeof(struct hwloc_topology_membind_support); if (PMIX_SUCCESS != (rc = pmix_bfrop_unpack_byte(buffer, support->membind, &cnt, PMIX_BYTE))) { goto cleanup; } /* pass it back */ tarray[i] = t; /* track the number added */ j++; } cleanup: *num_vals = j; return rc; }
int main(int argc, char *argv[]) { hwloc_topology_t topology; unsigned depth; hwloc_bitmap_t cpubind_set, membind_set; int cpubind = 1; /* membind if 0 */ int get_binding = 0; int get_last_cpu_location = 0; int single = 0; int verbose = 0; int logical = 1; int taskset = 0; int cpubind_flags = 0; hwloc_membind_policy_t membind_policy = HWLOC_MEMBIND_BIND; int membind_flags = 0; int opt; int ret; hwloc_pid_t pid = 0; char **orig_argv = argv; cpubind_set = hwloc_bitmap_alloc(); membind_set = hwloc_bitmap_alloc(); hwloc_topology_init(&topology); hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_WHOLE_IO); hwloc_topology_load(topology); depth = hwloc_topology_get_depth(topology); /* skip argv[0], handle options */ argv++; argc--; while (argc >= 1) { if (!strcmp(argv[0], "--")) { argc--; argv++; break; } opt = 0; if (*argv[0] == '-') { if (!strcmp(argv[0], "-v")) { verbose = 1; goto next; } else if (!strcmp(argv[0], "--help")) { usage(stdout); return EXIT_SUCCESS; } else if (!strcmp(argv[0], "--single")) { single = 1; goto next; } else if (!strcmp(argv[0], "--strict")) { cpubind_flags |= HWLOC_CPUBIND_STRICT; membind_flags |= HWLOC_MEMBIND_STRICT; goto next; } else if (!strcmp(argv[0], "--pid")) { if (argc < 2) { usage (stderr); exit(EXIT_FAILURE); } pid = atoi(argv[1]); opt = 1; goto next; } else if (!strcmp (argv[0], "--version")) { printf("%s %s\n", orig_argv[0], VERSION); exit(EXIT_SUCCESS); } if (!strcmp(argv[0], "-l") || !strcmp(argv[0], "--logical")) { logical = 1; goto next; } if (!strcmp(argv[0], "-p") || !strcmp(argv[0], "--physical")) { logical = 0; goto next; } if (!strcmp(argv[0], "--taskset")) { taskset = 1; goto next; } else if (!strncmp (argv[0], "--get-last-cpu-location", 10)) { get_last_cpu_location = 1; goto next; } else if (!strcmp (argv[0], "--get")) { get_binding = 1; goto next; } else if (!strcmp (argv[0], "--cpubind")) { cpubind = 1; goto next; } else if (!strcmp (argv[0], "--membind")) { cpubind = 0; goto next; } else if (!strcmp (argv[0], "--mempolicy")) { if (!strncmp(argv[1], "default", 2)) membind_policy = HWLOC_MEMBIND_DEFAULT; else if (!strncmp(argv[1], "firsttouch", 2)) membind_policy = HWLOC_MEMBIND_FIRSTTOUCH; else if (!strncmp(argv[1], "bind", 2)) membind_policy = HWLOC_MEMBIND_BIND; else if (!strncmp(argv[1], "interleave", 2)) membind_policy = HWLOC_MEMBIND_INTERLEAVE; else if (!strncmp(argv[1], "replicate", 2)) membind_policy = HWLOC_MEMBIND_REPLICATE; else if (!strncmp(argv[1], "nexttouch", 2)) membind_policy = HWLOC_MEMBIND_NEXTTOUCH; else { fprintf(stderr, "Unrecognized memory binding policy %s\n", argv[1]); usage (stderr); exit(EXIT_FAILURE); } opt = 1; goto next; } fprintf (stderr, "Unrecognized option: %s\n", argv[0]); usage(stderr); return EXIT_FAILURE; } ret = hwloc_calc_process_arg(topology, depth, argv[0], logical, cpubind ? cpubind_set : membind_set, verbose); if (ret < 0) { if (verbose) fprintf(stderr, "assuming the command starts at %s\n", argv[0]); break; } next: argc -= opt+1; argv += opt+1; } if (get_binding || get_last_cpu_location) { char *s; const char *policystr = NULL; int err; if (cpubind) { if (get_last_cpu_location) { if (pid) err = hwloc_get_proc_last_cpu_location(topology, pid, cpubind_set, 0); else err = hwloc_get_last_cpu_location(topology, cpubind_set, 0); } else { if (pid) err = hwloc_get_proc_cpubind(topology, pid, cpubind_set, 0); else err = hwloc_get_cpubind(topology, cpubind_set, 0); } if (err) { const char *errmsg = strerror(errno); if (pid) fprintf(stderr, "hwloc_get_proc_%s %ld failed (errno %d %s)\n", get_last_cpu_location ? "last_cpu_location" : "cpubind", (long) pid, errno, errmsg); else fprintf(stderr, "hwloc_get_%s failed (errno %d %s)\n", get_last_cpu_location ? "last_cpu_location" : "cpubind", errno, errmsg); return EXIT_FAILURE; } if (taskset) hwloc_bitmap_taskset_asprintf(&s, cpubind_set); else hwloc_bitmap_asprintf(&s, cpubind_set); } else { hwloc_membind_policy_t policy; if (pid) err = hwloc_get_proc_membind(topology, pid, membind_set, &policy, 0); else err = hwloc_get_membind(topology, membind_set, &policy, 0); if (err) { const char *errmsg = strerror(errno); if (pid) fprintf(stderr, "hwloc_get_proc_membind %ld failed (errno %d %s)\n", (long) pid, errno, errmsg); else fprintf(stderr, "hwloc_get_membind failed (errno %d %s)\n", errno, errmsg); return EXIT_FAILURE; } if (taskset) hwloc_bitmap_taskset_asprintf(&s, membind_set); else hwloc_bitmap_asprintf(&s, membind_set); switch (policy) { case HWLOC_MEMBIND_DEFAULT: policystr = "default"; break; case HWLOC_MEMBIND_FIRSTTOUCH: policystr = "firsttouch"; break; case HWLOC_MEMBIND_BIND: policystr = "bind"; break; case HWLOC_MEMBIND_INTERLEAVE: policystr = "interleave"; break; case HWLOC_MEMBIND_REPLICATE: policystr = "replicate"; break; case HWLOC_MEMBIND_NEXTTOUCH: policystr = "nexttouch"; break; default: fprintf(stderr, "unknown memory policy %d\n", policy); assert(0); break; } } if (policystr) printf("%s (%s)\n", s, policystr); else printf("%s\n", s); free(s); return EXIT_SUCCESS; } if (!hwloc_bitmap_iszero(membind_set)) { if (verbose) { char *s; hwloc_bitmap_asprintf(&s, membind_set); fprintf(stderr, "binding on memory set %s\n", s); free(s); } if (single) hwloc_bitmap_singlify(membind_set); if (pid) ret = hwloc_set_proc_membind(topology, pid, membind_set, membind_policy, membind_flags); else ret = hwloc_set_membind(topology, membind_set, membind_policy, membind_flags); if (ret) { int bind_errno = errno; const char *errmsg = strerror(bind_errno); char *s; hwloc_bitmap_asprintf(&s, membind_set); if (pid) fprintf(stderr, "hwloc_set_proc_membind %s %ld failed (errno %d %s)\n", s, (long) pid, bind_errno, errmsg); else fprintf(stderr, "hwloc_set_membind %s failed (errno %d %s)\n", s, bind_errno, errmsg); free(s); } } if (!hwloc_bitmap_iszero(cpubind_set)) { if (verbose) { char *s; hwloc_bitmap_asprintf(&s, cpubind_set); fprintf(stderr, "binding on cpu set %s\n", s); free(s); } if (single) hwloc_bitmap_singlify(cpubind_set); if (pid) ret = hwloc_set_proc_cpubind(topology, pid, cpubind_set, cpubind_flags); else ret = hwloc_set_cpubind(topology, cpubind_set, cpubind_flags); if (ret) { int bind_errno = errno; const char *errmsg = strerror(bind_errno); char *s; hwloc_bitmap_asprintf(&s, cpubind_set); if (pid) fprintf(stderr, "hwloc_set_proc_cpubind %s %ld failed (errno %d %s)\n", s, (long) pid, bind_errno, errmsg); else fprintf(stderr, "hwloc_set_cpubind %s failed (errno %d %s)\n", s, bind_errno, errmsg); free(s); } } hwloc_bitmap_free(cpubind_set); hwloc_bitmap_free(membind_set); hwloc_topology_destroy(topology); if (pid) return EXIT_SUCCESS; if (0 == argc) { fprintf(stderr, "%s: nothing to do!\n", orig_argv[0]); return EXIT_FAILURE; } ret = execvp(argv[0], argv); if (ret) { fprintf(stderr, "%s: Failed to launch executable \"%s\"\n", orig_argv[0], argv[0]); perror("execvp"); } return EXIT_FAILURE; }
int main(void) { hwloc_topology_t topology; cl_int clret; cl_platform_id *platform_ids; unsigned nrp, nrd, count, i, j; int err; hwloc_topology_init(&topology); hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_IO_DEVICES); hwloc_topology_load(topology); clret = clGetPlatformIDs(0, NULL, &nrp); if (CL_SUCCESS != clret || !nrp) return 0; platform_ids = malloc(nrp * sizeof(*platform_ids)); if (!platform_ids) return 0; clret = clGetPlatformIDs(nrp, platform_ids, &nrp); if (CL_SUCCESS != clret || !nrp) return 0; count = 0; for(i=0; i<nrp; i++) { cl_device_id *device_ids; clret = clGetDeviceIDs(platform_ids[i], CL_DEVICE_TYPE_ALL, 0, NULL, &nrd); if (CL_SUCCESS != clret || !nrd) continue; device_ids = malloc(nrd * sizeof(*device_ids)); if (!device_ids) continue; clret = clGetDeviceIDs(platform_ids[i], CL_DEVICE_TYPE_ALL, nrd, device_ids, &nrd); if (CL_SUCCESS != clret || !nrd) continue; for(j=0; j<nrd; j++) { hwloc_bitmap_t set; hwloc_obj_t osdev, osdev2, ancestor; const char *value; unsigned p, d; osdev = hwloc_opencl_get_device_osdev(topology, device_ids[j]); osdev2 = hwloc_opencl_get_device_osdev_by_index(topology, i, j); assert(osdev == osdev2); if (!osdev) { printf("no osdev for platform %u device %u\n", i, j); continue; } ancestor = hwloc_get_non_io_ancestor_obj(topology, osdev); set = hwloc_bitmap_alloc(); err = hwloc_opencl_get_device_cpuset(topology, device_ids[j], set); if (err < 0) { printf("no cpuset for platform %u device %u\n", i, j); } else { char *cpuset_string = NULL; hwloc_bitmap_asprintf(&cpuset_string, set); printf("got cpuset %s for platform %u device %u\n", cpuset_string, i, j); free(cpuset_string); if (hwloc_bitmap_isequal(hwloc_topology_get_complete_cpuset(topology), hwloc_topology_get_topology_cpuset(topology))) /* only compare if the topology is complete, otherwise things can be significantly different */ assert(hwloc_bitmap_isequal(set, ancestor->cpuset)); } hwloc_bitmap_free(set); printf("found OSDev %s\n", osdev->name); err = sscanf(osdev->name, "opencl%ud%u", &p, &d); assert(err == 2); assert(p == i); assert(d == j); value = hwloc_obj_get_info_by_name(osdev, "Backend"); err = strcmp(value, "OpenCL"); assert(!err); assert(osdev->attr->osdev.type == HWLOC_OBJ_OSDEV_COPROC); value = hwloc_obj_get_info_by_name(osdev, "CoProcType"); err = strcmp(value, "OpenCL"); assert(!err); value = hwloc_obj_get_info_by_name(osdev, "GPUModel"); printf("found OSDev model %s\n", value); count++; } } hwloc_topology_destroy(topology); return 0; }
int32_t Pipe::_getAutoAffinity() const { #ifdef EQ_USE_HWLOC_GL uint32_t port = getPort(); uint32_t device = getDevice(); if( port == LB_UNDEFINED_UINT32 && device == LB_UNDEFINED_UINT32 ) return lunchbox::Thread::NONE; if( port == LB_UNDEFINED_UINT32 ) port = 0; if( device == LB_UNDEFINED_UINT32 ) device = 0; hwloc_topology_t topology; hwloc_topology_init( &topology ); // Flags used for loading the I/O devices, bridges and their relevant info const unsigned long loading_flags = HWLOC_TOPOLOGY_FLAG_IO_BRIDGES | HWLOC_TOPOLOGY_FLAG_IO_DEVICES; // Set discovery flags if( hwloc_topology_set_flags( topology, loading_flags ) < 0 ) { LBINFO << "Automatic pipe thread placement failed: " << "hwloc_topology_set_flags() failed" << std::endl; hwloc_topology_destroy( topology ); return lunchbox::Thread::NONE; } if( hwloc_topology_load( topology ) < 0 ) { LBINFO << "Automatic pipe thread placement failed: " << "hwloc_topology_load() failed" << std::endl; hwloc_topology_destroy( topology ); return lunchbox::Thread::NONE; } const hwloc_obj_t osdev = hwloc_gl_get_display_osdev_by_port_device( topology, int( port ), int( device )); if( !osdev ) { LBINFO << "Automatic pipe thread placement failed: GPU not found" << std::endl; hwloc_topology_destroy( topology ); return lunchbox::Thread::NONE; } const hwloc_obj_t pcidev = osdev->parent; const hwloc_obj_t parent = hwloc_get_non_io_ancestor_obj( topology, pcidev ); const int numCpus = hwloc_get_nbobjs_inside_cpuset_by_type( topology, parent->cpuset, HWLOC_OBJ_SOCKET ); if( numCpus != 1 ) { LBINFO << "Automatic pipe thread placement failed: GPU attached to " << numCpus << " processors?" << std::endl; hwloc_topology_destroy( topology ); return lunchbox::Thread::NONE; } const hwloc_obj_t cpuObj = hwloc_get_obj_inside_cpuset_by_type( topology, parent->cpuset, HWLOC_OBJ_SOCKET, 0 ); if( cpuObj == 0 ) { LBINFO << "Automatic pipe thread placement failed: " << "hwloc_get_obj_inside_cpuset_by_type() failed" << std::endl; hwloc_topology_destroy( topology ); return lunchbox::Thread::NONE; } const int cpuIndex = cpuObj->logical_index; hwloc_topology_destroy( topology ); return cpuIndex + lunchbox::Thread::SOCKET; #else LBINFO << "Automatic thread placement not supported, no hwloc GL support" << std::endl; #endif return lunchbox::Thread::NONE; }
int main(int argc, char* argv[]) { hwloc_obj_t obj; unsigned j, k; struct hwloc_topology_support *support; int rc; if (2 != argc) { fprintf(stderr, "Usage: opal_hwloc <topofile>\n"); exit(1); } if (0 > (rc = opal_init(&argc, &argv))) { fprintf(stderr, "opal_hwloc: couldn't init opal - error code %d\n", rc); return rc; } if (0 != hwloc_topology_init(&my_topology)) { return OPAL_ERR_NOT_SUPPORTED; } if (0 != hwloc_topology_set_xml(my_topology, argv[1])) { hwloc_topology_destroy(my_topology); return OPAL_ERR_NOT_SUPPORTED; } /* since we are loading this from an external source, we have to * explicitly set a flag so hwloc sets things up correctly */ if (0 != hwloc_topology_set_flags(my_topology, (HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM | HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM | HWLOC_TOPOLOGY_FLAG_IO_DEVICES))) { hwloc_topology_destroy(my_topology); return OPAL_ERR_NOT_SUPPORTED; } if (0 != hwloc_topology_load(my_topology)) { hwloc_topology_destroy(my_topology); return OPAL_ERR_NOT_SUPPORTED; } /* remove the hostname from the topology. Unfortunately, hwloc * decided to add the source hostname to the "topology", thus * rendering it unusable as a pure topological description. So * we remove that information here. */ obj = hwloc_get_root_obj(my_topology); for (k=0; k < obj->infos_count; k++) { if (NULL == obj->infos[k].name || NULL == obj->infos[k].value) { continue; } if (0 == strncmp(obj->infos[k].name, "HostName", strlen("HostName"))) { free(obj->infos[k].name); free(obj->infos[k].value); /* left justify the array */ for (j=k; j < obj->infos_count-1; j++) { obj->infos[j] = obj->infos[j+1]; } obj->infos[obj->infos_count-1].name = NULL; obj->infos[obj->infos_count-1].value = NULL; obj->infos_count--; break; } } /* unfortunately, hwloc does not include support info in its * xml output :-(( We default to assuming it is present as * systems that use this option are likely to provide * binding support */ support = (struct hwloc_topology_support*)hwloc_topology_get_support(my_topology); support->cpubind->set_thisproc_cpubind = true; /* filter the cpus thru any default cpu set */ opal_hwloc_base_filter_cpus(my_topology); /* fill opal_cache_line_size global with the smallest L1 cache line size */ fill_cache_line_size(); /* test it */ if (NULL == hwloc_get_obj_by_type(my_topology, HWLOC_OBJ_CORE, 0)) { fprintf(stderr, "DIDN'T FIND A CORE\n"); } hwloc_topology_destroy(my_topology); opal_finalize(); return 0; }
extern int get_cpuinfo(uint16_t *p_cpus, uint16_t *p_boards, uint16_t *p_sockets, uint16_t *p_cores, uint16_t *p_threads, uint16_t *p_block_map_size, uint16_t **p_block_map, uint16_t **p_block_map_inv) { enum { SOCKET=0, CORE=1, PU=2, LAST_OBJ=3 }; hwloc_topology_t topology; hwloc_obj_t obj; hwloc_obj_type_t objtype[LAST_OBJ]; unsigned idx[LAST_OBJ]; int nobj[LAST_OBJ]; bitstr_t *used_socket = NULL; int *cores_per_socket; int actual_cpus; int macid; int absid; int actual_boards = 1, depth, sock_cnt, tot_socks = 0; int i, used_core_idx, used_sock_idx; debug2("hwloc_topology_init"); if (hwloc_topology_init(&topology)) { /* error in initialize hwloc library */ debug("hwloc_topology_init() failed."); return 1; } /* parse all system */ hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM); /* ignores cache, misc */ #if HWLOC_API_VERSION < 0x00020000 hwloc_topology_ignore_type(topology, HWLOC_OBJ_CACHE); hwloc_topology_ignore_type(topology, HWLOC_OBJ_MISC); #else hwloc_topology_set_type_filter(topology, HWLOC_OBJ_L1CACHE, HWLOC_TYPE_FILTER_KEEP_NONE); hwloc_topology_set_type_filter(topology, HWLOC_OBJ_L2CACHE, HWLOC_TYPE_FILTER_KEEP_NONE); hwloc_topology_set_type_filter(topology, HWLOC_OBJ_L3CACHE, HWLOC_TYPE_FILTER_KEEP_NONE); hwloc_topology_set_type_filter(topology, HWLOC_OBJ_L4CACHE, HWLOC_TYPE_FILTER_KEEP_NONE); hwloc_topology_set_type_filter(topology, HWLOC_OBJ_L5CACHE, HWLOC_TYPE_FILTER_KEEP_NONE); hwloc_topology_set_type_filter(topology, HWLOC_OBJ_MISC, HWLOC_TYPE_FILTER_KEEP_NONE); #endif /* load topology */ debug2("hwloc_topology_load"); if (hwloc_topology_load(topology)) { /* error in load hardware topology */ debug("hwloc_topology_load() failed."); hwloc_topology_destroy(topology); return 2; } #if _DEBUG _hwloc_children(topology, hwloc_get_root_obj(topology), 0); #endif /* * Some processors (e.g. AMD Opteron 6000 series) contain multiple * NUMA nodes per socket. This is a configuration which does not map * into the hardware entities that Slurm optimizes resource allocation * for (PU/thread, core, socket, baseboard, node and network switch). * In order to optimize resource allocations on such hardware, Slurm * will consider each NUMA node within the socket as a separate socket. * You can disable this configuring "SchedulerParameters=Ignore_NUMA", * in which case Slurm will report the correct socket count on the node, * but not be able to optimize resource allocations on the NUMA nodes. */ objtype[SOCKET] = HWLOC_OBJ_SOCKET; objtype[CORE] = HWLOC_OBJ_CORE; objtype[PU] = HWLOC_OBJ_PU; if (hwloc_get_type_depth(topology, HWLOC_OBJ_NODE) > hwloc_get_type_depth(topology, HWLOC_OBJ_SOCKET)) { char *sched_params = slurm_get_sched_params(); if (sched_params && strcasestr(sched_params, "Ignore_NUMA")) { info("Ignoring NUMA nodes within a socket"); } else { info("Considering each NUMA node as a socket"); objtype[SOCKET] = HWLOC_OBJ_NODE; } xfree(sched_params); } /* number of objects */ depth = hwloc_get_type_depth(topology, HWLOC_OBJ_GROUP); if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) { actual_boards = MAX(hwloc_get_nbobjs_by_depth(topology, depth), 1); } /* * Count sockets/NUMA containing any cores. * KNL NUMA with no cores are NOT counted. */ nobj[SOCKET] = 0; depth = hwloc_get_type_depth(topology, objtype[SOCKET]); used_socket = bit_alloc(_MAX_SOCKET_INX); cores_per_socket = xmalloc(sizeof(int) * _MAX_SOCKET_INX); sock_cnt = hwloc_get_nbobjs_by_depth(topology, depth); for (i = 0; i < sock_cnt; i++) { obj = hwloc_get_obj_by_depth(topology, depth, i); if (obj->type == objtype[SOCKET]) { cores_per_socket[i] = _core_child_count(topology, obj); if (cores_per_socket[i] > 0) { nobj[SOCKET]++; bit_set(used_socket, tot_socks); } if (++tot_socks >= _MAX_SOCKET_INX) { /* Bitmap size */ fatal("Socket count exceeds %d, expand data structure size", _MAX_SOCKET_INX); break; } } } nobj[CORE] = hwloc_get_nbobjs_by_type(topology, objtype[CORE]); /* * Workaround for hwloc bug, in some cases the topology "children" array * does not get populated, so _core_child_count() always returns 0 */ if (nobj[SOCKET] == 0) { nobj[SOCKET] = hwloc_get_nbobjs_by_type(topology, objtype[SOCKET]); if (nobj[SOCKET] == 0) { debug("get_cpuinfo() fudging nobj[SOCKET] from 0 to 1"); nobj[SOCKET] = 1; } if (nobj[SOCKET] >= _MAX_SOCKET_INX) { /* Bitmap size */ fatal("Socket count exceeds %d, expand data structure size", _MAX_SOCKET_INX); } bit_nset(used_socket, 0, nobj[SOCKET] - 1); } /* * Workaround for hwloc * hwloc_get_nbobjs_by_type() returns 0 on some architectures. */ if ( nobj[CORE] == 0 ) { debug("get_cpuinfo() fudging nobj[CORE] from 0 to 1"); nobj[CORE] = 1; } if ( nobj[SOCKET] == -1 ) fatal("get_cpuinfo() can not handle nobj[SOCKET] = -1"); if ( nobj[CORE] == -1 ) fatal("get_cpuinfo() can not handle nobj[CORE] = -1"); actual_cpus = hwloc_get_nbobjs_by_type(topology, objtype[PU]); #if 0 /* Used to find workaround above */ info("CORE = %d SOCKET = %d actual_cpus = %d nobj[CORE] = %d", CORE, SOCKET, actual_cpus, nobj[CORE]); #endif if ((actual_cpus % nobj[CORE]) != 0) { error("Thread count (%d) not multiple of core count (%d)", actual_cpus, nobj[CORE]); } nobj[PU] = actual_cpus / nobj[CORE]; /* threads per core */ if ((nobj[CORE] % nobj[SOCKET]) != 0) { error("Core count (%d) not multiple of socket count (%d)", nobj[CORE], nobj[SOCKET]); } nobj[CORE] /= nobj[SOCKET]; /* cores per socket */ debug("CPUs:%d Boards:%d Sockets:%d CoresPerSocket:%d ThreadsPerCore:%d", actual_cpus, actual_boards, nobj[SOCKET], nobj[CORE], nobj[PU]); /* allocate block_map */ if (p_block_map_size) *p_block_map_size = (uint16_t)actual_cpus; if (p_block_map && p_block_map_inv) { *p_block_map = xmalloc(actual_cpus * sizeof(uint16_t)); *p_block_map_inv = xmalloc(actual_cpus * sizeof(uint16_t)); /* initialize default as linear mapping */ for (i = 0; i < actual_cpus; i++) { (*p_block_map)[i] = i; (*p_block_map_inv)[i] = i; } /* create map with hwloc */ used_sock_idx = -1; used_core_idx = -1; for (idx[SOCKET] = 0; (used_sock_idx + 1) < nobj[SOCKET]; idx[SOCKET]++) { if (!bit_test(used_socket, idx[SOCKET])) continue; used_sock_idx++; for (idx[CORE] = 0; idx[CORE] < cores_per_socket[idx[SOCKET]]; idx[CORE]++) { used_core_idx++; for (idx[PU]=0; idx[PU]<nobj[PU]; ++idx[PU]) { /* get hwloc_obj by indexes */ obj=hwloc_get_obj_below_array_by_type( topology, 3, objtype, idx); if (!obj) continue; macid = obj->os_index; absid = used_core_idx * nobj[PU] + idx[PU]; if ((macid >= actual_cpus) || (absid >= actual_cpus)) { /* physical or logical ID are * out of range */ continue; } debug4("CPU map[%d]=>%d S:C:T %d:%d:%d", absid, macid, used_sock_idx, idx[CORE], idx[PU]); (*p_block_map)[absid] = macid; (*p_block_map_inv)[macid] = absid; } } } } FREE_NULL_BITMAP(used_socket); xfree(cores_per_socket); hwloc_topology_destroy(topology); /* update output parameters */ *p_cpus = actual_cpus; *p_boards = actual_boards; *p_sockets = nobj[SOCKET]; *p_cores = nobj[CORE]; *p_threads = nobj[PU]; #if _DEBUG /*** Display raw data ***/ debug("CPUs:%u Boards:%u Sockets:%u CoresPerSocket:%u ThreadsPerCore:%u", *p_cpus, *p_boards, *p_sockets, *p_cores, *p_threads); /* Display the mapping tables */ if (p_block_map && p_block_map_inv) { debug("------"); debug("Abstract -> Machine logical CPU ID block mapping:"); debug("AbstractId PhysicalId Inverse"); for (i = 0; i < *p_cpus; i++) { debug3(" %4d %4u %4u", i, (*p_block_map)[i], (*p_block_map_inv)[i]); } debug("------"); } #endif return SLURM_SUCCESS; }
static int rte_init(void) { int rc, ret; char *error = NULL; char *envar, *ev1, *ev2; uint64_t unique_key[2]; char *string_key; opal_value_t *kv; char *val; int u32, *u32ptr; uint16_t u16, *u16ptr; /* run the prolog */ if (ORTE_SUCCESS != (rc = orte_ess_base_std_prolog())) { ORTE_ERROR_LOG(rc); return rc; } u32ptr = &u32; u16ptr = &u16; if (NULL != orte_ess_singleton_server_uri) { /* we are going to connect to a server HNP */ if (0 == strncmp(orte_ess_singleton_server_uri, "file", strlen("file")) || 0 == strncmp(orte_ess_singleton_server_uri, "FILE", strlen("FILE"))) { char input[1024], *filename; FILE *fp; /* it is a file - get the filename */ filename = strchr(orte_ess_singleton_server_uri, ':'); if (NULL == filename) { /* filename is not correctly formatted */ orte_show_help("help-orterun.txt", "orterun:ompi-server-filename-bad", true, "singleton", orte_ess_singleton_server_uri); return ORTE_ERROR; } ++filename; /* space past the : */ if (0 >= strlen(filename)) { /* they forgot to give us the name! */ orte_show_help("help-orterun.txt", "orterun:ompi-server-filename-missing", true, "singleton", orte_ess_singleton_server_uri); return ORTE_ERROR; } /* open the file and extract the uri */ fp = fopen(filename, "r"); if (NULL == fp) { /* can't find or read file! */ orte_show_help("help-orterun.txt", "orterun:ompi-server-filename-access", true, "singleton", orte_ess_singleton_server_uri); return ORTE_ERROR; } memset(input, 0, 1024); // initialize the array to ensure a NULL termination if (NULL == fgets(input, 1023, fp)) { /* something malformed about file */ fclose(fp); orte_show_help("help-orterun.txt", "orterun:ompi-server-file-bad", true, "singleton", orte_ess_singleton_server_uri, "singleton"); return ORTE_ERROR; } fclose(fp); input[strlen(input)-1] = '\0'; /* remove newline */ orte_process_info.my_hnp_uri = strdup(input); } else { orte_process_info.my_hnp_uri = strdup(orte_ess_singleton_server_uri); } /* save the daemon uri - we will process it later */ orte_process_info.my_daemon_uri = strdup(orte_process_info.my_hnp_uri); /* construct our name - we are in their job family, so we know that * much. However, we cannot know how many other singletons and jobs * this HNP is running. Oh well - if someone really wants to use this * option, they can try to figure it out. For now, we'll just assume * we are the only ones */ ORTE_PROC_MY_NAME->jobid = ORTE_CONSTRUCT_LOCAL_JOBID(ORTE_PROC_MY_HNP->jobid, 1); /* obviously, we are vpid=0 for this job */ ORTE_PROC_MY_NAME->vpid = 0; /* for convenience, push the pubsub version of this param into the environ */ opal_setenv (OPAL_MCA_PREFIX"pubsub_orte_server", orte_process_info.my_hnp_uri, 1, &environ); } else { /* spawn our very own HNP to support us */ if (ORTE_SUCCESS != (rc = fork_hnp())) { ORTE_ERROR_LOG(rc); return rc; } /* our name was given to us by the HNP */ } /* get an async event base - we use the opal_async one so * we don't startup extra threads if not needed */ orte_event_base = opal_progress_thread_init(NULL); progress_thread_running = true; /* open and setup pmix */ if (NULL == opal_pmix.initialized) { if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) { error = "opening pmix"; goto error; } if (OPAL_SUCCESS != (ret = opal_pmix_base_select())) { error = "select pmix"; goto error; } } /* initialize the selected module */ if (!opal_pmix.initialized() && (OPAL_SUCCESS != (ret = opal_pmix.init()))) { error = "init pmix"; goto error; } /* pmix.init set our process name down in the OPAL layer, * so carry it forward here */ ORTE_PROC_MY_NAME->jobid = OPAL_PROC_MY_NAME.jobid; ORTE_PROC_MY_NAME->vpid = OPAL_PROC_MY_NAME.vpid; /* get our local rank from PMI */ OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCAL_RANK, ORTE_PROC_MY_NAME, &u16ptr, OPAL_UINT16); if (OPAL_SUCCESS != ret) { error = "getting local rank"; goto error; } orte_process_info.my_local_rank = u16; /* get our node rank from PMI */ OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_NODE_RANK, ORTE_PROC_MY_NAME, &u16ptr, OPAL_UINT16); if (OPAL_SUCCESS != ret) { error = "getting node rank"; goto error; } orte_process_info.my_node_rank = u16; /* get universe size */ OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_UNIV_SIZE, ORTE_PROC_MY_NAME, &u32ptr, OPAL_UINT32); if (OPAL_SUCCESS != ret) { error = "getting univ size"; goto error; } orte_process_info.num_procs = u32; /* push into the environ for pickup in MPI layer for * MPI-3 required info key */ if (NULL == getenv(OPAL_MCA_PREFIX"orte_ess_num_procs")) { asprintf(&ev1, OPAL_MCA_PREFIX"orte_ess_num_procs=%d", orte_process_info.num_procs); putenv(ev1); added_num_procs = true; } if (NULL == getenv("OMPI_APP_CTX_NUM_PROCS")) { asprintf(&ev2, "OMPI_APP_CTX_NUM_PROCS=%d", orte_process_info.num_procs); putenv(ev2); added_app_ctx = true; } /* get our app number from PMI - ok if not found */ OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_APPNUM, ORTE_PROC_MY_NAME, &u32ptr, OPAL_UINT32); if (OPAL_SUCCESS == ret) { orte_process_info.app_num = u32; } else { orte_process_info.app_num = 0; } /* set some other standard values */ orte_process_info.num_local_peers = 0; /* setup transport keys in case the MPI layer needs them - * we can use the jobfam and stepid as unique keys * because they are unique values assigned by the RM */ if (NULL == getenv(OPAL_MCA_PREFIX"orte_precondition_transports")) { unique_key[0] = ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid); unique_key[1] = ORTE_LOCAL_JOBID(ORTE_PROC_MY_NAME->jobid); if (NULL == (string_key = orte_pre_condition_transports_print(unique_key))) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE; } asprintf(&envar, OPAL_MCA_PREFIX"orte_precondition_transports=%s", string_key); putenv(envar); added_transport_keys = true; /* cannot free the envar as that messes up our environ */ free(string_key); } /* retrieve our topology */ OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCAL_TOPO, ORTE_PROC_MY_NAME, &val, OPAL_STRING); if (OPAL_SUCCESS == ret && NULL != val) { /* load the topology */ if (0 != hwloc_topology_init(&opal_hwloc_topology)) { ret = OPAL_ERROR; free(val); error = "setting topology"; goto error; } if (0 != hwloc_topology_set_xmlbuffer(opal_hwloc_topology, val, strlen(val))) { ret = OPAL_ERROR; free(val); hwloc_topology_destroy(opal_hwloc_topology); error = "setting topology"; goto error; } /* since we are loading this from an external source, we have to * explicitly set a flag so hwloc sets things up correctly */ if (0 != hwloc_topology_set_flags(opal_hwloc_topology, (HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM | HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM | HWLOC_TOPOLOGY_FLAG_IO_DEVICES))) { ret = OPAL_ERROR; hwloc_topology_destroy(opal_hwloc_topology); free(val); error = "setting topology"; goto error; } /* now load the topology */ if (0 != hwloc_topology_load(opal_hwloc_topology)) { ret = OPAL_ERROR; hwloc_topology_destroy(opal_hwloc_topology); free(val); error = "setting topology"; goto error; } free(val); } else { /* it wasn't passed down to us, so go get it */ if (OPAL_SUCCESS != (ret = opal_hwloc_base_get_topology())) { error = "topology discovery"; goto error; } /* push it into the PMIx database in case someone * tries to retrieve it so we avoid an attempt to * get it again */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_LOCAL_TOPO); kv->type = OPAL_STRING; if (0 != (ret = hwloc_topology_export_xmlbuffer(opal_hwloc_topology, &kv->data.string, &u32))) { error = "topology export"; goto error; } if (OPAL_SUCCESS != (ret = opal_pmix.store_local(ORTE_PROC_MY_NAME, kv))) { error = "topology store"; goto error; } OBJ_RELEASE(kv); } /* use the std app init to complete the procedure */ if (ORTE_SUCCESS != (rc = orte_ess_base_app_setup(true))) { ORTE_ERROR_LOG(rc); return rc; } /* push our hostname so others can find us, if they need to */ OPAL_MODEX_SEND_VALUE(ret, OPAL_PMIX_GLOBAL, OPAL_PMIX_HOSTNAME, orte_process_info.nodename, OPAL_STRING); if (ORTE_SUCCESS != ret) { error = "db store hostname"; goto error; } return ORTE_SUCCESS; error: if (ORTE_ERR_SILENT != ret && !orte_report_silent_errors) { orte_show_help("help-orte-runtime.txt", "orte_init:startup:internal-failure", true, error, ORTE_ERROR_NAME(ret), ret); } return ret; }
int main(void) { hwloc_topology_t topology; hwloc_obj_t obj; hwloc_topology_init(&topology); hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_WHOLE_IO); assert(-1 == hwloc_topology_ignore_type(topology, HWLOC_OBJ_PCI_DEVICE)); assert(-1 == hwloc_topology_ignore_type_keep_structure(topology, HWLOC_OBJ_BRIDGE)); assert(-1 == hwloc_topology_ignore_type(topology, HWLOC_OBJ_OS_DEVICE)); hwloc_topology_load(topology); printf("Found %d bridges\n", hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_BRIDGE)); obj = NULL; while ((obj = hwloc_get_next_bridge(topology, obj)) != NULL) { assert(obj->type == HWLOC_OBJ_BRIDGE); /* only host->pci and pci->pci bridge supported so far */ if (obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_HOST) { assert(obj->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI); printf(" Found host->PCI bridge for domain %04x bus %02x-%02x\n", obj->attr->bridge.downstream.pci.domain, obj->attr->bridge.downstream.pci.secondary_bus, obj->attr->bridge.downstream.pci.subordinate_bus); } else { assert(obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI); assert(obj->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI); printf(" Found PCI->PCI bridge [%04x:%04x] for domain %04x bus %02x-%02x\n", obj->attr->bridge.upstream.pci.vendor_id, obj->attr->bridge.upstream.pci.device_id, obj->attr->bridge.downstream.pci.domain, obj->attr->bridge.downstream.pci.secondary_bus, obj->attr->bridge.downstream.pci.subordinate_bus); } } printf("Found %d PCI devices\n", hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PCI_DEVICE)); obj = NULL; while ((obj = hwloc_get_next_pcidev(topology, obj)) != NULL) { assert(obj->type == HWLOC_OBJ_PCI_DEVICE); printf(" Found PCI device class %04x vendor %04x model %04x\n", obj->attr->pcidev.class_id, obj->attr->pcidev.vendor_id, obj->attr->pcidev.device_id); } printf("Found %d OS devices\n", hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_OS_DEVICE)); obj = NULL; while ((obj = hwloc_get_next_osdev(topology, obj)) != NULL) { assert(obj->type == HWLOC_OBJ_OS_DEVICE); printf(" Found OS device %s subtype %d\n", obj->name, obj->attr->osdev.type); } assert(HWLOC_TYPE_DEPTH_BRIDGE == hwloc_get_type_depth(topology, HWLOC_OBJ_BRIDGE)); assert(HWLOC_TYPE_DEPTH_PCI_DEVICE == hwloc_get_type_depth(topology, HWLOC_OBJ_PCI_DEVICE)); assert(HWLOC_TYPE_DEPTH_OS_DEVICE == hwloc_get_type_depth(topology, HWLOC_OBJ_OS_DEVICE)); assert(hwloc_compare_types(HWLOC_OBJ_BRIDGE, HWLOC_OBJ_PCI_DEVICE) < 0); assert(hwloc_compare_types(HWLOC_OBJ_BRIDGE, HWLOC_OBJ_OS_DEVICE) < 0); assert(hwloc_compare_types(HWLOC_OBJ_PCI_DEVICE, HWLOC_OBJ_OS_DEVICE) < 0); /* check that hwloc_get_hostbridge_by_pcibus() and hwloc_get_non_io_ancestor_obj work fine */ obj = NULL; while ((obj = hwloc_get_next_pcidev(topology, obj)) != NULL) { assert(hwloc_get_hostbridge_by_pcibus(topology, obj->attr->pcidev.domain, obj->attr->pcidev.bus)->parent == hwloc_get_non_io_ancestor_obj(topology, obj)); } hwloc_topology_destroy(topology); return 0; }
extern int get_cpuinfo(uint16_t *p_cpus, uint16_t *p_boards, uint16_t *p_sockets, uint16_t *p_cores, uint16_t *p_threads, uint16_t *p_block_map_size, uint16_t **p_block_map, uint16_t **p_block_map_inv) { enum { SOCKET=0, CORE=1, PU=2, LAST_OBJ=3 }; hwloc_topology_t topology; hwloc_obj_t obj; hwloc_obj_type_t objtype[LAST_OBJ]; unsigned idx[LAST_OBJ]; int nobj[LAST_OBJ]; int actual_cpus; int macid; int absid; int actual_boards = 1, depth; int i; debug2("hwloc_topology_init"); if (hwloc_topology_init(&topology)) { /* error in initialize hwloc library */ debug("hwloc_topology_init() failed."); return 1; } /* parse all system */ hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM); /* ignores cache, misc */ hwloc_topology_ignore_type (topology, HWLOC_OBJ_CACHE); hwloc_topology_ignore_type (topology, HWLOC_OBJ_MISC); /* load topology */ debug2("hwloc_topology_load"); if (hwloc_topology_load(topology)) { /* error in load hardware topology */ debug("hwloc_topology_load() failed."); hwloc_topology_destroy(topology); return 2; } /* Some processors (e.g. AMD Opteron 6000 series) contain multiple * NUMA nodes per socket. This is a configuration which does not map * into the hardware entities that Slurm optimizes resource allocation * for (PU/thread, core, socket, baseboard, node and network switch). * In order to optimize resource allocations on such hardware, Slurm * will consider each NUMA node within the socket as a separate socket. * You can disable this configuring "SchedulerParameters=Ignore_NUMA", * in which case Slurm will report the correct socket count on the node, * but not be able to optimize resource allocations on the NUMA nodes. */ objtype[SOCKET] = HWLOC_OBJ_SOCKET; objtype[CORE] = HWLOC_OBJ_CORE; objtype[PU] = HWLOC_OBJ_PU; if (hwloc_get_type_depth(topology, HWLOC_OBJ_NODE) > hwloc_get_type_depth(topology, HWLOC_OBJ_SOCKET)) { char *sched_params = slurm_get_sched_params(); if (sched_params && strcasestr(sched_params, "Ignore_NUMA")) { info("Ignoring NUMA nodes within a socket"); } else { info("Considering each NUMA node as a socket"); objtype[SOCKET] = HWLOC_OBJ_NODE; } xfree(sched_params); } /* number of objects */ depth = hwloc_get_type_depth(topology, HWLOC_OBJ_GROUP); if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) { actual_boards = MAX(hwloc_get_nbobjs_by_depth(topology, depth), 1); } nobj[SOCKET] = hwloc_get_nbobjs_by_type(topology, objtype[SOCKET]); nobj[CORE] = hwloc_get_nbobjs_by_type(topology, objtype[CORE]); /* * Workaround for hwloc * hwloc_get_nbobjs_by_type() returns 0 on some architectures. */ if ( nobj[SOCKET] == 0 ) { debug("get_cpuinfo() fudging nobj[SOCKET] from 0 to 1"); nobj[SOCKET] = 1; } if ( nobj[CORE] == 0 ) { debug("get_cpuinfo() fudging nobj[CORE] from 0 to 1"); nobj[CORE] = 1; } if ( nobj[SOCKET] == -1 ) fatal("get_cpuinfo() can not handle nobj[SOCKET] = -1"); if ( nobj[CORE] == -1 ) fatal("get_cpuinfo() can not handle nobj[CORE] = -1"); actual_cpus = hwloc_get_nbobjs_by_type(topology, objtype[PU]); #if 0 /* Used to find workaround above */ info("CORE = %d SOCKET = %d actual_cpus = %d nobj[CORE] = %d", CORE, SOCKET, actual_cpus, nobj[CORE]); #endif nobj[PU] = actual_cpus/nobj[CORE]; /* threads per core */ nobj[CORE] /= nobj[SOCKET]; /* cores per socket */ debug("CPUs:%d Boards:%u Sockets:%d CoresPerSocket:%d ThreadsPerCore:%d", actual_cpus, actual_boards, nobj[SOCKET], nobj[CORE], nobj[PU]); /* allocate block_map */ *p_block_map_size = (uint16_t)actual_cpus; if (p_block_map && p_block_map_inv) { *p_block_map = xmalloc(actual_cpus * sizeof(uint16_t)); *p_block_map_inv = xmalloc(actual_cpus * sizeof(uint16_t)); /* initialize default as linear mapping */ for (i = 0; i < actual_cpus; i++) { (*p_block_map)[i] = i; (*p_block_map_inv)[i] = i; } /* create map with hwloc */ for (idx[SOCKET]=0; idx[SOCKET]<nobj[SOCKET]; ++idx[SOCKET]) { for (idx[CORE]=0; idx[CORE]<nobj[CORE]; ++idx[CORE]) { for (idx[PU]=0; idx[PU]<nobj[PU]; ++idx[PU]) { /* get hwloc_obj by indexes */ obj=hwloc_get_obj_below_array_by_type( topology, 3, objtype, idx); if (!obj) continue; macid = obj->os_index; absid = idx[SOCKET]*nobj[CORE]*nobj[PU] + idx[CORE]*nobj[PU] + idx[PU]; if ((macid >= actual_cpus) || (absid >= actual_cpus)) { /* physical or logical ID are * out of range */ continue; } debug4("CPU map[%d]=>%d", absid, macid); (*p_block_map)[absid] = macid; (*p_block_map_inv)[macid] = absid; } } } } hwloc_topology_destroy(topology); /* update output parameters */ *p_cpus = actual_cpus; *p_boards = actual_boards; *p_sockets = nobj[SOCKET]; *p_cores = nobj[CORE]; *p_threads = nobj[PU]; #if DEBUG_DETAIL /*** Display raw data ***/ debug("CPUs:%u Boards:%u Sockets:%u CoresPerSocket:%u ThreadsPerCore:%u", *p_cpus, *p_boards, *p_sockets, *p_cores, *p_threads); /* Display the mapping tables */ if (p_block_map && p_block_map_inv) { debug("------"); debug("Abstract -> Machine logical CPU ID block mapping:"); debug("AbstractId PhysicalId Inverse"); for (i = 0; i < *p_cpus; i++) { debug3(" %4d %4u %4u", i, (*p_block_map)[i], (*p_block_map_inv)[i]); } debug("------"); } #endif return 0; }
static int allocate(orte_job_t *jdata, opal_list_t *nodes) { int i, n, val, dig, num_nodes; orte_node_t *node; #if OPAL_HAVE_HWLOC orte_topology_t *t; hwloc_topology_t topo; hwloc_obj_t obj; unsigned j, k; struct hwloc_topology_support *support; char **files=NULL; bool use_local_topology = false; #endif char **node_cnt=NULL; char **slot_cnt=NULL; char **max_slot_cnt=NULL; char *tmp; char prefix[6]; node_cnt = opal_argv_split(mca_ras_simulator_component.num_nodes, ','); slot_cnt = opal_argv_split(mca_ras_simulator_component.slots, ','); max_slot_cnt = opal_argv_split(mca_ras_simulator_component.slots_max, ','); /* backfill the slot_cnt as reqd so we don't have to * specify slot_cnt for each set of nodes - we'll set * */ tmp = slot_cnt[opal_argv_count(slot_cnt)-1]; for (n=opal_argv_count(slot_cnt); n < opal_argv_count(node_cnt); n++) { opal_argv_append_nosize(&slot_cnt, tmp); } /* backfill the max_slot_cnt as reqd */ tmp = max_slot_cnt[opal_argv_count(slot_cnt)-1]; for (n=opal_argv_count(max_slot_cnt); n < opal_argv_count(max_slot_cnt); n++) { opal_argv_append_nosize(&max_slot_cnt, tmp); } #if OPAL_HAVE_HWLOC if (NULL == mca_ras_simulator_component.topofiles) { /* use our topology */ use_local_topology = true; } else { files = opal_argv_split(mca_ras_simulator_component.topofiles, ','); if (opal_argv_count(files) != opal_argv_count(node_cnt)) { orte_show_help("help-ras-base.txt", "ras-sim:mismatch", true); goto error_silent; } } #else /* If we don't have hwloc and hwloc files were specified, then error out (because we can't deliver that functionality) */ if (NULL == mca_ras_simulator_component.topofiles) { orte_show_help("help-ras-simulator.txt", "no hwloc support for topofiles", true); goto error_silent; } #endif /* setup the prefix to the node names */ snprintf(prefix, 6, "nodeA"); /* process the request */ for (n=0; NULL != node_cnt[n]; n++) { num_nodes = strtol(node_cnt[n], NULL, 10); /* get number of digits */ val = num_nodes; for (dig=0; 0 != val; dig++) { val /= 10; } /* set the prefix for this group of nodes */ prefix[4] += n; /* check for topology */ #if OPAL_HAVE_HWLOC if (use_local_topology) { /* use our topology */ topo = opal_hwloc_topology; } else { if (0 != hwloc_topology_init(&topo)) { orte_show_help("help-ras-simulator.txt", "hwloc API fail", true, __FILE__, __LINE__, "hwloc_topology_init"); goto error_silent; } if (0 != hwloc_topology_set_xml(topo, files[n])) { orte_show_help("help-ras-simulator.txt", "hwloc failed to load xml", true, files[n]); hwloc_topology_destroy(topo); goto error_silent; } /* since we are loading this from an external source, we have to * explicitly set a flag so hwloc sets things up correctly */ if (0 != hwloc_topology_set_flags(topo, HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM)) { orte_show_help("help-ras-simulator.txt", "hwloc API fail", true, __FILE__, __LINE__, "hwloc_topology_set_flags"); hwloc_topology_destroy(topo); goto error_silent; } if (0 != hwloc_topology_load(topo)) { orte_show_help("help-ras-simulator.txt", "hwloc API fail", true, __FILE__, __LINE__, "hwloc_topology_load"); hwloc_topology_destroy(topo); goto error_silent; } /* remove the hostname from the topology. Unfortunately, hwloc * decided to add the source hostname to the "topology", thus * rendering it unusable as a pure topological description. So * we remove that information here. */ obj = hwloc_get_root_obj(topo); for (k=0; k < obj->infos_count; k++) { if (NULL == obj->infos[k].name || NULL == obj->infos[k].value) { continue; } if (0 == strncmp(obj->infos[k].name, "HostName", strlen("HostName"))) { free(obj->infos[k].name); free(obj->infos[k].value); /* left justify the array */ for (j=k; j < obj->infos_count-1; j++) { obj->infos[j] = obj->infos[j+1]; } obj->infos[obj->infos_count-1].name = NULL; obj->infos[obj->infos_count-1].value = NULL; obj->infos_count--; break; } } /* unfortunately, hwloc does not include support info in its * xml output :-(( To aid in debugging, we set it here */ support = (struct hwloc_topology_support*)hwloc_topology_get_support(topo); support->cpubind->set_thisproc_cpubind = mca_ras_simulator_component.have_cpubind; support->membind->set_thisproc_membind = mca_ras_simulator_component.have_membind; /* add it to our array */ t = OBJ_NEW(orte_topology_t); t->topo = topo; t->sig = opal_hwloc_base_get_topo_signature(topo); opal_pointer_array_add(orte_node_topologies, t); } #endif for (i=0; i < num_nodes; i++) { node = OBJ_NEW(orte_node_t); asprintf(&node->name, "%s%0*d", prefix, dig, i); node->state = ORTE_NODE_STATE_UP; node->slots_inuse = 0; node->slots_max = (NULL == max_slot_cnt[n] ? 0 : atoi(max_slot_cnt[n])); node->slots = (NULL == slot_cnt[n] ? 0 : atoi(slot_cnt[n])); #if OPAL_HAVE_HWLOC node->topology = topo; #endif opal_output_verbose(1, orte_ras_base_framework.framework_output, "Created Node <%10s> [%3d : %3d]", node->name, node->slots, node->slots_max); opal_list_append(nodes, &node->super); } } /* record the number of allocated nodes */ orte_num_allocated_nodes = opal_list_get_size(nodes); if (NULL != max_slot_cnt) { opal_argv_free(max_slot_cnt); } if (NULL != slot_cnt) { opal_argv_free(slot_cnt); } if (NULL != node_cnt) { opal_argv_free(node_cnt); } return ORTE_SUCCESS; error_silent: if (NULL != max_slot_cnt) { opal_argv_free(max_slot_cnt); } if (NULL != slot_cnt) { opal_argv_free(slot_cnt); } if (NULL != node_cnt) { opal_argv_free(node_cnt); } return ORTE_ERR_SILENT; }
int main(int argc, char *argv[]) { hwloc_topology_t topology; unsigned depth; hwloc_bitmap_t cpubind_set, membind_set; int got_cpubind = 0, got_membind = 0; int working_on_cpubind = 1; /* membind if 0 */ int get_binding = 0; int get_last_cpu_location = 0; unsigned long flags = HWLOC_TOPOLOGY_FLAG_WHOLE_IO|HWLOC_TOPOLOGY_FLAG_ICACHES; int force = 0; int single = 0; int verbose = 0; int logical = 1; int taskset = 0; int cpubind_flags = 0; hwloc_membind_policy_t membind_policy = HWLOC_MEMBIND_BIND; int membind_flags = 0; int opt; int ret; int pid_number = -1; hwloc_pid_t pid = 0; /* only valid when pid_number > 0, but gcc-4.8 still reports uninitialized warnings */ char *callname; cpubind_set = hwloc_bitmap_alloc(); membind_set = hwloc_bitmap_alloc(); hwloc_topology_init(&topology); hwloc_topology_set_flags(topology, flags); hwloc_topology_load(topology); depth = hwloc_topology_get_depth(topology); callname = argv[0]; /* skip argv[0], handle options */ argv++; argc--; while (argc >= 1) { if (!strcmp(argv[0], "--")) { argc--; argv++; break; } opt = 0; if (*argv[0] == '-') { if (!strcmp(argv[0], "-v") || !strcmp(argv[0], "--verbose")) { verbose++; goto next; } else if (!strcmp(argv[0], "-q") || !strcmp(argv[0], "--quiet")) { verbose--; goto next; } else if (!strcmp(argv[0], "--help")) { usage("hwloc-bind", stdout); return EXIT_SUCCESS; } else if (!strcmp(argv[0], "--single")) { single = 1; goto next; } else if (!strcmp(argv[0], "-f") || !strcmp(argv[0], "--force")) { force = 1; goto next; } else if (!strcmp(argv[0], "--strict")) { cpubind_flags |= HWLOC_CPUBIND_STRICT; membind_flags |= HWLOC_MEMBIND_STRICT; goto next; } else if (!strcmp(argv[0], "--pid")) { if (argc < 2) { usage ("hwloc-bind", stderr); exit(EXIT_FAILURE); } pid_number = atoi(argv[1]); opt = 1; goto next; } else if (!strcmp (argv[0], "--version")) { printf("%s %s\n", callname, HWLOC_VERSION); exit(EXIT_SUCCESS); } if (!strcmp(argv[0], "-l") || !strcmp(argv[0], "--logical")) { logical = 1; goto next; } if (!strcmp(argv[0], "-p") || !strcmp(argv[0], "--physical")) { logical = 0; goto next; } if (!strcmp(argv[0], "--taskset")) { taskset = 1; goto next; } else if (!strcmp (argv[0], "-e") || !strncmp (argv[0], "--get-last-cpu-location", 10)) { get_last_cpu_location = 1; goto next; } else if (!strcmp (argv[0], "--get")) { get_binding = 1; goto next; } else if (!strcmp (argv[0], "--cpubind")) { working_on_cpubind = 1; goto next; } else if (!strcmp (argv[0], "--membind")) { working_on_cpubind = 0; goto next; } else if (!strcmp (argv[0], "--mempolicy")) { if (!strncmp(argv[1], "default", 2)) membind_policy = HWLOC_MEMBIND_DEFAULT; else if (!strncmp(argv[1], "firsttouch", 2)) membind_policy = HWLOC_MEMBIND_FIRSTTOUCH; else if (!strncmp(argv[1], "bind", 2)) membind_policy = HWLOC_MEMBIND_BIND; else if (!strncmp(argv[1], "interleave", 2)) membind_policy = HWLOC_MEMBIND_INTERLEAVE; else if (!strncmp(argv[1], "replicate", 2)) membind_policy = HWLOC_MEMBIND_REPLICATE; else if (!strncmp(argv[1], "nexttouch", 2)) membind_policy = HWLOC_MEMBIND_NEXTTOUCH; else { fprintf(stderr, "Unrecognized memory binding policy %s\n", argv[1]); usage ("hwloc-bind", stderr); exit(EXIT_FAILURE); } opt = 1; goto next; } else if (!strcmp (argv[0], "--whole-system")) { flags |= HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM; hwloc_topology_destroy(topology); hwloc_topology_init(&topology); hwloc_topology_set_flags(topology, flags); hwloc_topology_load(topology); depth = hwloc_topology_get_depth(topology); goto next; } else if (!strcmp (argv[0], "--restrict")) { hwloc_bitmap_t restrictset; int err; if (argc < 2) { usage (callname, stdout); exit(EXIT_FAILURE); } restrictset = hwloc_bitmap_alloc(); hwloc_bitmap_sscanf(restrictset, argv[1]); err = hwloc_topology_restrict (topology, restrictset, 0); if (err) { perror("Restricting the topology"); /* fallthrough */ } hwloc_bitmap_free(restrictset); argc--; argv++; goto next; } fprintf (stderr, "Unrecognized option: %s\n", argv[0]); usage("hwloc-bind", stderr); return EXIT_FAILURE; } ret = hwloc_calc_process_arg(topology, depth, argv[0], logical, working_on_cpubind ? cpubind_set : membind_set, verbose); if (ret < 0) { if (verbose > 0) fprintf(stderr, "assuming the command starts at %s\n", argv[0]); break; } if (working_on_cpubind) got_cpubind = 1; else got_membind = 1; next: argc -= opt+1; argv += opt+1; } if (pid_number > 0) { pid = hwloc_pid_from_number(pid_number, !(get_binding || get_last_cpu_location)); /* no need to set_pid() * the doc just says we're operating on pid, not that we're retrieving the topo/cpuset as seen from inside pid */ } if (get_last_cpu_location && !working_on_cpubind) { fprintf(stderr, "Options --membind and --get-last-cpu-location cannot be combined.\n"); return EXIT_FAILURE; } if ((get_binding || get_last_cpu_location) && (got_cpubind || got_membind)) { /* doesn't work because get_binding/get_last_cpu_location overwrites cpubind_set */ fprintf(stderr, "Cannot display and set binding at the same time.\n"); return EXIT_FAILURE; } if (get_binding || get_last_cpu_location) { char *s; const char *policystr = NULL; int err; if (working_on_cpubind) { if (get_last_cpu_location) { if (pid_number > 0) err = hwloc_get_proc_last_cpu_location(topology, pid, cpubind_set, 0); else err = hwloc_get_last_cpu_location(topology, cpubind_set, 0); } else { if (pid_number > 0) err = hwloc_get_proc_cpubind(topology, pid, cpubind_set, 0); else err = hwloc_get_cpubind(topology, cpubind_set, 0); } if (err) { const char *errmsg = strerror(errno); if (pid_number > 0) fprintf(stderr, "hwloc_get_proc_%s %d failed (errno %d %s)\n", get_last_cpu_location ? "last_cpu_location" : "cpubind", pid_number, errno, errmsg); else fprintf(stderr, "hwloc_get_%s failed (errno %d %s)\n", get_last_cpu_location ? "last_cpu_location" : "cpubind", errno, errmsg); return EXIT_FAILURE; } if (taskset) hwloc_bitmap_taskset_asprintf(&s, cpubind_set); else hwloc_bitmap_asprintf(&s, cpubind_set); } else { hwloc_membind_policy_t policy; if (pid_number > 0) err = hwloc_get_proc_membind(topology, pid, membind_set, &policy, 0); else err = hwloc_get_membind(topology, membind_set, &policy, 0); if (err) { const char *errmsg = strerror(errno); if (pid_number > 0) fprintf(stderr, "hwloc_get_proc_membind %d failed (errno %d %s)\n", pid_number, errno, errmsg); else fprintf(stderr, "hwloc_get_membind failed (errno %d %s)\n", errno, errmsg); return EXIT_FAILURE; } if (taskset) hwloc_bitmap_taskset_asprintf(&s, membind_set); else hwloc_bitmap_asprintf(&s, membind_set); switch (policy) { case HWLOC_MEMBIND_DEFAULT: policystr = "default"; break; case HWLOC_MEMBIND_FIRSTTOUCH: policystr = "firsttouch"; break; case HWLOC_MEMBIND_BIND: policystr = "bind"; break; case HWLOC_MEMBIND_INTERLEAVE: policystr = "interleave"; break; case HWLOC_MEMBIND_REPLICATE: policystr = "replicate"; break; case HWLOC_MEMBIND_NEXTTOUCH: policystr = "nexttouch"; break; default: fprintf(stderr, "unknown memory policy %d\n", policy); assert(0); break; } } if (policystr) printf("%s (%s)\n", s, policystr); else printf("%s\n", s); free(s); } if (got_membind) { if (hwloc_bitmap_iszero(membind_set)) { if (verbose >= 0) fprintf(stderr, "cannot membind to empty set\n"); if (!force) goto failed_binding; } if (verbose > 0) { char *s; hwloc_bitmap_asprintf(&s, membind_set); fprintf(stderr, "binding on memory set %s\n", s); free(s); } if (single) hwloc_bitmap_singlify(membind_set); if (pid_number > 0) ret = hwloc_set_proc_membind(topology, pid, membind_set, membind_policy, membind_flags); else ret = hwloc_set_membind(topology, membind_set, membind_policy, membind_flags); if (ret && verbose >= 0) { int bind_errno = errno; const char *errmsg = strerror(bind_errno); char *s; hwloc_bitmap_asprintf(&s, membind_set); if (pid_number > 0) fprintf(stderr, "hwloc_set_proc_membind %s %d failed (errno %d %s)\n", s, pid_number, bind_errno, errmsg); else fprintf(stderr, "hwloc_set_membind %s failed (errno %d %s)\n", s, bind_errno, errmsg); free(s); } if (ret && !force) goto failed_binding; } if (got_cpubind) { if (hwloc_bitmap_iszero(cpubind_set)) { if (verbose >= 0) fprintf(stderr, "cannot cpubind to empty set\n"); if (!force) goto failed_binding; } if (verbose > 0) { char *s; hwloc_bitmap_asprintf(&s, cpubind_set); fprintf(stderr, "binding on cpu set %s\n", s); free(s); } if (single) hwloc_bitmap_singlify(cpubind_set); if (pid_number > 0) ret = hwloc_set_proc_cpubind(topology, pid, cpubind_set, cpubind_flags); else ret = hwloc_set_cpubind(topology, cpubind_set, cpubind_flags); if (ret && verbose >= 0) { int bind_errno = errno; const char *errmsg = strerror(bind_errno); char *s; hwloc_bitmap_asprintf(&s, cpubind_set); if (pid_number > 0) fprintf(stderr, "hwloc_set_proc_cpubind %s %d failed (errno %d %s)\n", s, pid_number, bind_errno, errmsg); else fprintf(stderr, "hwloc_set_cpubind %s failed (errno %d %s)\n", s, bind_errno, errmsg); free(s); } if (ret && !force) goto failed_binding; } hwloc_bitmap_free(cpubind_set); hwloc_bitmap_free(membind_set); hwloc_topology_destroy(topology); if (pid_number > 0) return EXIT_SUCCESS; if (0 == argc) { if (get_binding || get_last_cpu_location) return EXIT_SUCCESS; fprintf(stderr, "%s: nothing to do!\n", callname); return EXIT_FAILURE; } ret = execvp(argv[0], argv); if (ret) { fprintf(stderr, "%s: Failed to launch executable \"%s\"\n", callname, argv[0]); perror("execvp"); } return EXIT_FAILURE; failed_binding: hwloc_bitmap_free(cpubind_set); hwloc_bitmap_free(membind_set); hwloc_topology_destroy(topology); return EXIT_FAILURE; }
int main(int argc, char *argv[]) { const struct hwloc_topology_support *support; hwloc_topology_t topology; hwloc_const_bitmap_t topocpuset; hwloc_bitmap_t cpuset; unsigned long flags = 0; DIR *dir; struct dirent *dirent; int show_all = 0; int show_threads = 0; int get_last_cpu_location = 0; char *callname; char *pidcmd = NULL; int err; int opt; callname = strrchr(argv[0], '/'); if (!callname) callname = argv[0]; else callname++; /* skip argv[0], handle options */ argc--; argv++; while (argc >= 1) { opt = 0; if (!strcmp(argv[0], "-a")) show_all = 1; else if (!strcmp(argv[0], "-l") || !strcmp(argv[0], "--logical")) { logical = 1; } else if (!strcmp(argv[0], "-p") || !strcmp(argv[0], "--physical")) { logical = 0; } else if (!strcmp(argv[0], "-c") || !strcmp(argv[0], "--cpuset")) { show_cpuset = 1; } else if (!strcmp(argv[0], "-e") || !strncmp(argv[0], "--get-last-cpu-location", 10)) { get_last_cpu_location = 1; } else if (!strcmp(argv[0], "-t") || !strcmp(argv[0], "--threads")) { #ifdef HWLOC_LINUX_SYS show_threads = 1; #else fprintf (stderr, "Listing threads is currently only supported on Linux\n"); #endif } else if (!strcmp (argv[0], "--whole-system")) { flags |= HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM; } else if (!strcmp (argv[0], "--pid-cmd")) { if (argc < 2) { usage(callname, stdout); exit(EXIT_FAILURE); } pidcmd = argv[1]; opt = 1; } else { fprintf (stderr, "Unrecognized option: %s\n", argv[0]); usage (callname, stderr); exit(EXIT_FAILURE); } argc -= opt+1; argv += opt+1; } err = hwloc_topology_init(&topology); if (err) goto out; hwloc_topology_set_flags(topology, flags); err = hwloc_topology_load(topology); if (err) goto out_with_topology; support = hwloc_topology_get_support(topology); if (get_last_cpu_location) { if (!support->cpubind->get_proc_last_cpu_location) goto out_with_topology; } else { if (!support->cpubind->get_proc_cpubind) goto out_with_topology; } topocpuset = hwloc_topology_get_topology_cpuset(topology); dir = opendir("/proc"); if (!dir) goto out_with_topology; cpuset = hwloc_bitmap_alloc(); if (!cpuset) goto out_with_dir; while ((dirent = readdir(dir))) { long pid_number; hwloc_pid_t pid; char pidoutput[1024]; char *end; char name[64] = ""; /* management of threads */ unsigned boundthreads = 0, i; long *tids = NULL; /* NULL if process is not threaded */ hwloc_bitmap_t *tidcpusets = NULL; pid_number = strtol(dirent->d_name, &end, 10); if (*end) /* Not a number */ continue; pid = hwloc_pid_from_number(pid_number, 0); #ifdef HWLOC_LINUX_SYS { unsigned pathlen = 6 + strlen(dirent->d_name) + 1 + 7 + 1; char *path; int file; ssize_t n; path = malloc(pathlen); snprintf(path, pathlen, "/proc/%s/cmdline", dirent->d_name); file = open(path, O_RDONLY); free(path); if (file >= 0) { n = read(file, name, sizeof(name) - 1); close(file); if (n <= 0) /* Ignore kernel threads and errors */ continue; name[n] = 0; } } #endif /* HWLOC_LINUX_SYS */ if (show_threads) { #ifdef HWLOC_LINUX_SYS /* check if some threads must be displayed */ unsigned pathlen = 6 + strlen(dirent->d_name) + 1 + 4 + 1; char *path; DIR *taskdir; path = malloc(pathlen); snprintf(path, pathlen, "/proc/%s/task", dirent->d_name); taskdir = opendir(path); if (taskdir) { struct dirent *taskdirent; long tid; unsigned n = 0; /* count threads */ while ((taskdirent = readdir(taskdir))) { tid = strtol(taskdirent->d_name, &end, 10); if (*end) /* Not a number */ continue; n++; } if (n > 1) { /* if there's more than one thread, see if some are bound */ tids = malloc(n * sizeof(*tids)); tidcpusets = calloc(n+1, sizeof(*tidcpusets)); if (tids && tidcpusets) { /* reread the directory but gather info now */ rewinddir(taskdir); i = 0; while ((taskdirent = readdir(taskdir))) { tid = strtol(taskdirent->d_name, &end, 10); if (*end) /* Not a number */ continue; if (get_last_cpu_location) { if (hwloc_linux_get_tid_last_cpu_location(topology, tid, cpuset)) continue; } else { if (hwloc_linux_get_tid_cpubind(topology, tid, cpuset)) continue; } hwloc_bitmap_and(cpuset, cpuset, topocpuset); tids[i] = tid; tidcpusets[i] = hwloc_bitmap_dup(cpuset); i++; if (hwloc_bitmap_iszero(cpuset)) continue; if (hwloc_bitmap_isequal(cpuset, topocpuset) && !show_all) continue; boundthreads++; } } else { /* failed to alloc, behave as if there were no threads */ free(tids); tids = NULL; free(tidcpusets); tidcpusets = NULL; } } closedir(taskdir); } #endif /* HWLOC_LINUX_SYS */ } if (get_last_cpu_location) { if (hwloc_get_proc_last_cpu_location(topology, pid, cpuset, 0)) continue; } else { if (hwloc_get_proc_cpubind(topology, pid, cpuset, 0)) continue; } hwloc_bitmap_and(cpuset, cpuset, topocpuset); if (hwloc_bitmap_iszero(cpuset)) continue; /* don't print anything if the process isn't bound and if no threads are bound and if not showing all */ if (hwloc_bitmap_isequal(cpuset, topocpuset) && (!tids || !boundthreads) && !show_all) continue; pidoutput[0] = '\0'; if (pidcmd) { char *cmd; FILE *file; cmd = malloc(strlen(pidcmd)+1+5+2+1); sprintf(cmd, "%s %u", pidcmd, pid); file = popen(cmd, "r"); if (file) { if (fgets(pidoutput, sizeof(pidoutput), file)) { end = strchr(pidoutput, '\n'); if (end) *end = '\0'; } pclose(file); } free(cmd); } /* print the process */ print_task(topology, pid_number, name, cpuset, pidoutput[0] == '\0' ? NULL : pidoutput, 0); if (tids) /* print each tid we found (it's tidcpuset isn't NULL anymore) */ for(i=0; tidcpusets[i] != NULL; i++) { print_task(topology, tids[i], "", tidcpusets[i], NULL, 1); hwloc_bitmap_free(tidcpusets[i]); } /* free threads stuff */ free(tidcpusets); free(tids); } err = 0; hwloc_bitmap_free(cpuset); out_with_dir: closedir(dir); out_with_topology: hwloc_topology_destroy(topology); out: return err; }