int opal_hwloc_copy(hwloc_topology_t *dest, hwloc_topology_t src, opal_data_type_t type) { char *xml; int len; struct hwloc_topology_support *support, *destsupport; if (0 != hwloc_topology_export_xmlbuffer(src, &xml, &len)) { return OPAL_ERROR; } if (0 != hwloc_topology_init(dest)) { free(xml); return OPAL_ERROR; } if (0 != hwloc_topology_set_xmlbuffer(*dest, xml, len)) { hwloc_topology_destroy(*dest); free(xml); return OPAL_ERROR; } if (0 != hwloc_topology_load(*dest)) { hwloc_topology_destroy(*dest); free(xml); return OPAL_ERROR; } free(xml); /* get the available support - hwloc unfortunately does * not include this info in its xml support! */ support = (struct hwloc_topology_support*)hwloc_topology_get_support(src); destsupport = (struct hwloc_topology_support*)hwloc_topology_get_support(*dest); *destsupport = *support; return OPAL_SUCCESS; }
int opal_hwloc_pack(opal_buffer_t *buffer, const void *src, int32_t num_vals, opal_data_type_t type) { /* NOTE: hwloc defines topology_t as a pointer to a struct! */ hwloc_topology_t t, *tarray = (hwloc_topology_t*)src; int rc, i; char *xmlbuffer=NULL; int len; struct hwloc_topology_support *support; for (i=0; i < num_vals; i++) { t = tarray[i]; /* extract an xml-buffer representation of the tree */ if (0 != hwloc_topology_export_xmlbuffer(t, &xmlbuffer, &len)) { return OPAL_ERROR; } /* add to buffer */ if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &xmlbuffer, 1, OPAL_STRING))) { free(xmlbuffer); return rc; } /* cleanup */ if (NULL != xmlbuffer) { free(xmlbuffer); } /* get the available support - hwloc unfortunately does * not include this info in its xml export! */ support = (struct hwloc_topology_support*)hwloc_topology_get_support(t); /* pack the discovery support */ if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, support->discovery, sizeof(struct hwloc_topology_discovery_support), OPAL_BYTE))) { return rc; } /* pack the cpubind support */ if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, support->cpubind, sizeof(struct hwloc_topology_cpubind_support), OPAL_BYTE))) { return rc; } /* pack the membind support */ if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, support->membind, sizeof(struct hwloc_topology_membind_support), OPAL_BYTE))) { return rc; } } return OPAL_SUCCESS; }
/****** binding_support/has_core_binding() ***************************************** * NAME * has_core_binding() -- Check if core binding system call is supported. * * SYNOPSIS * bool has_core_binding() * * FUNCTION * Checks if core binding is supported on the machine or not. If it is * supported this does not mean that topology information (about socket * and core amount) is available (which is needed for internal functions * in order to perform a correct core binding). * * RESULT * bool - True if core binding could be done. False if not. * * NOTES * MT-NOTE: has_core_binding() is not MT safe * *******************************************************************************/ bool has_core_binding(void) { #if HAVE_HWLOC const struct hwloc_topology_support *support; if (!initialized) init_topology(); if (!sge_hwloc_topology) return false; support = hwloc_topology_get_support(sge_hwloc_topology); if (support->cpubind->set_proc_cpubind) return true; #endif return false; }
/****** binding_support/has_topology_information() ********************************* * NAME * has_topology_information() -- Checks if current arch offers topology. * * SYNOPSIS * bool has_topology_information() * * FUNCTION * Checks if current architecture (on which this function is called) * offers processor topology information or not. * * RESULT * bool - true if the arch offers topology information false if not * * NOTES * MT-NOTE: has_topology_information() is not MT safe * *******************************************************************************/ bool has_topology_information(void) { #if HAVE_HWLOC const struct hwloc_topology_support *support; if (!initialized) init_topology(); if (!sge_hwloc_topology) return false; support = hwloc_topology_get_support(sge_hwloc_topology); if (support->discovery->pu) return true; #endif return false; }
static void add_process_objects(hwloc_topology_t topology) { #ifdef HAVE_DIRENT_H hwloc_obj_t root; hwloc_bitmap_t cpuset; #ifdef HWLOC_LINUX_SYS hwloc_bitmap_t task_cpuset; #endif /* HWLOC_LINUX_SYS */ DIR *dir; struct dirent *dirent; const struct hwloc_topology_support *support; root = hwloc_get_root_obj(topology); support = hwloc_topology_get_support(topology); if (!support->cpubind->get_proc_cpubind) return; dir = opendir("/proc"); if (!dir) return; cpuset = hwloc_bitmap_alloc(); #ifdef HWLOC_LINUX_SYS task_cpuset = hwloc_bitmap_alloc(); #endif /* HWLOC_LINUX_SYS */ while ((dirent = readdir(dir))) { long local_pid_number; hwloc_pid_t local_pid; char *end; char name[64]; int proc_cpubind; local_pid_number = strtol(dirent->d_name, &end, 10); if (*end) /* Not a number */ continue; snprintf(name, sizeof(name), "%ld", local_pid_number); local_pid = hwloc_pid_from_number(local_pid_number, 0); proc_cpubind = hwloc_get_proc_cpubind(topology, local_pid, cpuset, 0) != -1; #ifdef HWLOC_LINUX_SYS { /* Get the process name */ char *path; unsigned pathlen = 6 + strlen(dirent->d_name) + 1 + 7 + 1; char cmd[64], *c; int file; ssize_t n; path = malloc(pathlen); snprintf(path, pathlen, "/proc/%s/cmdline", dirent->d_name); file = open(path, O_RDONLY); free(path); if (file >= 0) { n = read(file, cmd, sizeof(cmd) - 1); close(file); if (n <= 0) /* Ignore kernel threads and errors */ continue; cmd[n] = 0; if ((c = strchr(cmd, ' '))) *c = 0; snprintf(name, sizeof(name), "%ld %s", local_pid_number, cmd); } } { /* Get threads */ char *path; unsigned pathlen = 6+strlen(dirent->d_name) + 1 + 4 + 1; DIR *task_dir; struct dirent *task_dirent; path = malloc(pathlen); snprintf(path, pathlen, "/proc/%s/task", dirent->d_name); task_dir = opendir(path); free(path); if (task_dir) { while ((task_dirent = readdir(task_dir))) { long local_tid; char *task_end; char task_name[64]; local_tid = strtol(task_dirent->d_name, &task_end, 10); if (*task_end) /* Not a number, or the main task */ continue; if (hwloc_linux_get_tid_cpubind(topology, local_tid, task_cpuset)) continue; if (proc_cpubind && hwloc_bitmap_isequal(task_cpuset, cpuset)) continue; snprintf(task_name, sizeof(task_name), "%s %li", name, local_tid); insert_task(topology, task_cpuset, task_name); } closedir(task_dir); } } #endif /* HWLOC_LINUX_SYS */ if (!proc_cpubind) continue; if (hwloc_bitmap_isincluded(root->cpuset, cpuset)) continue; insert_task(topology, cpuset, name); } hwloc_bitmap_free(cpuset); #ifdef HWLOC_LINUX_SYS hwloc_bitmap_free(task_cpuset); #endif /* HWLOC_LINUX_SYS */ closedir(dir); #endif /* HAVE_DIRENT_H */ }
int orte_ess_base_proc_binding(void) { hwloc_obj_t node, obj; hwloc_cpuset_t cpus, nodeset; hwloc_obj_type_t target; unsigned int cache_level = 0; struct hwloc_topology_support *support; char *map; int ret; char *error=NULL; hwloc_cpuset_t mycpus; /* Determine if we were pre-bound or not */ if (NULL != getenv(OPAL_MCA_PREFIX"orte_bound_at_launch")) { orte_proc_is_bound = true; if (NULL != (map = getenv(OPAL_MCA_PREFIX"orte_base_applied_binding"))) { orte_proc_applied_binding = hwloc_bitmap_alloc(); if (0 != (ret = hwloc_bitmap_list_sscanf(orte_proc_applied_binding, map))) { error = "applied_binding parse"; goto error; } } } /* see if we were bound when launched */ if (!orte_proc_is_bound) { OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output, "%s Not bound at launch", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* we were not bound at launch */ if (NULL == opal_hwloc_topology) { /* there is nothing we can do, so just return */ return ORTE_SUCCESS; } support = (struct hwloc_topology_support*)hwloc_topology_get_support(opal_hwloc_topology); /* get our node object */ node = hwloc_get_root_obj(opal_hwloc_topology); nodeset = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, node); /* get our bindings */ cpus = hwloc_bitmap_alloc(); if (hwloc_get_cpubind(opal_hwloc_topology, cpus, HWLOC_CPUBIND_PROCESS) < 0) { /* we are NOT bound if get_cpubind fails, nor can we be bound - the * environment does not support it */ hwloc_bitmap_free(cpus); OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output, "%s Binding not supported", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); goto MOVEON; } /* we are bound if the two cpusets are not equal, * or if there is only ONE cpu available to us */ if (0 != hwloc_bitmap_compare(cpus, nodeset) || opal_hwloc_base_single_cpu(nodeset) || opal_hwloc_base_single_cpu(cpus)) { /* someone external set it - indicate it is set * so that we know */ orte_proc_is_bound = true; hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, cpus); hwloc_bitmap_free(cpus); OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output, "%s Process was externally bound", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); } else if (support->cpubind->set_thisproc_cpubind && OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy) && OPAL_BIND_TO_NONE != OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { /* the system is capable of doing processor affinity, but it * has not yet been set - see if a slot_list was given */ hwloc_bitmap_zero(cpus); if (OPAL_BIND_TO_CPUSET == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { if (OPAL_SUCCESS != (ret = opal_hwloc_base_slot_list_parse(opal_hwloc_base_slot_list, opal_hwloc_topology, OPAL_HWLOC_LOGICAL, cpus))) { error = "Setting processor affinity failed"; hwloc_bitmap_free(cpus); goto error; } if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) { error = "Setting processor affinity failed"; hwloc_bitmap_free(cpus); goto error; } hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, cpus); hwloc_bitmap_free(cpus); orte_proc_is_bound = true; OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output, "%s Process bound according to slot_list", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); } else { /* cleanup */ hwloc_bitmap_free(cpus); /* get the node rank */ if (ORTE_NODE_RANK_INVALID == orte_process_info.my_node_rank) { /* this is not an error - could be due to being * direct launched - so just ignore and leave * us unbound */ OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output, "%s Process not bound - no node rank available", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); goto MOVEON; } /* if the binding policy is hwthread, then we bind to the nrank-th * hwthread on this node */ if (OPAL_BIND_TO_HWTHREAD == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { if (NULL == (obj = opal_hwloc_base_get_obj_by_type(opal_hwloc_topology, HWLOC_OBJ_PU, 0, orte_process_info.my_node_rank, OPAL_HWLOC_LOGICAL))) { ret = ORTE_ERR_NOT_FOUND; error = "Getting hwthread object"; goto error; } cpus = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, obj); if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) { ret = ORTE_ERROR; error = "Setting processor affinity failed"; goto error; } hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, cpus); hwloc_bitmap_free(cpus); OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output, "%s Process bound to hwthread", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); } else if (OPAL_BIND_TO_CORE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { /* if the binding policy is core, then we bind to the nrank-th * core on this node */ if (NULL == (obj = opal_hwloc_base_get_obj_by_type(opal_hwloc_topology, HWLOC_OBJ_CORE, 0, orte_process_info.my_node_rank, OPAL_HWLOC_LOGICAL))) { ret = ORTE_ERR_NOT_FOUND; error = "Getting core object"; goto error; } cpus = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, obj); if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) { error = "Setting processor affinity failed"; ret = ORTE_ERROR; goto error; } hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, cpus); OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output, "%s Process bound to core", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); } else { /* for all higher binding policies, we bind to the specified * object that the nrank-th core belongs to */ if (NULL == (obj = opal_hwloc_base_get_obj_by_type(opal_hwloc_topology, HWLOC_OBJ_CORE, 0, orte_process_info.my_node_rank, OPAL_HWLOC_LOGICAL))) { ret = ORTE_ERR_NOT_FOUND; error = "Getting core object"; goto error; } if (OPAL_BIND_TO_L1CACHE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { target = HWLOC_OBJ_CACHE; cache_level = 1; } else if (OPAL_BIND_TO_L2CACHE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { target = HWLOC_OBJ_CACHE; cache_level = 2; } else if (OPAL_BIND_TO_L3CACHE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { target = HWLOC_OBJ_CACHE; cache_level = 3; } else if (OPAL_BIND_TO_SOCKET == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { target = HWLOC_OBJ_SOCKET; } else if (OPAL_BIND_TO_NUMA == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { target = HWLOC_OBJ_NODE; } else { ret = ORTE_ERR_NOT_FOUND; error = "Binding policy not known"; goto error; } for (obj = obj->parent; NULL != obj; obj = obj->parent) { if (target == obj->type) { if (HWLOC_OBJ_CACHE == target && cache_level != obj->attr->cache.depth) { continue; } /* this is the place! */ cpus = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, obj); if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) { ret = ORTE_ERROR; error = "Setting processor affinity failed"; goto error; } hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, cpus); orte_proc_is_bound = true; OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output, "%s Process bound to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hwloc_obj_type_string(target))); break; } } if (!orte_proc_is_bound) { ret = ORTE_ERROR; error = "Setting processor affinity failed"; goto error; } } } } } else { OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output, "%s Process bound at launch", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); } MOVEON: /* get or update our local cpuset - it will get used multiple * times, so it's more efficient to keep a global copy */ opal_hwloc_base_get_local_cpuset(); /* get the cpus we are bound to */ mycpus = hwloc_bitmap_alloc(); if (hwloc_get_cpubind(opal_hwloc_topology, mycpus, HWLOC_CPUBIND_PROCESS) < 0) { if (NULL != orte_process_info.cpuset) { free(orte_process_info.cpuset); orte_process_info.cpuset = NULL; } if (opal_hwloc_report_bindings || 4 < opal_output_get_verbosity(orte_ess_base_framework.framework_output)) { opal_output(0, "MCW rank %d is not bound", ORTE_PROC_MY_NAME->vpid); } } else { /* store/update the string representation of our local binding */ if (NULL != orte_process_info.cpuset) { free(orte_process_info.cpuset); orte_process_info.cpuset = NULL; } hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, mycpus); /* report the binding, if requested */ if (opal_hwloc_report_bindings || 4 < opal_output_get_verbosity(orte_ess_base_framework.framework_output)) { char tmp1[1024], tmp2[1024]; if (OPAL_ERR_NOT_BOUND == opal_hwloc_base_cset2str(tmp1, sizeof(tmp1), opal_hwloc_topology, mycpus)) { opal_output(0, "MCW rank %d is not bound (or bound to all available processors)", ORTE_PROC_MY_NAME->vpid); } else { opal_hwloc_base_cset2mapstr(tmp2, sizeof(tmp2), opal_hwloc_topology, mycpus); opal_output(0, "MCW rank %d bound to %s: %s", ORTE_PROC_MY_NAME->vpid, tmp1, tmp2); } } } hwloc_bitmap_free(mycpus); /* push our cpuset so others can calculate our locality */ if (NULL != orte_process_info.cpuset) { OPAL_MODEX_SEND_VALUE(ret, OPAL_PMIX_GLOBAL, OPAL_PMIX_CPUSET, orte_process_info.cpuset, OPAL_STRING); } return ORTE_SUCCESS; error: if (ORTE_ERR_SILENT != ret) { orte_show_help("help-orte-runtime", "orte_init:startup:internal-failure", true, error, ORTE_ERROR_NAME(ret), ret); } return ORTE_ERR_SILENT; }
} /* print the corresponding NUMA nodes */ hwloc_bitmap_asprintf(&s, set); printf("bound to nodeset %s with contains:\n", s); free(s); hwloc_bitmap_foreach_begin(i, set) { obj = hwloc_get_numanode_obj_by_os_index(topology, i); printf(" node #%u (OS index %u) with %lld bytes of memory\n", obj->logical_index, i, (unsigned long long) obj->memory.local_memory); } hwloc_bitmap_foreach_end(); hwloc_bitmap_free(set); /* check alloc+bind support */ support = hwloc_topology_get_support(topology); if (support->membind->bind_membind) { printf("BIND memory binding policy is supported\n"); } else { printf("BIND memory binding policy is NOT supported\n"); } if (support->membind->alloc_membind) { printf("Allocating bound memory is supported\n"); } else { printf("Allocating bound memory is NOT supported\n"); } /* allocate memory of each nodes */ printf("allocating memory on each node\n"); obj = NULL; buffer = NULL;
static int bind_to_cpuset(orte_job_t *jdata) { /* bind each process to opal_hwloc_base_cpu_set */ int i, j; orte_job_map_t *map; orte_node_t *node; orte_proc_t *proc; struct hwloc_topology_support *support; opal_hwloc_topo_data_t *sum; hwloc_obj_t root; opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps: bind job %s to cpuset %s", ORTE_JOBID_PRINT(jdata->jobid), opal_hwloc_base_cpu_set); /* initialize */ map = jdata->map; for (i=0; i < map->nodes->size; i++) { if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) { continue; } if (!orte_do_not_launch) { /* if we don't want to launch, then we are just testing the system, * so ignore questions about support capabilities */ support = (struct hwloc_topology_support*)hwloc_topology_get_support(node->topology); /* check if topology supports cpubind - have to be careful here * as Linux doesn't currently support thread-level binding. This * may change in the future, though, and it isn't clear how hwloc * interprets the current behavior. So check both flags to be sure. */ if (!support->cpubind->set_thisproc_cpubind && !support->cpubind->set_thisthread_cpubind) { if (!OPAL_BINDING_REQUIRED(opal_hwloc_binding_policy)) { /* we are not required to bind, so ignore this */ continue; } orte_show_help("help-orte-rmaps-base.txt", "rmaps:cpubind-not-supported", true, node->name); return ORTE_ERR_SILENT; } /* check if topology supports membind - have to be careful here * as hwloc treats this differently than I (at least) would have * expected. Per hwloc, Linux memory binding is at the thread, * and not process, level. Thus, hwloc sets the "thisproc" flag * to "false" on all Linux systems, and uses the "thisthread" flag * to indicate binding capability */ if (!support->membind->set_thisproc_membind && !support->membind->set_thisthread_membind) { if (OPAL_HWLOC_BASE_MBFA_WARN == opal_hwloc_base_mbfa && !membind_warned) { orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported", true, node->name); membind_warned = true; } else if (OPAL_HWLOC_BASE_MBFA_ERROR == opal_hwloc_base_mbfa) { orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported-fatal", true, node->name); return ORTE_ERR_SILENT; } } } root = hwloc_get_root_obj(node->topology); if (NULL == root->userdata) { /* something went wrong */ ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; } sum = (opal_hwloc_topo_data_t*)root->userdata; if (NULL == sum->available) { /* another error */ ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; } for (j=0; j < node->procs->size; j++) { if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) { continue; } /* ignore procs from other jobs */ if (proc->name.jobid != jdata->jobid) { continue; } /* ignore procs that have already been bound - should * never happen, but safer */ if (NULL != proc->cpu_bitmap) { continue; } hwloc_bitmap_list_asprintf(&proc->cpu_bitmap, sum->available); } } return ORTE_SUCCESS; }
int main(int argc, char *argv[]) { const struct hwloc_topology_support *support; hwloc_topology_t topology; hwloc_const_bitmap_t topocpuset; hwloc_bitmap_t cpuset; unsigned long flags = 0; DIR *dir; struct dirent *dirent; int show_all = 0; int show_threads = 0; int get_last_cpu_location = 0; char *callname; char *pidcmd = NULL; int err; int opt; callname = strrchr(argv[0], '/'); if (!callname) callname = argv[0]; else callname++; /* skip argv[0], handle options */ argc--; argv++; hwloc_utils_check_api_version(callname); while (argc >= 1) { opt = 0; if (!strcmp(argv[0], "-a")) show_all = 1; else if (!strcmp(argv[0], "-l") || !strcmp(argv[0], "--logical")) { logical = 1; } else if (!strcmp(argv[0], "-p") || !strcmp(argv[0], "--physical")) { logical = 0; } else if (!strcmp(argv[0], "-c") || !strcmp(argv[0], "--cpuset")) { show_cpuset = 1; } else if (!strcmp(argv[0], "-e") || !strncmp(argv[0], "--get-last-cpu-location", 10)) { get_last_cpu_location = 1; } else if (!strcmp(argv[0], "-t") || !strcmp(argv[0], "--threads")) { #ifdef HWLOC_LINUX_SYS show_threads = 1; #else fprintf (stderr, "Listing threads is currently only supported on Linux\n"); #endif } else if (!strcmp (argv[0], "--whole-system")) { flags |= HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM; } else if (!strcmp (argv[0], "--pid-cmd")) { if (argc < 2) { usage(callname, stdout); exit(EXIT_FAILURE); } pidcmd = argv[1]; opt = 1; } else { fprintf (stderr, "Unrecognized option: %s\n", argv[0]); usage (callname, stderr); exit(EXIT_FAILURE); } argc -= opt+1; argv += opt+1; } err = hwloc_topology_init(&topology); if (err) goto out; hwloc_topology_set_flags(topology, flags); err = hwloc_topology_load(topology); if (err) goto out_with_topology; support = hwloc_topology_get_support(topology); if (get_last_cpu_location) { if (!support->cpubind->get_proc_last_cpu_location) goto out_with_topology; } else { if (!support->cpubind->get_proc_cpubind) goto out_with_topology; } topocpuset = hwloc_topology_get_topology_cpuset(topology); dir = opendir("/proc"); if (!dir) goto out_with_topology; cpuset = hwloc_bitmap_alloc(); if (!cpuset) goto out_with_dir; while ((dirent = readdir(dir))) { long pid_number; hwloc_pid_t pid; char pidoutput[1024]; char *end; char name[64] = ""; /* management of threads */ unsigned boundthreads = 0, i; long *tids = NULL; /* NULL if process is not threaded */ hwloc_bitmap_t *tidcpusets = NULL; pid_number = strtol(dirent->d_name, &end, 10); if (*end) /* Not a number */ continue; pid = hwloc_pid_from_number(pid_number, 0); #ifdef HWLOC_LINUX_SYS { unsigned pathlen = 6 + strlen(dirent->d_name) + 1 + 7 + 1; char *path; int file; ssize_t n; path = malloc(pathlen); snprintf(path, pathlen, "/proc/%s/cmdline", dirent->d_name); file = open(path, O_RDONLY); free(path); if (file >= 0) { n = read(file, name, sizeof(name) - 1); close(file); if (n <= 0) /* Ignore kernel threads and errors */ continue; name[n] = 0; } } #endif /* HWLOC_LINUX_SYS */ if (show_threads) { #ifdef HWLOC_LINUX_SYS /* check if some threads must be displayed */ unsigned pathlen = 6 + strlen(dirent->d_name) + 1 + 4 + 1; char *path; DIR *taskdir; path = malloc(pathlen); snprintf(path, pathlen, "/proc/%s/task", dirent->d_name); taskdir = opendir(path); if (taskdir) { struct dirent *taskdirent; long tid; unsigned n = 0; /* count threads */ while ((taskdirent = readdir(taskdir))) { tid = strtol(taskdirent->d_name, &end, 10); if (*end) /* Not a number */ continue; n++; } if (n > 1) { /* if there's more than one thread, see if some are bound */ tids = malloc(n * sizeof(*tids)); tidcpusets = calloc(n+1, sizeof(*tidcpusets)); if (tids && tidcpusets) { /* reread the directory but gather info now */ rewinddir(taskdir); i = 0; while ((taskdirent = readdir(taskdir))) { tid = strtol(taskdirent->d_name, &end, 10); if (*end) /* Not a number */ continue; if (get_last_cpu_location) { if (hwloc_linux_get_tid_last_cpu_location(topology, tid, cpuset)) continue; } else { if (hwloc_linux_get_tid_cpubind(topology, tid, cpuset)) continue; } hwloc_bitmap_and(cpuset, cpuset, topocpuset); tids[i] = tid; tidcpusets[i] = hwloc_bitmap_dup(cpuset); i++; if (hwloc_bitmap_iszero(cpuset)) continue; if (hwloc_bitmap_isequal(cpuset, topocpuset) && !show_all) continue; boundthreads++; } } else { /* failed to alloc, behave as if there were no threads */ free(tids); tids = NULL; free(tidcpusets); tidcpusets = NULL; } } closedir(taskdir); } #endif /* HWLOC_LINUX_SYS */ } if (get_last_cpu_location) { if (hwloc_get_proc_last_cpu_location(topology, pid, cpuset, 0)) continue; } else { if (hwloc_get_proc_cpubind(topology, pid, cpuset, 0)) continue; } hwloc_bitmap_and(cpuset, cpuset, topocpuset); if (hwloc_bitmap_iszero(cpuset)) continue; /* don't print anything if the process isn't bound and if no threads are bound and if not showing all */ if (hwloc_bitmap_isequal(cpuset, topocpuset) && (!tids || !boundthreads) && !show_all) continue; pidoutput[0] = '\0'; if (pidcmd) { char *cmd; FILE *file; cmd = malloc(strlen(pidcmd)+1+5+2+1); sprintf(cmd, "%s %u", pidcmd, pid); file = popen(cmd, "r"); if (file) { if (fgets(pidoutput, sizeof(pidoutput), file)) { end = strchr(pidoutput, '\n'); if (end) *end = '\0'; } pclose(file); } free(cmd); } /* print the process */ print_task(topology, pid_number, name, cpuset, pidoutput[0] == '\0' ? NULL : pidoutput, 0); if (tids) /* print each tid we found (it's tidcpuset isn't NULL anymore) */ for(i=0; tidcpusets[i] != NULL; i++) { print_task(topology, tids[i], "", tidcpusets[i], NULL, 1); hwloc_bitmap_free(tidcpusets[i]); } /* free threads stuff */ free(tidcpusets); free(tids); } err = 0; hwloc_bitmap_free(cpuset); out_with_dir: closedir(dir); out_with_topology: hwloc_topology_destroy(topology); out: return err; }
static void print_hwloc_obj(char **output, char *prefix, hwloc_topology_t topo, hwloc_obj_t obj) { hwloc_obj_t obj2; char string[1024], *tmp, *tmp2, *pfx; unsigned i; struct hwloc_topology_support *support; /* print the object type */ hwloc_obj_type_snprintf(string, 1024, obj, 1); asprintf(&pfx, "\n%s\t", (NULL == prefix) ? "" : prefix); asprintf(&tmp, "%sType: %s Number of child objects: %u%sName=%s", (NULL == prefix) ? "" : prefix, string, obj->arity, pfx, (NULL == obj->name) ? "NULL" : obj->name); if (0 < hwloc_obj_attr_snprintf(string, 1024, obj, pfx, 1)) { /* print the attributes */ asprintf(&tmp2, "%s%s%s", tmp, pfx, string); free(tmp); tmp = tmp2; } /* print the cpusets - apparently, some new HWLOC types don't * have cpusets, so protect ourselves here */ if (NULL != obj->cpuset) { hwloc_bitmap_snprintf(string, OPAL_HWLOC_MAX_STRING, obj->cpuset); asprintf(&tmp2, "%s%sCpuset: %s", tmp, pfx, string); free(tmp); tmp = tmp2; } if (NULL != obj->online_cpuset) { hwloc_bitmap_snprintf(string, OPAL_HWLOC_MAX_STRING, obj->online_cpuset); asprintf(&tmp2, "%s%sOnline: %s", tmp, pfx, string); free(tmp); tmp = tmp2; } if (NULL != obj->allowed_cpuset) { hwloc_bitmap_snprintf(string, OPAL_HWLOC_MAX_STRING, obj->allowed_cpuset); asprintf(&tmp2, "%s%sAllowed: %s", tmp, pfx, string); free(tmp); tmp = tmp2; } if (HWLOC_OBJ_MACHINE == obj->type) { /* root level object - add support values */ support = (struct hwloc_topology_support*)hwloc_topology_get_support(topo); asprintf(&tmp2, "%s%sBind CPU proc: %s%sBind CPU thread: %s", tmp, pfx, (support->cpubind->set_thisproc_cpubind) ? "TRUE" : "FALSE", pfx, (support->cpubind->set_thisthread_cpubind) ? "TRUE" : "FALSE"); free(tmp); tmp = tmp2; asprintf(&tmp2, "%s%sBind MEM proc: %s%sBind MEM thread: %s", tmp, pfx, (support->membind->set_thisproc_membind) ? "TRUE" : "FALSE", pfx, (support->membind->set_thisthread_membind) ? "TRUE" : "FALSE"); free(tmp); tmp = tmp2; } asprintf(&tmp2, "%s%s\n", (NULL == *output) ? "" : *output, tmp); free(tmp); free(pfx); asprintf(&pfx, "%s\t", (NULL == prefix) ? "" : prefix); for (i=0; i < obj->arity; i++) { obj2 = obj->children[i]; /* print the object */ print_hwloc_obj(&tmp2, pfx, topo, obj2); } free(pfx); if (NULL != *output) { free(*output); } *output = tmp2; }
int orte_ess_base_proc_binding(void) { #if OPAL_HAVE_HWLOC hwloc_obj_t node, obj; hwloc_cpuset_t cpus, nodeset; hwloc_obj_type_t target; unsigned int cache_level = 0; struct hwloc_topology_support *support; char *map; int ret; char *error; /* Determine if we were pre-bound or not */ if (NULL != getenv("OMPI_MCA_orte_bound_at_launch")) { orte_proc_is_bound = true; if (NULL != (map = getenv("OMPI_MCA_orte_base_applied_binding"))) { orte_proc_applied_binding = hwloc_bitmap_alloc(); if (0 != (ret = hwloc_bitmap_list_sscanf(orte_proc_applied_binding, map))) { error = "applied_binding parse"; goto error; } } } /* see if we were bound when launched */ if (!orte_proc_is_bound) { OPAL_OUTPUT_VERBOSE((5, orte_ess_base_output, "%s Not bound at launch", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* we were not bound at launch */ if (NULL != opal_hwloc_topology) { support = (struct hwloc_topology_support*)hwloc_topology_get_support(opal_hwloc_topology); /* get our node object */ node = hwloc_get_root_obj(opal_hwloc_topology); nodeset = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, node); /* get our bindings */ cpus = hwloc_bitmap_alloc(); if (hwloc_get_cpubind(opal_hwloc_topology, cpus, HWLOC_CPUBIND_PROCESS) < 0) { /* we are NOT bound if get_cpubind fails, nor can we be bound - the * environment does not support it */ hwloc_bitmap_free(cpus); OPAL_OUTPUT_VERBOSE((5, orte_ess_base_output, "%s Binding not supported", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); goto MOVEON; } /* we are bound if the two cpusets are not equal, * or if there is only ONE cpu available to us */ if (0 != hwloc_bitmap_compare(cpus, nodeset) || opal_hwloc_base_single_cpu(nodeset) || opal_hwloc_base_single_cpu(cpus)) { /* someone external set it - indicate it is set * so that we know */ orte_proc_is_bound = true; hwloc_bitmap_free(cpus); OPAL_OUTPUT_VERBOSE((5, orte_ess_base_output, "%s Process was externally bound", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); } else if (support->cpubind->set_thisproc_cpubind && OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy) && OPAL_BIND_TO_NONE != OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { /* the system is capable of doing processor affinity, but it * has not yet been set - see if a slot_list was given */ hwloc_bitmap_zero(cpus); if (OPAL_BIND_TO_CPUSET == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { if (OPAL_SUCCESS != (ret = opal_hwloc_base_slot_list_parse(opal_hwloc_base_slot_list, opal_hwloc_topology, cpus))) { error = "Setting processor affinity failed"; hwloc_bitmap_free(cpus); goto error; } if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) { error = "Setting processor affinity failed"; hwloc_bitmap_free(cpus); goto error; } /* try to find a level and index for this location */ opal_hwloc_base_get_level_and_index(cpus, &orte_process_info.bind_level, &orte_process_info.bind_idx); /* cleanup */ hwloc_bitmap_free(cpus); orte_proc_is_bound = true; OPAL_OUTPUT_VERBOSE((5, orte_ess_base_output, "%s Process bound according to slot_list", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); } else { /* cleanup */ hwloc_bitmap_free(cpus); /* get the node rank */ if (ORTE_NODE_RANK_INVALID == orte_process_info.my_node_rank) { /* this is not an error - could be due to being * direct launched - so just ignore and leave * us unbound */ OPAL_OUTPUT_VERBOSE((5, orte_ess_base_output, "%s Process not bound - no node rank available", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); goto MOVEON; } /* if the binding policy is hwthread, then we bind to the nrank-th * hwthread on this node */ if (OPAL_BIND_TO_HWTHREAD == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { if (NULL == (obj = opal_hwloc_base_get_obj_by_type(opal_hwloc_topology, HWLOC_OBJ_PU, 0, orte_process_info.my_node_rank, OPAL_HWLOC_LOGICAL))) { ret = ORTE_ERR_NOT_FOUND; error = "Getting hwthread object"; goto error; } cpus = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, obj); if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) { ret = ORTE_ERROR; error = "Setting processor affinity failed"; goto error; } orte_process_info.bind_level = OPAL_HWLOC_HWTHREAD_LEVEL; orte_process_info.bind_idx = orte_process_info.my_node_rank; OPAL_OUTPUT_VERBOSE((5, orte_ess_base_output, "%s Process bound to hwthread", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); } else if (OPAL_BIND_TO_CORE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { /* if the binding policy is core, then we bind to the nrank-th * core on this node */ if (NULL == (obj = opal_hwloc_base_get_obj_by_type(opal_hwloc_topology, HWLOC_OBJ_CORE, 0, orte_process_info.my_node_rank, OPAL_HWLOC_LOGICAL))) { ret = ORTE_ERR_NOT_FOUND; error = "Getting core object"; goto error; } cpus = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, obj); if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) { error = "Setting processor affinity failed"; ret = ORTE_ERROR; goto error; } orte_process_info.bind_level = OPAL_HWLOC_CORE_LEVEL; orte_process_info.bind_idx = orte_process_info.my_node_rank; OPAL_OUTPUT_VERBOSE((5, orte_ess_base_output, "%s Process bound to core", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); } else { /* for all higher binding policies, we bind to the specified * object that the nrank-th core belongs to */ if (NULL == (obj = opal_hwloc_base_get_obj_by_type(opal_hwloc_topology, HWLOC_OBJ_CORE, 0, orte_process_info.my_node_rank, OPAL_HWLOC_LOGICAL))) { ret = ORTE_ERR_NOT_FOUND; error = "Getting core object"; goto error; } if (OPAL_BIND_TO_L1CACHE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { target = HWLOC_OBJ_CACHE; cache_level = 1; orte_process_info.bind_level = OPAL_HWLOC_L1CACHE_LEVEL; } else if (OPAL_BIND_TO_L2CACHE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { target = HWLOC_OBJ_CACHE; cache_level = 2; orte_process_info.bind_level = OPAL_HWLOC_L2CACHE_LEVEL; } else if (OPAL_BIND_TO_L3CACHE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { target = HWLOC_OBJ_CACHE; cache_level = 3; orte_process_info.bind_level = OPAL_HWLOC_L3CACHE_LEVEL; } else if (OPAL_BIND_TO_SOCKET == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { target = HWLOC_OBJ_SOCKET; orte_process_info.bind_level = OPAL_HWLOC_SOCKET_LEVEL; } else if (OPAL_BIND_TO_NUMA == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { target = HWLOC_OBJ_NODE; orte_process_info.bind_level = OPAL_HWLOC_NUMA_LEVEL; } else { ret = ORTE_ERR_NOT_FOUND; error = "Binding policy not known"; goto error; } for (obj = obj->parent; NULL != obj; obj = obj->parent) { if (target == obj->type) { if (HWLOC_OBJ_CACHE == target && cache_level != obj->attr->cache.depth) { continue; } /* this is the place! */ cpus = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, obj); if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) { ret = ORTE_ERROR; error = "Setting processor affinity failed"; goto error; } orte_process_info.bind_idx = opal_hwloc_base_get_obj_idx(opal_hwloc_topology, obj, OPAL_HWLOC_LOGICAL); orte_proc_is_bound = true; OPAL_OUTPUT_VERBOSE((5, orte_ess_base_output, "%s Process bound to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), opal_hwloc_base_print_level(orte_process_info.bind_level))); break; } } if (!orte_proc_is_bound) { ret = ORTE_ERROR; error = "Setting processor affinity failed"; goto error; } } } } } } else { OPAL_OUTPUT_VERBOSE((5, orte_ess_base_output, "%s Process bound at launch", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); } MOVEON: /* get or update our local cpuset - it will get used multiple * times, so it's more efficient to keep a global copy */ opal_hwloc_base_get_local_cpuset(); /* report bindings, if requested */ if (opal_hwloc_report_bindings) { char bindings[64]; hwloc_obj_t root; hwloc_cpuset_t cpus; /* get the root object for this node */ root = hwloc_get_root_obj(opal_hwloc_topology); cpus = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, root); /* we are not bound if this equals our cpuset */ if (0 == hwloc_bitmap_compare(cpus, opal_hwloc_my_cpuset)) { opal_output(0, "%s is not bound", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); } else { hwloc_bitmap_list_snprintf(bindings, 64, opal_hwloc_my_cpuset); opal_output(0, "%s is bound to cpus %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), bindings); } } return ORTE_SUCCESS; error: if (ORTE_ERR_SILENT != ret) { orte_show_help("help-orte-runtime", "orte_init:startup:internal-failure", true, error, ORTE_ERROR_NAME(ret), ret); } return ORTE_ERR_SILENT; #else return ORTE_SUCCESS; #endif }
void chpl_topo_init(void) { // // We only load hwloc topology information in configurations where // the locale model is other than "flat" or the tasking is based on // Qthreads (which will use the topology we load). We don't use // it otherwise (so far) because loading it is somewhat expensive. // if (strcmp(CHPL_LOCALE_MODEL, "flat") != 0 || strcmp(CHPL_TASKS, "qthreads") == 0) { haveTopology = true; } else { haveTopology = false; return; } // Check hwloc API version. // Require at least hwloc version 1.11 (we need 1.11.5 to not crash // in some NUMA configurations). // Check both at build time and run time. #define REQUIRE_HWLOC_VERSION 0x00010b00 #if HWLOC_API_VERSION < REQUIRE_HWLOC_VERSION #error hwloc version 1.11.5 or newer is required #endif CHK_ERR(hwloc_get_api_version() >= REQUIRE_HWLOC_VERSION); // // Allocate and initialize topology object. // CHK_ERR_ERRNO(hwloc_topology_init(&topology) == 0); // // Perform the topology detection. // CHK_ERR_ERRNO(hwloc_topology_load(topology) == 0); // // What is supported? // topoSupport = hwloc_topology_get_support(topology); // // TODO: update comment // For now, don't support setting memory locality when comm=ugni or // comm=gasnet, seg!=everything. Those are the two configurations in // which we use hugepages and/or memory registered with the comm // interface, both of which may be a problem for the set-membind call. // We will have other ways to achieve locality for these configs in // the future. // do_set_area_membind = true; if ((strcmp(CHPL_COMM, "gasnet") == 0 && strcmp(CHPL_GASNET_SEGMENT, "everything") != 0)) { do_set_area_membind = false; } // // We need depth information. // topoDepth = hwloc_topology_get_depth(topology); // // How many NUMA domains do we have? // { int level; // // Note: If there are multiple levels with NUMA nodes, this finds // only the uppermost. // for (level = 0, numaLevel = -1; level < topoDepth && numaLevel == -1; level++) { if (hwloc_get_depth_type(topology, level) == HWLOC_OBJ_NUMANODE) { numaLevel = level; } } } // // Find the NUMA nodes, that is, the objects at numaLevel that also // have CPUs. This is as opposed to things like Xeon Phi HBM, which // is memory-only, no CPUs. // { const hwloc_cpuset_t cpusetAll = hwloc_get_root_obj(topology)->cpuset; numNumaDomains = hwloc_get_nbobjs_inside_cpuset_by_depth(topology, cpusetAll, numaLevel); } }
void chpl_topo_init(void) { // // For now we don't load topology information for locModel=flat, since // we won't use it in that case and loading it is somewhat expensive. // Eventually we will probably load it even for locModel=flat and use // it as the information source for what's currently in chplsys, and // also pass it to Qthreads when we use that (so it doesn't load it // again), but that's work for the future. // haveTopology = (strcmp(CHPL_LOCALE_MODEL, "flat") != 0) ? true : false; if (!haveTopology) { return; } // Check hwloc API version. // Require at least hwloc version 1.11 (we need 1.11.5 to not crash // in some NUMA configurations). // Check both at build time and run time. #define REQUIRE_HWLOC_VERSION 0x00010b00 #if HWLOC_API_VERSION < REQUIRE_HWLOC_VERSION #error hwloc version 1.11.5 or newer is required #else { unsigned version = hwloc_get_api_version(); // check that the version is at least REQUIRE_HWLOC_VERSION if (version < REQUIRE_HWLOC_VERSION) chpl_internal_error("hwloc version 1.11.5 or newer is required"); } #endif // // Allocate and initialize topology object. // if (hwloc_topology_init(&topology)) { report_error("hwloc_topology_init()", errno); } // // Perform the topology detection. // if (hwloc_topology_load(topology)) { report_error("hwloc_topology_load()", errno); } // // What is supported? // topoSupport = hwloc_topology_get_support(topology); // // TODO: update comment // For now, don't support setting memory locality when comm=ugni or // comm=gasnet, seg!=everything. Those are the two configurations in // which we use hugepages and/or memory registered with the comm // interface, both of which may be a problem for the set-membind call. // We will have other ways to achieve locality for these configs in // the future. // do_set_area_membind = true; if ((strcmp(CHPL_COMM, "gasnet") == 0 && strcmp(CHPL_GASNET_SEGMENT, "everything") != 0)) { do_set_area_membind = false; } // // We need depth information. // topoDepth = hwloc_topology_get_depth(topology); // // How many NUMA domains do we have? // { int level; // // Note: If there are multiple levels with NUMA nodes, this finds // only the uppermost. // for (level = 0, numaLevel = -1; level < topoDepth && numaLevel == -1; level++) { if (hwloc_get_depth_type(topology, level) == HWLOC_OBJ_NUMANODE) { numaLevel = level; } } } // // Find the NUMA nodes, that is, the objects at numaLevel that also // have CPUs. This is as opposed to things like Xeon Phi HBM, which // is memory-only, no CPUs. // { const hwloc_cpuset_t cpusetAll = hwloc_get_root_obj(topology)->cpuset; numNumaDomains = hwloc_get_nbobjs_inside_cpuset_by_depth(topology, cpusetAll, numaLevel); } }
int main(void) { const struct hwloc_topology_support *support; char *buffer; hwloc_topology_t topology; hwloc_bitmap_t set = hwloc_bitmap_alloc(); hwloc_bitmap_t total = hwloc_bitmap_alloc(); hwloc_obj_t node; char *s; int err; err = hwloc_topology_init(&topology); assert(!err); err = hwloc_topology_load(topology); assert(!err); support = hwloc_topology_get_support(topology); if (!support->membind->get_area_memlocation) goto out; buffer = hwloc_alloc(topology, LEN); assert(buffer); printf("buffer %p length %u\n", buffer, LEN); err = hwloc_get_area_memlocation(topology, buffer, LEN, set, HWLOC_MEMBIND_BYNODESET); if (err < 0 && errno == ENOSYS) { fprintf(stderr, "hwloc_get_area_memlocation() failed with ENOSYS, aborting\n"); goto out_with_buffer; } assert(!err); hwloc_bitmap_asprintf(&s, set); printf("address %p length %u allocated in nodeset %s\n", buffer, LEN, s); free(s); hwloc_bitmap_copy(total, set); node = NULL; next1: node = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_NUMANODE, node); if (!node) goto out_with_buffer; if (!node->memory.local_memory) goto next1; printf("binding to 1st node and touching 1st quarter\n"); err = hwloc_set_area_membind(topology, buffer, LEN, node->nodeset, HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_BYNODESET); if (err < 0 && errno == ENOSYS) { fprintf(stderr, "hwloc_set_area_membind() failed with ENOSYS, aborting\n"); goto out_with_buffer; } assert(!err); memset(buffer, 0, LEN/4); err = hwloc_get_area_memlocation(topology, buffer, 1, set, HWLOC_MEMBIND_BYNODESET); assert(!err); hwloc_bitmap_asprintf(&s, set); printf("address %p length %u allocated in nodeset %s\n", buffer, LEN/4, s); free(s); hwloc_bitmap_or(total, total, set); next2: node = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_NUMANODE, node); if (!node) goto out_with_nomorenodes; if (!node->memory.local_memory) goto next2; printf("binding to 2nd node and touching 2nd quarter\n"); err = hwloc_set_area_membind(topology, buffer, LEN, node->nodeset, HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_BYNODESET); assert(!err); memset(buffer+LEN/4, 0, LEN/4); err = hwloc_get_area_memlocation(topology, buffer+LEN/4, LEN/4, set, HWLOC_MEMBIND_BYNODESET); assert(!err); hwloc_bitmap_asprintf(&s, set); printf("address %p length %u allocated in nodeset %s\n", buffer+LEN/4, LEN/4, s); free(s); hwloc_bitmap_or(total, total, set); next3: node = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_NUMANODE, node); if (!node) goto out_with_nomorenodes; if (!node->memory.local_memory) goto next3; printf("binding to 3rd node and touching 3rd quarter\n"); err = hwloc_set_area_membind(topology, buffer, LEN, node->nodeset, HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_BYNODESET); assert(!err); memset(buffer+LEN/2, 0, LEN/4); err = hwloc_get_area_memlocation(topology, buffer+LEN/2, LEN/4, set, HWLOC_MEMBIND_BYNODESET); assert(!err); hwloc_bitmap_asprintf(&s, set); printf("address %p length %u allocated in nodeset %s\n", buffer+LEN/2, LEN/4, s); free(s); hwloc_bitmap_or(total, total, set); next4: node = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_NUMANODE, node); if (!node) goto out_with_nomorenodes; if (!node->memory.local_memory) goto next4; printf("binding to 4th node and touching 4th quarter\n"); err = hwloc_set_area_membind(topology, buffer, LEN, node->nodeset, HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_BYNODESET); assert(!err); memset(buffer+3*LEN/4, 0, LEN/4); err = hwloc_get_area_memlocation(topology, buffer+3*LEN/4, LEN/4, set, HWLOC_MEMBIND_BYNODESET); assert(!err); hwloc_bitmap_asprintf(&s, set); printf("address %p length %u allocated in nodeset %s\n", buffer+3*LEN/4, LEN/4, s); free(s); hwloc_bitmap_or(total, total, set); out_with_nomorenodes: err = hwloc_get_area_memlocation(topology, buffer, LEN, set, HWLOC_MEMBIND_BYNODESET); assert(!err); hwloc_bitmap_asprintf(&s, set); printf("address %p length %u located on %s\n", buffer, LEN, s); free(s); assert(hwloc_bitmap_isincluded(total, set)); out_with_buffer: hwloc_free(topology, buffer, LEN); out: hwloc_topology_destroy(topology); hwloc_bitmap_free(set); hwloc_bitmap_free(total); return 0; }
int opal_hwloc_compare(const hwloc_topology_t topo1, const hwloc_topology_t topo2, opal_data_type_t type) { hwloc_topology_t t1, t2; unsigned d1, d2; struct hwloc_topology_support *s1, *s2; char *x1=NULL, *x2=NULL; int l1, l2; int s; /* stop stupid compiler warnings */ t1 = (hwloc_topology_t)topo1; t2 = (hwloc_topology_t)topo2; /* do something quick first */ d1 = hwloc_topology_get_depth(t1); d2 = hwloc_topology_get_depth(t2); if (d1 > d2) { return OPAL_VALUE1_GREATER; } else if (d2 > d1) { return OPAL_VALUE2_GREATER; } /* do the comparison the "cheat" way - get an xml representation * of each tree, and strcmp! This will work fine for inventory * comparisons, but might not meet the need for comparing topology * where we really need to do a tree-wise search so we only compare * the things we care about, and ignore stuff like MAC addresses */ if (0 != hwloc_topology_export_xmlbuffer(t1, &x1, &l1)) { return OPAL_EQUAL; } if (0 != hwloc_topology_export_xmlbuffer(t2, &x2, &l2)) { free(x1); return OPAL_EQUAL; } s = strcmp(x1, x2); free(x1); free(x2); if (s > 0) { return OPAL_VALUE1_GREATER; } else if (s < 0) { return OPAL_VALUE2_GREATER; } /* compare the available support - hwloc unfortunately does * not include this info in its xml support! */ if (NULL == (s1 = (struct hwloc_topology_support*)hwloc_topology_get_support(t1)) || NULL == s1->cpubind || NULL == s1->membind) { return OPAL_EQUAL; } if (NULL == (s2 = (struct hwloc_topology_support*)hwloc_topology_get_support(t2)) || NULL == s2->cpubind || NULL == s2->membind) { return OPAL_EQUAL; } /* compare the fields we care about */ if (s1->cpubind->set_thisproc_cpubind != s2->cpubind->set_thisproc_cpubind || s1->cpubind->set_thisthread_cpubind != s2->cpubind->set_thisthread_cpubind || s1->membind->set_thisproc_membind != s2->membind->set_thisproc_membind || s1->membind->set_thisthread_membind != s2->membind->set_thisthread_membind) { OPAL_OUTPUT_VERBOSE((5, opal_hwloc_base_framework.framework_output, "hwloc:base:compare BINDING CAPABILITIES DIFFER")); return OPAL_VALUE1_GREATER; } return OPAL_EQUAL; }
static int bind_downwards(orte_job_t *jdata, hwloc_obj_type_t target, unsigned cache_level) { int i, j; orte_job_map_t *map; orte_node_t *node; orte_proc_t *proc; hwloc_obj_t trg_obj, nxt_obj; hwloc_cpuset_t cpus; unsigned int ncpus; struct hwloc_topology_support *support; opal_hwloc_obj_data_t *data; int total_cpus; hwloc_cpuset_t totalcpuset; opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps: bind downward for job %s with bindings %s", ORTE_JOBID_PRINT(jdata->jobid), opal_hwloc_base_print_binding(jdata->map->binding)); /* initialize */ map = jdata->map; totalcpuset = hwloc_bitmap_alloc(); for (i=0; i < map->nodes->size; i++) { if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) { continue; } if (!orte_do_not_launch) { /* if we don't want to launch, then we are just testing the system, * so ignore questions about support capabilities */ support = (struct hwloc_topology_support*)hwloc_topology_get_support(node->topology); /* check if topology supports cpubind - have to be careful here * as Linux doesn't currently support thread-level binding. This * may change in the future, though, and it isn't clear how hwloc * interprets the current behavior. So check both flags to be sure. */ if (!support->cpubind->set_thisproc_cpubind && !support->cpubind->set_thisthread_cpubind) { if (!OPAL_BINDING_REQUIRED(opal_hwloc_binding_policy)) { /* we are not required to bind, so ignore this */ continue; } orte_show_help("help-orte-rmaps-base.txt", "rmaps:cpubind-not-supported", true, node->name); hwloc_bitmap_free(totalcpuset); return ORTE_ERR_SILENT; } /* check if topology supports membind - have to be careful here * as hwloc treats this differently than I (at least) would have * expected. Per hwloc, Linux memory binding is at the thread, * and not process, level. Thus, hwloc sets the "thisproc" flag * to "false" on all Linux systems, and uses the "thisthread" flag * to indicate binding capability */ if (!support->membind->set_thisproc_membind && !support->membind->set_thisthread_membind) { if (OPAL_HWLOC_BASE_MBFA_WARN == opal_hwloc_base_mbfa && !membind_warned) { orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported", true, node->name); membind_warned = true; } else if (OPAL_HWLOC_BASE_MBFA_ERROR == opal_hwloc_base_mbfa) { orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported-fatal", true, node->name); hwloc_bitmap_free(totalcpuset); return ORTE_ERR_SILENT; } } } /* clear the topology of any prior usage numbers */ opal_hwloc_base_clear_usage(node->topology); /* cycle thru the procs */ for (j=0; j < node->procs->size; j++) { if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) { continue; } /* ignore procs from other jobs */ if (proc->name.jobid != jdata->jobid) { continue; } /* ignore procs that have already been bound - should * never happen, but safer */ if (NULL != proc->cpu_bitmap) { continue; } /* we don't know if the target is a direct child of this locale, * or if it is some depth below it, so we have to conduct a bit * of a search. Let hwloc find the min usage one for us. */ trg_obj = opal_hwloc_base_find_min_bound_target_under_obj(node->topology, proc->locale, target, cache_level); if (NULL == trg_obj) { /* there aren't any such targets under this object */ orte_show_help("help-orte-rmaps-base.txt", "rmaps:no-available-cpus", true, node->name); hwloc_bitmap_free(totalcpuset); return ORTE_ERR_SILENT; } /* start with a clean slate */ hwloc_bitmap_zero(totalcpuset); total_cpus = 0; nxt_obj = trg_obj; do { if (NULL == nxt_obj) { /* could not find enough cpus to meet request */ orte_show_help("help-orte-rmaps-base.txt", "rmaps:no-available-cpus", true, node->name); hwloc_bitmap_free(totalcpuset); return ORTE_ERR_SILENT; } trg_obj = nxt_obj; /* get the number of cpus under this location */ ncpus = opal_hwloc_base_get_npus(node->topology, trg_obj); opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "%s GOT %d CPUS", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ncpus); /* track the number bound */ if (NULL == (data = (opal_hwloc_obj_data_t*)trg_obj->userdata)) { data = OBJ_NEW(opal_hwloc_obj_data_t); trg_obj->userdata = data; } data->num_bound++; /* error out if adding a proc would cause overload and that wasn't allowed */ if (ncpus < data->num_bound && !OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding)) { orte_show_help("help-orte-rmaps-base.txt", "rmaps:binding-overload", true, opal_hwloc_base_print_binding(map->binding), node->name, data->num_bound, ncpus); hwloc_bitmap_free(totalcpuset); return ORTE_ERR_SILENT; } /* bind the proc here */ cpus = opal_hwloc_base_get_available_cpus(node->topology, trg_obj); hwloc_bitmap_or(totalcpuset, totalcpuset, cpus); total_cpus += ncpus; /* move to the next location, in case we need it */ nxt_obj = trg_obj->next_cousin; } while (total_cpus < orte_rmaps_base.cpus_per_rank); hwloc_bitmap_list_asprintf(&proc->cpu_bitmap, totalcpuset); if (4 < opal_output_get_verbosity(orte_rmaps_base_framework.framework_output)) { char tmp1[1024], tmp2[1024]; opal_hwloc_base_cset2str(tmp1, sizeof(tmp1), totalcpuset); opal_hwloc_base_cset2mapstr(tmp2, sizeof(tmp2), totalcpuset); opal_output(orte_rmaps_base_framework.framework_output, "%s BOUND PROC %s[%s] TO %s: %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&proc->name), node->name, tmp1, tmp2); } } } hwloc_bitmap_free(totalcpuset); return ORTE_SUCCESS; }
static int bind_upwards(orte_job_t *jdata, hwloc_obj_type_t target, unsigned cache_level) { /* traverse the hwloc topology tree on each node upwards * until we find an object of type target - and then bind * the process to that target */ int i, j; orte_job_map_t *map; orte_node_t *node; orte_proc_t *proc; hwloc_obj_t obj; hwloc_cpuset_t cpus; unsigned int idx, ncpus; struct hwloc_topology_support *support; opal_hwloc_obj_data_t *data; opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps: bind upwards for job %s with bindings %s", ORTE_JOBID_PRINT(jdata->jobid), opal_hwloc_base_print_binding(jdata->map->binding)); /* initialize */ map = jdata->map; for (i=0; i < map->nodes->size; i++) { if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) { continue; } if (!orte_do_not_launch) { /* if we don't want to launch, then we are just testing the system, * so ignore questions about support capabilities */ support = (struct hwloc_topology_support*)hwloc_topology_get_support(node->topology); /* check if topology supports cpubind - have to be careful here * as Linux doesn't currently support thread-level binding. This * may change in the future, though, and it isn't clear how hwloc * interprets the current behavior. So check both flags to be sure. */ if (!support->cpubind->set_thisproc_cpubind && !support->cpubind->set_thisthread_cpubind) { if (!OPAL_BINDING_REQUIRED(opal_hwloc_binding_policy)) { /* we are not required to bind, so ignore this */ continue; } orte_show_help("help-orte-rmaps-base.txt", "rmaps:cpubind-not-supported", true, node->name); return ORTE_ERR_SILENT; } /* check if topology supports membind - have to be careful here * as hwloc treats this differently than I (at least) would have * expected. Per hwloc, Linux memory binding is at the thread, * and not process, level. Thus, hwloc sets the "thisproc" flag * to "false" on all Linux systems, and uses the "thisthread" flag * to indicate binding capability */ if (!support->membind->set_thisproc_membind && !support->membind->set_thisthread_membind) { if (OPAL_HWLOC_BASE_MBFA_WARN == opal_hwloc_base_mbfa && !membind_warned) { orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported", true, node->name); membind_warned = true; } else if (OPAL_HWLOC_BASE_MBFA_ERROR == opal_hwloc_base_mbfa) { orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported-fatal", true, node->name); return ORTE_ERR_SILENT; } } } /* clear the topology of any prior usage numbers */ opal_hwloc_base_clear_usage(node->topology); /* cycle thru the procs */ for (j=0; j < node->procs->size; j++) { if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) { continue; } /* ignore procs from other jobs */ if (proc->name.jobid != jdata->jobid) { continue; } /* ignore procs that have already been bound - should * never happen, but safer */ if (NULL != proc->cpu_bitmap) { continue; } /* bozo check */ if (NULL == proc->locale) { opal_output(0, "BIND UPWARDS: LOCALE FOR PROC %s IS NULL", ORTE_NAME_PRINT(&proc->name)); return ORTE_ERR_SILENT; } /* starting at the locale, move up thru the parents * to find the target object type */ for (obj = proc->locale->parent; NULL != obj; obj = obj->parent) { opal_output(0, "%s bind:upward target %s type %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hwloc_obj_type_string(target), hwloc_obj_type_string(obj->type)); if (target == obj->type) { if (HWLOC_OBJ_CACHE == target && cache_level != obj->attr->cache.depth) { continue; } /* get its index */ if (UINT_MAX == (idx = opal_hwloc_base_get_obj_idx(node->topology, obj, OPAL_HWLOC_AVAILABLE))) { return ORTE_ERR_SILENT; } /* track the number bound */ data = (opal_hwloc_obj_data_t*)obj->userdata; data->num_bound++; /* get the number of cpus under this location */ if (0 == (ncpus = opal_hwloc_base_get_npus(node->topology, obj))) { orte_show_help("help-orte-rmaps-base.txt", "rmaps:no-available-cpus", true, node->name); return ORTE_ERR_SILENT; } /* error out if adding a proc would cause overload and that wasn't allowed */ if (ncpus < data->num_bound && !OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding)) { orte_show_help("help-orte-rmaps-base.txt", "rmaps:binding-overload", true, opal_hwloc_base_print_binding(map->binding), node->name, data->num_bound, ncpus); return ORTE_ERR_SILENT; } /* bind it here */ cpus = opal_hwloc_base_get_available_cpus(node->topology, obj); hwloc_bitmap_list_asprintf(&proc->cpu_bitmap, cpus); opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "%s BOUND PROC %s TO %s[%s:%u] on node %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&proc->name), proc->cpu_bitmap, hwloc_obj_type_string(target), idx, node->name); break; } } if (NULL == proc->cpu_bitmap && OPAL_BINDING_REQUIRED(jdata->map->binding)) { /* didn't find anyone to bind to - this is an error * unless the user specified if-supported */ orte_show_help("help-orte-rmaps-base.txt", "rmaps:binding-target-not-found", true, opal_hwloc_base_print_binding(map->binding), node->name); return ORTE_ERR_SILENT; } } } return ORTE_SUCCESS; }
int opal_hwloc_unpack(opal_buffer_t *buffer, void *dest, int32_t *num_vals, opal_data_type_t type) { /* NOTE: hwloc defines topology_t as a pointer to a struct! */ hwloc_topology_t t, *tarray = (hwloc_topology_t*)dest; int rc=OPAL_SUCCESS, i, cnt, j; char *xmlbuffer; struct hwloc_topology_support *support; for (i=0, j=0; i < *num_vals; i++) { /* unpack the xml string */ cnt=1; if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &xmlbuffer, &cnt, OPAL_STRING))) { goto cleanup; } /* convert the xml */ if (0 != hwloc_topology_init(&t)) { rc = OPAL_ERROR; free(xmlbuffer); goto cleanup; } if (0 != hwloc_topology_set_xmlbuffer(t, xmlbuffer, strlen(xmlbuffer))) { rc = OPAL_ERROR; free(xmlbuffer); hwloc_topology_destroy(t); goto cleanup; } free(xmlbuffer); /* since we are loading this from an external source, we have to * explicitly set a flag so hwloc sets things up correctly */ if (0 != hwloc_topology_set_flags(t, HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM | HWLOC_TOPOLOGY_FLAG_IO_DEVICES)) { rc = OPAL_ERROR; hwloc_topology_destroy(t); goto cleanup; } /* now load the topology */ if (0 != hwloc_topology_load(t)) { rc = OPAL_ERROR; hwloc_topology_destroy(t); goto cleanup; } /* get the available support - hwloc unfortunately does * not include this info in its xml import! */ support = (struct hwloc_topology_support*)hwloc_topology_get_support(t); cnt = sizeof(struct hwloc_topology_discovery_support); if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, support->discovery, &cnt, OPAL_BYTE))) { goto cleanup; } cnt = sizeof(struct hwloc_topology_cpubind_support); if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, support->cpubind, &cnt, OPAL_BYTE))) { goto cleanup; } cnt = sizeof(struct hwloc_topology_membind_support); if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, support->membind, &cnt, OPAL_BYTE))) { goto cleanup; } /* pass it back */ tarray[i] = t; /* track the number added */ j++; } cleanup: *num_vals = j; return rc; }
static int bind_in_place(orte_job_t *jdata, hwloc_obj_type_t target, unsigned cache_level) { /* traverse the hwloc topology tree on each node downwards * until we find an unused object of type target - and then bind * the process to that target */ int i, j; orte_job_map_t *map; orte_node_t *node; orte_proc_t *proc; hwloc_cpuset_t cpus; unsigned int idx, ncpus; struct hwloc_topology_support *support; opal_hwloc_obj_data_t *data; hwloc_obj_t locale, sib; char *cpu_bitmap; bool found; opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps: bind in place for job %s with bindings %s", ORTE_JOBID_PRINT(jdata->jobid), opal_hwloc_base_print_binding(jdata->map->binding)); /* initialize */ map = jdata->map; for (i=0; i < map->nodes->size; i++) { if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) { continue; } if (!orte_do_not_launch) { /* if we don't want to launch, then we are just testing the system, * so ignore questions about support capabilities */ support = (struct hwloc_topology_support*)hwloc_topology_get_support(node->topology); /* check if topology supports cpubind - have to be careful here * as Linux doesn't currently support thread-level binding. This * may change in the future, though, and it isn't clear how hwloc * interprets the current behavior. So check both flags to be sure. */ if (!support->cpubind->set_thisproc_cpubind && !support->cpubind->set_thisthread_cpubind) { if (!OPAL_BINDING_REQUIRED(map->binding) || !OPAL_BINDING_POLICY_IS_SET(map->binding)) { /* we are not required to bind, so ignore this */ continue; } orte_show_help("help-orte-rmaps-base.txt", "rmaps:cpubind-not-supported", true, node->name); return ORTE_ERR_SILENT; } /* check if topology supports membind - have to be careful here * as hwloc treats this differently than I (at least) would have * expected. Per hwloc, Linux memory binding is at the thread, * and not process, level. Thus, hwloc sets the "thisproc" flag * to "false" on all Linux systems, and uses the "thisthread" flag * to indicate binding capability - don't warn if the user didn't * specifically request binding */ if (!support->membind->set_thisproc_membind && !support->membind->set_thisthread_membind && OPAL_BINDING_POLICY_IS_SET(map->binding)) { if (OPAL_HWLOC_BASE_MBFA_WARN == opal_hwloc_base_mbfa && !membind_warned) { orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported", true, node->name); membind_warned = true; } else if (OPAL_HWLOC_BASE_MBFA_ERROR == opal_hwloc_base_mbfa) { orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported-fatal", true, node->name); return ORTE_ERR_SILENT; } } } /* some systems do not report cores, and so we can get a situation where our * default binding policy will fail for no necessary reason. So if we are * computing a binding due to our default policy, and no cores are found * on this node, just silently skip it - we will not bind */ if (!OPAL_BINDING_POLICY_IS_SET(map->binding) && HWLOC_TYPE_DEPTH_UNKNOWN == hwloc_get_type_depth(node->topology, HWLOC_OBJ_CORE)) { opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "Unable to bind-to core by default on node %s as no cores detected", node->name); continue; } /* we share topologies in order * to save space, so we need to reset the usage info to reflect * our own current state */ reset_usage(node, jdata->jobid); /* cycle thru the procs */ for (j=0; j < node->procs->size; j++) { if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) { continue; } /* ignore procs from other jobs */ if (proc->name.jobid != jdata->jobid) { continue; } /* bozo check */ if (!orte_get_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, (void**)&locale, OPAL_PTR)) { orte_show_help("help-orte-rmaps-base.txt", "rmaps:no-locale", true, ORTE_NAME_PRINT(&proc->name)); return ORTE_ERR_SILENT; } /* get the index of this location */ if (UINT_MAX == (idx = opal_hwloc_base_get_obj_idx(node->topology, locale, OPAL_HWLOC_AVAILABLE))) { ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); return ORTE_ERR_SILENT; } data = (opal_hwloc_obj_data_t*)locale->userdata; /* get the number of cpus under this location */ if (0 == (ncpus = opal_hwloc_base_get_npus(node->topology, locale))) { orte_show_help("help-orte-rmaps-base.txt", "rmaps:no-available-cpus", true, node->name); return ORTE_ERR_SILENT; } /* if we don't have enough cpus to support this additional proc, try * shifting the location to a cousin that can support it - the important * thing is that we maintain the same level in the topology */ if (ncpus < (data->num_bound+1)) { opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "%s bind_in_place: searching right", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); sib = locale; found = false; while (NULL != (sib = sib->next_cousin)) { data = (opal_hwloc_obj_data_t*)sib->userdata; ncpus = opal_hwloc_base_get_npus(node->topology, sib); if (data->num_bound < ncpus) { found = true; locale = sib; break; } } if (!found) { /* try the other direction */ opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "%s bind_in_place: searching left", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); sib = locale; while (NULL != (sib = sib->prev_cousin)) { data = (opal_hwloc_obj_data_t*)sib->userdata; ncpus = opal_hwloc_base_get_npus(node->topology, sib); if (data->num_bound < ncpus) { found = true; locale = sib; break; } } } if (!found) { /* no place to put this - see if overload is allowed */ if (!OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding)) { if (OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) { /* if the user specified a binding policy, then we cannot meet * it since overload isn't allowed, so error out - have the * message indicate that setting overload allowed will remove * this restriction */ orte_show_help("help-orte-rmaps-base.txt", "rmaps:binding-overload", true, opal_hwloc_base_print_binding(map->binding), node->name, data->num_bound, ncpus); return ORTE_ERR_SILENT; } else { /* if we have the default binding policy, then just don't bind */ OPAL_SET_BINDING_POLICY(map->binding, OPAL_BIND_TO_NONE); unbind_procs(jdata); return ORTE_SUCCESS; } } } } /* track the number bound */ data = (opal_hwloc_obj_data_t*)locale->userdata; // just in case it changed data->num_bound++; opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "BINDING PROC %s TO %s NUMBER %u", ORTE_NAME_PRINT(&proc->name), hwloc_obj_type_string(locale->type), idx); /* bind the proc here */ cpus = opal_hwloc_base_get_available_cpus(node->topology, locale); hwloc_bitmap_list_asprintf(&cpu_bitmap, cpus); orte_set_attribute(&proc->attributes, ORTE_PROC_CPU_BITMAP, ORTE_ATTR_GLOBAL, cpu_bitmap, OPAL_STRING); /* update the location, in case it changed */ orte_set_attribute(&proc->attributes, ORTE_PROC_HWLOC_BOUND, ORTE_ATTR_LOCAL, locale, OPAL_PTR); opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "%s BOUND PROC %s TO %s[%s:%u] on node %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&proc->name), cpu_bitmap, hwloc_obj_type_string(locale->type), idx, node->name); if (NULL != cpu_bitmap) { free(cpu_bitmap); } } } return ORTE_SUCCESS; }
static int bind_in_place(orte_job_t *jdata, hwloc_obj_type_t target, unsigned cache_level) { /* traverse the hwloc topology tree on each node downwards * until we find an unused object of type target - and then bind * the process to that target */ int i, j; orte_job_map_t *map; orte_node_t *node; orte_proc_t *proc; hwloc_cpuset_t cpus; unsigned int idx, ncpus; struct hwloc_topology_support *support; opal_hwloc_obj_data_t *data; opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps: bind in place for job %s with bindings %s", ORTE_JOBID_PRINT(jdata->jobid), opal_hwloc_base_print_binding(jdata->map->binding)); /* initialize */ map = jdata->map; for (i=0; i < map->nodes->size; i++) { if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) { continue; } if (!orte_do_not_launch) { /* if we don't want to launch, then we are just testing the system, * so ignore questions about support capabilities */ support = (struct hwloc_topology_support*)hwloc_topology_get_support(node->topology); /* check if topology supports cpubind - have to be careful here * as Linux doesn't currently support thread-level binding. This * may change in the future, though, and it isn't clear how hwloc * interprets the current behavior. So check both flags to be sure. */ if (!support->cpubind->set_thisproc_cpubind && !support->cpubind->set_thisthread_cpubind) { if (!OPAL_BINDING_REQUIRED(map->binding) || !OPAL_BINDING_POLICY_IS_SET(map->binding)) { /* we are not required to bind, so ignore this */ continue; } orte_show_help("help-orte-rmaps-base.txt", "rmaps:cpubind-not-supported", true, node->name); return ORTE_ERR_SILENT; } /* check if topology supports membind - have to be careful here * as hwloc treats this differently than I (at least) would have * expected. Per hwloc, Linux memory binding is at the thread, * and not process, level. Thus, hwloc sets the "thisproc" flag * to "false" on all Linux systems, and uses the "thisthread" flag * to indicate binding capability - don't warn if the user didn't * specifically request binding */ if (!support->membind->set_thisproc_membind && !support->membind->set_thisthread_membind && OPAL_BINDING_POLICY_IS_SET(map->binding)) { if (OPAL_HWLOC_BASE_MBFA_WARN == opal_hwloc_base_mbfa && !membind_warned) { orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported", true, node->name); membind_warned = true; } else if (OPAL_HWLOC_BASE_MBFA_ERROR == opal_hwloc_base_mbfa) { orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported-fatal", true, node->name); return ORTE_ERR_SILENT; } } } /* some systems do not report cores, and so we can get a situation where our * default binding policy will fail for no necessary reason. So if we are * computing a binding due to our default policy, and no cores are found * on this node, just silently skip it - we will not bind */ if (!OPAL_BINDING_POLICY_IS_SET(map->binding) && HWLOC_TYPE_DEPTH_UNKNOWN == hwloc_get_type_depth(node->topology, HWLOC_OBJ_CORE)) { opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "Unable to bind-to core by default on node %s as no cores detected", node->name); continue; } /* we share topologies in order * to save space, so we need to reset the usage info to reflect * our own current state */ reset_usage(node, jdata->jobid); /* cycle thru the procs */ for (j=0; j < node->procs->size; j++) { if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) { continue; } /* ignore procs from other jobs */ if (proc->name.jobid != jdata->jobid) { continue; } /* ignore procs that have already been bound - should * never happen, but safer */ if (NULL != proc->cpu_bitmap) { continue; } /* get the index of this location */ if (UINT_MAX == (idx = opal_hwloc_base_get_obj_idx(node->topology, proc->locale, OPAL_HWLOC_AVAILABLE))) { ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); return ORTE_ERR_SILENT; } /* track the number bound */ data = (opal_hwloc_obj_data_t*)proc->locale->userdata; data->num_bound++; opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "BINDING PROC %s TO %s NUMBER %u", ORTE_NAME_PRINT(&proc->name), hwloc_obj_type_string(proc->locale->type), idx); /* get the number of cpus under this location */ if (0 == (ncpus = opal_hwloc_base_get_npus(node->topology, proc->locale))) { orte_show_help("help-orte-rmaps-base.txt", "rmaps:no-available-cpus", true, node->name); return ORTE_ERR_SILENT; } /* error out if adding a proc would cause overload and that wasn't allowed, * and it wasn't a default binding policy (i.e., the user requested it) */ if (ncpus < data->num_bound && !OPAL_BIND_OVERLOAD_ALLOWED(jdata->map->binding) && OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) { orte_show_help("help-orte-rmaps-base.txt", "rmaps:binding-overload", true, opal_hwloc_base_print_binding(map->binding), node->name, data->num_bound, ncpus); return ORTE_ERR_SILENT; } /* bind the proc here */ cpus = opal_hwloc_base_get_available_cpus(node->topology, proc->locale); hwloc_bitmap_list_asprintf(&proc->cpu_bitmap, cpus); /* record the location */ proc->bind_location = proc->locale; opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "%s BOUND PROC %s TO %s[%s:%u] on node %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&proc->name), proc->cpu_bitmap, hwloc_obj_type_string(proc->locale->type), idx, node->name); } } return ORTE_SUCCESS; }
int main(void) { hwloc_bitmap_t set; hwloc_obj_t obj; char *str = NULL; hwloc_topology_init(&topology); hwloc_topology_load(topology); support = hwloc_topology_get_support(topology); obj = hwloc_get_root_obj(topology); set = hwloc_bitmap_dup(obj->cpuset); while (hwloc_bitmap_isequal(obj->cpuset, set)) { if (!obj->arity) break; obj = obj->children[0]; } hwloc_bitmap_asprintf(&str, set); printf("system set is %s\n", str); free(str); test(set, 0); printf("now strict\n"); test(set, HWLOC_CPUBIND_STRICT); hwloc_bitmap_free(set); set = hwloc_bitmap_dup(obj->cpuset); hwloc_bitmap_asprintf(&str, set); printf("obj set is %s\n", str); free(str); test(set, 0); printf("now strict\n"); test(set, HWLOC_CPUBIND_STRICT); hwloc_bitmap_singlify(set); hwloc_bitmap_asprintf(&str, set); printf("singlified to %s\n", str); free(str); test(set, 0); printf("now strict\n"); test(set, HWLOC_CPUBIND_STRICT); hwloc_bitmap_free(set); printf("\n\nmemory tests\n\n"); printf("complete node set\n"); set = hwloc_bitmap_dup(hwloc_get_root_obj(topology)->cpuset); hwloc_bitmap_asprintf(&str, set); printf("i.e. cpuset %s\n", str); free(str); testmem3(set); hwloc_bitmap_free(set); obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NODE, 0); if (obj) { set = hwloc_bitmap_dup(obj->cpuset); hwloc_bitmap_asprintf(&str, set); printf("cpuset set is %s\n", str); free(str); testmem3(set); obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NODE, 1); if (obj) { hwloc_bitmap_or(set, set, obj->cpuset); hwloc_bitmap_asprintf(&str, set); printf("cpuset set is %s\n", str); free(str); testmem3(set); } hwloc_bitmap_free(set); } hwloc_topology_destroy(topology); return 0; }
int orte_rmaps_base_compute_bindings(orte_job_t *jdata) { hwloc_obj_type_t hwb, hwm; unsigned clvl=0, clvm=0; opal_binding_policy_t bind; orte_mapping_policy_t map; orte_node_t *node; int i, rc; struct hwloc_topology_support *support; bool force_down = false; hwloc_cpuset_t totalcpuset; int bind_depth, map_depth; opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps: compute bindings for job %s with policy %s", ORTE_JOBID_PRINT(jdata->jobid), opal_hwloc_base_print_binding(jdata->map->binding)); map = ORTE_GET_MAPPING_POLICY(jdata->map->mapping); bind = OPAL_GET_BINDING_POLICY(jdata->map->binding); if (ORTE_MAPPING_BYUSER == map) { /* user specified binding by rankfile - nothing for us to do */ return ORTE_SUCCESS; } if (OPAL_BIND_TO_CPUSET == bind) { int rc; /* cpuset was given - setup the bindings */ if (ORTE_SUCCESS != (rc = bind_to_cpuset(jdata))) { ORTE_ERROR_LOG(rc); } return rc; } if (OPAL_BIND_TO_NONE == bind) { /* no binding requested */ return ORTE_SUCCESS; } if (OPAL_BIND_TO_BOARD == bind) { /* doesn't do anything at this time */ return ORTE_SUCCESS; } /* binding requested - convert the binding level to the hwloc obj type */ switch (bind) { case OPAL_BIND_TO_NUMA: hwb = HWLOC_OBJ_NODE; break; case OPAL_BIND_TO_SOCKET: hwb = HWLOC_OBJ_SOCKET; break; case OPAL_BIND_TO_L3CACHE: hwb = HWLOC_OBJ_CACHE; clvl = 3; break; case OPAL_BIND_TO_L2CACHE: hwb = HWLOC_OBJ_CACHE; clvl = 2; break; case OPAL_BIND_TO_L1CACHE: hwb = HWLOC_OBJ_CACHE; clvl = 1; break; case OPAL_BIND_TO_CORE: hwb = HWLOC_OBJ_CORE; break; case OPAL_BIND_TO_HWTHREAD: hwb = HWLOC_OBJ_PU; break; default: ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); return ORTE_ERR_BAD_PARAM; } /* do the same for the mapping policy */ switch (map) { case ORTE_MAPPING_BYNODE: case ORTE_MAPPING_BYSLOT: case ORTE_MAPPING_SEQ: hwm = HWLOC_OBJ_MACHINE; break; case ORTE_MAPPING_BYDIST: case ORTE_MAPPING_BYNUMA: hwm = HWLOC_OBJ_NODE; break; case ORTE_MAPPING_BYSOCKET: hwm = HWLOC_OBJ_SOCKET; break; case ORTE_MAPPING_BYL3CACHE: hwm = HWLOC_OBJ_CACHE; clvm = 3; break; case ORTE_MAPPING_BYL2CACHE: hwm = HWLOC_OBJ_CACHE; clvm = 2; break; case ORTE_MAPPING_BYL1CACHE: hwm = HWLOC_OBJ_CACHE; clvm = 1; break; case ORTE_MAPPING_BYCORE: hwm = HWLOC_OBJ_CORE; break; case ORTE_MAPPING_BYHWTHREAD: hwm = HWLOC_OBJ_PU; break; default: ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); return ORTE_ERR_BAD_PARAM; } /* if the job was mapped by the corresponding target, then * we bind in place * * otherwise, we have to bind either up or down the hwloc * tree. If we are binding upwards (e.g., mapped to hwthread * but binding to core), then we just climb the tree to find * the first matching object. * * if we are binding downwards (e.g., mapped to node and bind * to core), then we have to do a round-robin assigment of * procs to the resources below. */ if (ORTE_MAPPING_BYDIST == map) { int rc = ORTE_SUCCESS; if (OPAL_BIND_TO_NUMA == bind) { opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps: bindings for job %s - dist to numa", ORTE_JOBID_PRINT(jdata->jobid)); if (ORTE_SUCCESS != (rc = bind_in_place(jdata, HWLOC_OBJ_NODE, 0))) { ORTE_ERROR_LOG(rc); } } else if (OPAL_BIND_TO_NUMA < bind) { /* bind every proc downwards */ force_down = true; goto execute; } /* if the binding policy is less than numa, then we are unbound - so * just ignore this and return (should have been caught in prior * tests anyway as only options meeting that criteria are "none" * and "board") */ return rc; } /* now deal with the remaining binding policies based on hardware */ if (bind == map) { opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps: bindings for job %s - bind in place", ORTE_JOBID_PRINT(jdata->jobid)); if (ORTE_SUCCESS != (rc = bind_in_place(jdata, hwb, clvl))) { ORTE_ERROR_LOG(rc); } return rc; } /* we need to handle the remaining binding options on a per-node * basis because different nodes could potentially have different * topologies, with different relative depths for the two levels */ execute: /* initialize */ totalcpuset = hwloc_bitmap_alloc(); for (i=0; i < jdata->map->nodes->size; i++) { if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, i))) { continue; } if (!orte_do_not_launch) { /* if we don't want to launch, then we are just testing the system, * so ignore questions about support capabilities */ support = (struct hwloc_topology_support*)hwloc_topology_get_support(node->topology); /* check if topology supports cpubind - have to be careful here * as Linux doesn't currently support thread-level binding. This * may change in the future, though, and it isn't clear how hwloc * interprets the current behavior. So check both flags to be sure. */ if (!support->cpubind->set_thisproc_cpubind && !support->cpubind->set_thisthread_cpubind) { if (!OPAL_BINDING_REQUIRED(jdata->map->binding) || !OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) { /* we are not required to bind, so ignore this */ continue; } orte_show_help("help-orte-rmaps-base.txt", "rmaps:cpubind-not-supported", true, node->name); hwloc_bitmap_free(totalcpuset); return ORTE_ERR_SILENT; } /* check if topology supports membind - have to be careful here * as hwloc treats this differently than I (at least) would have * expected. Per hwloc, Linux memory binding is at the thread, * and not process, level. Thus, hwloc sets the "thisproc" flag * to "false" on all Linux systems, and uses the "thisthread" flag * to indicate binding capability - don't warn if the user didn't * specifically request binding */ if (!support->membind->set_thisproc_membind && !support->membind->set_thisthread_membind && OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) { if (OPAL_HWLOC_BASE_MBFA_WARN == opal_hwloc_base_mbfa && !membind_warned) { orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported", true, node->name); membind_warned = true; } else if (OPAL_HWLOC_BASE_MBFA_ERROR == opal_hwloc_base_mbfa) { orte_show_help("help-orte-rmaps-base.txt", "rmaps:membind-not-supported-fatal", true, node->name); hwloc_bitmap_free(totalcpuset); return ORTE_ERR_SILENT; } } } /* some systems do not report cores, and so we can get a situation where our * default binding policy will fail for no necessary reason. So if we are * computing a binding due to our default policy, and no cores are found * on this node, just silently skip it - we will not bind */ if (!OPAL_BINDING_POLICY_IS_SET(jdata->map->binding) && HWLOC_TYPE_DEPTH_UNKNOWN == hwloc_get_type_depth(node->topology, HWLOC_OBJ_CORE)) { opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "Unable to bind-to core by default on node %s as no cores detected", node->name); continue; } /* we share topologies in order * to save space, so we need to reset the usage info to reflect * our own current state */ reset_usage(node, jdata->jobid); if (force_down) { if (ORTE_SUCCESS != (rc = bind_downwards(jdata, node, hwb, clvl))) { ORTE_ERROR_LOG(rc); return rc; } } else { /* determine the relative depth on this node */ if (HWLOC_OBJ_CACHE == hwb) { /* must use a unique function because blasted hwloc * just doesn't deal with caches very well...sigh */ bind_depth = hwloc_get_cache_type_depth(node->topology, clvl, -1); } else { bind_depth = hwloc_get_type_depth(node->topology, hwb); } if (0 > bind_depth) { /* didn't find such an object */ orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:no-objects", true, hwloc_obj_type_string(hwb), node->name); return ORTE_ERR_SILENT; } if (HWLOC_OBJ_CACHE == hwm) { /* must use a unique function because blasted hwloc * just doesn't deal with caches very well...sigh */ map_depth = hwloc_get_cache_type_depth(node->topology, clvm, -1); } else { map_depth = hwloc_get_type_depth(node->topology, hwm); } if (0 > map_depth) { /* didn't find such an object */ orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:no-objects", true, hwloc_obj_type_string(hwm), node->name); return ORTE_ERR_SILENT; } opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "%s bind_depth: %d map_depth %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), bind_depth, map_depth); if (bind_depth > map_depth) { if (ORTE_SUCCESS != (rc = bind_downwards(jdata, node, hwb, clvl))) { ORTE_ERROR_LOG(rc); return rc; } } else { if (ORTE_SUCCESS != (rc = bind_upwards(jdata, node, hwb, clvl))) { ORTE_ERROR_LOG(rc); return rc; } } } } return ORTE_SUCCESS; }
static int allocate(orte_job_t *jdata, opal_list_t *nodes) { int i, n, val, dig, num_nodes; orte_node_t *node; #if OPAL_HAVE_HWLOC orte_topology_t *t; hwloc_topology_t topo; hwloc_obj_t obj; unsigned j, k; struct hwloc_topology_support *support; char **files=NULL; char **topos = NULL; bool use_local_topology = false; #endif char **node_cnt=NULL; char **slot_cnt=NULL; char **max_slot_cnt=NULL; char *tmp; char prefix[6]; node_cnt = opal_argv_split(mca_ras_simulator_component.num_nodes, ','); if (NULL != mca_ras_simulator_component.slots) { slot_cnt = opal_argv_split(mca_ras_simulator_component.slots, ','); /* backfile the slot_cnt so every topology has a cnt */ tmp = slot_cnt[opal_argv_count(slot_cnt)-1]; for (n=opal_argv_count(slot_cnt); n < opal_argv_count(node_cnt); n++) { opal_argv_append_nosize(&slot_cnt, tmp); } } if (NULL != mca_ras_simulator_component.slots_max) { max_slot_cnt = opal_argv_split(mca_ras_simulator_component.slots_max, ','); /* backfill the max_slot_cnt as reqd */ tmp = max_slot_cnt[opal_argv_count(slot_cnt)-1]; for (n=opal_argv_count(max_slot_cnt); n < opal_argv_count(max_slot_cnt); n++) { opal_argv_append_nosize(&max_slot_cnt, tmp); } } #if OPAL_HAVE_HWLOC if (NULL != mca_ras_simulator_component.topofiles) { files = opal_argv_split(mca_ras_simulator_component.topofiles, ','); if (opal_argv_count(files) != opal_argv_count(node_cnt)) { orte_show_help("help-ras-base.txt", "ras-sim:mismatch", true); goto error_silent; } } else if (NULL != mca_ras_simulator_component.topologies) { topos = opal_argv_split(mca_ras_simulator_component.topologies, ','); if (opal_argv_count(topos) != opal_argv_count(node_cnt)) { orte_show_help("help-ras-base.txt", "ras-sim:mismatch", true); goto error_silent; } } else { /* use our topology */ use_local_topology = true; } #else /* If we don't have hwloc and hwloc files were specified, then error out (because we can't deliver that functionality) */ if (NULL == mca_ras_simulator_component.topofiles) { orte_show_help("help-ras-simulator.txt", "no hwloc support for topofiles", true); goto error_silent; } #endif /* setup the prefix to the node names */ snprintf(prefix, 6, "nodeA"); /* process the request */ for (n=0; NULL != node_cnt[n]; n++) { num_nodes = strtol(node_cnt[n], NULL, 10); /* get number of digits */ val = num_nodes; for (dig=0; 0 != val; dig++) { val /= 10; } /* set the prefix for this group of nodes */ prefix[4] += n; /* check for topology */ #if OPAL_HAVE_HWLOC if (use_local_topology) { /* use our topology */ topo = opal_hwloc_topology; } else if (NULL != files) { if (0 != hwloc_topology_init(&topo)) { orte_show_help("help-ras-simulator.txt", "hwloc API fail", true, __FILE__, __LINE__, "hwloc_topology_init"); goto error_silent; } if (0 != hwloc_topology_set_xml(topo, files[n])) { orte_show_help("help-ras-simulator.txt", "hwloc failed to load xml", true, files[n]); hwloc_topology_destroy(topo); goto error_silent; } /* since we are loading this from an external source, we have to * explicitly set a flag so hwloc sets things up correctly */ if (0 != hwloc_topology_set_flags(topo, HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM)) { orte_show_help("help-ras-simulator.txt", "hwloc API fail", true, __FILE__, __LINE__, "hwloc_topology_set_flags"); hwloc_topology_destroy(topo); goto error_silent; } if (0 != hwloc_topology_load(topo)) { orte_show_help("help-ras-simulator.txt", "hwloc API fail", true, __FILE__, __LINE__, "hwloc_topology_load"); hwloc_topology_destroy(topo); goto error_silent; } /* remove the hostname from the topology. Unfortunately, hwloc * decided to add the source hostname to the "topology", thus * rendering it unusable as a pure topological description. So * we remove that information here. */ obj = hwloc_get_root_obj(topo); for (k=0; k < obj->infos_count; k++) { if (NULL == obj->infos[k].name || NULL == obj->infos[k].value) { continue; } if (0 == strncmp(obj->infos[k].name, "HostName", strlen("HostName"))) { free(obj->infos[k].name); free(obj->infos[k].value); /* left justify the array */ for (j=k; j < obj->infos_count-1; j++) { obj->infos[j] = obj->infos[j+1]; } obj->infos[obj->infos_count-1].name = NULL; obj->infos[obj->infos_count-1].value = NULL; obj->infos_count--; break; } } /* unfortunately, hwloc does not include support info in its * xml output :-(( To aid in debugging, we set it here */ support = (struct hwloc_topology_support*)hwloc_topology_get_support(topo); support->cpubind->set_thisproc_cpubind = mca_ras_simulator_component.have_cpubind; support->membind->set_thisproc_membind = mca_ras_simulator_component.have_membind; /* add it to our array */ t = OBJ_NEW(orte_topology_t); t->topo = topo; t->sig = opal_hwloc_base_get_topo_signature(topo); opal_pointer_array_add(orte_node_topologies, t); } else { if (0 != hwloc_topology_init(&topo)) { orte_show_help("help-ras-simulator.txt", "hwloc API fail", true, __FILE__, __LINE__, "hwloc_topology_init"); goto error_silent; } if (0 != hwloc_topology_set_synthetic(topo, topos[n])) { orte_show_help("help-ras-simulator.txt", "hwloc API fail", true, __FILE__, __LINE__, "hwloc_topology_set_synthetic"); hwloc_topology_destroy(topo); goto error_silent; } if (0 != hwloc_topology_load(topo)) { orte_show_help("help-ras-simulator.txt", "hwloc API fail", true, __FILE__, __LINE__, "hwloc_topology_load"); hwloc_topology_destroy(topo); goto error_silent; } if (OPAL_SUCCESS != opal_hwloc_base_filter_cpus(topo)) { orte_show_help("help-ras-simulator.txt", "hwloc API fail", true, __FILE__, __LINE__, "opal_hwloc_base_filter_cpus"); hwloc_topology_destroy(topo); goto error_silent; } /* remove the hostname from the topology. Unfortunately, hwloc * decided to add the source hostname to the "topology", thus * rendering it unusable as a pure topological description. So * we remove that information here. */ obj = hwloc_get_root_obj(topo); for (k=0; k < obj->infos_count; k++) { if (NULL == obj->infos[k].name || NULL == obj->infos[k].value) { continue; } if (0 == strncmp(obj->infos[k].name, "HostName", strlen("HostName"))) { free(obj->infos[k].name); free(obj->infos[k].value); /* left justify the array */ for (j=k; j < obj->infos_count-1; j++) { obj->infos[j] = obj->infos[j+1]; } obj->infos[obj->infos_count-1].name = NULL; obj->infos[obj->infos_count-1].value = NULL; obj->infos_count--; break; } } /* unfortunately, hwloc does not include support info in its * xml output :-(( To aid in debugging, we set it here */ support = (struct hwloc_topology_support*)hwloc_topology_get_support(topo); support->cpubind->set_thisproc_cpubind = mca_ras_simulator_component.have_cpubind; support->membind->set_thisproc_membind = mca_ras_simulator_component.have_membind; /* add it to our array */ t = OBJ_NEW(orte_topology_t); t->topo = topo; t->sig = opal_hwloc_base_get_topo_signature(topo); opal_pointer_array_add(orte_node_topologies, t); } #endif for (i=0; i < num_nodes; i++) { node = OBJ_NEW(orte_node_t); asprintf(&node->name, "%s%0*d", prefix, dig, i); node->state = ORTE_NODE_STATE_UP; node->slots_inuse = 0; #if OPAL_HAVE_HWLOC if (NULL == max_slot_cnt || NULL == max_slot_cnt[n]) { node->slots_max = 0; } else { obj = hwloc_get_root_obj(topo); node->slots_max = opal_hwloc_base_get_npus(topo, obj); } if (NULL == slot_cnt || NULL == slot_cnt[n]) { node->slots = 0; } else { obj = hwloc_get_root_obj(topo); node->slots = opal_hwloc_base_get_npus(topo, obj); } node->topology = topo; #endif opal_output_verbose(1, orte_ras_base_framework.framework_output, "Created Node <%10s> [%3d : %3d]", node->name, node->slots, node->slots_max); opal_list_append(nodes, &node->super); } } /* record the number of allocated nodes */ orte_num_allocated_nodes = opal_list_get_size(nodes); if (NULL != max_slot_cnt) { opal_argv_free(max_slot_cnt); } if (NULL != slot_cnt) { opal_argv_free(slot_cnt); } if (NULL != node_cnt) { opal_argv_free(node_cnt); } return ORTE_SUCCESS; error_silent: if (NULL != max_slot_cnt) { opal_argv_free(max_slot_cnt); } if (NULL != slot_cnt) { opal_argv_free(slot_cnt); } if (NULL != node_cnt) { opal_argv_free(node_cnt); } return ORTE_ERR_SILENT; }
static void add_process_objects(hwloc_topology_t topology) { #ifdef HAVE_DIRENT_H hwloc_obj_t root; hwloc_bitmap_t cpuset; #ifdef HWLOC_LINUX_SYS hwloc_bitmap_t task_cpuset; #endif /* HWLOC_LINUX_SYS */ DIR *dir; struct dirent *dirent; const struct hwloc_topology_support *support; root = hwloc_get_root_obj(topology); support = hwloc_topology_get_support(topology); if (!support->cpubind->get_proc_cpubind) return; dir = opendir("/proc"); if (!dir) return; cpuset = hwloc_bitmap_alloc(); #ifdef HWLOC_LINUX_SYS task_cpuset = hwloc_bitmap_alloc(); #endif /* HWLOC_LINUX_SYS */ while ((dirent = readdir(dir))) { long local_pid_number; hwloc_pid_t local_pid; char *end; char name[80]; int proc_cpubind; local_pid_number = strtol(dirent->d_name, &end, 10); if (*end) /* Not a number */ continue; snprintf(name, sizeof(name), "%ld", local_pid_number); local_pid = hwloc_pid_from_number(local_pid_number, 0); proc_cpubind = hwloc_get_proc_cpubind(topology, local_pid, cpuset, 0) != -1; #ifdef HWLOC_LINUX_SYS { char comm[16]; char *path; size_t pathlen = 6 + strlen(dirent->d_name) + 1 + 7 + 1; path = malloc(pathlen); { /* Get the process name */ char cmd[64]; int file; ssize_t n; snprintf(path, pathlen, "/proc/%s/cmdline", dirent->d_name); file = open(path, O_RDONLY); if (file < 0) { /* Ignore errors */ free(path); continue; } n = read(file, cmd, sizeof(cmd)); close(file); if (n <= 0) { /* Ignore kernel threads and errors */ free(path); continue; } snprintf(path, pathlen, "/proc/%s/comm", dirent->d_name); file = open(path, O_RDONLY); if (file >= 0) { n = read(file, comm, sizeof(comm) - 1); close(file); if (n > 0) { comm[n] = 0; if (n > 1 && comm[n-1] == '\n') comm[n-1] = 0; } else { snprintf(comm, sizeof(comm), "(unknown)"); } } else { /* Old kernel, have to look at old file */ char stats[32]; char *parenl = NULL, *parenr; snprintf(path, pathlen, "/proc/%s/stat", dirent->d_name); file = open(path, O_RDONLY); if (file < 0) { /* Ignore errors */ free(path); continue; } /* "pid (comm) ..." */ n = read(file, stats, sizeof(stats) - 1); close(file); if (n > 0) { stats[n] = 0; parenl = strchr(stats, '('); parenr = strchr(stats, ')'); if (!parenr) parenr = &stats[sizeof(stats)-1]; *parenr = 0; } if (!parenl) { snprintf(comm, sizeof(comm), "(unknown)"); } else { snprintf(comm, sizeof(comm), "%s", parenl+1); } } snprintf(name, sizeof(name), "%ld %s", local_pid_number, comm); } { /* Get threads */ DIR *task_dir; struct dirent *task_dirent; snprintf(path, pathlen, "/proc/%s/task", dirent->d_name); task_dir = opendir(path); if (task_dir) { while ((task_dirent = readdir(task_dir))) { long local_tid; char *task_end; const size_t tid_len = sizeof(local_tid)*3+1; size_t task_pathlen = 6 + strlen(dirent->d_name) + 1 + 4 + 1 + strlen(task_dirent->d_name) + 1 + 4 + 1; char *task_path; int comm_file; char task_comm[16] = ""; char task_name[sizeof(name) + 1 + tid_len + 1 + sizeof(task_comm) + 1]; ssize_t n; local_tid = strtol(task_dirent->d_name, &task_end, 10); if (*task_end) /* Not a number, or the main task */ continue; task_path = malloc(task_pathlen); snprintf(task_path, task_pathlen, "/proc/%s/task/%s/comm", dirent->d_name, task_dirent->d_name); comm_file = open(task_path, O_RDONLY); free(task_path); if (comm_file >= 0) { n = read(comm_file, task_comm, sizeof(task_comm) - 1); if (n < 0) n = 0; close(comm_file); task_comm[n] = 0; if (n > 1 && task_comm[n-1] == '\n') task_comm[n-1] = 0; if (!strcmp(comm, task_comm)) /* Same as process comm, do not show it again */ n = 0; } else { n = 0; } if (hwloc_linux_get_tid_cpubind(topology, local_tid, task_cpuset)) continue; if (proc_cpubind && hwloc_bitmap_isequal(task_cpuset, cpuset)) continue; if (n) { snprintf(task_name, sizeof(task_name), "%s %li %s", name, local_tid, task_comm); } else { snprintf(task_name, sizeof(task_name), "%s %li", name, local_tid); } insert_task(topology, task_cpuset, task_name); } closedir(task_dir); } } free(path); } #endif /* HWLOC_LINUX_SYS */ if (!proc_cpubind) continue; if (hwloc_bitmap_isincluded(root->cpuset, cpuset)) continue; insert_task(topology, cpuset, name); } hwloc_bitmap_free(cpuset); #ifdef HWLOC_LINUX_SYS hwloc_bitmap_free(task_cpuset); #endif /* HWLOC_LINUX_SYS */ closedir(dir); #endif /* HAVE_DIRENT_H */ }
int main(int argc, char* argv[]) { hwloc_obj_t obj; unsigned j, k; struct hwloc_topology_support *support; int rc; if (2 != argc) { fprintf(stderr, "Usage: opal_hwloc <topofile>\n"); exit(1); } if (0 > (rc = opal_init(&argc, &argv))) { fprintf(stderr, "opal_hwloc: couldn't init opal - error code %d\n", rc); return rc; } if (0 != hwloc_topology_init(&my_topology)) { return OPAL_ERR_NOT_SUPPORTED; } if (0 != hwloc_topology_set_xml(my_topology, argv[1])) { hwloc_topology_destroy(my_topology); return OPAL_ERR_NOT_SUPPORTED; } /* since we are loading this from an external source, we have to * explicitly set a flag so hwloc sets things up correctly */ if (0 != hwloc_topology_set_flags(my_topology, (HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM | HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM | HWLOC_TOPOLOGY_FLAG_IO_DEVICES))) { hwloc_topology_destroy(my_topology); return OPAL_ERR_NOT_SUPPORTED; } if (0 != hwloc_topology_load(my_topology)) { hwloc_topology_destroy(my_topology); return OPAL_ERR_NOT_SUPPORTED; } /* remove the hostname from the topology. Unfortunately, hwloc * decided to add the source hostname to the "topology", thus * rendering it unusable as a pure topological description. So * we remove that information here. */ obj = hwloc_get_root_obj(my_topology); for (k=0; k < obj->infos_count; k++) { if (NULL == obj->infos[k].name || NULL == obj->infos[k].value) { continue; } if (0 == strncmp(obj->infos[k].name, "HostName", strlen("HostName"))) { free(obj->infos[k].name); free(obj->infos[k].value); /* left justify the array */ for (j=k; j < obj->infos_count-1; j++) { obj->infos[j] = obj->infos[j+1]; } obj->infos[obj->infos_count-1].name = NULL; obj->infos[obj->infos_count-1].value = NULL; obj->infos_count--; break; } } /* unfortunately, hwloc does not include support info in its * xml output :-(( We default to assuming it is present as * systems that use this option are likely to provide * binding support */ support = (struct hwloc_topology_support*)hwloc_topology_get_support(my_topology); support->cpubind->set_thisproc_cpubind = true; /* filter the cpus thru any default cpu set */ opal_hwloc_base_filter_cpus(my_topology); /* fill opal_cache_line_size global with the smallest L1 cache line size */ fill_cache_line_size(); /* test it */ if (NULL == hwloc_get_obj_by_type(my_topology, HWLOC_OBJ_CORE, 0)) { fprintf(stderr, "DIDN'T FIND A CORE\n"); } hwloc_topology_destroy(my_topology); opal_finalize(); return 0; }