static ldom_t hwloc_hpux_find_ldom(hwloc_topology_t topology, hwloc_const_bitmap_t hwloc_set) { int has_numa = sysconf(_SC_CCNUMA_SUPPORT) == 1; hwloc_obj_t obj; if (!has_numa) return -1; obj = hwloc_get_first_largest_obj_inside_cpuset(topology, hwloc_set); if (!hwloc_bitmap_isequal(obj->cpuset, hwloc_set)) /* Does not correspond to exactly one node */ return -1; /* obj is the highest possibly matching object, but some (single) child (with same cpuset) could match too */ while (obj->type != HWLOC_OBJ_NUMANODE) { /* try the first child, in case it has the same cpuset */ if (!obj->first_child || !obj->first_child->cpuset || !hwloc_bitmap_isequal(obj->cpuset, obj->first_child->cpuset)) return -1; obj = obj->first_child; } return obj->os_index; }
int main(void) { hwloc_bitmap_t orig, expected; orig = hwloc_bitmap_alloc(); expected = hwloc_bitmap_alloc(); /* empty set gives empty set */ hwloc_bitmap_singlify(orig); assert(hwloc_bitmap_iszero(orig)); /* full set gives first bit only */ hwloc_bitmap_fill(orig); hwloc_bitmap_singlify(orig); hwloc_bitmap_zero(expected); hwloc_bitmap_set(expected, 0); assert(hwloc_bitmap_isequal(orig, expected)); assert(!hwloc_bitmap_compare(orig, expected)); /* actual non-trivial set */ hwloc_bitmap_zero(orig); hwloc_bitmap_set(orig, 45); hwloc_bitmap_set(orig, 46); hwloc_bitmap_set(orig, 517); hwloc_bitmap_singlify(orig); hwloc_bitmap_zero(expected); hwloc_bitmap_set(expected, 45); assert(hwloc_bitmap_isequal(orig, expected)); assert(!hwloc_bitmap_compare(orig, expected)); hwloc_bitmap_free(orig); hwloc_bitmap_free(expected); return 0; }
static int check_compare(hwloc_const_bitmap_t set1, hwloc_const_bitmap_t set2) { int res = hwloc_bitmap_compare_inclusion(set1, set2); if (hwloc_bitmap_iszero(set1)) { if (hwloc_bitmap_iszero(set2)) { assert(res == HWLOC_BITMAP_EQUAL); } else { assert(res == HWLOC_BITMAP_INCLUDED); } } else if (hwloc_bitmap_iszero(set2)) { assert(res == HWLOC_BITMAP_CONTAINS); } else if (hwloc_bitmap_isequal(set1, set2)) { assert(res == HWLOC_BITMAP_EQUAL); } else if (hwloc_bitmap_isincluded(set1, set2)) { assert(res == HWLOC_BITMAP_INCLUDED); } else if (hwloc_bitmap_isincluded(set2, set1)) { assert(res == HWLOC_BITMAP_CONTAINS); } else if (hwloc_bitmap_intersects(set1, set2)) { assert(res == HWLOC_BITMAP_INTERSECTS); } else { assert(res == HWLOC_BITMAP_DIFFERENT); } return res; }
static void* hwloc_hpux_alloc_membind(hwloc_topology_t topology, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) { int mmap_flags; /* Can not give a set of nodes. */ if (!hwloc_bitmap_isequal(nodeset, hwloc_topology_get_complete_nodeset(topology))) { errno = EXDEV; return hwloc_alloc_or_fail(topology, len, flags); } switch (policy) { case HWLOC_MEMBIND_DEFAULT: case HWLOC_MEMBIND_BIND: mmap_flags = 0; break; case HWLOC_MEMBIND_FIRSTTOUCH: mmap_flags = MAP_MEM_FIRST_TOUCH; break; case HWLOC_MEMBIND_INTERLEAVE: mmap_flags = MAP_MEM_INTERLEAVED; break; default: errno = ENOSYS; return NULL; } return mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | mmap_flags, -1, 0); }
unsigned hwloc_get_closest_objs (struct hwloc_topology *topology, struct hwloc_obj *src, struct hwloc_obj **objs, unsigned max) { struct hwloc_obj *parent, *nextparent, **src_objs; int i,src_nbobjects; unsigned stored = 0; src_nbobjects = topology->level_nbobjects[src->depth]; src_objs = topology->levels[src->depth]; parent = src; while (stored < max) { while (1) { nextparent = parent->parent; if (!nextparent) goto out; if (!hwloc_bitmap_isequal(parent->cpuset, nextparent->cpuset)) break; parent = nextparent; } /* traverse src's objects and find those that are in nextparent and were not in parent */ for(i=0; i<src_nbobjects; i++) { if (hwloc_bitmap_isincluded(src_objs[i]->cpuset, nextparent->cpuset) && !hwloc_bitmap_isincluded(src_objs[i]->cpuset, parent->cpuset)) { objs[stored++] = src_objs[i]; if (stored == max) goto out; } } parent = nextparent; } out: return stored; }
static int hwloc_osf_set_thread_cpubind(hwloc_topology_t topology, hwloc_thread_t thread, hwloc_const_bitmap_t hwloc_set, int flags) { radset_t radset; if (hwloc_bitmap_isequal(hwloc_set, hwloc_topology_get_complete_cpuset(topology))) { if ((errno = pthread_rad_detach(thread))) return -1; return 0; } /* Apparently OSF migrates pages */ if (flags & HWLOC_CPUBIND_NOMEMBIND) { errno = ENOSYS; return -1; } if (!prepare_radset(topology, &radset, hwloc_set)) return -1; if (flags & HWLOC_CPUBIND_STRICT) { if ((errno = pthread_rad_bind(thread, radset, RAD_INSIST | RAD_WAIT))) return -1; } else { if ((errno = pthread_rad_attach(thread, radset, RAD_WAIT))) return -1; } radsetdestroy(&radset); return 0; }
static int hwloc_solaris_set_area_membind(hwloc_topology_t topology, const void *addr, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags __hwloc_attribute_unused) { int advice; size_t remainder; /* Can not give a set of nodes just for an area. */ if (!hwloc_bitmap_isequal(nodeset, hwloc_topology_get_complete_nodeset(topology))) { errno = EXDEV; return -1; } switch (policy) { case HWLOC_MEMBIND_DEFAULT: case HWLOC_MEMBIND_BIND: advice = MADV_ACCESS_DEFAULT; break; case HWLOC_MEMBIND_FIRSTTOUCH: case HWLOC_MEMBIND_NEXTTOUCH: advice = MADV_ACCESS_LWP; break; case HWLOC_MEMBIND_INTERLEAVE: advice = MADV_ACCESS_MANY; break; default: errno = ENOSYS; return -1; } remainder = (uintptr_t) addr & (sysconf(_SC_PAGESIZE)-1); addr = (char*) addr - remainder; len += remainder; return madvise((void*) addr, len, advice); }
int main(void) { hwloc_topology_t topology; int i; int err; hwloc_topology_init(&topology); hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_IO_DEVICES); hwloc_topology_load(topology); for(i=0; ; i++) { hwloc_bitmap_t set; hwloc_obj_t osdev, ancestor; const char *value; osdev = hwloc_intel_mic_get_device_osdev_by_index(topology, i); if (!osdev) break; assert(osdev); ancestor = hwloc_get_non_io_ancestor_obj(topology, osdev); printf("found OSDev %s\n", osdev->name); err = strncmp(osdev->name, "mic", 3); assert(!err); assert(atoi(osdev->name+3) == (int) i); assert(osdev->attr->osdev.type == HWLOC_OBJ_OSDEV_COPROC); value = hwloc_obj_get_info_by_name(osdev, "CoProcType"); err = strcmp(value, "MIC"); assert(!err); value = hwloc_obj_get_info_by_name(osdev, "MICFamily"); printf("found MICFamily %s\n", value); value = hwloc_obj_get_info_by_name(osdev, "MICSKU"); printf("found MICSKU %s\n", value); value = hwloc_obj_get_info_by_name(osdev, "MICActiveCores"); printf("found MICActiveCores %s\n", value); value = hwloc_obj_get_info_by_name(osdev, "MICMemorySize"); printf("found MICMemorySize %s\n", value); set = hwloc_bitmap_alloc(); err = hwloc_intel_mic_get_device_cpuset(topology, i, set); if (err < 0) { printf("failed to get cpuset for device %d\n", i); } else { char *cpuset_string = NULL; hwloc_bitmap_asprintf(&cpuset_string, set); printf("got cpuset %s for device %d\n", cpuset_string, i); assert(hwloc_bitmap_isequal(set, ancestor->cpuset)); free(cpuset_string); } hwloc_bitmap_free(set); } hwloc_topology_destroy(topology); return 0; }
static int hwloc_aix_set_sth_cpubind(hwloc_topology_t topology, rstype_t what, rsid_t who, hwloc_const_bitmap_t hwloc_set, int flags __hwloc_attribute_unused) { rsethandle_t rad; int res; unsigned cpu; if (flags & HWLOC_CPUBIND_NOMEMBIND) { errno = ENOSYS; return -1; } /* The resulting binding is always strict */ if (hwloc_bitmap_isequal(hwloc_set, hwloc_topology_get_complete_cpuset(topology))) { if (ra_detachrset(what, who, 0)) return -1; return 0; } rad = rs_alloc(RS_EMPTY); hwloc_bitmap_foreach_begin(cpu, hwloc_set) rs_op(RS_ADDRESOURCE, rad, NULL, R_PROCS, cpu); hwloc_bitmap_foreach_end(); res = ra_attachrset(what, who, rad, 0); rs_free(rad); return res; }
int main(void) { hwloc_topology_t topology; hwloc_bitmap_t set, set2, nocpunomemnodeset, nocpubutmemnodeset, nomembutcpunodeset, nomembutcpucpuset; hwloc_obj_t node; struct bitmask *bitmask, *bitmask2; unsigned long mask; unsigned long maxnode; int i; if (numa_available() < 0) /* libnuma has inconsistent behavior when the kernel isn't NUMA-aware. * don't try to check everything precisely. */ exit(77); hwloc_topology_init(&topology); hwloc_topology_load(topology); /* convert full stuff between cpuset and libnuma */ set = hwloc_bitmap_alloc(); nocpunomemnodeset = hwloc_bitmap_alloc(); nocpubutmemnodeset = hwloc_bitmap_alloc(); nomembutcpunodeset = hwloc_bitmap_alloc(); nomembutcpucpuset = hwloc_bitmap_alloc(); /* gather all nodes if any, or the whole system if no nodes */ if (hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_NUMANODE)) { node = NULL; while ((node = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_NUMANODE, node)) != NULL) { hwloc_bitmap_or(set, set, node->cpuset); if (hwloc_bitmap_iszero(node->cpuset)) { if (node->memory.local_memory) hwloc_bitmap_set(nocpubutmemnodeset, node->os_index); else hwloc_bitmap_set(nocpunomemnodeset, node->os_index); } else if (!node->memory.local_memory) { hwloc_bitmap_set(nomembutcpunodeset, node->os_index); hwloc_bitmap_or(nomembutcpucpuset, nomembutcpucpuset, node->cpuset); } } } else { hwloc_bitmap_or(set, set, hwloc_topology_get_complete_cpuset(topology)); } set2 = hwloc_bitmap_alloc(); hwloc_cpuset_from_linux_libnuma_bitmask(topology, set2, numa_all_nodes_ptr); /* numa_all_nodes_ptr doesn't contain NODES with CPU but no memory */ hwloc_bitmap_or(set2, set2, nomembutcpucpuset); assert(hwloc_bitmap_isequal(set, set2)); hwloc_bitmap_free(set2); bitmask = hwloc_cpuset_to_linux_libnuma_bitmask(topology, set); /* numa_all_nodes_ptr contains NODES with no CPU but with memory */ hwloc_bitmap_foreach_begin(i, nocpubutmemnodeset) { numa_bitmask_setbit(bitmask, i); } hwloc_bitmap_foreach_end();
static void result_get(const char *msg, hwloc_const_bitmap_t expected, hwloc_const_bitmap_t result, int err, int supported) { const char *errmsg = strerror(errno); if (err) printf("%-40s: %sFAILED (%d, %s)\n", msg, supported?"":"X", errno, errmsg); else if (!expected || hwloc_bitmap_isequal(expected, result)) printf("%-40s: OK\n", msg); else { char *expected_s, *result_s; hwloc_bitmap_asprintf(&expected_s, expected); hwloc_bitmap_asprintf(&result_s, result); printf("%-40s: expected %s, got %s\n", msg, expected_s, result_s); } }
static ldom_t hwloc_hpux_find_ldom(hwloc_topology_t topology, hwloc_const_bitmap_t hwloc_set) { int has_numa = sysconf(_SC_CCNUMA_SUPPORT) == 1; hwloc_obj_t obj; if (!has_numa) return -1; obj = hwloc_get_first_largest_obj_inside_cpuset(topology, hwloc_set); if (!hwloc_bitmap_isequal(obj->cpuset, hwloc_set) || obj->type != HWLOC_OBJ_NODE) { /* Does not correspond to exactly one node */ return -1; } return obj->os_index; }
static int chk_mem_bind(hwloc_topology_t topology, hwloc_nodeset_t nodeset, int print) { hwloc_membind_policy_t policy; hwloc_bitmap_t checkset = hwloc_bitmap_alloc(); if(hwloc_get_membind(topology, checkset, &policy, HWLOC_MEMBIND_THREAD|HWLOC_MEMBIND_BYNODESET) == -1){ perror("get_membind"); hwloc_bitmap_free(checkset); return -1; } if(print){ char * policy_name; switch(policy){ case HWLOC_MEMBIND_DEFAULT: policy_name = "DEFAULT"; break; case HWLOC_MEMBIND_FIRSTTOUCH: policy_name = "FIRSTTOUCH"; break; case HWLOC_MEMBIND_BIND: policy_name = "BIND"; break; case HWLOC_MEMBIND_INTERLEAVE: policy_name = "INTERLEAVE"; break; case HWLOC_MEMBIND_NEXTTOUCH: policy_name = "NEXTTOUCH"; break; case HWLOC_MEMBIND_MIXED: policy_name = "MIXED"; break; default: policy_name=NULL; break; } hwloc_obj_t mem_obj = hwloc_get_first_largest_obj_inside_cpuset(topology, checkset); printf("membind(%s)=%s:%d\n",policy_name,hwloc_obj_type_string(mem_obj->type),mem_obj->logical_index); } if(nodeset == NULL) return -1; int ret = hwloc_bitmap_isequal(nodeset,checkset); hwloc_bitmap_free(checkset); return ret ? 0 : -1; }
static int chk_cpu_bind(hwloc_topology_t topology, hwloc_cpuset_t cpuset, int print) { hwloc_bitmap_t checkset = hwloc_bitmap_alloc(); if(hwloc_get_cpubind(topology, checkset, HWLOC_CPUBIND_THREAD) == -1){ perror("get_cpubind"); hwloc_bitmap_free(checkset); return -1; } if(print){ hwloc_obj_t cpu_obj = hwloc_get_first_largest_obj_inside_cpuset(topology, checkset); printf("cpubind=%s:%d\n",hwloc_obj_type_string(cpu_obj->type),cpu_obj->logical_index); } if(cpuset == NULL) return -1; int ret = hwloc_bitmap_isequal(cpuset,checkset); hwloc_bitmap_free(checkset); return ret ? 0 : -1; }
static int hwloc__get_largest_objs_inside_cpuset (struct hwloc_obj *current, hwloc_const_bitmap_t set, struct hwloc_obj ***res, int *max) { int gotten = 0; unsigned i; /* the caller must ensure this */ if (*max <= 0) return 0; if (hwloc_bitmap_isequal(current->cpuset, set)) { **res = current; (*res)++; (*max)--; return 1; } for (i=0; i<current->arity; i++) { hwloc_bitmap_t subset = hwloc_bitmap_dup(set); int ret; /* split out the cpuset part corresponding to this child and see if there's anything to do */ if (current->children[i]->cpuset) { hwloc_bitmap_and(subset, subset, current->children[i]->cpuset); if (hwloc_bitmap_iszero(subset)) { hwloc_bitmap_free(subset); continue; } } ret = hwloc__get_largest_objs_inside_cpuset (current->children[i], subset, res, max); gotten += ret; hwloc_bitmap_free(subset); /* if no more room to store remaining objects, return what we got so far */ if (!*max) break; } return gotten; }
/* Recursively output topology in a console fashion */ static void output_topology (hwloc_topology_t topology, hwloc_obj_t l, hwloc_obj_t parent, FILE *output, int i, int logical, int verbose_mode) { hwloc_obj_t child; int group_identical = (verbose_mode <= 1) && !lstopo_show_cpuset; unsigned collapse = 1; if (l->type == HWLOC_OBJ_PCI_DEVICE) { const char *collapsestr = hwloc_obj_get_info_by_name(l, "lstopoCollapse"); if (collapsestr) collapse = atoi(collapsestr); if (!collapse) return; } if (group_identical && parent && parent->arity == 1 && l->cpuset && parent->cpuset && hwloc_bitmap_isequal(l->cpuset, parent->cpuset)) { /* in non-verbose mode, merge objects with their parent is they are exactly identical */ fprintf(output, " + "); } else { if (parent) fprintf(output, "\n"); indent (output, 2*i); i++; } if (collapse > 1) fprintf(output, "%u x { ", collapse); output_console_obj(topology, l, output, logical, verbose_mode, collapse > 1); if (collapse > 1) fprintf(output, " }"); for(child = l->first_child; child; child = child->next_sibling) if (child->type != HWLOC_OBJ_PU || !lstopo_ignore_pus) output_topology (topology, child, l, output, i, logical, verbose_mode); for(child = l->io_first_child; child; child = child->next_sibling) output_topology (topology, child, l, output, i, logical, verbose_mode); for(child = l->misc_first_child; child; child = child->next_sibling) output_topology (topology, child, l, output, i, logical, verbose_mode); }
static int hwloc_aix_set_sth_cpubind(hwloc_topology_t topology, rstype_t what, rsid_t who, pid_t pid, hwloc_const_bitmap_t hwloc_set, int flags __hwloc_attribute_unused) { rsethandle_t rad; int res; unsigned cpu; if (flags & HWLOC_CPUBIND_NOMEMBIND) { errno = ENOSYS; return -1; } /* The resulting binding is always strict */ if (hwloc_bitmap_isequal(hwloc_set, hwloc_topology_get_complete_cpuset(topology))) { if (ra_detachrset(what, who, 0)) return -1; return 0; } rad = rs_alloc(RS_EMPTY); hwloc_bitmap_foreach_begin(cpu, hwloc_set) rs_op(RS_ADDRESOURCE, rad, NULL, R_PROCS, cpu); hwloc_bitmap_foreach_end(); res = ra_attachrset(what, who, rad, 0); if (res < 0 && errno == EPERM) { /* EPERM may mean that one thread has ben bound with bindprocessor(). * Unbind the entire process (we can't unbind individual threads) * and try again. */ bindprocessor(BINDPROCESS, pid, PROCESSOR_CLASS_ANY); res = ra_attachrset(what, who, rad, 0); } rs_free(rad); return res; }
/* Note: get_cpubind not available on HP-UX */ static int hwloc_hpux_set_proc_cpubind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_bitmap_t hwloc_set, int flags) { ldom_t ldom; spu_t cpu; /* Drop previous binding */ mpctl(MPC_SETLDOM, MPC_LDOMFLOAT, pid); mpctl(MPC_SETPROCESS, MPC_SPUFLOAT, pid); if (hwloc_bitmap_isequal(hwloc_set, hwloc_topology_get_complete_cpuset(topology))) return 0; ldom = hwloc_hpux_find_ldom(topology, hwloc_set); if (ldom != -1) return mpctl(MPC_SETLDOM, ldom, pid); cpu = hwloc_hpux_find_spu(topology, hwloc_set); if (cpu != -1) return mpctl(flags & HWLOC_CPUBIND_STRICT ? MPC_SETPROCESS_FORCE : MPC_SETPROCESS, cpu, pid); errno = EXDEV; return -1; }
/* Recursively output topology in a console fashion */ static void output_topology (hwloc_topology_t topology, hwloc_obj_t l, hwloc_obj_t parent, FILE *output, int i, int logical, int verbose_mode) { unsigned x; int group_identical = (verbose_mode <= 1) && !lstopo_show_cpuset; if (group_identical && parent && parent->arity == 1 && l->cpuset && parent->cpuset && hwloc_bitmap_isequal(l->cpuset, parent->cpuset)) { /* in non-verbose mode, merge objects with their parent is they are exactly identical */ fprintf(output, " + "); } else { if (parent) fprintf(output, "\n"); indent (output, 2*i); i++; } output_console_obj(topology, l, output, logical, verbose_mode); if (l->arity || (!i && !l->arity)) { for (x=0; x<l->arity; x++) if (l->children[x]->type != HWLOC_OBJ_PU || !lstopo_ignore_pus) output_topology (topology, l->children[x], l, output, i, logical, verbose_mode); } }
static int hwloc_hpux_set_thread_cpubind(hwloc_topology_t topology, hwloc_thread_t pthread, hwloc_const_bitmap_t hwloc_set, int flags) { ldom_t ldom, ldom2; spu_t cpu, cpu2; /* Drop previous binding */ pthread_ldom_bind_np(&ldom2, PTHREAD_LDOMFLOAT_NP, pthread); pthread_processor_bind_np(PTHREAD_BIND_ADVISORY_NP, &cpu2, PTHREAD_SPUFLOAT_NP, pthread); if (hwloc_bitmap_isequal(hwloc_set, hwloc_topology_get_complete_cpuset(topology))) return 0; ldom = hwloc_hpux_find_ldom(topology, hwloc_set); if (ldom != -1) return pthread_ldom_bind_np(&ldom2, ldom, pthread); cpu = hwloc_hpux_find_spu(topology, hwloc_set); if (cpu != -1) return pthread_processor_bind_np(flags & HWLOC_CPUBIND_STRICT ? PTHREAD_BIND_FORCED_NP : PTHREAD_BIND_ADVISORY_NP, &cpu2, cpu, pthread); errno = EXDEV; return -1; }
int main(int argc, char *argv[]) { hwloc_topology_t topology; int loaded = 0; int depth; hwloc_bitmap_t cpubind_set, membind_set; int got_cpubind = 0, got_membind = 0; int working_on_cpubind = 1; /* membind if 0 */ int get_binding = 0; int use_nodeset = 0; int get_last_cpu_location = 0; unsigned long flags = 0; int force = 0; int single = 0; int verbose = 0; int only_hbm = -1; int logical = 1; int taskset = 0; unsigned cpubind_flags = 0; hwloc_membind_policy_t membind_policy = HWLOC_MEMBIND_BIND; int got_mempolicy = 0; unsigned membind_flags = 0; int opt; int ret; int pid_number = -1; int tid_number = -1; hwloc_pid_t pid = 0; /* only valid when pid_number > 0, but gcc-4.8 still reports uninitialized warnings */ char *callname; struct hwloc_calc_location_context_s lcontext; struct hwloc_calc_set_context_s scontext; callname = argv[0]; /* skip argv[0], handle options */ argv++; argc--; hwloc_utils_check_api_version(callname); cpubind_set = hwloc_bitmap_alloc(); membind_set = hwloc_bitmap_alloc(); /* don't load now, in case some options change the config before the topology is actually used */ #define LOADED() (loaded) #define ENSURE_LOADED() do { \ if (!loaded) { \ hwloc_topology_init(&topology); \ hwloc_topology_set_all_types_filter(topology, HWLOC_TYPE_FILTER_KEEP_ALL); \ hwloc_topology_set_flags(topology, flags); \ hwloc_topology_load(topology); \ depth = hwloc_topology_get_depth(topology); \ loaded = 1; \ } \ } while (0) while (argc >= 1) { if (!strcmp(argv[0], "--")) { argc--; argv++; break; } opt = 0; if (*argv[0] == '-') { if (!strcmp(argv[0], "-v") || !strcmp(argv[0], "--verbose")) { verbose++; goto next; } if (!strcmp(argv[0], "-q") || !strcmp(argv[0], "--quiet")) { verbose--; goto next; } if (!strcmp(argv[0], "--help")) { usage("hwloc-bind", stdout); return EXIT_SUCCESS; } if (!strcmp(argv[0], "--single")) { single = 1; goto next; } if (!strcmp(argv[0], "-f") || !strcmp(argv[0], "--force")) { force = 1; goto next; } if (!strcmp(argv[0], "--strict")) { cpubind_flags |= HWLOC_CPUBIND_STRICT; membind_flags |= HWLOC_MEMBIND_STRICT; goto next; } if (!strcmp(argv[0], "--pid")) { if (argc < 2) { usage ("hwloc-bind", stderr); exit(EXIT_FAILURE); } pid_number = atoi(argv[1]); opt = 1; goto next; } #ifdef HWLOC_LINUX_SYS if (!strcmp(argv[0], "--tid")) { if (argc < 2) { usage ("hwloc-bind", stderr); exit(EXIT_FAILURE); } tid_number = atoi(argv[1]); opt = 1; goto next; } #endif if (!strcmp (argv[0], "--version")) { printf("%s %s\n", callname, HWLOC_VERSION); exit(EXIT_SUCCESS); } if (!strcmp(argv[0], "-l") || !strcmp(argv[0], "--logical")) { logical = 1; goto next; } if (!strcmp(argv[0], "-p") || !strcmp(argv[0], "--physical")) { logical = 0; goto next; } if (!strcmp(argv[0], "--taskset")) { taskset = 1; goto next; } if (!strcmp (argv[0], "-e") || !strncmp (argv[0], "--get-last-cpu-location", 10)) { get_last_cpu_location = 1; goto next; } if (!strcmp (argv[0], "--get")) { get_binding = 1; goto next; } if (!strcmp (argv[0], "--nodeset")) { use_nodeset = 1; goto next; } if (!strcmp (argv[0], "--cpubind")) { working_on_cpubind = 1; goto next; } if (!strcmp (argv[0], "--membind")) { working_on_cpubind = 0; goto next; } if (!strcmp (argv[0], "--mempolicy")) { if (!strncmp(argv[1], "default", 2)) membind_policy = HWLOC_MEMBIND_DEFAULT; else if (!strncmp(argv[1], "firsttouch", 2)) membind_policy = HWLOC_MEMBIND_FIRSTTOUCH; else if (!strncmp(argv[1], "bind", 2)) membind_policy = HWLOC_MEMBIND_BIND; else if (!strncmp(argv[1], "interleave", 2)) membind_policy = HWLOC_MEMBIND_INTERLEAVE; else if (!strncmp(argv[1], "nexttouch", 2)) membind_policy = HWLOC_MEMBIND_NEXTTOUCH; else { fprintf(stderr, "Unrecognized memory binding policy %s\n", argv[1]); usage ("hwloc-bind", stderr); exit(EXIT_FAILURE); } got_mempolicy = 1; opt = 1; goto next; } if (!strcmp(argv[0], "--hbm")) { only_hbm = 1; goto next; } if (!strcmp(argv[0], "--no-hbm")) { only_hbm = 0; goto next; } if (!strcmp (argv[0], "--whole-system")) { if (loaded) { fprintf(stderr, "Input option %s disallowed after options using the topology\n", argv[0]); exit(EXIT_FAILURE); } flags |= HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM; goto next; } if (!strcmp (argv[0], "--restrict")) { hwloc_bitmap_t restrictset; int err; if (argc < 2) { usage (callname, stdout); exit(EXIT_FAILURE); } restrictset = hwloc_bitmap_alloc(); hwloc_bitmap_sscanf(restrictset, argv[1]); ENSURE_LOADED(); err = hwloc_topology_restrict (topology, restrictset, 0); if (err) { perror("Restricting the topology"); /* FALLTHRU */ } hwloc_bitmap_free(restrictset); argc--; argv++; goto next; } fprintf (stderr, "Unrecognized option: %s\n", argv[0]); usage("hwloc-bind", stderr); return EXIT_FAILURE; } ENSURE_LOADED(); lcontext.topology = topology; lcontext.topodepth = depth; lcontext.only_hbm = only_hbm; lcontext.logical = logical; lcontext.verbose = verbose; scontext.nodeset_input = use_nodeset; scontext.nodeset_output = working_on_cpubind ? 0 : 1; scontext.output_set = working_on_cpubind ? cpubind_set : membind_set; ret = hwloc_calc_process_location_as_set(&lcontext, &scontext, argv[0]); if (ret < 0) { if (verbose > 0) fprintf(stderr, "assuming the command starts at %s\n", argv[0]); break; } if (working_on_cpubind) got_cpubind = 1; else got_membind = 1; next: argc -= opt+1; argv += opt+1; } ENSURE_LOADED(); if (pid_number > 0 && tid_number > 0) { fprintf(stderr, "cannot operate both on tid and pid\n"); return EXIT_FAILURE; } if (pid_number > 0) { pid = hwloc_pid_from_number(pid_number, !(get_binding || get_last_cpu_location)); /* no need to set_pid() * the doc just says we're operating on pid, not that we're retrieving the topo/cpuset as seen from inside pid */ } if (get_last_cpu_location && !working_on_cpubind) { fprintf(stderr, "Options --membind and --get-last-cpu-location cannot be combined.\n"); return EXIT_FAILURE; } if ((get_binding || get_last_cpu_location) && (got_cpubind || got_membind)) { /* doesn't work because get_binding/get_last_cpu_location overwrites cpubind_set */ fprintf(stderr, "Cannot display and set binding at the same time.\n"); return EXIT_FAILURE; } if (get_binding || get_last_cpu_location) { char *s; const char *policystr = NULL; int err; if (working_on_cpubind) { if (get_last_cpu_location) { if (pid_number > 0) err = hwloc_get_proc_last_cpu_location(topology, pid, cpubind_set, 0); #ifdef HWLOC_LINUX_SYS else if (tid_number > 0) err = hwloc_linux_get_tid_last_cpu_location(topology, tid_number, cpubind_set); #endif else err = hwloc_get_last_cpu_location(topology, cpubind_set, 0); } else { if (pid_number > 0) err = hwloc_get_proc_cpubind(topology, pid, cpubind_set, 0); #ifdef HWLOC_LINUX_SYS else if (tid_number > 0) err = hwloc_linux_get_tid_cpubind(topology, tid_number, cpubind_set); #endif else err = hwloc_get_cpubind(topology, cpubind_set, 0); } if (err) { const char *errmsg = strerror(errno); if (pid_number > 0) fprintf(stderr, "hwloc_get_proc_%s %d failed (errno %d %s)\n", get_last_cpu_location ? "last_cpu_location" : "cpubind", pid_number, errno, errmsg); else if (tid_number > 0) fprintf(stderr, "hwloc_get_tid_%s %d failed (errno %d %s)\n", get_last_cpu_location ? "last_cpu_location" : "cpubind", tid_number, errno, errmsg); else fprintf(stderr, "hwloc_get_%s failed (errno %d %s)\n", get_last_cpu_location ? "last_cpu_location" : "cpubind", errno, errmsg); return EXIT_FAILURE; } if (use_nodeset) { hwloc_bitmap_t nset = hwloc_bitmap_alloc(); hwloc_cpuset_to_nodeset(topology, cpubind_set, nset); if (taskset) hwloc_bitmap_taskset_asprintf(&s, nset); else hwloc_bitmap_asprintf(&s, nset); hwloc_bitmap_free(nset); } else { if (taskset) hwloc_bitmap_taskset_asprintf(&s, cpubind_set); else hwloc_bitmap_asprintf(&s, cpubind_set); } } else { hwloc_membind_policy_t policy; if (pid_number > 0) { err = hwloc_get_proc_membind(topology, pid, membind_set, &policy, use_nodeset ? HWLOC_MEMBIND_BYNODESET : 0); } else if (tid_number > 0) { err = -1; errno = ENOSYS; } else { err = hwloc_get_membind(topology, membind_set, &policy, use_nodeset ? HWLOC_MEMBIND_BYNODESET : 0); } if (err) { const char *errmsg = strerror(errno); if (pid_number > 0) fprintf(stderr, "hwloc_get_proc_membind %d failed (errno %d %s)\n", pid_number, errno, errmsg); else fprintf(stderr, "hwloc_get_membind failed (errno %d %s)\n", errno, errmsg); return EXIT_FAILURE; } if (taskset) hwloc_bitmap_taskset_asprintf(&s, membind_set); else hwloc_bitmap_asprintf(&s, membind_set); switch (policy) { case HWLOC_MEMBIND_FIRSTTOUCH: policystr = "firsttouch"; break; case HWLOC_MEMBIND_BIND: policystr = "bind"; break; case HWLOC_MEMBIND_INTERLEAVE: policystr = "interleave"; break; case HWLOC_MEMBIND_NEXTTOUCH: policystr = "nexttouch"; break; default: fprintf(stderr, "unknown memory policy %d\n", policy); assert(0); break; } } if (policystr) printf("%s (%s)\n", s, policystr); else printf("%s\n", s); free(s); } if (got_membind) { if (hwloc_bitmap_iszero(membind_set)) { if (verbose >= 0) fprintf(stderr, "cannot membind to empty set\n"); if (!force) goto failed_binding; } if (verbose > 0) { char *s; hwloc_bitmap_asprintf(&s, membind_set); fprintf(stderr, "binding on memory set %s\n", s); free(s); } if (single) hwloc_bitmap_singlify(membind_set); if (pid_number > 0) ret = hwloc_set_proc_membind(topology, pid, membind_set, membind_policy, membind_flags | HWLOC_MEMBIND_BYNODESET); else if (tid_number > 0) { ret = -1; errno = ENOSYS; } else ret = hwloc_set_membind(topology, membind_set, membind_policy, membind_flags | HWLOC_MEMBIND_BYNODESET); if (ret && verbose >= 0) { int bind_errno = errno; const char *errmsg = strerror(bind_errno); char *s; hwloc_bitmap_asprintf(&s, membind_set); if (pid_number > 0) fprintf(stderr, "hwloc_set_proc_membind %s (policy %d flags %x) PID %d failed (errno %d %s)\n", s, membind_policy, membind_flags, pid_number, bind_errno, errmsg); else fprintf(stderr, "hwloc_set_membind %s (policy %d flags %x) failed (errno %d %s)\n", s, membind_policy, membind_flags, bind_errno, errmsg); free(s); } if (ret && !force) goto failed_binding; } else { if (got_mempolicy) fprintf(stderr, "--mempolicy ignored unless memory binding is also requested with --membind.\n"); } if (got_cpubind) { if (hwloc_bitmap_iszero(cpubind_set)) { if (verbose >= 0) fprintf(stderr, "cannot cpubind to empty set\n"); if (!force) goto failed_binding; } if (verbose > 0) { char *s; hwloc_bitmap_asprintf(&s, cpubind_set); fprintf(stderr, "binding on cpu set %s\n", s); free(s); } if (got_membind && !hwloc_bitmap_isequal(membind_set, cpubind_set)) { if (verbose) fprintf(stderr, "Conflicting CPU and memory binding requested, adding HWLOC_CPUBIND_NOMEMBIND flag.\n"); cpubind_flags |= HWLOC_CPUBIND_NOMEMBIND; } if (single) hwloc_bitmap_singlify(cpubind_set); if (pid_number > 0) ret = hwloc_set_proc_cpubind(topology, pid, cpubind_set, cpubind_flags); #ifdef HWLOC_LINUX_SYS else if (tid_number > 0) ret = hwloc_linux_set_tid_cpubind(topology, tid_number, cpubind_set); #endif else ret = hwloc_set_cpubind(topology, cpubind_set, cpubind_flags); if (ret && verbose >= 0) { int bind_errno = errno; const char *errmsg = strerror(bind_errno); char *s; hwloc_bitmap_asprintf(&s, cpubind_set); if (pid_number > 0) fprintf(stderr, "hwloc_set_proc_cpubind %s (flags %x) PID %d failed (errno %d %s)\n", s, cpubind_flags, pid_number, bind_errno, errmsg); else if (tid_number > 0) fprintf(stderr, "hwloc_set_tid_cpubind %s (flags %x) PID %d failed (errno %d %s)\n", s, cpubind_flags, tid_number, bind_errno, errmsg); else fprintf(stderr, "hwloc_set_cpubind %s (flags %x) failed (errno %d %s)\n", s, cpubind_flags, bind_errno, errmsg); free(s); } if (ret && !force) goto failed_binding; } hwloc_bitmap_free(cpubind_set); hwloc_bitmap_free(membind_set); hwloc_topology_destroy(topology); if (pid_number > 0 || tid_number > 0) return EXIT_SUCCESS; if (0 == argc) { if (get_binding || get_last_cpu_location) return EXIT_SUCCESS; fprintf(stderr, "%s: nothing to do!\n", callname); return EXIT_FAILURE; } /* FIXME: check whether Windows execvp() passes INHERIT_PARENT_AFFINITY to CreateProcess() * because we need to propagate processor group affinity. However process-wide affinity * isn't supported with processor groups so far. */ ret = execvp(argv[0], argv); if (ret) { fprintf(stderr, "%s: Failed to launch executable \"%s\"\n", callname, argv[0]); perror("execvp"); } return EXIT_FAILURE; failed_binding: hwloc_bitmap_free(cpubind_set); hwloc_bitmap_free(membind_set); hwloc_topology_destroy(topology); return EXIT_FAILURE; }
static void add_process_objects(hwloc_topology_t topology) { #ifdef HAVE_DIRENT_H hwloc_obj_t root; hwloc_bitmap_t cpuset; #ifdef HWLOC_LINUX_SYS hwloc_bitmap_t task_cpuset; #endif /* HWLOC_LINUX_SYS */ DIR *dir; struct dirent *dirent; const struct hwloc_topology_support *support; root = hwloc_get_root_obj(topology); support = hwloc_topology_get_support(topology); if (!support->cpubind->get_proc_cpubind) return; dir = opendir("/proc"); if (!dir) return; cpuset = hwloc_bitmap_alloc(); #ifdef HWLOC_LINUX_SYS task_cpuset = hwloc_bitmap_alloc(); #endif /* HWLOC_LINUX_SYS */ while ((dirent = readdir(dir))) { long local_pid_number; hwloc_pid_t local_pid; char *end; char name[80]; int proc_cpubind; local_pid_number = strtol(dirent->d_name, &end, 10); if (*end) /* Not a number */ continue; snprintf(name, sizeof(name), "%ld", local_pid_number); local_pid = hwloc_pid_from_number(local_pid_number, 0); proc_cpubind = hwloc_get_proc_cpubind(topology, local_pid, cpuset, 0) != -1; #ifdef HWLOC_LINUX_SYS { char comm[16]; char *path; size_t pathlen = 6 + strlen(dirent->d_name) + 1 + 7 + 1; path = malloc(pathlen); { /* Get the process name */ char cmd[64]; int file; ssize_t n; snprintf(path, pathlen, "/proc/%s/cmdline", dirent->d_name); file = open(path, O_RDONLY); if (file < 0) { /* Ignore errors */ free(path); continue; } n = read(file, cmd, sizeof(cmd)); close(file); if (n <= 0) { /* Ignore kernel threads and errors */ free(path); continue; } snprintf(path, pathlen, "/proc/%s/comm", dirent->d_name); file = open(path, O_RDONLY); if (file >= 0) { n = read(file, comm, sizeof(comm) - 1); close(file); if (n > 0) { comm[n] = 0; if (n > 1 && comm[n-1] == '\n') comm[n-1] = 0; } else { snprintf(comm, sizeof(comm), "(unknown)"); } } else { /* Old kernel, have to look at old file */ char stats[32]; char *parenl = NULL, *parenr; snprintf(path, pathlen, "/proc/%s/stat", dirent->d_name); file = open(path, O_RDONLY); if (file < 0) { /* Ignore errors */ free(path); continue; } /* "pid (comm) ..." */ n = read(file, stats, sizeof(stats) - 1); close(file); if (n > 0) { stats[n] = 0; parenl = strchr(stats, '('); parenr = strchr(stats, ')'); if (!parenr) parenr = &stats[sizeof(stats)-1]; *parenr = 0; } if (!parenl) { snprintf(comm, sizeof(comm), "(unknown)"); } else { snprintf(comm, sizeof(comm), "%s", parenl+1); } } snprintf(name, sizeof(name), "%ld %s", local_pid_number, comm); } { /* Get threads */ DIR *task_dir; struct dirent *task_dirent; snprintf(path, pathlen, "/proc/%s/task", dirent->d_name); task_dir = opendir(path); if (task_dir) { while ((task_dirent = readdir(task_dir))) { long local_tid; char *task_end; const size_t tid_len = sizeof(local_tid)*3+1; size_t task_pathlen = 6 + strlen(dirent->d_name) + 1 + 4 + 1 + strlen(task_dirent->d_name) + 1 + 4 + 1; char *task_path; int comm_file; char task_comm[16] = ""; char task_name[sizeof(name) + 1 + tid_len + 1 + sizeof(task_comm) + 1]; ssize_t n; local_tid = strtol(task_dirent->d_name, &task_end, 10); if (*task_end) /* Not a number, or the main task */ continue; task_path = malloc(task_pathlen); snprintf(task_path, task_pathlen, "/proc/%s/task/%s/comm", dirent->d_name, task_dirent->d_name); comm_file = open(task_path, O_RDONLY); free(task_path); if (comm_file >= 0) { n = read(comm_file, task_comm, sizeof(task_comm) - 1); if (n < 0) n = 0; close(comm_file); task_comm[n] = 0; if (n > 1 && task_comm[n-1] == '\n') task_comm[n-1] = 0; if (!strcmp(comm, task_comm)) /* Same as process comm, do not show it again */ n = 0; } else { n = 0; } if (hwloc_linux_get_tid_cpubind(topology, local_tid, task_cpuset)) continue; if (proc_cpubind && hwloc_bitmap_isequal(task_cpuset, cpuset)) continue; if (n) { snprintf(task_name, sizeof(task_name), "%s %li %s", name, local_tid, task_comm); } else { snprintf(task_name, sizeof(task_name), "%s %li", name, local_tid); } insert_task(topology, task_cpuset, task_name); } closedir(task_dir); } } free(path); } #endif /* HWLOC_LINUX_SYS */ if (!proc_cpubind) continue; if (hwloc_bitmap_isincluded(root->cpuset, cpuset)) continue; insert_task(topology, cpuset, name); } hwloc_bitmap_free(cpuset); #ifdef HWLOC_LINUX_SYS hwloc_bitmap_free(task_cpuset); #endif /* HWLOC_LINUX_SYS */ closedir(dir); #endif /* HAVE_DIRENT_H */ }
int main(void) { hwloc_topology_t topology; #ifdef HWLOC_HAVE_CPU_SET unsigned depth; hwloc_bitmap_t hwlocset; cpu_set_t schedset; hwloc_obj_t obj; int err; #endif /* HWLOC_HAVE_CPU_SET */ hwloc_topology_init(&topology); hwloc_topology_load(topology); #ifdef HWLOC_HAVE_CPU_SET depth = hwloc_topology_get_depth(topology); hwlocset = hwloc_bitmap_dup(hwloc_topology_get_complete_cpuset(topology)); hwloc_cpuset_to_glibc_sched_affinity(topology, hwlocset, &schedset, sizeof(schedset)); #ifdef HWLOC_HAVE_OLD_SCHED_SETAFFINITY err = sched_setaffinity(0, sizeof(schedset)); #else err = sched_setaffinity(0, sizeof(schedset), &schedset); #endif assert(!err); hwloc_bitmap_free(hwlocset); #ifdef HWLOC_HAVE_OLD_SCHED_SETAFFINITY err = sched_getaffinity(0, sizeof(schedset)); #else err = sched_getaffinity(0, sizeof(schedset), &schedset); #endif assert(!err); hwlocset = hwloc_bitmap_alloc(); hwloc_cpuset_from_glibc_sched_affinity(topology, hwlocset, &schedset, sizeof(schedset)); assert(hwloc_bitmap_isincluded(hwlocset, hwloc_topology_get_complete_cpuset(topology))); hwloc_bitmap_andnot(hwlocset, hwlocset, hwloc_topology_get_online_cpuset(topology)); hwloc_bitmap_andnot(hwlocset, hwlocset, hwloc_topology_get_allowed_cpuset(topology)); assert(hwloc_bitmap_iszero(hwlocset)); hwloc_bitmap_free(hwlocset); obj = hwloc_get_obj_by_depth(topology, depth-1, hwloc_get_nbobjs_by_depth(topology, depth-1) - 1); assert(obj); assert(obj->type == HWLOC_OBJ_PU); hwlocset = hwloc_bitmap_dup(obj->cpuset); hwloc_cpuset_to_glibc_sched_affinity(topology, hwlocset, &schedset, sizeof(schedset)); #ifdef HWLOC_HAVE_OLD_SCHED_SETAFFINITY err = sched_setaffinity(0, sizeof(schedset)); #else err = sched_setaffinity(0, sizeof(schedset), &schedset); #endif assert(!err); hwloc_bitmap_free(hwlocset); #ifdef HWLOC_HAVE_OLD_SCHED_SETAFFINITY err = sched_getaffinity(0, sizeof(schedset)); #else err = sched_getaffinity(0, sizeof(schedset), &schedset); #endif assert(!err); hwlocset = hwloc_bitmap_alloc(); hwloc_cpuset_from_glibc_sched_affinity(topology, hwlocset, &schedset, sizeof(schedset)); assert(hwloc_bitmap_isequal(hwlocset, obj->cpuset)); hwloc_bitmap_free(hwlocset); #endif /* HWLOC_HAVE_CPU_SET */ hwloc_topology_destroy(topology); return 0; }
int main(int argc, char *argv[]) { const struct hwloc_topology_support *support; hwloc_topology_t topology; hwloc_const_bitmap_t topocpuset; hwloc_bitmap_t cpuset; unsigned long flags = 0; DIR *dir; struct dirent *dirent; int show_all = 0; int show_threads = 0; int get_last_cpu_location = 0; char *callname; char *pidcmd = NULL; int err; int opt; callname = strrchr(argv[0], '/'); if (!callname) callname = argv[0]; else callname++; /* skip argv[0], handle options */ argc--; argv++; hwloc_utils_check_api_version(callname); while (argc >= 1) { opt = 0; if (!strcmp(argv[0], "-a")) show_all = 1; else if (!strcmp(argv[0], "-l") || !strcmp(argv[0], "--logical")) { logical = 1; } else if (!strcmp(argv[0], "-p") || !strcmp(argv[0], "--physical")) { logical = 0; } else if (!strcmp(argv[0], "-c") || !strcmp(argv[0], "--cpuset")) { show_cpuset = 1; } else if (!strcmp(argv[0], "-e") || !strncmp(argv[0], "--get-last-cpu-location", 10)) { get_last_cpu_location = 1; } else if (!strcmp(argv[0], "-t") || !strcmp(argv[0], "--threads")) { #ifdef HWLOC_LINUX_SYS show_threads = 1; #else fprintf (stderr, "Listing threads is currently only supported on Linux\n"); #endif } else if (!strcmp (argv[0], "--whole-system")) { flags |= HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM; } else if (!strcmp (argv[0], "--pid-cmd")) { if (argc < 2) { usage(callname, stdout); exit(EXIT_FAILURE); } pidcmd = argv[1]; opt = 1; } else { fprintf (stderr, "Unrecognized option: %s\n", argv[0]); usage (callname, stderr); exit(EXIT_FAILURE); } argc -= opt+1; argv += opt+1; } err = hwloc_topology_init(&topology); if (err) goto out; hwloc_topology_set_flags(topology, flags); err = hwloc_topology_load(topology); if (err) goto out_with_topology; support = hwloc_topology_get_support(topology); if (get_last_cpu_location) { if (!support->cpubind->get_proc_last_cpu_location) goto out_with_topology; } else { if (!support->cpubind->get_proc_cpubind) goto out_with_topology; } topocpuset = hwloc_topology_get_topology_cpuset(topology); dir = opendir("/proc"); if (!dir) goto out_with_topology; cpuset = hwloc_bitmap_alloc(); if (!cpuset) goto out_with_dir; while ((dirent = readdir(dir))) { long pid_number; hwloc_pid_t pid; char pidoutput[1024]; char *end; char name[64] = ""; /* management of threads */ unsigned boundthreads = 0, i; long *tids = NULL; /* NULL if process is not threaded */ hwloc_bitmap_t *tidcpusets = NULL; pid_number = strtol(dirent->d_name, &end, 10); if (*end) /* Not a number */ continue; pid = hwloc_pid_from_number(pid_number, 0); #ifdef HWLOC_LINUX_SYS { unsigned pathlen = 6 + strlen(dirent->d_name) + 1 + 7 + 1; char *path; int file; ssize_t n; path = malloc(pathlen); snprintf(path, pathlen, "/proc/%s/cmdline", dirent->d_name); file = open(path, O_RDONLY); free(path); if (file >= 0) { n = read(file, name, sizeof(name) - 1); close(file); if (n <= 0) /* Ignore kernel threads and errors */ continue; name[n] = 0; } } #endif /* HWLOC_LINUX_SYS */ if (show_threads) { #ifdef HWLOC_LINUX_SYS /* check if some threads must be displayed */ unsigned pathlen = 6 + strlen(dirent->d_name) + 1 + 4 + 1; char *path; DIR *taskdir; path = malloc(pathlen); snprintf(path, pathlen, "/proc/%s/task", dirent->d_name); taskdir = opendir(path); if (taskdir) { struct dirent *taskdirent; long tid; unsigned n = 0; /* count threads */ while ((taskdirent = readdir(taskdir))) { tid = strtol(taskdirent->d_name, &end, 10); if (*end) /* Not a number */ continue; n++; } if (n > 1) { /* if there's more than one thread, see if some are bound */ tids = malloc(n * sizeof(*tids)); tidcpusets = calloc(n+1, sizeof(*tidcpusets)); if (tids && tidcpusets) { /* reread the directory but gather info now */ rewinddir(taskdir); i = 0; while ((taskdirent = readdir(taskdir))) { tid = strtol(taskdirent->d_name, &end, 10); if (*end) /* Not a number */ continue; if (get_last_cpu_location) { if (hwloc_linux_get_tid_last_cpu_location(topology, tid, cpuset)) continue; } else { if (hwloc_linux_get_tid_cpubind(topology, tid, cpuset)) continue; } hwloc_bitmap_and(cpuset, cpuset, topocpuset); tids[i] = tid; tidcpusets[i] = hwloc_bitmap_dup(cpuset); i++; if (hwloc_bitmap_iszero(cpuset)) continue; if (hwloc_bitmap_isequal(cpuset, topocpuset) && !show_all) continue; boundthreads++; } } else { /* failed to alloc, behave as if there were no threads */ free(tids); tids = NULL; free(tidcpusets); tidcpusets = NULL; } } closedir(taskdir); } #endif /* HWLOC_LINUX_SYS */ } if (get_last_cpu_location) { if (hwloc_get_proc_last_cpu_location(topology, pid, cpuset, 0)) continue; } else { if (hwloc_get_proc_cpubind(topology, pid, cpuset, 0)) continue; } hwloc_bitmap_and(cpuset, cpuset, topocpuset); if (hwloc_bitmap_iszero(cpuset)) continue; /* don't print anything if the process isn't bound and if no threads are bound and if not showing all */ if (hwloc_bitmap_isequal(cpuset, topocpuset) && (!tids || !boundthreads) && !show_all) continue; pidoutput[0] = '\0'; if (pidcmd) { char *cmd; FILE *file; cmd = malloc(strlen(pidcmd)+1+5+2+1); sprintf(cmd, "%s %u", pidcmd, pid); file = popen(cmd, "r"); if (file) { if (fgets(pidoutput, sizeof(pidoutput), file)) { end = strchr(pidoutput, '\n'); if (end) *end = '\0'; } pclose(file); } free(cmd); } /* print the process */ print_task(topology, pid_number, name, cpuset, pidoutput[0] == '\0' ? NULL : pidoutput, 0); if (tids) /* print each tid we found (it's tidcpuset isn't NULL anymore) */ for(i=0; tidcpusets[i] != NULL; i++) { print_task(topology, tids[i], "", tidcpusets[i], NULL, 1); hwloc_bitmap_free(tidcpusets[i]); } /* free threads stuff */ free(tidcpusets); free(tids); } err = 0; hwloc_bitmap_free(cpuset); out_with_dir: closedir(dir); out_with_topology: hwloc_topology_destroy(topology); out: return err; }
static void add_process_objects(hwloc_topology_t topology) { #ifdef HAVE_DIRENT_H hwloc_obj_t root; hwloc_bitmap_t cpuset; #ifdef HWLOC_LINUX_SYS hwloc_bitmap_t task_cpuset; #endif /* HWLOC_LINUX_SYS */ DIR *dir; struct dirent *dirent; const struct hwloc_topology_support *support; root = hwloc_get_root_obj(topology); support = hwloc_topology_get_support(topology); if (!support->cpubind->get_proc_cpubind) return; dir = opendir("/proc"); if (!dir) return; cpuset = hwloc_bitmap_alloc(); #ifdef HWLOC_LINUX_SYS task_cpuset = hwloc_bitmap_alloc(); #endif /* HWLOC_LINUX_SYS */ while ((dirent = readdir(dir))) { long local_pid_number; hwloc_pid_t local_pid; char *end; char name[64]; int proc_cpubind; local_pid_number = strtol(dirent->d_name, &end, 10); if (*end) /* Not a number */ continue; snprintf(name, sizeof(name), "%ld", local_pid_number); local_pid = hwloc_pid_from_number(local_pid_number, 0); proc_cpubind = hwloc_get_proc_cpubind(topology, local_pid, cpuset, 0) != -1; #ifdef HWLOC_LINUX_SYS { /* Get the process name */ char *path; unsigned pathlen = 6 + strlen(dirent->d_name) + 1 + 7 + 1; char cmd[64], *c; int file; ssize_t n; path = malloc(pathlen); snprintf(path, pathlen, "/proc/%s/cmdline", dirent->d_name); file = open(path, O_RDONLY); free(path); if (file >= 0) { n = read(file, cmd, sizeof(cmd) - 1); close(file); if (n <= 0) /* Ignore kernel threads and errors */ continue; cmd[n] = 0; if ((c = strchr(cmd, ' '))) *c = 0; snprintf(name, sizeof(name), "%ld %s", local_pid_number, cmd); } } { /* Get threads */ char *path; unsigned pathlen = 6+strlen(dirent->d_name) + 1 + 4 + 1; DIR *task_dir; struct dirent *task_dirent; path = malloc(pathlen); snprintf(path, pathlen, "/proc/%s/task", dirent->d_name); task_dir = opendir(path); free(path); if (task_dir) { while ((task_dirent = readdir(task_dir))) { long local_tid; char *task_end; char task_name[64]; local_tid = strtol(task_dirent->d_name, &task_end, 10); if (*task_end) /* Not a number, or the main task */ continue; if (hwloc_linux_get_tid_cpubind(topology, local_tid, task_cpuset)) continue; if (proc_cpubind && hwloc_bitmap_isequal(task_cpuset, cpuset)) continue; snprintf(task_name, sizeof(task_name), "%s %li", name, local_tid); insert_task(topology, task_cpuset, task_name); } closedir(task_dir); } } #endif /* HWLOC_LINUX_SYS */ if (!proc_cpubind) continue; if (hwloc_bitmap_isincluded(root->cpuset, cpuset)) continue; insert_task(topology, cpuset, name); } hwloc_bitmap_free(cpuset); #ifdef HWLOC_LINUX_SYS hwloc_bitmap_free(task_cpuset); #endif /* HWLOC_LINUX_SYS */ closedir(dir); #endif /* HAVE_DIRENT_H */ }
void output_console(hwloc_topology_t topology, const char *filename, int logical, int legend __hwloc_attribute_unused, int verbose_mode) { unsigned topodepth; FILE *output; if (!filename || !strcmp(filename, "-")) output = stdout; else { output = open_file(filename, "w"); if (!output) { fprintf(stderr, "Failed to open %s for writing (%s)\n", filename, strerror(errno)); return; } } topodepth = hwloc_topology_get_depth(topology); /* * if verbose_mode == 0, only print the summary. * if verbose_mode == 1, only print the topology tree. * if verbose_mode > 1, print both. */ if (lstopo_show_only != (hwloc_obj_type_t)-1) { if (verbose_mode > 1) fprintf(output, "Only showing %s objects\n", hwloc_obj_type_string(lstopo_show_only)); output_only (topology, hwloc_get_root_obj(topology), output, logical, verbose_mode); } else if (verbose_mode >= 1) { output_topology (topology, hwloc_get_root_obj(topology), NULL, output, 0, logical, verbose_mode); fprintf(output, "\n"); } if ((verbose_mode > 1 || !verbose_mode) && lstopo_show_only == (hwloc_obj_type_t)-1) { hwloc_lstopo_show_summary(output, topology); } if (verbose_mode > 1 && lstopo_show_only == (hwloc_obj_type_t)-1) { const struct hwloc_distances_s * distances; unsigned depth; for (depth = 0; depth < topodepth; depth++) { distances = hwloc_get_whole_distance_matrix_by_depth(topology, depth); if (!distances || !distances->latency) continue; printf("latency matrix between %ss (depth %u) by %s indexes:\n", hwloc_obj_type_string(hwloc_get_depth_type(topology, depth)), depth, logical ? "logical" : "physical"); hwloc_utils_print_distance_matrix(topology, hwloc_get_root_obj(topology), distances->nbobjs, depth, distances->latency, logical); } } if (verbose_mode > 1 && lstopo_show_only == (hwloc_obj_type_t)-1) { hwloc_const_bitmap_t complete = hwloc_topology_get_complete_cpuset(topology); hwloc_const_bitmap_t topo = hwloc_topology_get_topology_cpuset(topology); hwloc_const_bitmap_t online = hwloc_topology_get_online_cpuset(topology); hwloc_const_bitmap_t allowed = hwloc_topology_get_allowed_cpuset(topology); if (complete && !hwloc_bitmap_isequal(topo, complete)) { hwloc_bitmap_t unknown = hwloc_bitmap_alloc(); char *unknownstr; hwloc_bitmap_copy(unknown, complete); hwloc_bitmap_andnot(unknown, unknown, topo); hwloc_bitmap_asprintf(&unknownstr, unknown); fprintf (output, "%d processors not represented in topology: %s\n", hwloc_bitmap_weight(unknown), unknownstr); free(unknownstr); hwloc_bitmap_free(unknown); } if (complete && !hwloc_bitmap_isequal(online, complete)) { hwloc_bitmap_t offline = hwloc_bitmap_alloc(); char *offlinestr; hwloc_bitmap_copy(offline, complete); hwloc_bitmap_andnot(offline, offline, online); hwloc_bitmap_asprintf(&offlinestr, offline); fprintf (output, "%d processors offline: %s\n", hwloc_bitmap_weight(offline), offlinestr); free(offlinestr); hwloc_bitmap_free(offline); } if (complete && !hwloc_bitmap_isequal(allowed, online)) { if (!hwloc_bitmap_isincluded(online, allowed)) { hwloc_bitmap_t forbidden = hwloc_bitmap_alloc(); char *forbiddenstr; hwloc_bitmap_copy(forbidden, online); hwloc_bitmap_andnot(forbidden, forbidden, allowed); hwloc_bitmap_asprintf(&forbiddenstr, forbidden); fprintf(output, "%d processors online but not allowed: %s\n", hwloc_bitmap_weight(forbidden), forbiddenstr); free(forbiddenstr); hwloc_bitmap_free(forbidden); } if (!hwloc_bitmap_isincluded(allowed, online)) { hwloc_bitmap_t potential = hwloc_bitmap_alloc(); char *potentialstr; hwloc_bitmap_copy(potential, allowed); hwloc_bitmap_andnot(potential, potential, online); hwloc_bitmap_asprintf(&potentialstr, potential); fprintf(output, "%d processors allowed but not online: %s\n", hwloc_bitmap_weight(potential), potentialstr); free(potentialstr); hwloc_bitmap_free(potential); } } if (!hwloc_topology_is_thissystem(topology)) fprintf (output, "Topology not from this system\n"); } if (output != stdout) fclose(output); }
void output_console(struct lstopo_output *loutput, const char *filename) { hwloc_topology_t topology = loutput->topology; unsigned topodepth; int verbose_mode = loutput->verbose_mode; int logical = loutput->logical; FILE *output; output = open_output(filename, loutput->overwrite); if (!output) { fprintf(stderr, "Failed to open %s for writing (%s)\n", filename, strerror(errno)); return; } topodepth = hwloc_topology_get_depth(topology); /* * if verbose_mode == 0, only print the summary. * if verbose_mode == 1, only print the topology tree. * if verbose_mode > 1, print both. */ if (lstopo_show_only != (hwloc_obj_type_t)-1) { if (verbose_mode > 1) fprintf(output, "Only showing %s objects\n", hwloc_obj_type_string(lstopo_show_only)); output_only (topology, hwloc_get_root_obj(topology), output, logical, verbose_mode); } else if (verbose_mode >= 1) { output_topology (topology, hwloc_get_root_obj(topology), NULL, output, 0, logical, verbose_mode); fprintf(output, "\n"); } if ((verbose_mode > 1 || !verbose_mode) && lstopo_show_only == (hwloc_obj_type_t)-1) { hwloc_lstopo_show_summary(output, topology); } if (verbose_mode > 1 && lstopo_show_only == (hwloc_obj_type_t)-1) { const struct hwloc_distances_s * distances; unsigned depth; for (depth = 0; depth < topodepth; depth++) { distances = hwloc_get_whole_distance_matrix_by_depth(topology, depth); if (!distances || !distances->latency) continue; fprintf(output, "relative latency matrix between %ss (depth %u) by %s indexes:\n", hwloc_obj_type_string(hwloc_get_depth_type(topology, depth)), depth, logical ? "logical" : "physical"); hwloc_utils_print_distance_matrix(output, topology, hwloc_get_root_obj(topology), distances->nbobjs, depth, distances->latency, logical); } } if (verbose_mode > 1 && lstopo_show_only == (hwloc_obj_type_t)-1) { hwloc_const_bitmap_t complete = hwloc_topology_get_complete_cpuset(topology); hwloc_const_bitmap_t topo = hwloc_topology_get_topology_cpuset(topology); hwloc_const_bitmap_t allowed = hwloc_topology_get_allowed_cpuset(topology); if (!hwloc_bitmap_isequal(topo, complete)) { hwloc_bitmap_t unknown = hwloc_bitmap_alloc(); char *unknownstr; hwloc_bitmap_copy(unknown, complete); hwloc_bitmap_andnot(unknown, unknown, topo); hwloc_bitmap_asprintf(&unknownstr, unknown); fprintf (output, "%d processors not represented in topology: %s\n", hwloc_bitmap_weight(unknown), unknownstr); free(unknownstr); hwloc_bitmap_free(unknown); } if (!hwloc_bitmap_isequal(topo, allowed)) { hwloc_bitmap_t disallowed = hwloc_bitmap_alloc(); char *disallowedstr; hwloc_bitmap_copy(disallowed, topo); hwloc_bitmap_andnot(disallowed, disallowed, allowed); hwloc_bitmap_asprintf(&disallowedstr, disallowed); fprintf(output, "%d processors represented but not allowed: %s\n", hwloc_bitmap_weight(disallowed), disallowedstr); free(disallowedstr); hwloc_bitmap_free(disallowed); } if (!hwloc_topology_is_thissystem(topology)) fprintf (output, "Topology not from this system\n"); } if (output != stdout) fclose(output); }
/* user to have to play with the cgroup hierarchy to modify it */ extern int task_cgroup_cpuset_set_task_affinity(stepd_step_rec_t *job) { int fstatus = SLURM_ERROR; #ifndef HAVE_HWLOC error("task/cgroup: plugin not compiled with hwloc support, " "skipping affinity."); return fstatus; #else char mstr[1 + CPU_SETSIZE / 4]; cpu_bind_type_t bind_type; cpu_set_t ts; hwloc_obj_t obj; hwloc_obj_type_t socket_or_node; hwloc_topology_t topology; hwloc_bitmap_t cpuset; hwloc_obj_type_t hwtype; hwloc_obj_type_t req_hwtype; int bind_verbose = 0; int rc = SLURM_SUCCESS, match; pid_t pid = job->envtp->task_pid; size_t tssize; uint32_t nldoms; uint32_t nsockets; uint32_t ncores; uint32_t npus; uint32_t nobj; uint32_t taskid = job->envtp->localid; uint32_t jntasks = job->node_tasks; uint32_t jnpus; /* Allocate and initialize hwloc objects */ hwloc_topology_init(&topology); hwloc_topology_load(topology); cpuset = hwloc_bitmap_alloc(); int spec_threads = 0; if (job->batch) { jnpus = job->cpus; job->cpus_per_task = job->cpus; } else jnpus = jntasks * job->cpus_per_task; bind_type = job->cpu_bind_type; if ((conf->task_plugin_param & CPU_BIND_VERBOSE) || (bind_type & CPU_BIND_VERBOSE)) bind_verbose = 1 ; if ( hwloc_get_type_depth(topology, HWLOC_OBJ_NODE) > hwloc_get_type_depth(topology, HWLOC_OBJ_SOCKET) ) { /* One socket contains multiple NUMA-nodes * like AMD Opteron 6000 series etc. * In such case, use NUMA-node instead of socket. */ socket_or_node = HWLOC_OBJ_NODE; } else { socket_or_node = HWLOC_OBJ_SOCKET; } if (bind_type & CPU_BIND_NONE) { if (bind_verbose) info("task/cgroup: task[%u] is requesting no affinity", taskid); return 0; } else if (bind_type & CPU_BIND_TO_THREADS) { if (bind_verbose) info("task/cgroup: task[%u] is requesting " "thread level binding",taskid); req_hwtype = HWLOC_OBJ_PU; } else if (bind_type & CPU_BIND_TO_CORES) { if (bind_verbose) info("task/cgroup: task[%u] is requesting " "core level binding",taskid); req_hwtype = HWLOC_OBJ_CORE; } else if (bind_type & CPU_BIND_TO_SOCKETS) { if (bind_verbose) info("task/cgroup: task[%u] is requesting " "socket level binding",taskid); req_hwtype = socket_or_node; } else if (bind_type & CPU_BIND_TO_LDOMS) { if (bind_verbose) info("task/cgroup: task[%u] is requesting " "ldom level binding",taskid); req_hwtype = HWLOC_OBJ_NODE; } else if (bind_type & CPU_BIND_TO_BOARDS) { if (bind_verbose) info("task/cgroup: task[%u] is requesting " "board level binding",taskid); req_hwtype = HWLOC_OBJ_GROUP; } else if (bind_type & bind_mode_ldom) { req_hwtype = HWLOC_OBJ_NODE; } else { if (bind_verbose) info("task/cgroup: task[%u] using core level binding" " by default",taskid); req_hwtype = HWLOC_OBJ_CORE; } /* * Perform the topology detection. It will only get allowed PUs. * Detect in the same time the granularity to use for binding. * The granularity can be relaxed from threads to cores if enough * cores are available as with hyperthread support, ntasks-per-core * param can let us have access to more threads per core for each * task * Revert back to machine granularity if no finer-grained granularity * matching the request is found. This will result in no affinity * applied. * The detected granularity will be used to find where to best place * the task, then the cpu_bind option will be used to relax the * affinity constraint and use more PUs. (i.e. use a core granularity * to dispatch the tasks across the sockets and then provide access * to each task to the cores of its socket.) */ npus = (uint32_t) hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PU); ncores = (uint32_t) hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_CORE); nsockets = (uint32_t) hwloc_get_nbobjs_by_type(topology, socket_or_node); nldoms = (uint32_t) hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_NODE); //info("PU:%d CORE:%d SOCK:%d LDOM:%d", npus, ncores, nsockets, nldoms); hwtype = HWLOC_OBJ_MACHINE; nobj = 1; if ((job->job_core_spec != (uint16_t) NO_VAL) && (job->job_core_spec & CORE_SPEC_THREAD) && (job->job_core_spec != CORE_SPEC_THREAD)) { spec_threads = job->job_core_spec & (~CORE_SPEC_THREAD); } if (npus >= (jnpus + spec_threads) || bind_type & CPU_BIND_TO_THREADS) { hwtype = HWLOC_OBJ_PU; nobj = npus; } if (ncores >= jnpus || bind_type & CPU_BIND_TO_CORES) { hwtype = HWLOC_OBJ_CORE; nobj = ncores; } if (nsockets >= jntasks && bind_type & CPU_BIND_TO_SOCKETS) { hwtype = socket_or_node; nobj = nsockets; } /* * HWLOC returns all the NUMA nodes available regardless of the * number of underlying sockets available (regardless of the allowed * resources). So there is no guarantee that each ldom will be populated * with usable sockets. So add a simple check that at least ensure that * we have as many sockets as ldoms before moving to ldoms granularity */ if (nldoms >= jntasks && nsockets >= nldoms && bind_type & (CPU_BIND_TO_LDOMS | bind_mode_ldom)) { hwtype = HWLOC_OBJ_NODE; nobj = nldoms; } /* * If not enough objects to do the job, revert to no affinity mode */ if (hwloc_compare_types(hwtype, HWLOC_OBJ_MACHINE) == 0) { info("task/cgroup: task[%u] disabling affinity because of %s " "granularity",taskid, hwloc_obj_type_string(hwtype)); } else if ((hwloc_compare_types(hwtype, HWLOC_OBJ_CORE) >= 0) && (nobj < jnpus)) { info("task/cgroup: task[%u] not enough %s objects (%d < %d), " "disabling affinity", taskid, hwloc_obj_type_string(hwtype), nobj, jnpus); } else if (bind_type & bind_mode) { /* Explicit binding mode specified by the user * Bind the taskid in accordance with the specified mode */ obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_MACHINE, 0); match = hwloc_bitmap_isequal(obj->complete_cpuset, obj->allowed_cpuset); if ((job->job_core_spec == (uint16_t) NO_VAL) && !match) { info("task/cgroup: entire node must be allocated, " "disabling affinity, task[%u]", taskid); fprintf(stderr, "Requested cpu_bind option requires " "entire node to be allocated; disabling " "affinity\n"); } else { if (bind_verbose) { info("task/cgroup: task[%u] is requesting " "explicit binding mode", taskid); } _get_sched_cpuset(topology, hwtype, req_hwtype, &ts, job); tssize = sizeof(cpu_set_t); fstatus = SLURM_SUCCESS; if (job->job_core_spec != (uint16_t) NO_VAL) _validate_mask(taskid, obj, &ts); if ((rc = sched_setaffinity(pid, tssize, &ts))) { error("task/cgroup: task[%u] unable to set " "mask 0x%s", taskid, cpuset_to_str(&ts, mstr)); error("sched_setaffinity rc = %d", rc); fstatus = SLURM_ERROR; } else if (bind_verbose) { info("task/cgroup: task[%u] mask 0x%s", taskid, cpuset_to_str(&ts, mstr)); } _slurm_chkaffinity(&ts, job, rc); } } else { /* Bind the detected object to the taskid, respecting the * granularity, using the designated or default distribution * method (block or cyclic). */ char *str; if (bind_verbose) { info("task/cgroup: task[%u] using %s granularity dist %u", taskid, hwloc_obj_type_string(hwtype), job->task_dist); } /* See srun man page for detailed information on --distribution * option. * * You can see the equivalent code for the * task/affinity plugin in * src/plugins/task/affinity/dist_tasks.c, around line 368 */ switch (job->task_dist & SLURM_DIST_NODESOCKMASK) { case SLURM_DIST_BLOCK_BLOCK: case SLURM_DIST_CYCLIC_BLOCK: case SLURM_DIST_PLANE: /* tasks are distributed in blocks within a plane */ _task_cgroup_cpuset_dist_block(topology, hwtype, req_hwtype, nobj, job, bind_verbose, cpuset); break; case SLURM_DIST_ARBITRARY: case SLURM_DIST_BLOCK: case SLURM_DIST_CYCLIC: case SLURM_DIST_UNKNOWN: if (slurm_get_select_type_param() & CR_CORE_DEFAULT_DIST_BLOCK) { _task_cgroup_cpuset_dist_block(topology, hwtype, req_hwtype, nobj, job, bind_verbose, cpuset); break; } /* We want to fall through here if we aren't doing a default dist block. */ default: _task_cgroup_cpuset_dist_cyclic(topology, hwtype, req_hwtype, job, bind_verbose, cpuset); break; } hwloc_bitmap_asprintf(&str, cpuset); tssize = sizeof(cpu_set_t); if (hwloc_cpuset_to_glibc_sched_affinity(topology, cpuset, &ts, tssize) == 0) { fstatus = SLURM_SUCCESS; if ((rc = sched_setaffinity(pid, tssize, &ts))) { error("task/cgroup: task[%u] unable to set " "taskset '%s'", taskid, str); fstatus = SLURM_ERROR; } else if (bind_verbose) { info("task/cgroup: task[%u] set taskset '%s'", taskid, str); } _slurm_chkaffinity(&ts, job, rc); } else { error("task/cgroup: task[%u] unable to build " "taskset '%s'",taskid,str); fstatus = SLURM_ERROR; } free(str); } /* Destroy hwloc objects */ hwloc_bitmap_free(cpuset); hwloc_topology_destroy(topology); return fstatus; #endif }
/* user to have to play with the cgroup hierarchy to modify it */ extern int task_cgroup_cpuset_set_task_affinity(stepd_step_rec_t *job) { int fstatus = SLURM_ERROR; #ifndef HAVE_HWLOC error("task/cgroup: plugin not compiled with hwloc support, " "skipping affinity."); return fstatus; #else char mstr[1 + CPU_SETSIZE / 4]; cpu_bind_type_t bind_type; cpu_set_t ts; hwloc_obj_t obj; hwloc_obj_type_t socket_or_node; hwloc_topology_t topology; hwloc_bitmap_t cpuset; hwloc_obj_type_t hwtype; hwloc_obj_type_t req_hwtype; int bind_verbose = 0; int rc = SLURM_SUCCESS; pid_t pid = job->envtp->task_pid; size_t tssize; uint32_t nldoms; uint32_t nsockets; uint32_t ncores; uint32_t npus; uint32_t nobj; uint32_t taskid = job->envtp->localid; uint32_t jntasks = job->node_tasks; uint32_t jnpus = jntasks * job->cpus_per_task; bind_type = job->cpu_bind_type; if (conf->task_plugin_param & CPU_BIND_VERBOSE || bind_type & CPU_BIND_VERBOSE) bind_verbose = 1 ; /* Allocate and initialize hwloc objects */ hwloc_topology_init(&topology); hwloc_topology_load(topology); cpuset = hwloc_bitmap_alloc(); if ( hwloc_get_type_depth(topology, HWLOC_OBJ_NODE) > hwloc_get_type_depth(topology, HWLOC_OBJ_SOCKET) ) { /* One socket contains multiple NUMA-nodes * like AMD Opteron 6000 series etc. * In such case, use NUMA-node instead of socket. */ socket_or_node = HWLOC_OBJ_NODE; } else { socket_or_node = HWLOC_OBJ_SOCKET; } if (bind_type & CPU_BIND_NONE) { if (bind_verbose) info("task/cgroup: task[%u] is requesting no affinity", taskid); return 0; } else if (bind_type & CPU_BIND_TO_THREADS) { if (bind_verbose) info("task/cgroup: task[%u] is requesting " "thread level binding",taskid); req_hwtype = HWLOC_OBJ_PU; } else if (bind_type & CPU_BIND_TO_CORES) { if (bind_verbose) info("task/cgroup: task[%u] is requesting " "core level binding",taskid); req_hwtype = HWLOC_OBJ_CORE; } else if (bind_type & CPU_BIND_TO_SOCKETS) { if (bind_verbose) info("task/cgroup: task[%u] is requesting " "socket level binding",taskid); req_hwtype = socket_or_node; } else if (bind_type & CPU_BIND_TO_LDOMS) { if (bind_verbose) info("task/cgroup: task[%u] is requesting " "ldom level binding",taskid); req_hwtype = HWLOC_OBJ_NODE; } else if (bind_type & CPU_BIND_TO_BOARDS) { if (bind_verbose) info("task/cgroup: task[%u] is requesting " "board level binding",taskid); req_hwtype = HWLOC_OBJ_GROUP; } else if (bind_type & bind_mode_ldom) { req_hwtype = HWLOC_OBJ_NODE; } else { if (bind_verbose) info("task/cgroup: task[%u] using core level binding" " by default",taskid); req_hwtype = HWLOC_OBJ_CORE; } /* * Perform the topology detection. It will only get allowed PUs. * Detect in the same time the granularity to use for binding. * The granularity can be relaxed from threads to cores if enough * cores are available as with hyperthread support, ntasks-per-core * param can let us have access to more threads per core for each * task * Revert back to machine granularity if no finer-grained granularity * matching the request is found. This will result in no affinity * applied. * The detected granularity will be used to find where to best place * the task, then the cpu_bind option will be used to relax the * affinity constraint and use more PUs. (i.e. use a core granularity * to dispatch the tasks across the sockets and then provide access * to each task to the cores of its socket.) */ npus = (uint32_t) hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PU); ncores = (uint32_t) hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_CORE); nsockets = (uint32_t) hwloc_get_nbobjs_by_type(topology, socket_or_node); nldoms = (uint32_t) hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_NODE); hwtype = HWLOC_OBJ_MACHINE; nobj = 1; if (npus >= jnpus || bind_type & CPU_BIND_TO_THREADS) { hwtype = HWLOC_OBJ_PU; nobj = npus; } if (ncores >= jnpus || bind_type & CPU_BIND_TO_CORES) { hwtype = HWLOC_OBJ_CORE; nobj = ncores; } if (nsockets >= jntasks && bind_type & CPU_BIND_TO_SOCKETS) { hwtype = socket_or_node; nobj = nsockets; } /* * HWLOC returns all the NUMA nodes available regardless of the * number of underlying sockets available (regardless of the allowed * resources). So there is no guarantee that each ldom will be populated * with usable sockets. So add a simple check that at least ensure that * we have as many sockets as ldoms before moving to ldoms granularity */ if (nldoms >= jntasks && nsockets >= nldoms && bind_type & (CPU_BIND_TO_LDOMS | bind_mode_ldom)) { hwtype = HWLOC_OBJ_NODE; nobj = nldoms; } /* * If not enough objects to do the job, revert to no affinity mode */ if (hwloc_compare_types(hwtype,HWLOC_OBJ_MACHINE) == 0) { info("task/cgroup: task[%u] disabling affinity because of %s " "granularity",taskid,hwloc_obj_type_string(hwtype)); } else if (hwloc_compare_types(hwtype,HWLOC_OBJ_CORE) >= 0 && jnpus > nobj) { info("task/cgroup: task[%u] not enough %s objects, disabling " "affinity",taskid,hwloc_obj_type_string(hwtype)); } else if (bind_type & bind_mode) { /* Explicit binding mode specified by the user * Bind the taskid in accordance with the specified mode */ obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_MACHINE, 0); if (!hwloc_bitmap_isequal(obj->complete_cpuset, obj->allowed_cpuset)) { info("task/cgroup: entire node must be allocated, " "disabling affinity, task[%u]", taskid); fprintf(stderr, "Requested cpu_bind option requires " "entire node to be allocated; disabling " "affinity\n"); } else { if (bind_verbose) info("task/cgroup: task[%u] is requesting " "explicit binding mode",taskid); _get_sched_cpuset(topology, hwtype, req_hwtype, &ts, job); tssize = sizeof(cpu_set_t); fstatus = SLURM_SUCCESS; if ((rc = sched_setaffinity(pid, tssize, &ts))) { error("task/cgroup: task[%u] unable to set " "mask 0x%s", taskid, cpuset_to_str(&ts, mstr)); fstatus = SLURM_ERROR; } else if (bind_verbose) info("task/cgroup: task[%u] mask 0x%s", taskid, cpuset_to_str(&ts, mstr)); slurm_chkaffinity(&ts, job, rc); } } else { /* Bind the detected object to the taskid, respecting the * granularity, using the designated or default distribution * method (block or cyclic). */ char *str; if (bind_verbose) { info("task/cgroup: task[%u] using %s granularity", taskid,hwloc_obj_type_string(hwtype)); } /* There are two "distributions," controlled by the * -m option of srun and friends. The first is the * distribution of tasks to nodes. The second is the * distribution of allocated cpus to tasks for * binding. This code is handling the second * distribution. Here's how the values get set, based * on the value of -m * * SLURM_DIST_CYCLIC = srun -m cyclic * SLURM_DIST_BLOCK = srun -m block * SLURM_DIST_CYCLIC_CYCLIC = srun -m cyclic:cyclic * SLURM_DIST_BLOCK_CYCLIC = srun -m block:cyclic * * In the first two cases, the user only specified the * first distribution. The second distribution * defaults to cyclic. In the second two cases, the * user explicitly requested a second distribution of * cyclic. So all these four cases correspond to a * second distribution of cyclic. So we want to call * _task_cgroup_cpuset_dist_cyclic. * * If the user explicitly specifies a second * distribution of block, or if * CR_CORE_DEFAULT_DIST_BLOCK is configured and the * user does not explicitly specify a second * distribution of cyclic, the second distribution is * block, and we need to call * _task_cgroup_cpuset_dist_block. In these cases, * task_dist would be set to SLURM_DIST_CYCLIC_BLOCK * or SLURM_DIST_BLOCK_BLOCK. * * You can see the equivalent code for the * task/affinity plugin in * src/plugins/task/affinity/dist_tasks.c, around line 384. */ switch (job->task_dist) { case SLURM_DIST_CYCLIC: case SLURM_DIST_BLOCK: case SLURM_DIST_CYCLIC_CYCLIC: case SLURM_DIST_BLOCK_CYCLIC: _task_cgroup_cpuset_dist_cyclic( topology, hwtype, req_hwtype, job, bind_verbose, cpuset); break; default: _task_cgroup_cpuset_dist_block( topology, hwtype, req_hwtype, nobj, job, bind_verbose, cpuset); } hwloc_bitmap_asprintf(&str, cpuset); tssize = sizeof(cpu_set_t); if (hwloc_cpuset_to_glibc_sched_affinity(topology,cpuset, &ts,tssize) == 0) { fstatus = SLURM_SUCCESS; if ((rc = sched_setaffinity(pid,tssize,&ts))) { error("task/cgroup: task[%u] unable to set " "taskset '%s'",taskid,str); fstatus = SLURM_ERROR; } else if (bind_verbose) { info("task/cgroup: task[%u] taskset '%s' is set" ,taskid,str); } slurm_chkaffinity(&ts, job, rc); } else { error("task/cgroup: task[%u] unable to build " "taskset '%s'",taskid,str); fstatus = SLURM_ERROR; } free(str); } /* Destroy hwloc objects */ hwloc_bitmap_free(cpuset); hwloc_topology_destroy(topology); return fstatus; #endif }
int main(void) { hwloc_bitmap_t set; hwloc_obj_t obj; char *str = NULL; hwloc_topology_init(&topology); hwloc_topology_load(topology); support = hwloc_topology_get_support(topology); obj = hwloc_get_root_obj(topology); set = hwloc_bitmap_dup(obj->cpuset); while (hwloc_bitmap_isequal(obj->cpuset, set)) { if (!obj->arity) break; obj = obj->children[0]; } hwloc_bitmap_asprintf(&str, set); printf("system set is %s\n", str); free(str); test(set, 0); printf("now strict\n"); test(set, HWLOC_CPUBIND_STRICT); hwloc_bitmap_free(set); set = hwloc_bitmap_dup(obj->cpuset); hwloc_bitmap_asprintf(&str, set); printf("obj set is %s\n", str); free(str); test(set, 0); printf("now strict\n"); test(set, HWLOC_CPUBIND_STRICT); hwloc_bitmap_singlify(set); hwloc_bitmap_asprintf(&str, set); printf("singlified to %s\n", str); free(str); test(set, 0); printf("now strict\n"); test(set, HWLOC_CPUBIND_STRICT); hwloc_bitmap_free(set); printf("\n\nmemory tests\n\n"); printf("complete node set\n"); set = hwloc_bitmap_dup(hwloc_get_root_obj(topology)->cpuset); hwloc_bitmap_asprintf(&str, set); printf("i.e. cpuset %s\n", str); free(str); testmem3(set); hwloc_bitmap_free(set); obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NODE, 0); if (obj) { set = hwloc_bitmap_dup(obj->cpuset); hwloc_bitmap_asprintf(&str, set); printf("cpuset set is %s\n", str); free(str); testmem3(set); obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NODE, 1); if (obj) { hwloc_bitmap_or(set, set, obj->cpuset); hwloc_bitmap_asprintf(&str, set); printf("cpuset set is %s\n", str); free(str); testmem3(set); } hwloc_bitmap_free(set); } hwloc_topology_destroy(topology); return 0; }