c_sublocid_t chpl_topo_getThreadLocality(void) { hwloc_cpuset_t cpuset; hwloc_nodeset_t nodeset; int flags; int node; if (!haveTopology) { return c_sublocid_any; } if (!topoSupport->cpubind->get_thread_cpubind) { return c_sublocid_any; } CHK_ERR_ERRNO((cpuset = hwloc_bitmap_alloc()) != NULL); CHK_ERR_ERRNO((nodeset = hwloc_bitmap_alloc()) != NULL); flags = HWLOC_CPUBIND_THREAD; CHK_ERR_ERRNO(hwloc_set_cpubind(topology, cpuset, flags) == 0); hwloc_cpuset_to_nodeset(topology, cpuset, nodeset); node = hwloc_bitmap_first(nodeset); hwloc_bitmap_free(nodeset); hwloc_bitmap_free(cpuset); return node; }
/******************* FUNCTION *********************/ int TopoHwloc::getCurrentIdFromNUMABinding(void) const { hwloc_nodeset_t nodeset = hwloc_bitmap_alloc(); hwloc_cpuset_t cpuset = hwloc_bitmap_alloc(); hwloc_membind_policy_t policy; int res = -1; int weight; int status; #if defined(SCTK_ALLOC_DEBUG) && defined(hwloc_bitmap_list_snprintf) char buffer[4096]; #endif //if no numa node, return immediately if (getNbNumaEntities() == 1) return -1; //nodes // flags = 0 fallback on PROCESS if THREAD is not supported (as for windows). status = hwloc_get_membind_nodeset(topology,nodeset,&policy,0); assert(status == 0); if (status == 0) return -1; #if defined(SCTK_ALLOC_DEBUG) && defined(hwloc_bitmap_list_snprintf) status = hwloc_bitmap_list_snprintf(buffer,4096,nodeset); sprintf(stderr,"Current nodes : %s\n",buffer); #endif //cores // flags = 0 fallback on PROCESS if THREAD is not supported (as for windows). status = hwloc_get_membind(topology,cpuset,&policy,0); assert(status == 0); if (status == 0) return -1; #if defined(SCTK_ALLOC_DEBUG) && defined(hwloc_bitmap_list_snprintf) status = hwloc_bitmap_list_snprintf(buffer,4096,cpuset); sprintf(stderr,"Current cores : %s\n",buffer); #endif //nodes from cores hwloc_cpuset_to_nodeset(topology,cpuset,nodeset); #if defined(SCTK_ALLOC_DEBUG) && defined(hwloc_bitmap_list_snprintf) status = hwloc_bitmap_list_snprintf(buffer,4096,nodeset); sprintf(stderr,"Current nodes from cores : %s\n",buffer); #endif //calc res weight = hwloc_bitmap_weight(nodeset); assert(weight != 0); if (weight == 1) res = getFirstBitInBitmap(nodeset); hwloc_bitmap_free(cpuset); hwloc_bitmap_free(nodeset); return res; }
static int hwloc_win_get_thisthread_membind(hwloc_topology_t topology, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags) { int ret; hwloc_cpuset_t cpuset = hwloc_bitmap_alloc(); ret = hwloc_win_get_thread_cpubind(topology, GetCurrentThread(), cpuset, flags); if (!ret) { *policy = HWLOC_MEMBIND_BIND; hwloc_cpuset_to_nodeset(topology, cpuset, nodeset); } hwloc_bitmap_free(cpuset); return ret; }
static int hwloc_win_get_proc_membind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags) { int ret; hwloc_cpuset_t cpuset = hwloc_bitmap_alloc(); ret = hwloc_win_get_proc_cpubind(topology, pid, cpuset, flags & HWLOC_MEMBIND_STRICT?HWLOC_CPUBIND_STRICT:0); if (!ret) { *policy = HWLOC_MEMBIND_BIND; hwloc_cpuset_to_nodeset(topology, cpuset, nodeset); } hwloc_bitmap_free(cpuset); return ret; }
void THardwareLocalityHelper::BindThreadForDevice(int deviceId) { if (!HasContext) { return; } THwlocSet deviceCpu; THwlocSet numaNode; int errCode = hwloc_cudart_get_device_cpuset(Context, deviceId, deviceCpu.Set); hwloc_cpuset_to_nodeset(Context, deviceCpu.Set, numaNode.Set); errCode = hwloc_set_cpubind(Context, deviceCpu.Set, HWLOC_CPUBIND_THREAD | HWLOC_CPUBIND_STRICT); if (errCode == -1) { MATRIXNET_ERROR_LOG << "Can't bind thread for " << deviceId << " with err " << errno << Endl; } errCode = hwloc_set_membind_nodeset(Context, numaNode.Set, HWLOC_MEMBIND_BIND, HWLOC_CPUBIND_THREAD | HWLOC_CPUBIND_STRICT); if (errCode == -1) { MATRIXNET_ERROR_LOG << "Can't bind memory for " << deviceId << " with err " << errno << Endl; } }
/* * Get the node where the current thread is running * return the node of the core */ int hw_my_node() { int node; hwloc_cpuset_t set; hwloc_nodeset_t nset; if (local_topo->nnodes != 0 ){ set = hwloc_bitmap_alloc(); nset = hwloc_bitmap_alloc(); hwloc_get_cpubind (topology,set,HWLOC_CPUBIND_THREAD); hwloc_cpuset_to_nodeset(topology,set,nset); node = hwloc_bitmap_first(nset); hwloc_bitmap_free(set); hwloc_bitmap_free(nset); } else node = -1; return node; }
/******************* FUNCTION *********************/ int TopoHwloc::getCurrentIdFromThreadBinding(void) const { hwloc_nodeset_t nodeset = hwloc_bitmap_alloc(); hwloc_cpuset_t cpuset = hwloc_bitmap_alloc(); int res = -1; int weight; #if defined(SCTK_ALLOC_DEBUG) && defined(hwloc_bitmap_list_snprintf) char buffer[4096]; #endif //get current core binding //for windows use 0 instead of HWLOC_CPUBIND_THREAD int status = hwloc_get_cpubind (topology, cpuset, 0); assert(status == 0); if (status == 0) return -1; #if defined(SCTK_ALLOC_DEBUG) && defined(hwloc_bitmap_list_snprintf) status = hwloc_bitmap_list_snprintf(buffer,4096,cpuset); sprintf(stderr,"Current cores : %s\n",buffer); #endif //nodes from cores hwloc_cpuset_to_nodeset(topology,cpuset,nodeset); #if defined(SCTK_ALLOC_DEBUG) && defined(hwloc_bitmap_list_snprintf) status = hwloc_bitmap_list_snprintf(buffer,4096,nodeset); sprintf(stderr,"Current nodes from cores : %s\n",buffer); #endif //calc res weight = hwloc_bitmap_weight(nodeset); assert(weight != 0); if (weight == 1) res = getFirstBitInBitmap(nodeset); hwloc_bitmap_free(cpuset); hwloc_bitmap_free(nodeset); return res; }
static int hwloc_fix_membind_cpuset(hwloc_topology_t topology, hwloc_nodeset_t nodeset, hwloc_const_cpuset_t cpuset) { hwloc_const_bitmap_t topology_set = hwloc_topology_get_topology_cpuset(topology); hwloc_const_bitmap_t complete_set = hwloc_topology_get_complete_cpuset(topology); hwloc_const_bitmap_t complete_nodeset = hwloc_topology_get_complete_nodeset(topology); if (!topology_set) { /* The topology is composed of several systems, the cpuset is thus * ambiguous. */ errno = EXDEV; return -1; } if (!complete_nodeset) { /* There is no NUMA node */ errno = ENODEV; return -1; } if (hwloc_bitmap_iszero(cpuset)) { errno = EINVAL; return -1; } if (!hwloc_bitmap_isincluded(cpuset, complete_set)) { errno = EINVAL; return -1; } if (hwloc_bitmap_isincluded(topology_set, cpuset)) { hwloc_bitmap_copy(nodeset, complete_nodeset); return 0; } hwloc_cpuset_to_nodeset(topology, cpuset, nodeset); return 0; }
c_sublocid_t chpl_topo_getThreadLocality(void) { hwloc_cpuset_t cpuset; hwloc_nodeset_t nodeset; int flags; int node; if (!haveTopology) { return c_sublocid_any; } if (!topoSupport->cpubind->get_thread_cpubind) { return c_sublocid_any; } if ((cpuset = hwloc_bitmap_alloc()) == NULL) { report_error("hwloc_bitmap_alloc()", errno); } if ((nodeset = hwloc_bitmap_alloc()) == NULL) { report_error("hwloc_bitmap_alloc()", errno); } flags = HWLOC_CPUBIND_THREAD; if (hwloc_get_cpubind(topology, cpuset, flags)) { report_error("hwloc_get_cpubind()", errno); } hwloc_cpuset_to_nodeset(topology, cpuset, nodeset); node = hwloc_bitmap_first(nodeset); hwloc_bitmap_free(nodeset); hwloc_bitmap_free(cpuset); return node; }
int main(int argc, char *argv[]) { hwloc_topology_t topology; int loaded = 0; int depth; hwloc_bitmap_t cpubind_set, membind_set; int got_cpubind = 0, got_membind = 0; int working_on_cpubind = 1; /* membind if 0 */ int get_binding = 0; int use_nodeset = 0; int get_last_cpu_location = 0; unsigned long flags = 0; int force = 0; int single = 0; int verbose = 0; int only_hbm = -1; int logical = 1; int taskset = 0; unsigned cpubind_flags = 0; hwloc_membind_policy_t membind_policy = HWLOC_MEMBIND_BIND; int got_mempolicy = 0; unsigned membind_flags = 0; int opt; int ret; int pid_number = -1; int tid_number = -1; hwloc_pid_t pid = 0; /* only valid when pid_number > 0, but gcc-4.8 still reports uninitialized warnings */ char *callname; struct hwloc_calc_location_context_s lcontext; struct hwloc_calc_set_context_s scontext; callname = argv[0]; /* skip argv[0], handle options */ argv++; argc--; hwloc_utils_check_api_version(callname); cpubind_set = hwloc_bitmap_alloc(); membind_set = hwloc_bitmap_alloc(); /* don't load now, in case some options change the config before the topology is actually used */ #define LOADED() (loaded) #define ENSURE_LOADED() do { \ if (!loaded) { \ hwloc_topology_init(&topology); \ hwloc_topology_set_all_types_filter(topology, HWLOC_TYPE_FILTER_KEEP_ALL); \ hwloc_topology_set_flags(topology, flags); \ hwloc_topology_load(topology); \ depth = hwloc_topology_get_depth(topology); \ loaded = 1; \ } \ } while (0) while (argc >= 1) { if (!strcmp(argv[0], "--")) { argc--; argv++; break; } opt = 0; if (*argv[0] == '-') { if (!strcmp(argv[0], "-v") || !strcmp(argv[0], "--verbose")) { verbose++; goto next; } if (!strcmp(argv[0], "-q") || !strcmp(argv[0], "--quiet")) { verbose--; goto next; } if (!strcmp(argv[0], "--help")) { usage("hwloc-bind", stdout); return EXIT_SUCCESS; } if (!strcmp(argv[0], "--single")) { single = 1; goto next; } if (!strcmp(argv[0], "-f") || !strcmp(argv[0], "--force")) { force = 1; goto next; } if (!strcmp(argv[0], "--strict")) { cpubind_flags |= HWLOC_CPUBIND_STRICT; membind_flags |= HWLOC_MEMBIND_STRICT; goto next; } if (!strcmp(argv[0], "--pid")) { if (argc < 2) { usage ("hwloc-bind", stderr); exit(EXIT_FAILURE); } pid_number = atoi(argv[1]); opt = 1; goto next; } #ifdef HWLOC_LINUX_SYS if (!strcmp(argv[0], "--tid")) { if (argc < 2) { usage ("hwloc-bind", stderr); exit(EXIT_FAILURE); } tid_number = atoi(argv[1]); opt = 1; goto next; } #endif if (!strcmp (argv[0], "--version")) { printf("%s %s\n", callname, HWLOC_VERSION); exit(EXIT_SUCCESS); } if (!strcmp(argv[0], "-l") || !strcmp(argv[0], "--logical")) { logical = 1; goto next; } if (!strcmp(argv[0], "-p") || !strcmp(argv[0], "--physical")) { logical = 0; goto next; } if (!strcmp(argv[0], "--taskset")) { taskset = 1; goto next; } if (!strcmp (argv[0], "-e") || !strncmp (argv[0], "--get-last-cpu-location", 10)) { get_last_cpu_location = 1; goto next; } if (!strcmp (argv[0], "--get")) { get_binding = 1; goto next; } if (!strcmp (argv[0], "--nodeset")) { use_nodeset = 1; goto next; } if (!strcmp (argv[0], "--cpubind")) { working_on_cpubind = 1; goto next; } if (!strcmp (argv[0], "--membind")) { working_on_cpubind = 0; goto next; } if (!strcmp (argv[0], "--mempolicy")) { if (!strncmp(argv[1], "default", 2)) membind_policy = HWLOC_MEMBIND_DEFAULT; else if (!strncmp(argv[1], "firsttouch", 2)) membind_policy = HWLOC_MEMBIND_FIRSTTOUCH; else if (!strncmp(argv[1], "bind", 2)) membind_policy = HWLOC_MEMBIND_BIND; else if (!strncmp(argv[1], "interleave", 2)) membind_policy = HWLOC_MEMBIND_INTERLEAVE; else if (!strncmp(argv[1], "nexttouch", 2)) membind_policy = HWLOC_MEMBIND_NEXTTOUCH; else { fprintf(stderr, "Unrecognized memory binding policy %s\n", argv[1]); usage ("hwloc-bind", stderr); exit(EXIT_FAILURE); } got_mempolicy = 1; opt = 1; goto next; } if (!strcmp(argv[0], "--hbm")) { only_hbm = 1; goto next; } if (!strcmp(argv[0], "--no-hbm")) { only_hbm = 0; goto next; } if (!strcmp (argv[0], "--whole-system")) { if (loaded) { fprintf(stderr, "Input option %s disallowed after options using the topology\n", argv[0]); exit(EXIT_FAILURE); } flags |= HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM; goto next; } if (!strcmp (argv[0], "--restrict")) { hwloc_bitmap_t restrictset; int err; if (argc < 2) { usage (callname, stdout); exit(EXIT_FAILURE); } restrictset = hwloc_bitmap_alloc(); hwloc_bitmap_sscanf(restrictset, argv[1]); ENSURE_LOADED(); err = hwloc_topology_restrict (topology, restrictset, 0); if (err) { perror("Restricting the topology"); /* FALLTHRU */ } hwloc_bitmap_free(restrictset); argc--; argv++; goto next; } fprintf (stderr, "Unrecognized option: %s\n", argv[0]); usage("hwloc-bind", stderr); return EXIT_FAILURE; } ENSURE_LOADED(); lcontext.topology = topology; lcontext.topodepth = depth; lcontext.only_hbm = only_hbm; lcontext.logical = logical; lcontext.verbose = verbose; scontext.nodeset_input = use_nodeset; scontext.nodeset_output = working_on_cpubind ? 0 : 1; scontext.output_set = working_on_cpubind ? cpubind_set : membind_set; ret = hwloc_calc_process_location_as_set(&lcontext, &scontext, argv[0]); if (ret < 0) { if (verbose > 0) fprintf(stderr, "assuming the command starts at %s\n", argv[0]); break; } if (working_on_cpubind) got_cpubind = 1; else got_membind = 1; next: argc -= opt+1; argv += opt+1; } ENSURE_LOADED(); if (pid_number > 0 && tid_number > 0) { fprintf(stderr, "cannot operate both on tid and pid\n"); return EXIT_FAILURE; } if (pid_number > 0) { pid = hwloc_pid_from_number(pid_number, !(get_binding || get_last_cpu_location)); /* no need to set_pid() * the doc just says we're operating on pid, not that we're retrieving the topo/cpuset as seen from inside pid */ } if (get_last_cpu_location && !working_on_cpubind) { fprintf(stderr, "Options --membind and --get-last-cpu-location cannot be combined.\n"); return EXIT_FAILURE; } if ((get_binding || get_last_cpu_location) && (got_cpubind || got_membind)) { /* doesn't work because get_binding/get_last_cpu_location overwrites cpubind_set */ fprintf(stderr, "Cannot display and set binding at the same time.\n"); return EXIT_FAILURE; } if (get_binding || get_last_cpu_location) { char *s; const char *policystr = NULL; int err; if (working_on_cpubind) { if (get_last_cpu_location) { if (pid_number > 0) err = hwloc_get_proc_last_cpu_location(topology, pid, cpubind_set, 0); #ifdef HWLOC_LINUX_SYS else if (tid_number > 0) err = hwloc_linux_get_tid_last_cpu_location(topology, tid_number, cpubind_set); #endif else err = hwloc_get_last_cpu_location(topology, cpubind_set, 0); } else { if (pid_number > 0) err = hwloc_get_proc_cpubind(topology, pid, cpubind_set, 0); #ifdef HWLOC_LINUX_SYS else if (tid_number > 0) err = hwloc_linux_get_tid_cpubind(topology, tid_number, cpubind_set); #endif else err = hwloc_get_cpubind(topology, cpubind_set, 0); } if (err) { const char *errmsg = strerror(errno); if (pid_number > 0) fprintf(stderr, "hwloc_get_proc_%s %d failed (errno %d %s)\n", get_last_cpu_location ? "last_cpu_location" : "cpubind", pid_number, errno, errmsg); else if (tid_number > 0) fprintf(stderr, "hwloc_get_tid_%s %d failed (errno %d %s)\n", get_last_cpu_location ? "last_cpu_location" : "cpubind", tid_number, errno, errmsg); else fprintf(stderr, "hwloc_get_%s failed (errno %d %s)\n", get_last_cpu_location ? "last_cpu_location" : "cpubind", errno, errmsg); return EXIT_FAILURE; } if (use_nodeset) { hwloc_bitmap_t nset = hwloc_bitmap_alloc(); hwloc_cpuset_to_nodeset(topology, cpubind_set, nset); if (taskset) hwloc_bitmap_taskset_asprintf(&s, nset); else hwloc_bitmap_asprintf(&s, nset); hwloc_bitmap_free(nset); } else { if (taskset) hwloc_bitmap_taskset_asprintf(&s, cpubind_set); else hwloc_bitmap_asprintf(&s, cpubind_set); } } else { hwloc_membind_policy_t policy; if (pid_number > 0) { err = hwloc_get_proc_membind(topology, pid, membind_set, &policy, use_nodeset ? HWLOC_MEMBIND_BYNODESET : 0); } else if (tid_number > 0) { err = -1; errno = ENOSYS; } else { err = hwloc_get_membind(topology, membind_set, &policy, use_nodeset ? HWLOC_MEMBIND_BYNODESET : 0); } if (err) { const char *errmsg = strerror(errno); if (pid_number > 0) fprintf(stderr, "hwloc_get_proc_membind %d failed (errno %d %s)\n", pid_number, errno, errmsg); else fprintf(stderr, "hwloc_get_membind failed (errno %d %s)\n", errno, errmsg); return EXIT_FAILURE; } if (taskset) hwloc_bitmap_taskset_asprintf(&s, membind_set); else hwloc_bitmap_asprintf(&s, membind_set); switch (policy) { case HWLOC_MEMBIND_FIRSTTOUCH: policystr = "firsttouch"; break; case HWLOC_MEMBIND_BIND: policystr = "bind"; break; case HWLOC_MEMBIND_INTERLEAVE: policystr = "interleave"; break; case HWLOC_MEMBIND_NEXTTOUCH: policystr = "nexttouch"; break; default: fprintf(stderr, "unknown memory policy %d\n", policy); assert(0); break; } } if (policystr) printf("%s (%s)\n", s, policystr); else printf("%s\n", s); free(s); } if (got_membind) { if (hwloc_bitmap_iszero(membind_set)) { if (verbose >= 0) fprintf(stderr, "cannot membind to empty set\n"); if (!force) goto failed_binding; } if (verbose > 0) { char *s; hwloc_bitmap_asprintf(&s, membind_set); fprintf(stderr, "binding on memory set %s\n", s); free(s); } if (single) hwloc_bitmap_singlify(membind_set); if (pid_number > 0) ret = hwloc_set_proc_membind(topology, pid, membind_set, membind_policy, membind_flags | HWLOC_MEMBIND_BYNODESET); else if (tid_number > 0) { ret = -1; errno = ENOSYS; } else ret = hwloc_set_membind(topology, membind_set, membind_policy, membind_flags | HWLOC_MEMBIND_BYNODESET); if (ret && verbose >= 0) { int bind_errno = errno; const char *errmsg = strerror(bind_errno); char *s; hwloc_bitmap_asprintf(&s, membind_set); if (pid_number > 0) fprintf(stderr, "hwloc_set_proc_membind %s (policy %d flags %x) PID %d failed (errno %d %s)\n", s, membind_policy, membind_flags, pid_number, bind_errno, errmsg); else fprintf(stderr, "hwloc_set_membind %s (policy %d flags %x) failed (errno %d %s)\n", s, membind_policy, membind_flags, bind_errno, errmsg); free(s); } if (ret && !force) goto failed_binding; } else { if (got_mempolicy) fprintf(stderr, "--mempolicy ignored unless memory binding is also requested with --membind.\n"); } if (got_cpubind) { if (hwloc_bitmap_iszero(cpubind_set)) { if (verbose >= 0) fprintf(stderr, "cannot cpubind to empty set\n"); if (!force) goto failed_binding; } if (verbose > 0) { char *s; hwloc_bitmap_asprintf(&s, cpubind_set); fprintf(stderr, "binding on cpu set %s\n", s); free(s); } if (got_membind && !hwloc_bitmap_isequal(membind_set, cpubind_set)) { if (verbose) fprintf(stderr, "Conflicting CPU and memory binding requested, adding HWLOC_CPUBIND_NOMEMBIND flag.\n"); cpubind_flags |= HWLOC_CPUBIND_NOMEMBIND; } if (single) hwloc_bitmap_singlify(cpubind_set); if (pid_number > 0) ret = hwloc_set_proc_cpubind(topology, pid, cpubind_set, cpubind_flags); #ifdef HWLOC_LINUX_SYS else if (tid_number > 0) ret = hwloc_linux_set_tid_cpubind(topology, tid_number, cpubind_set); #endif else ret = hwloc_set_cpubind(topology, cpubind_set, cpubind_flags); if (ret && verbose >= 0) { int bind_errno = errno; const char *errmsg = strerror(bind_errno); char *s; hwloc_bitmap_asprintf(&s, cpubind_set); if (pid_number > 0) fprintf(stderr, "hwloc_set_proc_cpubind %s (flags %x) PID %d failed (errno %d %s)\n", s, cpubind_flags, pid_number, bind_errno, errmsg); else if (tid_number > 0) fprintf(stderr, "hwloc_set_tid_cpubind %s (flags %x) PID %d failed (errno %d %s)\n", s, cpubind_flags, tid_number, bind_errno, errmsg); else fprintf(stderr, "hwloc_set_cpubind %s (flags %x) failed (errno %d %s)\n", s, cpubind_flags, bind_errno, errmsg); free(s); } if (ret && !force) goto failed_binding; } hwloc_bitmap_free(cpubind_set); hwloc_bitmap_free(membind_set); hwloc_topology_destroy(topology); if (pid_number > 0 || tid_number > 0) return EXIT_SUCCESS; if (0 == argc) { if (get_binding || get_last_cpu_location) return EXIT_SUCCESS; fprintf(stderr, "%s: nothing to do!\n", callname); return EXIT_FAILURE; } /* FIXME: check whether Windows execvp() passes INHERIT_PARENT_AFFINITY to CreateProcess() * because we need to propagate processor group affinity. However process-wide affinity * isn't supported with processor groups so far. */ ret = execvp(argv[0], argv); if (ret) { fprintf(stderr, "%s: Failed to launch executable \"%s\"\n", callname, argv[0]); perror("execvp"); } return EXIT_FAILURE; failed_binding: hwloc_bitmap_free(cpubind_set); hwloc_bitmap_free(membind_set); hwloc_topology_destroy(topology); return EXIT_FAILURE; }
static void create_hwloc_cpusets() { #ifdef USE_HWLOC int i; int err = hwloc_topology_init(&topology); assert(err == 0); err = hwloc_topology_load(topology); assert(err == 0); hwloc_bitmap_t cpuset = hwloc_bitmap_alloc(); assert(cpuset); err = hwloc_get_cpubind(topology, cpuset, HWLOC_CPUBIND_PROCESS); assert(err == 0); const int available_pus = hwloc_bitmap_weight(cpuset); const int last_set_index = hwloc_bitmap_last(cpuset); const int num_workers = hc_context->nworkers; hclib_affinity_t selected_affinity = HCLIB_AFFINITY_STRIDED; const char *user_selected_affinity = getenv("HCLIB_AFFINITY"); if (user_selected_affinity) { if (strcmp(user_selected_affinity, "strided") == 0) { selected_affinity = HCLIB_AFFINITY_STRIDED; } else if (strcmp(user_selected_affinity, "chunked") == 0) { selected_affinity = HCLIB_AFFINITY_CHUNKED; } else { fprintf(stderr, "Unsupported thread affinity \"%s\" specified with " "HCLIB_AFFINITY.\n", user_selected_affinity); exit(1); } } thread_cpusets = (hwloc_bitmap_t *)malloc(hc_context->nworkers * sizeof(*thread_cpusets)); assert(thread_cpusets); for (i = 0; i < hc_context->nworkers; i++) { thread_cpusets[i] = hwloc_bitmap_alloc(); assert(thread_cpusets[i]); } switch (selected_affinity) { case (HCLIB_AFFINITY_STRIDED): { if (available_pus < num_workers) { fprintf(stderr, "ERROR Available PUs (%d) was less than number " "of workers (%d), don't currently support " "oversubscription with strided thread pinning\n", available_pus, num_workers); exit(1); } int count = 0; int index = 0; while (index <= last_set_index) { if (hwloc_bitmap_isset(cpuset, index)) { hwloc_bitmap_set(thread_cpusets[count % num_workers], index); count++; } index++; } break; } case (HCLIB_AFFINITY_CHUNKED): { const int chunk_size = (available_pus + num_workers - 1) / num_workers; int count = 0; int index = 0; while (index <= last_set_index) { if (hwloc_bitmap_isset(cpuset, index)) { hwloc_bitmap_set(thread_cpusets[count / chunk_size], index); count++; } index++; } break; } default: assert(false); } hwloc_bitmap_t nodeset = hwloc_bitmap_alloc(); hwloc_bitmap_t other_nodeset = hwloc_bitmap_alloc(); assert(nodeset && other_nodeset); /* * Here, we look for contiguous ranges of worker threads that share any NUMA * nodes with us. In theory, this should be more hierarchical but isn't yet. * This is also super inefficient... O(T^2) where T is the number of * workers. */ bool revert_to_naive_stealing = false; for (i = 0; i < hc_context->nworkers; i++) { // Get the NUMA nodes for this CPU set hwloc_cpuset_to_nodeset(topology, thread_cpusets[i], nodeset); int base = -1; int limit = -1; int j; for (j = 0; j < hc_context->nworkers; j++) { hwloc_cpuset_to_nodeset(topology, thread_cpusets[j], other_nodeset); // Take the intersection, see if there is any overlap hwloc_bitmap_and(other_nodeset, nodeset, other_nodeset); if (base < 0) { // Haven't found a contiguous chunk of workers yet. if (!hwloc_bitmap_iszero(other_nodeset)) { base = j; } } else { /* * Have a contiguous chunk of workers, either still inside it or * after it. */ if (limit < 0) { // Inside the contiguous chunk of workers if (hwloc_bitmap_iszero(other_nodeset)) { // Found the end limit = j; } } else { // After the contiguous chunk of workers if (!hwloc_bitmap_iszero(other_nodeset)) { // No contiguous chunk to find, just do something naive. revert_to_naive_stealing = true; break; } } } } if (revert_to_naive_stealing) { fprintf(stderr, "WARNING: Using naive work-stealing patterns.\n"); base = 0; limit = hc_context->nworkers; } else { assert(base >= 0); if (limit < 0) { limit = hc_context->nworkers; } } hc_context->workers[i]->base_intra_socket_workers = base; hc_context->workers[i]->limit_intra_socket_workers = limit; #ifdef VERBOSE char *nbuf; hwloc_bitmap_asprintf(&nbuf, nodeset); char *buffer; hwloc_bitmap_asprintf(&buffer, thread_cpusets[i]); fprintf(stderr, "Worker %d has access to %d PUs (%s), %d NUMA nodes " "(%s). Shared NUMA nodes with [%d, %d).\n", i, hwloc_bitmap_weight(thread_cpusets[i]), buffer, hwloc_bitmap_weight(nodeset), nbuf, base, limit); free(buffer); #endif } #endif }