static void * hwloc_win_alloc_membind(hwloc_topology_t topology __hwloc_attribute_unused, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) { int node; switch (policy) { case HWLOC_MEMBIND_DEFAULT: case HWLOC_MEMBIND_BIND: break; default: errno = ENOSYS; return hwloc_alloc_or_fail(topology, len, flags); } if (flags & HWLOC_MEMBIND_STRICT) { errno = ENOSYS; return NULL; } if (hwloc_bitmap_weight(nodeset) != 1) { /* Not a single node, can't do this */ errno = EXDEV; return hwloc_alloc_or_fail(topology, len, flags); } node = hwloc_bitmap_first(nodeset); return VirtualAllocExNumaProc(GetCurrentProcess(), NULL, len, MEM_COMMIT|MEM_RESERVE, PAGE_EXECUTE_READWRITE, node); }
END_TEST START_TEST(add_extra_memory_nodes_if_needed_test) { long long mem_requested; long long mem_reserved; std::set<int> current_mem_ids; hwloc_bitmap_t job_mems = hwloc_bitmap_alloc(); hwloc_bitmap_t torque_root_mems = hwloc_bitmap_alloc(); char buf[1024]; hwloc_bitmap_set(job_mems, 0); current_mem_ids.insert(0); hwloc_bitmap_set(torque_root_mems, 0); hwloc_bitmap_set(torque_root_mems, 1); mem_requested = 16 * 1024; mem_requested *= 1024; mem_requested *= 1024; mem_reserved = 15 * 1024; mem_reserved *= 1024; mem_reserved *= 1024; add_extra_memory_nodes_if_needed(mem_requested, mem_reserved, job_mems, torque_root_mems, current_mem_ids); fail_unless(hwloc_bitmap_weight(job_mems) == 2); hwloc_bitmap_displaylist(buf, sizeof(buf), job_mems); fail_unless(strchr(buf, '0') != NULL); fail_unless(strchr(buf, '1') != NULL); }
/******************* FUNCTION *********************/ int TopoHwloc::getCurrentIdFromNUMABinding(void) const { hwloc_nodeset_t nodeset = hwloc_bitmap_alloc(); hwloc_cpuset_t cpuset = hwloc_bitmap_alloc(); hwloc_membind_policy_t policy; int res = -1; int weight; int status; #if defined(SCTK_ALLOC_DEBUG) && defined(hwloc_bitmap_list_snprintf) char buffer[4096]; #endif //if no numa node, return immediately if (getNbNumaEntities() == 1) return -1; //nodes // flags = 0 fallback on PROCESS if THREAD is not supported (as for windows). status = hwloc_get_membind_nodeset(topology,nodeset,&policy,0); assert(status == 0); if (status == 0) return -1; #if defined(SCTK_ALLOC_DEBUG) && defined(hwloc_bitmap_list_snprintf) status = hwloc_bitmap_list_snprintf(buffer,4096,nodeset); sprintf(stderr,"Current nodes : %s\n",buffer); #endif //cores // flags = 0 fallback on PROCESS if THREAD is not supported (as for windows). status = hwloc_get_membind(topology,cpuset,&policy,0); assert(status == 0); if (status == 0) return -1; #if defined(SCTK_ALLOC_DEBUG) && defined(hwloc_bitmap_list_snprintf) status = hwloc_bitmap_list_snprintf(buffer,4096,cpuset); sprintf(stderr,"Current cores : %s\n",buffer); #endif //nodes from cores hwloc_cpuset_to_nodeset(topology,cpuset,nodeset); #if defined(SCTK_ALLOC_DEBUG) && defined(hwloc_bitmap_list_snprintf) status = hwloc_bitmap_list_snprintf(buffer,4096,nodeset); sprintf(stderr,"Current nodes from cores : %s\n",buffer); #endif //calc res weight = hwloc_bitmap_weight(nodeset); assert(weight != 0); if (weight == 1) res = getFirstBitInBitmap(nodeset); hwloc_bitmap_free(cpuset); hwloc_bitmap_free(nodeset); return res; }
static spu_t hwloc_hpux_find_spu(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_bitmap_t hwloc_set) { spu_t cpu; cpu = hwloc_bitmap_first(hwloc_set); if (cpu != -1 && hwloc_bitmap_weight(hwloc_set) == 1) return cpu; return -1; }
/* convert set into index+mask if all set bits are in the same ULONG. * otherwise return -1. */ static int hwloc_bitmap_to_single_ULONG_PTR(hwloc_const_bitmap_t set, unsigned *index, ULONG_PTR *mask) { unsigned first_ulp, last_ulp; if (hwloc_bitmap_weight(set) == -1) return -1; first_ulp = hwloc_bitmap_first(set) / (sizeof(ULONG_PTR)*8); last_ulp = hwloc_bitmap_last(set) / (sizeof(ULONG_PTR)*8); if (first_ulp != last_ulp) return -1; *mask = hwloc_bitmap_to_ith_ULONG_PTR(set, first_ulp); *index = first_ulp; return 0; }
/******************* FUNCTION *********************/ int TopoHwloc::getCurrentIdFromThreadBinding(void) const { hwloc_nodeset_t nodeset = hwloc_bitmap_alloc(); hwloc_cpuset_t cpuset = hwloc_bitmap_alloc(); int res = -1; int weight; #if defined(SCTK_ALLOC_DEBUG) && defined(hwloc_bitmap_list_snprintf) char buffer[4096]; #endif //get current core binding //for windows use 0 instead of HWLOC_CPUBIND_THREAD int status = hwloc_get_cpubind (topology, cpuset, 0); assert(status == 0); if (status == 0) return -1; #if defined(SCTK_ALLOC_DEBUG) && defined(hwloc_bitmap_list_snprintf) status = hwloc_bitmap_list_snprintf(buffer,4096,cpuset); sprintf(stderr,"Current cores : %s\n",buffer); #endif //nodes from cores hwloc_cpuset_to_nodeset(topology,cpuset,nodeset); #if defined(SCTK_ALLOC_DEBUG) && defined(hwloc_bitmap_list_snprintf) status = hwloc_bitmap_list_snprintf(buffer,4096,nodeset); sprintf(stderr,"Current nodes from cores : %s\n",buffer); #endif //calc res weight = hwloc_bitmap_weight(nodeset); assert(weight != 0); if (weight == 1) res = getFirstBitInBitmap(nodeset); hwloc_bitmap_free(cpuset); hwloc_bitmap_free(nodeset); return res; }
int main(void) { hwloc_topology_t topology; hwloc_bitmap_t set; hwloc_const_bitmap_t cset; hwloc_membind_policy_t policy; const struct hwloc_topology_support *support; int nbnodes; hwloc_obj_t obj; char *buffer, *s; unsigned i; int err; /* create a topology */ err = hwloc_topology_init(&topology); if (err < 0) { fprintf(stderr, "failed to initialize the topology\n"); return EXIT_FAILURE; } err = hwloc_topology_load(topology); if (err < 0) { fprintf(stderr, "failed to load the topology\n"); hwloc_topology_destroy(topology); return EXIT_FAILURE; } /* retrieve the entire set of NUMA nodes and count them */ cset = hwloc_topology_get_topology_nodeset(topology); nbnodes = hwloc_bitmap_weight(cset); if (nbnodes <= 0) { /* nbnodes may be -1 when there's no NUMA information, * or 0 when the machine is known to be non-NUMA */ printf("this machine is not NUMA, nothing to do\n"); hwloc_topology_destroy(topology); return EXIT_SUCCESS; } printf("there are %d nodes in the machine\n", nbnodes); /* get the process memory binding as a nodeset */ set = hwloc_bitmap_alloc(); if (!set) { fprintf(stderr, "failed to allocate a bitmap\n"); hwloc_topology_destroy(topology); return EXIT_FAILURE; } err = hwloc_get_membind_nodeset(topology, set, &policy, 0); if (err < 0) { fprintf(stderr, "failed to retrieve my memory binding and policy\n"); hwloc_topology_destroy(topology); hwloc_bitmap_free(set); return EXIT_FAILURE; } /* print the corresponding NUMA nodes */ hwloc_bitmap_asprintf(&s, set); printf("bound to nodeset %s with contains:\n", s); free(s); hwloc_bitmap_foreach_begin(i, set) { obj = hwloc_get_numanode_obj_by_os_index(topology, i); printf(" node #%u (OS index %u) with %lld bytes of memory\n", obj->logical_index, i, (unsigned long long) obj->memory.local_memory); }
void output_console(struct lstopo_output *loutput, const char *filename) { hwloc_topology_t topology = loutput->topology; unsigned topodepth; int verbose_mode = loutput->verbose_mode; int logical = loutput->logical; FILE *output; output = open_output(filename, loutput->overwrite); if (!output) { fprintf(stderr, "Failed to open %s for writing (%s)\n", filename, strerror(errno)); return; } topodepth = hwloc_topology_get_depth(topology); /* * if verbose_mode == 0, only print the summary. * if verbose_mode == 1, only print the topology tree. * if verbose_mode > 1, print both. */ if (lstopo_show_only != (hwloc_obj_type_t)-1) { if (verbose_mode > 1) fprintf(output, "Only showing %s objects\n", hwloc_obj_type_string(lstopo_show_only)); output_only (topology, hwloc_get_root_obj(topology), output, logical, verbose_mode); } else if (verbose_mode >= 1) { output_topology (topology, hwloc_get_root_obj(topology), NULL, output, 0, logical, verbose_mode); fprintf(output, "\n"); } if ((verbose_mode > 1 || !verbose_mode) && lstopo_show_only == (hwloc_obj_type_t)-1) { hwloc_lstopo_show_summary(output, topology); } if (verbose_mode > 1 && lstopo_show_only == (hwloc_obj_type_t)-1) { const struct hwloc_distances_s * distances; unsigned depth; for (depth = 0; depth < topodepth; depth++) { distances = hwloc_get_whole_distance_matrix_by_depth(topology, depth); if (!distances || !distances->latency) continue; fprintf(output, "relative latency matrix between %ss (depth %u) by %s indexes:\n", hwloc_obj_type_string(hwloc_get_depth_type(topology, depth)), depth, logical ? "logical" : "physical"); hwloc_utils_print_distance_matrix(output, topology, hwloc_get_root_obj(topology), distances->nbobjs, depth, distances->latency, logical); } } if (verbose_mode > 1 && lstopo_show_only == (hwloc_obj_type_t)-1) { hwloc_const_bitmap_t complete = hwloc_topology_get_complete_cpuset(topology); hwloc_const_bitmap_t topo = hwloc_topology_get_topology_cpuset(topology); hwloc_const_bitmap_t allowed = hwloc_topology_get_allowed_cpuset(topology); if (!hwloc_bitmap_isequal(topo, complete)) { hwloc_bitmap_t unknown = hwloc_bitmap_alloc(); char *unknownstr; hwloc_bitmap_copy(unknown, complete); hwloc_bitmap_andnot(unknown, unknown, topo); hwloc_bitmap_asprintf(&unknownstr, unknown); fprintf (output, "%d processors not represented in topology: %s\n", hwloc_bitmap_weight(unknown), unknownstr); free(unknownstr); hwloc_bitmap_free(unknown); } if (!hwloc_bitmap_isequal(topo, allowed)) { hwloc_bitmap_t disallowed = hwloc_bitmap_alloc(); char *disallowedstr; hwloc_bitmap_copy(disallowed, topo); hwloc_bitmap_andnot(disallowed, disallowed, allowed); hwloc_bitmap_asprintf(&disallowedstr, disallowed); fprintf(output, "%d processors represented but not allowed: %s\n", hwloc_bitmap_weight(disallowed), disallowedstr); free(disallowedstr); hwloc_bitmap_free(disallowed); } if (!hwloc_topology_is_thissystem(topology)) fprintf (output, "Topology not from this system\n"); } if (output != stdout) fclose(output); }
void output_console(hwloc_topology_t topology, const char *filename, int logical, int legend __hwloc_attribute_unused, int verbose_mode) { unsigned topodepth; FILE *output; if (!filename || !strcmp(filename, "-")) output = stdout; else { output = open_file(filename, "w"); if (!output) { fprintf(stderr, "Failed to open %s for writing (%s)\n", filename, strerror(errno)); return; } } topodepth = hwloc_topology_get_depth(topology); /* * if verbose_mode == 0, only print the summary. * if verbose_mode == 1, only print the topology tree. * if verbose_mode > 1, print both. */ if (lstopo_show_only != (hwloc_obj_type_t)-1) { if (verbose_mode > 1) fprintf(output, "Only showing %s objects\n", hwloc_obj_type_string(lstopo_show_only)); output_only (topology, hwloc_get_root_obj(topology), output, logical, verbose_mode); } else if (verbose_mode >= 1) { output_topology (topology, hwloc_get_root_obj(topology), NULL, output, 0, logical, verbose_mode); fprintf(output, "\n"); } if ((verbose_mode > 1 || !verbose_mode) && lstopo_show_only == (hwloc_obj_type_t)-1) { hwloc_lstopo_show_summary(output, topology); } if (verbose_mode > 1 && lstopo_show_only == (hwloc_obj_type_t)-1) { const struct hwloc_distances_s * distances; unsigned depth; for (depth = 0; depth < topodepth; depth++) { distances = hwloc_get_whole_distance_matrix_by_depth(topology, depth); if (!distances || !distances->latency) continue; printf("latency matrix between %ss (depth %u) by %s indexes:\n", hwloc_obj_type_string(hwloc_get_depth_type(topology, depth)), depth, logical ? "logical" : "physical"); hwloc_utils_print_distance_matrix(topology, hwloc_get_root_obj(topology), distances->nbobjs, depth, distances->latency, logical); } } if (verbose_mode > 1 && lstopo_show_only == (hwloc_obj_type_t)-1) { hwloc_const_bitmap_t complete = hwloc_topology_get_complete_cpuset(topology); hwloc_const_bitmap_t topo = hwloc_topology_get_topology_cpuset(topology); hwloc_const_bitmap_t online = hwloc_topology_get_online_cpuset(topology); hwloc_const_bitmap_t allowed = hwloc_topology_get_allowed_cpuset(topology); if (complete && !hwloc_bitmap_isequal(topo, complete)) { hwloc_bitmap_t unknown = hwloc_bitmap_alloc(); char *unknownstr; hwloc_bitmap_copy(unknown, complete); hwloc_bitmap_andnot(unknown, unknown, topo); hwloc_bitmap_asprintf(&unknownstr, unknown); fprintf (output, "%d processors not represented in topology: %s\n", hwloc_bitmap_weight(unknown), unknownstr); free(unknownstr); hwloc_bitmap_free(unknown); } if (complete && !hwloc_bitmap_isequal(online, complete)) { hwloc_bitmap_t offline = hwloc_bitmap_alloc(); char *offlinestr; hwloc_bitmap_copy(offline, complete); hwloc_bitmap_andnot(offline, offline, online); hwloc_bitmap_asprintf(&offlinestr, offline); fprintf (output, "%d processors offline: %s\n", hwloc_bitmap_weight(offline), offlinestr); free(offlinestr); hwloc_bitmap_free(offline); } if (complete && !hwloc_bitmap_isequal(allowed, online)) { if (!hwloc_bitmap_isincluded(online, allowed)) { hwloc_bitmap_t forbidden = hwloc_bitmap_alloc(); char *forbiddenstr; hwloc_bitmap_copy(forbidden, online); hwloc_bitmap_andnot(forbidden, forbidden, allowed); hwloc_bitmap_asprintf(&forbiddenstr, forbidden); fprintf(output, "%d processors online but not allowed: %s\n", hwloc_bitmap_weight(forbidden), forbiddenstr); free(forbiddenstr); hwloc_bitmap_free(forbidden); } if (!hwloc_bitmap_isincluded(allowed, online)) { hwloc_bitmap_t potential = hwloc_bitmap_alloc(); char *potentialstr; hwloc_bitmap_copy(potential, allowed); hwloc_bitmap_andnot(potential, potential, online); hwloc_bitmap_asprintf(&potentialstr, potential); fprintf(output, "%d processors allowed but not online: %s\n", hwloc_bitmap_weight(potential), potentialstr); free(potentialstr); hwloc_bitmap_free(potential); } } if (!hwloc_topology_is_thissystem(topology)) fprintf (output, "Topology not from this system\n"); } if (output != stdout) fclose(output); }
int main(void) { hwloc_bitmap_t set; /* check an empty bitmap */ set = hwloc_bitmap_alloc(); assert(hwloc_bitmap_to_ulong(set) == 0UL); assert(hwloc_bitmap_to_ith_ulong(set, 0) == 0UL); assert(hwloc_bitmap_to_ith_ulong(set, 1) == 0UL); assert(hwloc_bitmap_to_ith_ulong(set, 23) == 0UL); /* check a non-empty bitmap */ hwloc_bitmap_from_ith_ulong(set, 4, 0xff); assert(hwloc_bitmap_to_ith_ulong(set, 4) == 0xff); assert(hwloc_bitmap_to_ulong(set) == 0UL); assert(hwloc_bitmap_to_ith_ulong(set, 0) == 0UL); assert(hwloc_bitmap_to_ith_ulong(set, 1) == 0UL); assert(hwloc_bitmap_to_ith_ulong(set, 23) == 0UL); /* check a zeroed bitmap */ hwloc_bitmap_zero(set); assert(hwloc_bitmap_to_ulong(set) == 0UL); assert(hwloc_bitmap_to_ith_ulong(set, 0) == 0UL); assert(hwloc_bitmap_to_ith_ulong(set, 1) == 0UL); assert(hwloc_bitmap_to_ith_ulong(set, 4) == 0UL); assert(hwloc_bitmap_to_ith_ulong(set, 23) == 0UL); hwloc_bitmap_free(set); /* check a full bitmap */ set = hwloc_bitmap_alloc_full(); assert(hwloc_bitmap_to_ulong(set) == ~0UL); assert(hwloc_bitmap_to_ith_ulong(set, 0) == ~0UL); assert(hwloc_bitmap_to_ith_ulong(set, 1) == ~0UL); assert(hwloc_bitmap_to_ith_ulong(set, 23) == ~0UL); /* check a almost full bitmap */ hwloc_bitmap_set_ith_ulong(set, 4, 0xff); assert(hwloc_bitmap_to_ith_ulong(set, 4) == 0xff); assert(hwloc_bitmap_to_ulong(set) == ~0UL); assert(hwloc_bitmap_to_ith_ulong(set, 0) == ~0UL); assert(hwloc_bitmap_to_ith_ulong(set, 1) == ~0UL); assert(hwloc_bitmap_to_ith_ulong(set, 23) == ~0UL); /* check a almost empty bitmap */ hwloc_bitmap_from_ith_ulong(set, 4, 0xff); assert(hwloc_bitmap_to_ith_ulong(set, 4) == 0xff); assert(hwloc_bitmap_to_ulong(set) == 0UL); assert(hwloc_bitmap_to_ith_ulong(set, 0) == 0UL); assert(hwloc_bitmap_to_ith_ulong(set, 1) == 0UL); assert(hwloc_bitmap_to_ith_ulong(set, 23) == 0UL); hwloc_bitmap_free(set); /* check ranges */ set = hwloc_bitmap_alloc(); assert(hwloc_bitmap_weight(set) == 0); /* 23-45 */ hwloc_bitmap_set_range(set, 23, 45); assert(hwloc_bitmap_weight(set) == 23); /* 23-45,78- */ hwloc_bitmap_set_range(set, 78, -1); assert(hwloc_bitmap_weight(set) == -1); /* 23- */ hwloc_bitmap_set_range(set, 44, 79); assert(hwloc_bitmap_weight(set) == -1); assert(hwloc_bitmap_first(set) == 23); assert(!hwloc_bitmap_isfull(set)); /* 0- */ hwloc_bitmap_set_range(set, 0, 22); assert(hwloc_bitmap_weight(set) == -1); assert(hwloc_bitmap_isfull(set)); /* 0-34,57- */ hwloc_bitmap_clr_range(set, 35, 56); assert(hwloc_bitmap_weight(set) == -1); assert(!hwloc_bitmap_isfull(set)); /* 0-34,57 */ hwloc_bitmap_clr_range(set, 58, -1); assert(hwloc_bitmap_weight(set) == 36); assert(hwloc_bitmap_last(set) == 57); assert(hwloc_bitmap_next(set, 34) == 57); /* 0-34 */ hwloc_bitmap_clr(set, 57); assert(hwloc_bitmap_weight(set) == 35); assert(hwloc_bitmap_last(set) == 34); /* empty */ hwloc_bitmap_clr_range(set, 0, 34); assert(hwloc_bitmap_weight(set) == 0); assert(hwloc_bitmap_first(set) == -1); hwloc_bitmap_free(set); return 0; }
static void create_hwloc_cpusets() { #ifdef USE_HWLOC int i; int err = hwloc_topology_init(&topology); assert(err == 0); err = hwloc_topology_load(topology); assert(err == 0); hwloc_bitmap_t cpuset = hwloc_bitmap_alloc(); assert(cpuset); err = hwloc_get_cpubind(topology, cpuset, HWLOC_CPUBIND_PROCESS); assert(err == 0); const int available_pus = hwloc_bitmap_weight(cpuset); const int last_set_index = hwloc_bitmap_last(cpuset); const int num_workers = hc_context->nworkers; hclib_affinity_t selected_affinity = HCLIB_AFFINITY_STRIDED; const char *user_selected_affinity = getenv("HCLIB_AFFINITY"); if (user_selected_affinity) { if (strcmp(user_selected_affinity, "strided") == 0) { selected_affinity = HCLIB_AFFINITY_STRIDED; } else if (strcmp(user_selected_affinity, "chunked") == 0) { selected_affinity = HCLIB_AFFINITY_CHUNKED; } else { fprintf(stderr, "Unsupported thread affinity \"%s\" specified with " "HCLIB_AFFINITY.\n", user_selected_affinity); exit(1); } } thread_cpusets = (hwloc_bitmap_t *)malloc(hc_context->nworkers * sizeof(*thread_cpusets)); assert(thread_cpusets); for (i = 0; i < hc_context->nworkers; i++) { thread_cpusets[i] = hwloc_bitmap_alloc(); assert(thread_cpusets[i]); } switch (selected_affinity) { case (HCLIB_AFFINITY_STRIDED): { if (available_pus < num_workers) { fprintf(stderr, "ERROR Available PUs (%d) was less than number " "of workers (%d), don't currently support " "oversubscription with strided thread pinning\n", available_pus, num_workers); exit(1); } int count = 0; int index = 0; while (index <= last_set_index) { if (hwloc_bitmap_isset(cpuset, index)) { hwloc_bitmap_set(thread_cpusets[count % num_workers], index); count++; } index++; } break; } case (HCLIB_AFFINITY_CHUNKED): { const int chunk_size = (available_pus + num_workers - 1) / num_workers; int count = 0; int index = 0; while (index <= last_set_index) { if (hwloc_bitmap_isset(cpuset, index)) { hwloc_bitmap_set(thread_cpusets[count / chunk_size], index); count++; } index++; } break; } default: assert(false); } hwloc_bitmap_t nodeset = hwloc_bitmap_alloc(); hwloc_bitmap_t other_nodeset = hwloc_bitmap_alloc(); assert(nodeset && other_nodeset); /* * Here, we look for contiguous ranges of worker threads that share any NUMA * nodes with us. In theory, this should be more hierarchical but isn't yet. * This is also super inefficient... O(T^2) where T is the number of * workers. */ bool revert_to_naive_stealing = false; for (i = 0; i < hc_context->nworkers; i++) { // Get the NUMA nodes for this CPU set hwloc_cpuset_to_nodeset(topology, thread_cpusets[i], nodeset); int base = -1; int limit = -1; int j; for (j = 0; j < hc_context->nworkers; j++) { hwloc_cpuset_to_nodeset(topology, thread_cpusets[j], other_nodeset); // Take the intersection, see if there is any overlap hwloc_bitmap_and(other_nodeset, nodeset, other_nodeset); if (base < 0) { // Haven't found a contiguous chunk of workers yet. if (!hwloc_bitmap_iszero(other_nodeset)) { base = j; } } else { /* * Have a contiguous chunk of workers, either still inside it or * after it. */ if (limit < 0) { // Inside the contiguous chunk of workers if (hwloc_bitmap_iszero(other_nodeset)) { // Found the end limit = j; } } else { // After the contiguous chunk of workers if (!hwloc_bitmap_iszero(other_nodeset)) { // No contiguous chunk to find, just do something naive. revert_to_naive_stealing = true; break; } } } } if (revert_to_naive_stealing) { fprintf(stderr, "WARNING: Using naive work-stealing patterns.\n"); base = 0; limit = hc_context->nworkers; } else { assert(base >= 0); if (limit < 0) { limit = hc_context->nworkers; } } hc_context->workers[i]->base_intra_socket_workers = base; hc_context->workers[i]->limit_intra_socket_workers = limit; #ifdef VERBOSE char *nbuf; hwloc_bitmap_asprintf(&nbuf, nodeset); char *buffer; hwloc_bitmap_asprintf(&buffer, thread_cpusets[i]); fprintf(stderr, "Worker %d has access to %d PUs (%s), %d NUMA nodes " "(%s). Shared NUMA nodes with [%d, %d).\n", i, hwloc_bitmap_weight(thread_cpusets[i]), buffer, hwloc_bitmap_weight(nodeset), nbuf, base, limit); free(buffer); #endif } #endif }
static int hwloc_solaris_set_sth_cpubind(hwloc_topology_t topology, idtype_t idtype, id_t id, hwloc_const_bitmap_t hwloc_set, int flags) { unsigned target_cpu; /* The resulting binding is always strict */ if (hwloc_bitmap_isequal(hwloc_set, hwloc_topology_get_complete_cpuset(topology))) { if (processor_bind(idtype, id, PBIND_NONE, NULL) != 0) return -1; #ifdef HAVE_LIBLGRP if (!(flags & HWLOC_CPUBIND_NOMEMBIND)) { int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE); if (depth >= 0) { int n = hwloc_get_nbobjs_by_depth(topology, depth); int i; for (i = 0; i < n; i++) { hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, i); lgrp_affinity_set(idtype, id, obj->os_index, LGRP_AFF_NONE); } } } #endif /* HAVE_LIBLGRP */ return 0; } #ifdef HAVE_LIBLGRP if (!(flags & HWLOC_CPUBIND_NOMEMBIND)) { int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE); if (depth >= 0) { int n = hwloc_get_nbobjs_by_depth(topology, depth); int i; int ok; hwloc_bitmap_t target = hwloc_bitmap_alloc(); for (i = 0; i < n; i++) { hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, i); if (hwloc_bitmap_isincluded(obj->cpuset, hwloc_set)) hwloc_bitmap_or(target, target, obj->cpuset); } ok = hwloc_bitmap_isequal(target, hwloc_set); hwloc_bitmap_free(target); if (ok) { /* Ok, managed to achieve hwloc_set by just combining NUMA nodes */ for (i = 0; i < n; i++) { hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, i); if (hwloc_bitmap_isincluded(obj->cpuset, hwloc_set)) { lgrp_affinity_set(idtype, id, obj->os_index, LGRP_AFF_STRONG); } else { if (flags & HWLOC_CPUBIND_STRICT) lgrp_affinity_set(idtype, id, obj->os_index, LGRP_AFF_NONE); else lgrp_affinity_set(idtype, id, obj->os_index, LGRP_AFF_WEAK); } } return 0; } } } #endif /* HAVE_LIBLGRP */ if (hwloc_bitmap_weight(hwloc_set) != 1) { errno = EXDEV; return -1; } target_cpu = hwloc_bitmap_first(hwloc_set); if (processor_bind(idtype, id, (processorid_t) (target_cpu), NULL) != 0) return -1; return 0; }
int main(void) { hwloc_topology_t topology; hwloc_bitmap_t set, set2; hwloc_const_bitmap_t cset_available, cset_all; hwloc_obj_t obj; char *buffer; char type[64]; unsigned i; int err; /* create a topology */ err = hwloc_topology_init(&topology); if (err < 0) { fprintf(stderr, "failed to initialize the topology\n"); return EXIT_FAILURE; } err = hwloc_topology_load(topology); if (err < 0) { fprintf(stderr, "failed to load the topology\n"); hwloc_topology_destroy(topology); return EXIT_FAILURE; } /* retrieve the entire set of available PUs */ cset_available = hwloc_topology_get_topology_cpuset(topology); /* retrieve the CPU binding of the current entire process */ set = hwloc_bitmap_alloc(); if (!set) { fprintf(stderr, "failed to allocate a bitmap\n"); hwloc_topology_destroy(topology); return EXIT_FAILURE; } err = hwloc_get_cpubind(topology, set, HWLOC_CPUBIND_PROCESS); if (err < 0) { fprintf(stderr, "failed to get cpu binding\n"); hwloc_bitmap_free(set); hwloc_topology_destroy(topology); } /* display the processing units that cannot be used by this process */ if (hwloc_bitmap_isequal(set, cset_available)) { printf("this process can use all available processing units in the system\n"); } else { /* compute the set where we currently cannot run. * we can't modify cset_available because it's a system read-only one, * so we do set = available &~ set */ hwloc_bitmap_andnot(set, cset_available, set); hwloc_bitmap_asprintf(&buffer, set); printf("process cannot use %d process units (%s) among %u in the system\n", hwloc_bitmap_weight(set), buffer, hwloc_bitmap_weight(cset_available)); free(buffer); /* restore set where it was before the &~ operation above */ hwloc_bitmap_andnot(set, cset_available, set); } /* print the smallest object covering the current process binding */ obj = hwloc_get_obj_covering_cpuset(topology, set); hwloc_obj_type_snprintf(type, sizeof(type), obj, 0); printf("process is bound within object %s logical index %u\n", type, obj->logical_index); /* retrieve the single PU where the current thread actually runs within this process binding */ set2 = hwloc_bitmap_alloc(); if (!set2) { fprintf(stderr, "failed to allocate a bitmap\n"); hwloc_bitmap_free(set); hwloc_topology_destroy(topology); return EXIT_FAILURE; } err = hwloc_get_last_cpu_location(topology, set2, HWLOC_CPUBIND_THREAD); if (err < 0) { fprintf(stderr, "failed to get last cpu location\n"); hwloc_bitmap_free(set); hwloc_bitmap_free(set2); hwloc_topology_destroy(topology); } /* sanity checks that are not actually needed but help the reader */ /* this thread runs within the process binding */ assert(hwloc_bitmap_isincluded(set2, set)); /* this thread runs on a single PU at a time */ assert(hwloc_bitmap_weight(set2) == 1); /* print the logical number of the PU where that thread runs */ /* extract the PU OS index from the bitmap */ i = hwloc_bitmap_first(set2); obj = hwloc_get_pu_obj_by_os_index(topology, i); printf("thread is now running on PU logical index %u (OS/physical index %u)\n", obj->logical_index, i); /* migrate this single thread to where other PUs within the current binding */ hwloc_bitmap_andnot(set2, set, set2); err = hwloc_set_cpubind(topology, set2, HWLOC_CPUBIND_THREAD); if (err < 0) { fprintf(stderr, "failed to set thread binding\n"); hwloc_bitmap_free(set); hwloc_bitmap_free(set2); hwloc_topology_destroy(topology); } /* reprint the PU where that thread runs */ err = hwloc_get_last_cpu_location(topology, set2, HWLOC_CPUBIND_THREAD); if (err < 0) { fprintf(stderr, "failed to get last cpu location\n"); hwloc_bitmap_free(set); hwloc_bitmap_free(set2); hwloc_topology_destroy(topology); } /* print the logical number of the PU where that thread runs */ /* extract the PU OS index from the bitmap */ i = hwloc_bitmap_first(set2); obj = hwloc_get_pu_obj_by_os_index(topology, i); printf("thread is running on PU logical index %u (OS/physical index %u)\n", obj->logical_index, i); hwloc_bitmap_free(set); hwloc_bitmap_free(set2); /* retrieve the entire set of all PUs */ cset_all = hwloc_topology_get_complete_cpuset(topology); if (hwloc_bitmap_isequal(cset_all, cset_available)) { printf("all hardware PUs are available\n"); } else { printf("only %d hardware PUs are available in the machine among %d\n", hwloc_bitmap_weight(cset_available), hwloc_bitmap_weight(cset_all)); } hwloc_topology_destroy(topology); return EXIT_SUCCESS; }
int main(void) { hwloc_bitmap_t set; int i, cpu, expected_cpu = 0; /* empty set */ set = hwloc_bitmap_alloc(); assert(hwloc_bitmap_first(set) == -1); assert(hwloc_bitmap_last(set) == -1); assert(hwloc_bitmap_next(set, 0) == -1); assert(hwloc_bitmap_next(set, -1) == -1); assert(hwloc_bitmap_weight(set) == 0); /* full set */ hwloc_bitmap_fill(set); assert(hwloc_bitmap_first(set) == 0); assert(hwloc_bitmap_last(set) == -1); assert(hwloc_bitmap_next(set, -1) == 0); assert(hwloc_bitmap_next(set, 0) == 1); assert(hwloc_bitmap_next(set, 1) == 2); assert(hwloc_bitmap_next(set, 2) == 3); assert(hwloc_bitmap_next(set, 30) == 31); assert(hwloc_bitmap_next(set, 31) == 32); assert(hwloc_bitmap_next(set, 32) == 33); assert(hwloc_bitmap_next(set, 62) == 63); assert(hwloc_bitmap_next(set, 63) == 64); assert(hwloc_bitmap_next(set, 64) == 65); assert(hwloc_bitmap_next(set, 12345) == 12346); assert(hwloc_bitmap_weight(set) == -1); /* custom sets */ hwloc_bitmap_zero(set); hwloc_bitmap_set_range(set, 36, 59); assert(hwloc_bitmap_first(set) == 36); assert(hwloc_bitmap_last(set) == 59); assert(hwloc_bitmap_next(set, -1) == 36); assert(hwloc_bitmap_next(set, 0) == 36); assert(hwloc_bitmap_next(set, 36) == 37); assert(hwloc_bitmap_next(set, 59) == -1); assert(hwloc_bitmap_weight(set) == 24); hwloc_bitmap_set_range(set, 136, 259); assert(hwloc_bitmap_first(set) == 36); assert(hwloc_bitmap_last(set) == 259); assert(hwloc_bitmap_next(set, 59) == 136); assert(hwloc_bitmap_next(set, 259) == -1); assert(hwloc_bitmap_weight(set) == 148); hwloc_bitmap_clr(set, 199); assert(hwloc_bitmap_first(set) == 36); assert(hwloc_bitmap_last(set) == 259); assert(hwloc_bitmap_next(set, 198) == 200); assert(hwloc_bitmap_next(set, 199) == 200); assert(hwloc_bitmap_weight(set) == 147); i = 0; hwloc_bitmap_foreach_begin(cpu, set) { if (0 <= i && i < 24) expected_cpu = i + 36; else if (24 <= i && i < 87) expected_cpu = i + 112; else if (87 <= i && i < 147) expected_cpu = i + 113; assert(expected_cpu == cpu); i++; } hwloc_bitmap_foreach_end(); hwloc_bitmap_free(set); return 0; }
static void look_rset(int sdl, hwloc_obj_type_t type, struct hwloc_topology *topology, int level) { rsethandle_t rset, rad; int i,maxcpus,j; int nbnodes; struct hwloc_obj *obj; if ((topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM)) rset = rs_alloc(RS_ALL); else rset = rs_alloc(RS_PARTITION); rad = rs_alloc(RS_EMPTY); nbnodes = rs_numrads(rset, sdl, 0); if (nbnodes == -1) { perror("rs_numrads"); return; } for (i = 0; i < nbnodes; i++) { hwloc_bitmap_t cpuset; unsigned os_index = (unsigned) -1; /* no os_index except for PU and NUMANODE below */ if (rs_getrad(rset, rad, sdl, i, 0)) { fprintf(stderr,"rs_getrad(%d) failed: %s\n", i, strerror(errno)); continue; } if (!rs_getinfo(rad, R_NUMPROCS, 0)) continue; maxcpus = rs_getinfo(rad, R_MAXPROCS, 0); cpuset = hwloc_bitmap_alloc(); for (j = 0; j < maxcpus; j++) { if (rs_op(RS_TESTRESOURCE, rad, NULL, R_PROCS, j)) hwloc_bitmap_set(cpuset, j); } if (type == HWLOC_OBJ_PU) { os_index = hwloc_bitmap_first(cpuset); hwloc_debug("Found PU #%u inside node %d for sdl %d\n", os_index, i, sdl); assert(hwloc_bitmap_weight(cpuset) == 1); } else if (type == HWLOC_OBJ_NUMANODE) { /* NUMA node os_index isn't used for binding, just use the rad number to get unique values. * Note that we'll use that fact in hwloc_aix_prepare_membind(). */ os_index = i; hwloc_debug("Using os_index #%u for NUMA node inside node %d for sdl %d\n", os_index, i, sdl); } obj = hwloc_alloc_setup_object(type, os_index); obj->cpuset = cpuset; obj->os_level = sdl; switch(type) { case HWLOC_OBJ_NUMANODE: obj->nodeset = hwloc_bitmap_alloc(); hwloc_bitmap_set(obj->nodeset, i); obj->memory.local_memory = 0; /* TODO: odd, rs_getinfo(rad, R_MEMSIZE, 0) << 10 returns the total memory ... */ obj->memory.page_types_len = 2; obj->memory.page_types = malloc(2*sizeof(*obj->memory.page_types)); memset(obj->memory.page_types, 0, 2*sizeof(*obj->memory.page_types)); obj->memory.page_types[0].size = hwloc_getpagesize(); #ifdef HAVE__SC_LARGE_PAGESIZE obj->memory.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE); #endif /* TODO: obj->memory.page_types[1].count = rs_getinfo(rset, R_LGPGFREE, 0) / hugepagesize */ break; case HWLOC_OBJ_CACHE: obj->attr->cache.size = _system_configuration.L2_cache_size; obj->attr->cache.associativity = _system_configuration.L2_cache_asc; obj->attr->cache.linesize = 0; /* unknown by default */ if (__power_pc()) if (__power_4() || __power_5() || __power_6() || __power_7()) obj->attr->cache.linesize = 128; obj->attr->cache.depth = 2; obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED; /* OK for power[4567], unknown for others */ break; case HWLOC_OBJ_GROUP: obj->attr->group.depth = level; break; case HWLOC_OBJ_CORE: { hwloc_obj_t obj2, obj3; obj2 = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, i); obj2->cpuset = hwloc_bitmap_dup(obj->cpuset); obj2->attr->cache.size = _system_configuration.dcache_size; obj2->attr->cache.associativity = _system_configuration.dcache_asc; obj2->attr->cache.linesize = _system_configuration.dcache_line; obj2->attr->cache.depth = 1; if (_system_configuration.cache_attrib & (1<<30)) { /* Unified cache */ obj2->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED; hwloc_debug("Adding an L1u cache for core %d\n", i); hwloc_insert_object_by_cpuset(topology, obj2); } else { /* Separate Instruction and Data caches */ obj2->attr->cache.type = HWLOC_OBJ_CACHE_DATA; hwloc_debug("Adding an L1d cache for core %d\n", i); hwloc_insert_object_by_cpuset(topology, obj2); obj3 = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, i); obj3->cpuset = hwloc_bitmap_dup(obj->cpuset); obj3->attr->cache.size = _system_configuration.icache_size; obj3->attr->cache.associativity = _system_configuration.icache_asc; obj3->attr->cache.linesize = _system_configuration.icache_line; obj3->attr->cache.depth = 1; obj3->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION; hwloc_debug("Adding an L1i cache for core %d\n", i); hwloc_insert_object_by_cpuset(topology, obj3); } break; } default: break; } hwloc_debug_2args_bitmap("%s %d has cpuset %s\n", hwloc_obj_type_string(type), i, obj->cpuset); hwloc_insert_object_by_cpuset(topology, obj); } rs_free(rset); rs_free(rad); }
static void getNumCPUs(void) { // // accessible cores // // // Hwloc can't tell us the number of accessible cores directly, so // get that by counting the parent cores of the accessible PUs. // // // We could seemingly use hwloc_topology_get_allowed_cpuset() to get // the set of accessible PUs here. But that seems not to reflect the // schedaffinity settings, so use hwloc_get_proc_cpubind() instead. // hwloc_cpuset_t logAccSet; CHK_ERR_ERRNO((logAccSet = hwloc_bitmap_alloc()) != NULL); if (hwloc_get_proc_cpubind(topology, getpid(), logAccSet, 0) != 0) { #ifdef __APPLE__ const int errRecoverable = (errno == ENOSYS); // no cpubind on macOS #else const int errRecoverable = 0; #endif if (errRecoverable) { hwloc_bitmap_fill(logAccSet); } else { REPORT_ERR_ERRNO(hwloc_get_proc_cpubind(topology, getpid(), logAccSet, 0) == 0); } } hwloc_bitmap_and(logAccSet, logAccSet, hwloc_topology_get_online_cpuset(topology)); hwloc_cpuset_t physAccSet; CHK_ERR_ERRNO((physAccSet = hwloc_bitmap_alloc()) != NULL); #define NEXT_PU(pu) \ hwloc_get_next_obj_inside_cpuset_by_type(topology, logAccSet, \ HWLOC_OBJ_PU, pu) for (hwloc_obj_t pu = NEXT_PU(NULL); pu != NULL; pu = NEXT_PU(pu)) { hwloc_obj_t core; CHK_ERR_ERRNO((core = hwloc_get_ancestor_obj_by_type(topology, HWLOC_OBJ_CORE, pu)) != NULL); hwloc_bitmap_set(physAccSet, core->logical_index); } #undef NEXT_PU numCPUsPhysAcc = hwloc_bitmap_weight(physAccSet); hwloc_bitmap_free(physAccSet); CHK_ERR(numCPUsPhysAcc > 0); // // all cores // numCPUsPhysAll = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_CORE); CHK_ERR(numCPUsPhysAll > 0); // // accessible PUs // numCPUsLogAcc = hwloc_bitmap_weight(logAccSet); CHK_ERR(numCPUsLogAcc > 0); hwloc_bitmap_free(logAccSet); // // all PUs // numCPUsLogAll = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PU); CHK_ERR(numCPUsLogAll > 0); }