static void output_compute_pu_min_textwidth(struct lstopo_output *output) { unsigned fontsize = output->fontsize; char text[64]; int n; hwloc_topology_t topology = output->topology; hwloc_obj_t lastpu; if (!output->methods->textsize) { output->min_pu_textwidth = 0; return; } if (output->logical) { int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_PU); lastpu = hwloc_get_obj_by_depth(topology, depth, hwloc_get_nbobjs_by_depth(topology, depth)-1); } else { unsigned lastidx = hwloc_bitmap_last(hwloc_topology_get_topology_cpuset(topology)); lastpu = hwloc_get_pu_obj_by_os_index(topology, lastidx); } n = lstopo_obj_snprintf(output, text, sizeof(text), lastpu); output->min_pu_textwidth = get_textwidth(output, text, n, fontsize); }
/******************* FUNCTION *********************/ int TopoHwloc::getFirstBitInBitmap(hwloc_bitmap_t bitmap) const { int last = hwloc_bitmap_last(bitmap); int current = hwloc_bitmap_first(bitmap); assert(current != -1); while (current != last) { if (hwloc_bitmap_isset(bitmap,current)) break; current = hwloc_bitmap_next(bitmap,current); } return current; }
/* convert set into index+mask if all set bits are in the same ULONG. * otherwise return -1. */ static int hwloc_bitmap_to_single_ULONG_PTR(hwloc_const_bitmap_t set, unsigned *index, ULONG_PTR *mask) { unsigned first_ulp, last_ulp; if (hwloc_bitmap_weight(set) == -1) return -1; first_ulp = hwloc_bitmap_first(set) / (sizeof(ULONG_PTR)*8); last_ulp = hwloc_bitmap_last(set) / (sizeof(ULONG_PTR)*8); if (first_ulp != last_ulp) return -1; *mask = hwloc_bitmap_to_ith_ULONG_PTR(set, first_ulp); *index = first_ulp; return 0; }
int main(void) { hwloc_bitmap_t set; /* check an empty bitmap */ set = hwloc_bitmap_alloc(); assert(hwloc_bitmap_to_ulong(set) == 0UL); assert(hwloc_bitmap_to_ith_ulong(set, 0) == 0UL); assert(hwloc_bitmap_to_ith_ulong(set, 1) == 0UL); assert(hwloc_bitmap_to_ith_ulong(set, 23) == 0UL); /* check a non-empty bitmap */ hwloc_bitmap_from_ith_ulong(set, 4, 0xff); assert(hwloc_bitmap_to_ith_ulong(set, 4) == 0xff); assert(hwloc_bitmap_to_ulong(set) == 0UL); assert(hwloc_bitmap_to_ith_ulong(set, 0) == 0UL); assert(hwloc_bitmap_to_ith_ulong(set, 1) == 0UL); assert(hwloc_bitmap_to_ith_ulong(set, 23) == 0UL); /* check a zeroed bitmap */ hwloc_bitmap_zero(set); assert(hwloc_bitmap_to_ulong(set) == 0UL); assert(hwloc_bitmap_to_ith_ulong(set, 0) == 0UL); assert(hwloc_bitmap_to_ith_ulong(set, 1) == 0UL); assert(hwloc_bitmap_to_ith_ulong(set, 4) == 0UL); assert(hwloc_bitmap_to_ith_ulong(set, 23) == 0UL); hwloc_bitmap_free(set); /* check a full bitmap */ set = hwloc_bitmap_alloc_full(); assert(hwloc_bitmap_to_ulong(set) == ~0UL); assert(hwloc_bitmap_to_ith_ulong(set, 0) == ~0UL); assert(hwloc_bitmap_to_ith_ulong(set, 1) == ~0UL); assert(hwloc_bitmap_to_ith_ulong(set, 23) == ~0UL); /* check a almost full bitmap */ hwloc_bitmap_set_ith_ulong(set, 4, 0xff); assert(hwloc_bitmap_to_ith_ulong(set, 4) == 0xff); assert(hwloc_bitmap_to_ulong(set) == ~0UL); assert(hwloc_bitmap_to_ith_ulong(set, 0) == ~0UL); assert(hwloc_bitmap_to_ith_ulong(set, 1) == ~0UL); assert(hwloc_bitmap_to_ith_ulong(set, 23) == ~0UL); /* check a almost empty bitmap */ hwloc_bitmap_from_ith_ulong(set, 4, 0xff); assert(hwloc_bitmap_to_ith_ulong(set, 4) == 0xff); assert(hwloc_bitmap_to_ulong(set) == 0UL); assert(hwloc_bitmap_to_ith_ulong(set, 0) == 0UL); assert(hwloc_bitmap_to_ith_ulong(set, 1) == 0UL); assert(hwloc_bitmap_to_ith_ulong(set, 23) == 0UL); hwloc_bitmap_free(set); /* check ranges */ set = hwloc_bitmap_alloc(); assert(hwloc_bitmap_weight(set) == 0); /* 23-45 */ hwloc_bitmap_set_range(set, 23, 45); assert(hwloc_bitmap_weight(set) == 23); /* 23-45,78- */ hwloc_bitmap_set_range(set, 78, -1); assert(hwloc_bitmap_weight(set) == -1); /* 23- */ hwloc_bitmap_set_range(set, 44, 79); assert(hwloc_bitmap_weight(set) == -1); assert(hwloc_bitmap_first(set) == 23); assert(!hwloc_bitmap_isfull(set)); /* 0- */ hwloc_bitmap_set_range(set, 0, 22); assert(hwloc_bitmap_weight(set) == -1); assert(hwloc_bitmap_isfull(set)); /* 0-34,57- */ hwloc_bitmap_clr_range(set, 35, 56); assert(hwloc_bitmap_weight(set) == -1); assert(!hwloc_bitmap_isfull(set)); /* 0-34,57 */ hwloc_bitmap_clr_range(set, 58, -1); assert(hwloc_bitmap_weight(set) == 36); assert(hwloc_bitmap_last(set) == 57); assert(hwloc_bitmap_next(set, 34) == 57); /* 0-34 */ hwloc_bitmap_clr(set, 57); assert(hwloc_bitmap_weight(set) == 35); assert(hwloc_bitmap_last(set) == 34); /* empty */ hwloc_bitmap_clr_range(set, 0, 34); assert(hwloc_bitmap_weight(set) == 0); assert(hwloc_bitmap_first(set) == -1); hwloc_bitmap_free(set); return 0; }
static void create_hwloc_cpusets() { #ifdef USE_HWLOC int i; int err = hwloc_topology_init(&topology); assert(err == 0); err = hwloc_topology_load(topology); assert(err == 0); hwloc_bitmap_t cpuset = hwloc_bitmap_alloc(); assert(cpuset); err = hwloc_get_cpubind(topology, cpuset, HWLOC_CPUBIND_PROCESS); assert(err == 0); const int available_pus = hwloc_bitmap_weight(cpuset); const int last_set_index = hwloc_bitmap_last(cpuset); const int num_workers = hc_context->nworkers; hclib_affinity_t selected_affinity = HCLIB_AFFINITY_STRIDED; const char *user_selected_affinity = getenv("HCLIB_AFFINITY"); if (user_selected_affinity) { if (strcmp(user_selected_affinity, "strided") == 0) { selected_affinity = HCLIB_AFFINITY_STRIDED; } else if (strcmp(user_selected_affinity, "chunked") == 0) { selected_affinity = HCLIB_AFFINITY_CHUNKED; } else { fprintf(stderr, "Unsupported thread affinity \"%s\" specified with " "HCLIB_AFFINITY.\n", user_selected_affinity); exit(1); } } thread_cpusets = (hwloc_bitmap_t *)malloc(hc_context->nworkers * sizeof(*thread_cpusets)); assert(thread_cpusets); for (i = 0; i < hc_context->nworkers; i++) { thread_cpusets[i] = hwloc_bitmap_alloc(); assert(thread_cpusets[i]); } switch (selected_affinity) { case (HCLIB_AFFINITY_STRIDED): { if (available_pus < num_workers) { fprintf(stderr, "ERROR Available PUs (%d) was less than number " "of workers (%d), don't currently support " "oversubscription with strided thread pinning\n", available_pus, num_workers); exit(1); } int count = 0; int index = 0; while (index <= last_set_index) { if (hwloc_bitmap_isset(cpuset, index)) { hwloc_bitmap_set(thread_cpusets[count % num_workers], index); count++; } index++; } break; } case (HCLIB_AFFINITY_CHUNKED): { const int chunk_size = (available_pus + num_workers - 1) / num_workers; int count = 0; int index = 0; while (index <= last_set_index) { if (hwloc_bitmap_isset(cpuset, index)) { hwloc_bitmap_set(thread_cpusets[count / chunk_size], index); count++; } index++; } break; } default: assert(false); } hwloc_bitmap_t nodeset = hwloc_bitmap_alloc(); hwloc_bitmap_t other_nodeset = hwloc_bitmap_alloc(); assert(nodeset && other_nodeset); /* * Here, we look for contiguous ranges of worker threads that share any NUMA * nodes with us. In theory, this should be more hierarchical but isn't yet. * This is also super inefficient... O(T^2) where T is the number of * workers. */ bool revert_to_naive_stealing = false; for (i = 0; i < hc_context->nworkers; i++) { // Get the NUMA nodes for this CPU set hwloc_cpuset_to_nodeset(topology, thread_cpusets[i], nodeset); int base = -1; int limit = -1; int j; for (j = 0; j < hc_context->nworkers; j++) { hwloc_cpuset_to_nodeset(topology, thread_cpusets[j], other_nodeset); // Take the intersection, see if there is any overlap hwloc_bitmap_and(other_nodeset, nodeset, other_nodeset); if (base < 0) { // Haven't found a contiguous chunk of workers yet. if (!hwloc_bitmap_iszero(other_nodeset)) { base = j; } } else { /* * Have a contiguous chunk of workers, either still inside it or * after it. */ if (limit < 0) { // Inside the contiguous chunk of workers if (hwloc_bitmap_iszero(other_nodeset)) { // Found the end limit = j; } } else { // After the contiguous chunk of workers if (!hwloc_bitmap_iszero(other_nodeset)) { // No contiguous chunk to find, just do something naive. revert_to_naive_stealing = true; break; } } } } if (revert_to_naive_stealing) { fprintf(stderr, "WARNING: Using naive work-stealing patterns.\n"); base = 0; limit = hc_context->nworkers; } else { assert(base >= 0); if (limit < 0) { limit = hc_context->nworkers; } } hc_context->workers[i]->base_intra_socket_workers = base; hc_context->workers[i]->limit_intra_socket_workers = limit; #ifdef VERBOSE char *nbuf; hwloc_bitmap_asprintf(&nbuf, nodeset); char *buffer; hwloc_bitmap_asprintf(&buffer, thread_cpusets[i]); fprintf(stderr, "Worker %d has access to %d PUs (%s), %d NUMA nodes " "(%s). Shared NUMA nodes with [%d, %d).\n", i, hwloc_bitmap_weight(thread_cpusets[i]), buffer, hwloc_bitmap_weight(nodeset), nbuf, base, limit); free(buffer); #endif } #endif }
int main(void) { hwloc_bitmap_t set; int i, cpu, expected_cpu = 0; /* empty set */ set = hwloc_bitmap_alloc(); assert(hwloc_bitmap_first(set) == -1); assert(hwloc_bitmap_last(set) == -1); assert(hwloc_bitmap_next(set, 0) == -1); assert(hwloc_bitmap_next(set, -1) == -1); assert(hwloc_bitmap_weight(set) == 0); /* full set */ hwloc_bitmap_fill(set); assert(hwloc_bitmap_first(set) == 0); assert(hwloc_bitmap_last(set) == -1); assert(hwloc_bitmap_next(set, -1) == 0); assert(hwloc_bitmap_next(set, 0) == 1); assert(hwloc_bitmap_next(set, 1) == 2); assert(hwloc_bitmap_next(set, 2) == 3); assert(hwloc_bitmap_next(set, 30) == 31); assert(hwloc_bitmap_next(set, 31) == 32); assert(hwloc_bitmap_next(set, 32) == 33); assert(hwloc_bitmap_next(set, 62) == 63); assert(hwloc_bitmap_next(set, 63) == 64); assert(hwloc_bitmap_next(set, 64) == 65); assert(hwloc_bitmap_next(set, 12345) == 12346); assert(hwloc_bitmap_weight(set) == -1); /* custom sets */ hwloc_bitmap_zero(set); hwloc_bitmap_set_range(set, 36, 59); assert(hwloc_bitmap_first(set) == 36); assert(hwloc_bitmap_last(set) == 59); assert(hwloc_bitmap_next(set, -1) == 36); assert(hwloc_bitmap_next(set, 0) == 36); assert(hwloc_bitmap_next(set, 36) == 37); assert(hwloc_bitmap_next(set, 59) == -1); assert(hwloc_bitmap_weight(set) == 24); hwloc_bitmap_set_range(set, 136, 259); assert(hwloc_bitmap_first(set) == 36); assert(hwloc_bitmap_last(set) == 259); assert(hwloc_bitmap_next(set, 59) == 136); assert(hwloc_bitmap_next(set, 259) == -1); assert(hwloc_bitmap_weight(set) == 148); hwloc_bitmap_clr(set, 199); assert(hwloc_bitmap_first(set) == 36); assert(hwloc_bitmap_last(set) == 259); assert(hwloc_bitmap_next(set, 198) == 200); assert(hwloc_bitmap_next(set, 199) == 200); assert(hwloc_bitmap_weight(set) == 147); i = 0; hwloc_bitmap_foreach_begin(cpu, set) { if (0 <= i && i < 24) expected_cpu = i + 36; else if (24 <= i && i < 87) expected_cpu = i + 112; else if (87 <= i && i < 147) expected_cpu = i + 113; assert(expected_cpu == cpu); i++; } hwloc_bitmap_foreach_end(); hwloc_bitmap_free(set); return 0; }