Beispiel #1
0
static void
output_compute_pu_min_textwidth(struct lstopo_output *output)
{
  unsigned fontsize = output->fontsize;
  char text[64];
  int n;
  hwloc_topology_t topology = output->topology;
  hwloc_obj_t lastpu;

  if (!output->methods->textsize) {
    output->min_pu_textwidth = 0;
    return;
  }

  if (output->logical) {
    int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_PU);
    lastpu = hwloc_get_obj_by_depth(topology, depth, hwloc_get_nbobjs_by_depth(topology, depth)-1);
  } else {
    unsigned lastidx = hwloc_bitmap_last(hwloc_topology_get_topology_cpuset(topology));
    lastpu = hwloc_get_pu_obj_by_os_index(topology, lastidx);
  }

  n = lstopo_obj_snprintf(output, text, sizeof(text), lastpu);
  output->min_pu_textwidth = get_textwidth(output, text, n, fontsize);
}
Beispiel #2
0
/*******************  FUNCTION  *********************/
int TopoHwloc::getFirstBitInBitmap(hwloc_bitmap_t bitmap) const
{
	int last = hwloc_bitmap_last(bitmap);
	int current = hwloc_bitmap_first(bitmap);
	assert(current != -1);
	while (current != last)
	{
		if (hwloc_bitmap_isset(bitmap,current))
			break;
		current = hwloc_bitmap_next(bitmap,current);
	}
	return current;
}
Beispiel #3
0
/* convert set into index+mask if all set bits are in the same ULONG.
 * otherwise return -1.
 */
static int hwloc_bitmap_to_single_ULONG_PTR(hwloc_const_bitmap_t set, unsigned *index, ULONG_PTR *mask)
{
  unsigned first_ulp, last_ulp;
  if (hwloc_bitmap_weight(set) == -1)
    return -1;
  first_ulp = hwloc_bitmap_first(set) / (sizeof(ULONG_PTR)*8);
  last_ulp = hwloc_bitmap_last(set) / (sizeof(ULONG_PTR)*8);
  if (first_ulp != last_ulp)
    return -1;
  *mask = hwloc_bitmap_to_ith_ULONG_PTR(set, first_ulp);
  *index = first_ulp;
  return 0;
}
Beispiel #4
0
int main(void)
{
    hwloc_bitmap_t set;

    /* check an empty bitmap */
    set = hwloc_bitmap_alloc();
    assert(hwloc_bitmap_to_ulong(set) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 0) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 1) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 23) == 0UL);
    /* check a non-empty bitmap */
    hwloc_bitmap_from_ith_ulong(set, 4, 0xff);
    assert(hwloc_bitmap_to_ith_ulong(set, 4) == 0xff);
    assert(hwloc_bitmap_to_ulong(set) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 0) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 1) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 23) == 0UL);
    /* check a zeroed bitmap */
    hwloc_bitmap_zero(set);
    assert(hwloc_bitmap_to_ulong(set) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 0) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 1) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 4) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 23) == 0UL);
    hwloc_bitmap_free(set);

    /* check a full bitmap */
    set = hwloc_bitmap_alloc_full();
    assert(hwloc_bitmap_to_ulong(set) == ~0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 0) == ~0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 1) == ~0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 23) == ~0UL);
    /* check a almost full bitmap */
    hwloc_bitmap_set_ith_ulong(set, 4, 0xff);
    assert(hwloc_bitmap_to_ith_ulong(set, 4) == 0xff);
    assert(hwloc_bitmap_to_ulong(set) == ~0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 0) == ~0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 1) == ~0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 23) == ~0UL);
    /* check a almost empty bitmap */
    hwloc_bitmap_from_ith_ulong(set, 4, 0xff);
    assert(hwloc_bitmap_to_ith_ulong(set, 4) == 0xff);
    assert(hwloc_bitmap_to_ulong(set) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 0) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 1) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 23) == 0UL);
    hwloc_bitmap_free(set);

    /* check ranges */
    set = hwloc_bitmap_alloc();
    assert(hwloc_bitmap_weight(set) == 0);
    /* 23-45 */
    hwloc_bitmap_set_range(set, 23, 45);
    assert(hwloc_bitmap_weight(set) == 23);
    /* 23-45,78- */
    hwloc_bitmap_set_range(set, 78, -1);
    assert(hwloc_bitmap_weight(set) == -1);
    /* 23- */
    hwloc_bitmap_set_range(set, 44, 79);
    assert(hwloc_bitmap_weight(set) == -1);
    assert(hwloc_bitmap_first(set) == 23);
    assert(!hwloc_bitmap_isfull(set));
    /* 0- */
    hwloc_bitmap_set_range(set, 0, 22);
    assert(hwloc_bitmap_weight(set) == -1);
    assert(hwloc_bitmap_isfull(set));
    /* 0-34,57- */
    hwloc_bitmap_clr_range(set, 35, 56);
    assert(hwloc_bitmap_weight(set) == -1);
    assert(!hwloc_bitmap_isfull(set));
    /* 0-34,57 */
    hwloc_bitmap_clr_range(set, 58, -1);
    assert(hwloc_bitmap_weight(set) == 36);
    assert(hwloc_bitmap_last(set) == 57);
    assert(hwloc_bitmap_next(set, 34) == 57);
    /* 0-34 */
    hwloc_bitmap_clr(set, 57);
    assert(hwloc_bitmap_weight(set) == 35);
    assert(hwloc_bitmap_last(set) == 34);
    /* empty */
    hwloc_bitmap_clr_range(set, 0, 34);
    assert(hwloc_bitmap_weight(set) == 0);
    assert(hwloc_bitmap_first(set) == -1);
    hwloc_bitmap_free(set);

    return 0;
}
Beispiel #5
0
static void create_hwloc_cpusets() {
#ifdef USE_HWLOC
    int i;

    int err = hwloc_topology_init(&topology);
    assert(err == 0);

    err = hwloc_topology_load(topology);
    assert(err == 0);

    hwloc_bitmap_t cpuset = hwloc_bitmap_alloc();
    assert(cpuset);

    err = hwloc_get_cpubind(topology, cpuset, HWLOC_CPUBIND_PROCESS);
    assert(err == 0);
    const int available_pus = hwloc_bitmap_weight(cpuset);
    const int last_set_index = hwloc_bitmap_last(cpuset);
    const int num_workers = hc_context->nworkers;

    hclib_affinity_t selected_affinity = HCLIB_AFFINITY_STRIDED;
    const char *user_selected_affinity = getenv("HCLIB_AFFINITY");
    if (user_selected_affinity) {
        if (strcmp(user_selected_affinity, "strided") == 0) {
            selected_affinity = HCLIB_AFFINITY_STRIDED;
        } else if (strcmp(user_selected_affinity, "chunked") == 0) {
            selected_affinity = HCLIB_AFFINITY_CHUNKED;
        } else {
            fprintf(stderr, "Unsupported thread affinity \"%s\" specified with "
                    "HCLIB_AFFINITY.\n", user_selected_affinity);
            exit(1);
        }
    }

    thread_cpusets = (hwloc_bitmap_t *)malloc(hc_context->nworkers *
            sizeof(*thread_cpusets));
    assert(thread_cpusets);

    for (i = 0; i < hc_context->nworkers; i++) {
        thread_cpusets[i] = hwloc_bitmap_alloc();
        assert(thread_cpusets[i]);
    }

    switch (selected_affinity) {
        case (HCLIB_AFFINITY_STRIDED): {
            if (available_pus < num_workers) {
                fprintf(stderr, "ERROR Available PUs (%d) was less than number "
                        "of workers (%d), don't currently support "
                        "oversubscription with strided thread pinning\n",
                        available_pus, num_workers);
                exit(1);
            }

            int count = 0;
            int index = 0;
            while (index <= last_set_index) {
                if (hwloc_bitmap_isset(cpuset, index)) {
                    hwloc_bitmap_set(thread_cpusets[count % num_workers],
                            index);
                    count++;
                }
                index++;
            }
            break;
        }
        case (HCLIB_AFFINITY_CHUNKED): {
            const int chunk_size = (available_pus + num_workers - 1) /
                    num_workers;
            int count = 0;
            int index = 0;
            while (index <= last_set_index) {
                if (hwloc_bitmap_isset(cpuset, index)) {
                    hwloc_bitmap_set(thread_cpusets[count / chunk_size], index);
                    count++;
                }
                index++;
            }
            break;
        }
        default:
            assert(false);
    }

    hwloc_bitmap_t nodeset = hwloc_bitmap_alloc();
    hwloc_bitmap_t other_nodeset = hwloc_bitmap_alloc();
    assert(nodeset && other_nodeset);

    /*
     * Here, we look for contiguous ranges of worker threads that share any NUMA
     * nodes with us. In theory, this should be more hierarchical but isn't yet.
     * This is also super inefficient... O(T^2) where T is the number of
     * workers.
     */
    bool revert_to_naive_stealing = false;
    for (i = 0; i < hc_context->nworkers; i++) {
        // Get the NUMA nodes for this CPU set
        hwloc_cpuset_to_nodeset(topology, thread_cpusets[i], nodeset);

        int base = -1;
        int limit = -1;
        int j;
        for (j = 0; j < hc_context->nworkers; j++) {
            hwloc_cpuset_to_nodeset(topology, thread_cpusets[j], other_nodeset);
            // Take the intersection, see if there is any overlap
            hwloc_bitmap_and(other_nodeset, nodeset, other_nodeset);

            if (base < 0) {
                // Haven't found a contiguous chunk of workers yet.
                if (!hwloc_bitmap_iszero(other_nodeset)) {
                    base = j;
                }
            } else {
                /*
                 * Have a contiguous chunk of workers, either still inside it or
                 * after it.
                 */
                if (limit < 0) {
                    // Inside the contiguous chunk of workers
                    if (hwloc_bitmap_iszero(other_nodeset)) {
                        // Found the end
                        limit = j;
                    }
                } else {
                    // After the contiguous chunk of workers
                    if (!hwloc_bitmap_iszero(other_nodeset)) {
                        // No contiguous chunk to find, just do something naive.
                        revert_to_naive_stealing = true;
                        break;
                    }
                }
            }
        }

        if (revert_to_naive_stealing) {
            fprintf(stderr, "WARNING: Using naive work-stealing patterns.\n");
            base = 0;
            limit = hc_context->nworkers;
        } else {
            assert(base >= 0);
            if (limit < 0) {
                limit = hc_context->nworkers;
            }
        }

        hc_context->workers[i]->base_intra_socket_workers = base;
        hc_context->workers[i]->limit_intra_socket_workers = limit;

#ifdef VERBOSE
        char *nbuf;
        hwloc_bitmap_asprintf(&nbuf, nodeset);

        char *buffer;
        hwloc_bitmap_asprintf(&buffer, thread_cpusets[i]);
        fprintf(stderr, "Worker %d has access to %d PUs (%s), %d NUMA nodes "
                "(%s). Shared NUMA nodes with [%d, %d).\n", i,
                hwloc_bitmap_weight(thread_cpusets[i]), buffer,
                hwloc_bitmap_weight(nodeset), nbuf, base, limit);
        free(buffer);
#endif
    }

#endif
}
int main(void)
{
  hwloc_bitmap_t set;
  int i, cpu, expected_cpu = 0;

  /* empty set */
  set = hwloc_bitmap_alloc();
  assert(hwloc_bitmap_first(set) == -1);
  assert(hwloc_bitmap_last(set) == -1);
  assert(hwloc_bitmap_next(set, 0) == -1);
  assert(hwloc_bitmap_next(set, -1) == -1);
  assert(hwloc_bitmap_weight(set) == 0);

  /* full set */
  hwloc_bitmap_fill(set);
  assert(hwloc_bitmap_first(set) == 0);
  assert(hwloc_bitmap_last(set) == -1);
  assert(hwloc_bitmap_next(set, -1) == 0);
  assert(hwloc_bitmap_next(set, 0) == 1);
  assert(hwloc_bitmap_next(set, 1) == 2);
  assert(hwloc_bitmap_next(set, 2) == 3);
  assert(hwloc_bitmap_next(set, 30) == 31);
  assert(hwloc_bitmap_next(set, 31) == 32);
  assert(hwloc_bitmap_next(set, 32) == 33);
  assert(hwloc_bitmap_next(set, 62) == 63);
  assert(hwloc_bitmap_next(set, 63) == 64);
  assert(hwloc_bitmap_next(set, 64) == 65);
  assert(hwloc_bitmap_next(set, 12345) == 12346);
  assert(hwloc_bitmap_weight(set) == -1);

  /* custom sets */
  hwloc_bitmap_zero(set);
  hwloc_bitmap_set_range(set, 36, 59);
  assert(hwloc_bitmap_first(set) == 36);
  assert(hwloc_bitmap_last(set) == 59);
  assert(hwloc_bitmap_next(set, -1) == 36);
  assert(hwloc_bitmap_next(set, 0) == 36);
  assert(hwloc_bitmap_next(set, 36) == 37);
  assert(hwloc_bitmap_next(set, 59) == -1);
  assert(hwloc_bitmap_weight(set) == 24);
  hwloc_bitmap_set_range(set, 136, 259);
  assert(hwloc_bitmap_first(set) == 36);
  assert(hwloc_bitmap_last(set) == 259);
  assert(hwloc_bitmap_next(set, 59) == 136);
  assert(hwloc_bitmap_next(set, 259) == -1);
  assert(hwloc_bitmap_weight(set) == 148);
  hwloc_bitmap_clr(set, 199);
  assert(hwloc_bitmap_first(set) == 36);
  assert(hwloc_bitmap_last(set) == 259);
  assert(hwloc_bitmap_next(set, 198) == 200);
  assert(hwloc_bitmap_next(set, 199) == 200);
  assert(hwloc_bitmap_weight(set) == 147);

  i = 0;
  hwloc_bitmap_foreach_begin(cpu, set) {
    if (0 <= i && i < 24)
      expected_cpu = i + 36;
    else if (24 <= i && i < 87)
      expected_cpu = i + 112;
    else if (87 <= i && i < 147)
      expected_cpu = i + 113;

    assert(expected_cpu == cpu);

    i++;
  } hwloc_bitmap_foreach_end();

  hwloc_bitmap_free(set);

  return 0;
}