Exemplo n.º 1
0
static void *
hwloc_win_alloc_membind(hwloc_topology_t topology __hwloc_attribute_unused, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) {
  int node;

  switch (policy) {
    case HWLOC_MEMBIND_DEFAULT:
    case HWLOC_MEMBIND_BIND:
      break;
    default:
      errno = ENOSYS;
      return hwloc_alloc_or_fail(topology, len, flags);
  }

  if (flags & HWLOC_MEMBIND_STRICT) {
    errno = ENOSYS;
    return NULL;
  }

  if (hwloc_bitmap_weight(nodeset) != 1) {
    /* Not a single node, can't do this */
    errno = EXDEV;
    return hwloc_alloc_or_fail(topology, len, flags);
  }

  node = hwloc_bitmap_first(nodeset);
  return VirtualAllocExNumaProc(GetCurrentProcess(), NULL, len, MEM_COMMIT|MEM_RESERVE, PAGE_EXECUTE_READWRITE, node);
}
Exemplo n.º 2
0
END_TEST

START_TEST(add_extra_memory_nodes_if_needed_test)
  {
  long long      mem_requested;
  long long      mem_reserved;
  std::set<int>  current_mem_ids;
  hwloc_bitmap_t job_mems = hwloc_bitmap_alloc();
  hwloc_bitmap_t torque_root_mems = hwloc_bitmap_alloc();
  char           buf[1024];
  hwloc_bitmap_set(job_mems, 0);
  current_mem_ids.insert(0);
  hwloc_bitmap_set(torque_root_mems, 0);
  hwloc_bitmap_set(torque_root_mems, 1);
  mem_requested = 16 * 1024;
  mem_requested *= 1024;
  mem_requested *= 1024;
  mem_reserved = 15 * 1024;
  mem_reserved *= 1024;
  mem_reserved *= 1024;

  add_extra_memory_nodes_if_needed(mem_requested, mem_reserved, job_mems, torque_root_mems, current_mem_ids);
  fail_unless(hwloc_bitmap_weight(job_mems) == 2);
  
  hwloc_bitmap_displaylist(buf, sizeof(buf), job_mems);
  fail_unless(strchr(buf, '0') != NULL);
  fail_unless(strchr(buf, '1') != NULL);
  }
Exemplo n.º 3
0
/*******************  FUNCTION  *********************/
int TopoHwloc::getCurrentIdFromNUMABinding(void) const
{
	hwloc_nodeset_t nodeset = hwloc_bitmap_alloc();
	hwloc_cpuset_t cpuset = hwloc_bitmap_alloc();
	hwloc_membind_policy_t policy;
	int res = -1;
	int weight;
	int status;
	#if defined(SCTK_ALLOC_DEBUG) && defined(hwloc_bitmap_list_snprintf)
	char buffer[4096];
	#endif

	//if no numa node, return immediately
	if (getNbNumaEntities() == 1)
		return -1;

	//nodes
	// flags = 0 fallback on PROCESS if THREAD is not supported (as for windows).
	status =  hwloc_get_membind_nodeset(topology,nodeset,&policy,0);
	assert(status == 0);
	if (status == 0)
		return -1;

	#if defined(SCTK_ALLOC_DEBUG) && defined(hwloc_bitmap_list_snprintf)
	status = hwloc_bitmap_list_snprintf(buffer,4096,nodeset);
	sprintf(stderr,"Current nodes : %s\n",buffer);
	#endif

	//cores
	// flags = 0 fallback on PROCESS if THREAD is not supported (as for windows).
	status =  hwloc_get_membind(topology,cpuset,&policy,0);
	assert(status == 0);
	if (status == 0)
		return -1;

	#if defined(SCTK_ALLOC_DEBUG) && defined(hwloc_bitmap_list_snprintf)
	status = hwloc_bitmap_list_snprintf(buffer,4096,cpuset);
	sprintf(stderr,"Current cores : %s\n",buffer);
	#endif

	//nodes from cores
	hwloc_cpuset_to_nodeset(topology,cpuset,nodeset);

	#if defined(SCTK_ALLOC_DEBUG) && defined(hwloc_bitmap_list_snprintf)
	status = hwloc_bitmap_list_snprintf(buffer,4096,nodeset);
	sprintf(stderr,"Current nodes from cores : %s\n",buffer);
	#endif

	//calc res
	weight = hwloc_bitmap_weight(nodeset);
	assert(weight != 0);
	if (weight == 1)
		res = getFirstBitInBitmap(nodeset);

	hwloc_bitmap_free(cpuset);
	hwloc_bitmap_free(nodeset);

	return res;
}
Exemplo n.º 4
0
static spu_t
hwloc_hpux_find_spu(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_bitmap_t hwloc_set)
{
  spu_t cpu;

  cpu = hwloc_bitmap_first(hwloc_set);
  if (cpu != -1 && hwloc_bitmap_weight(hwloc_set) == 1)
    return cpu;
  return -1;
}
Exemplo n.º 5
0
/* convert set into index+mask if all set bits are in the same ULONG.
 * otherwise return -1.
 */
static int hwloc_bitmap_to_single_ULONG_PTR(hwloc_const_bitmap_t set, unsigned *index, ULONG_PTR *mask)
{
  unsigned first_ulp, last_ulp;
  if (hwloc_bitmap_weight(set) == -1)
    return -1;
  first_ulp = hwloc_bitmap_first(set) / (sizeof(ULONG_PTR)*8);
  last_ulp = hwloc_bitmap_last(set) / (sizeof(ULONG_PTR)*8);
  if (first_ulp != last_ulp)
    return -1;
  *mask = hwloc_bitmap_to_ith_ULONG_PTR(set, first_ulp);
  *index = first_ulp;
  return 0;
}
Exemplo n.º 6
0
/*******************  FUNCTION  *********************/
int TopoHwloc::getCurrentIdFromThreadBinding(void) const
{
	hwloc_nodeset_t nodeset = hwloc_bitmap_alloc();
	hwloc_cpuset_t cpuset = hwloc_bitmap_alloc();
	int res = -1;
	int weight;
	#if defined(SCTK_ALLOC_DEBUG) && defined(hwloc_bitmap_list_snprintf)
	char buffer[4096];
	#endif
	
	//get current core binding
	//for windows use 0 instead of HWLOC_CPUBIND_THREAD
	int status = hwloc_get_cpubind (topology, cpuset, 0);
	assert(status == 0);
	if (status == 0)
		return -1;

	#if defined(SCTK_ALLOC_DEBUG) && defined(hwloc_bitmap_list_snprintf)
	status = hwloc_bitmap_list_snprintf(buffer,4096,cpuset);
	sprintf(stderr,"Current cores : %s\n",buffer);
	#endif

	//nodes from cores
	hwloc_cpuset_to_nodeset(topology,cpuset,nodeset);

	#if defined(SCTK_ALLOC_DEBUG) && defined(hwloc_bitmap_list_snprintf)
	status = hwloc_bitmap_list_snprintf(buffer,4096,nodeset);
	sprintf(stderr,"Current nodes from cores : %s\n",buffer);
	#endif

	//calc res
	weight = hwloc_bitmap_weight(nodeset);
	assert(weight != 0);
	if (weight == 1)
		res = getFirstBitInBitmap(nodeset);

	hwloc_bitmap_free(cpuset);
	hwloc_bitmap_free(nodeset);

	return res;
}
Exemplo n.º 7
0
int main(void)
{
    hwloc_topology_t topology;
    hwloc_bitmap_t set;
    hwloc_const_bitmap_t cset;
    hwloc_membind_policy_t policy;
    const struct hwloc_topology_support *support;
    int nbnodes;
    hwloc_obj_t obj;
    char *buffer, *s;
    unsigned i;
    int err;

    /* create a topology */
    err = hwloc_topology_init(&topology);
    if (err < 0) {
        fprintf(stderr, "failed to initialize the topology\n");
        return EXIT_FAILURE;
    }
    err = hwloc_topology_load(topology);
    if (err < 0) {
        fprintf(stderr, "failed to load the topology\n");
        hwloc_topology_destroy(topology);
        return EXIT_FAILURE;
    }

    /* retrieve the entire set of NUMA nodes and count them */
    cset = hwloc_topology_get_topology_nodeset(topology);
    nbnodes = hwloc_bitmap_weight(cset);
    if (nbnodes <= 0) {
        /* nbnodes may be -1 when there's no NUMA information,
         * or 0 when the machine is known to be non-NUMA */
        printf("this machine is not NUMA, nothing to do\n");
        hwloc_topology_destroy(topology);
        return EXIT_SUCCESS;
    }
    printf("there are %d nodes in the machine\n", nbnodes);

    /* get the process memory binding as a nodeset */
    set = hwloc_bitmap_alloc();
    if (!set) {
        fprintf(stderr, "failed to allocate a bitmap\n");
        hwloc_topology_destroy(topology);
        return EXIT_FAILURE;
    }
    err = hwloc_get_membind_nodeset(topology, set, &policy, 0);
    if (err < 0) {
        fprintf(stderr, "failed to retrieve my memory binding and policy\n");
        hwloc_topology_destroy(topology);
        hwloc_bitmap_free(set);
        return EXIT_FAILURE;
    }

    /* print the corresponding NUMA nodes */
    hwloc_bitmap_asprintf(&s, set);
    printf("bound to nodeset %s with contains:\n", s);
    free(s);
    hwloc_bitmap_foreach_begin(i, set) {
        obj = hwloc_get_numanode_obj_by_os_index(topology, i);
        printf("  node #%u (OS index %u) with %lld bytes of memory\n",
               obj->logical_index, i, (unsigned long long) obj->memory.local_memory);
    }
Exemplo n.º 8
0
void output_console(struct lstopo_output *loutput, const char *filename)
{
  hwloc_topology_t topology = loutput->topology;
  unsigned topodepth;
  int verbose_mode = loutput->verbose_mode;
  int logical = loutput->logical;
  FILE *output;

  output = open_output(filename, loutput->overwrite);
  if (!output) {
    fprintf(stderr, "Failed to open %s for writing (%s)\n", filename, strerror(errno));
    return;
  }

  topodepth = hwloc_topology_get_depth(topology);

  /*
   * if verbose_mode == 0, only print the summary.
   * if verbose_mode == 1, only print the topology tree.
   * if verbose_mode > 1, print both.
   */

  if (lstopo_show_only != (hwloc_obj_type_t)-1) {
    if (verbose_mode > 1)
      fprintf(output, "Only showing %s objects\n", hwloc_obj_type_string(lstopo_show_only));
    output_only (topology, hwloc_get_root_obj(topology), output, logical, verbose_mode);
  } else if (verbose_mode >= 1) {
    output_topology (topology, hwloc_get_root_obj(topology), NULL, output, 0, logical, verbose_mode);
    fprintf(output, "\n");
  }

  if ((verbose_mode > 1 || !verbose_mode) && lstopo_show_only == (hwloc_obj_type_t)-1) {
    hwloc_lstopo_show_summary(output, topology);
 }

  if (verbose_mode > 1 && lstopo_show_only == (hwloc_obj_type_t)-1) {
    const struct hwloc_distances_s * distances;
    unsigned depth;

    for (depth = 0; depth < topodepth; depth++) {
      distances = hwloc_get_whole_distance_matrix_by_depth(topology, depth);
      if (!distances || !distances->latency)
        continue;
      fprintf(output, "relative latency matrix between %ss (depth %u) by %s indexes:\n",
	      hwloc_obj_type_string(hwloc_get_depth_type(topology, depth)),
	      depth,
	      logical ? "logical" : "physical");
      hwloc_utils_print_distance_matrix(output, topology, hwloc_get_root_obj(topology), distances->nbobjs, depth, distances->latency, logical);
    }
  }

  if (verbose_mode > 1 && lstopo_show_only == (hwloc_obj_type_t)-1) {
    hwloc_const_bitmap_t complete = hwloc_topology_get_complete_cpuset(topology);
    hwloc_const_bitmap_t topo = hwloc_topology_get_topology_cpuset(topology);
    hwloc_const_bitmap_t allowed = hwloc_topology_get_allowed_cpuset(topology);

    if (!hwloc_bitmap_isequal(topo, complete)) {
      hwloc_bitmap_t unknown = hwloc_bitmap_alloc();
      char *unknownstr;
      hwloc_bitmap_copy(unknown, complete);
      hwloc_bitmap_andnot(unknown, unknown, topo);
      hwloc_bitmap_asprintf(&unknownstr, unknown);
      fprintf (output, "%d processors not represented in topology: %s\n", hwloc_bitmap_weight(unknown), unknownstr);
      free(unknownstr);
      hwloc_bitmap_free(unknown);
    }
    if (!hwloc_bitmap_isequal(topo, allowed)) {
      hwloc_bitmap_t disallowed = hwloc_bitmap_alloc();
      char *disallowedstr;
      hwloc_bitmap_copy(disallowed, topo);
      hwloc_bitmap_andnot(disallowed, disallowed, allowed);
      hwloc_bitmap_asprintf(&disallowedstr, disallowed);
      fprintf(output, "%d processors represented but not allowed: %s\n", hwloc_bitmap_weight(disallowed), disallowedstr);
      free(disallowedstr);
      hwloc_bitmap_free(disallowed);
    }
    if (!hwloc_topology_is_thissystem(topology))
      fprintf (output, "Topology not from this system\n");
  }

  if (output != stdout)
    fclose(output);
}
Exemplo n.º 9
0
void output_console(hwloc_topology_t topology, const char *filename, int logical, int legend __hwloc_attribute_unused, int verbose_mode)
{
  unsigned topodepth;
  FILE *output;

  if (!filename || !strcmp(filename, "-"))
    output = stdout;
  else {
    output = open_file(filename, "w"); 
    if (!output) {
      fprintf(stderr, "Failed to open %s for writing (%s)\n", filename, strerror(errno));
      return;
    }
  }

  topodepth = hwloc_topology_get_depth(topology);

  /*
   * if verbose_mode == 0, only print the summary.
   * if verbose_mode == 1, only print the topology tree.
   * if verbose_mode > 1, print both.
   */

  if (lstopo_show_only != (hwloc_obj_type_t)-1) {
    if (verbose_mode > 1)
      fprintf(output, "Only showing %s objects\n", hwloc_obj_type_string(lstopo_show_only));
    output_only (topology, hwloc_get_root_obj(topology), output, logical, verbose_mode);
  } else if (verbose_mode >= 1) {
    output_topology (topology, hwloc_get_root_obj(topology), NULL, output, 0, logical, verbose_mode);
    fprintf(output, "\n");
  }

  if ((verbose_mode > 1 || !verbose_mode) && lstopo_show_only == (hwloc_obj_type_t)-1) {
    hwloc_lstopo_show_summary(output, topology);
 }

  if (verbose_mode > 1 && lstopo_show_only == (hwloc_obj_type_t)-1) {
    const struct hwloc_distances_s * distances;
    unsigned depth;

    for (depth = 0; depth < topodepth; depth++) {
      distances = hwloc_get_whole_distance_matrix_by_depth(topology, depth);
      if (!distances || !distances->latency)
        continue;
      printf("latency matrix between %ss (depth %u) by %s indexes:\n",
	     hwloc_obj_type_string(hwloc_get_depth_type(topology, depth)),
	     depth,
	     logical ? "logical" : "physical");
      hwloc_utils_print_distance_matrix(topology, hwloc_get_root_obj(topology), distances->nbobjs, depth, distances->latency, logical);
    }
  }

  if (verbose_mode > 1 && lstopo_show_only == (hwloc_obj_type_t)-1) {
    hwloc_const_bitmap_t complete = hwloc_topology_get_complete_cpuset(topology);
    hwloc_const_bitmap_t topo = hwloc_topology_get_topology_cpuset(topology);
    hwloc_const_bitmap_t online = hwloc_topology_get_online_cpuset(topology);
    hwloc_const_bitmap_t allowed = hwloc_topology_get_allowed_cpuset(topology);

    if (complete && !hwloc_bitmap_isequal(topo, complete)) {
      hwloc_bitmap_t unknown = hwloc_bitmap_alloc();
      char *unknownstr;
      hwloc_bitmap_copy(unknown, complete);
      hwloc_bitmap_andnot(unknown, unknown, topo);
      hwloc_bitmap_asprintf(&unknownstr, unknown);
      fprintf (output, "%d processors not represented in topology: %s\n", hwloc_bitmap_weight(unknown), unknownstr);
      free(unknownstr);
      hwloc_bitmap_free(unknown);
    }
    if (complete && !hwloc_bitmap_isequal(online, complete)) {
      hwloc_bitmap_t offline = hwloc_bitmap_alloc();
      char *offlinestr;
      hwloc_bitmap_copy(offline, complete);
      hwloc_bitmap_andnot(offline, offline, online);
      hwloc_bitmap_asprintf(&offlinestr, offline);
      fprintf (output, "%d processors offline: %s\n", hwloc_bitmap_weight(offline), offlinestr);
      free(offlinestr);
      hwloc_bitmap_free(offline);
    }
    if (complete && !hwloc_bitmap_isequal(allowed, online)) {
      if (!hwloc_bitmap_isincluded(online, allowed)) {
        hwloc_bitmap_t forbidden = hwloc_bitmap_alloc();
        char *forbiddenstr;
        hwloc_bitmap_copy(forbidden, online);
        hwloc_bitmap_andnot(forbidden, forbidden, allowed);
        hwloc_bitmap_asprintf(&forbiddenstr, forbidden);
        fprintf(output, "%d processors online but not allowed: %s\n", hwloc_bitmap_weight(forbidden), forbiddenstr);
        free(forbiddenstr);
        hwloc_bitmap_free(forbidden);
      }
      if (!hwloc_bitmap_isincluded(allowed, online)) {
        hwloc_bitmap_t potential = hwloc_bitmap_alloc();
        char *potentialstr;
        hwloc_bitmap_copy(potential, allowed);
        hwloc_bitmap_andnot(potential, potential, online);
        hwloc_bitmap_asprintf(&potentialstr, potential);
        fprintf(output, "%d processors allowed but not online: %s\n", hwloc_bitmap_weight(potential), potentialstr);
        free(potentialstr);
        hwloc_bitmap_free(potential);
      }
    }
    if (!hwloc_topology_is_thissystem(topology))
      fprintf (output, "Topology not from this system\n");
  }

  if (output != stdout)
    fclose(output);
}
Exemplo n.º 10
0
int main(void)
{
    hwloc_bitmap_t set;

    /* check an empty bitmap */
    set = hwloc_bitmap_alloc();
    assert(hwloc_bitmap_to_ulong(set) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 0) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 1) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 23) == 0UL);
    /* check a non-empty bitmap */
    hwloc_bitmap_from_ith_ulong(set, 4, 0xff);
    assert(hwloc_bitmap_to_ith_ulong(set, 4) == 0xff);
    assert(hwloc_bitmap_to_ulong(set) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 0) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 1) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 23) == 0UL);
    /* check a zeroed bitmap */
    hwloc_bitmap_zero(set);
    assert(hwloc_bitmap_to_ulong(set) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 0) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 1) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 4) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 23) == 0UL);
    hwloc_bitmap_free(set);

    /* check a full bitmap */
    set = hwloc_bitmap_alloc_full();
    assert(hwloc_bitmap_to_ulong(set) == ~0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 0) == ~0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 1) == ~0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 23) == ~0UL);
    /* check a almost full bitmap */
    hwloc_bitmap_set_ith_ulong(set, 4, 0xff);
    assert(hwloc_bitmap_to_ith_ulong(set, 4) == 0xff);
    assert(hwloc_bitmap_to_ulong(set) == ~0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 0) == ~0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 1) == ~0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 23) == ~0UL);
    /* check a almost empty bitmap */
    hwloc_bitmap_from_ith_ulong(set, 4, 0xff);
    assert(hwloc_bitmap_to_ith_ulong(set, 4) == 0xff);
    assert(hwloc_bitmap_to_ulong(set) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 0) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 1) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 23) == 0UL);
    hwloc_bitmap_free(set);

    /* check ranges */
    set = hwloc_bitmap_alloc();
    assert(hwloc_bitmap_weight(set) == 0);
    /* 23-45 */
    hwloc_bitmap_set_range(set, 23, 45);
    assert(hwloc_bitmap_weight(set) == 23);
    /* 23-45,78- */
    hwloc_bitmap_set_range(set, 78, -1);
    assert(hwloc_bitmap_weight(set) == -1);
    /* 23- */
    hwloc_bitmap_set_range(set, 44, 79);
    assert(hwloc_bitmap_weight(set) == -1);
    assert(hwloc_bitmap_first(set) == 23);
    assert(!hwloc_bitmap_isfull(set));
    /* 0- */
    hwloc_bitmap_set_range(set, 0, 22);
    assert(hwloc_bitmap_weight(set) == -1);
    assert(hwloc_bitmap_isfull(set));
    /* 0-34,57- */
    hwloc_bitmap_clr_range(set, 35, 56);
    assert(hwloc_bitmap_weight(set) == -1);
    assert(!hwloc_bitmap_isfull(set));
    /* 0-34,57 */
    hwloc_bitmap_clr_range(set, 58, -1);
    assert(hwloc_bitmap_weight(set) == 36);
    assert(hwloc_bitmap_last(set) == 57);
    assert(hwloc_bitmap_next(set, 34) == 57);
    /* 0-34 */
    hwloc_bitmap_clr(set, 57);
    assert(hwloc_bitmap_weight(set) == 35);
    assert(hwloc_bitmap_last(set) == 34);
    /* empty */
    hwloc_bitmap_clr_range(set, 0, 34);
    assert(hwloc_bitmap_weight(set) == 0);
    assert(hwloc_bitmap_first(set) == -1);
    hwloc_bitmap_free(set);

    return 0;
}
Exemplo n.º 11
0
static void create_hwloc_cpusets() {
#ifdef USE_HWLOC
    int i;

    int err = hwloc_topology_init(&topology);
    assert(err == 0);

    err = hwloc_topology_load(topology);
    assert(err == 0);

    hwloc_bitmap_t cpuset = hwloc_bitmap_alloc();
    assert(cpuset);

    err = hwloc_get_cpubind(topology, cpuset, HWLOC_CPUBIND_PROCESS);
    assert(err == 0);
    const int available_pus = hwloc_bitmap_weight(cpuset);
    const int last_set_index = hwloc_bitmap_last(cpuset);
    const int num_workers = hc_context->nworkers;

    hclib_affinity_t selected_affinity = HCLIB_AFFINITY_STRIDED;
    const char *user_selected_affinity = getenv("HCLIB_AFFINITY");
    if (user_selected_affinity) {
        if (strcmp(user_selected_affinity, "strided") == 0) {
            selected_affinity = HCLIB_AFFINITY_STRIDED;
        } else if (strcmp(user_selected_affinity, "chunked") == 0) {
            selected_affinity = HCLIB_AFFINITY_CHUNKED;
        } else {
            fprintf(stderr, "Unsupported thread affinity \"%s\" specified with "
                    "HCLIB_AFFINITY.\n", user_selected_affinity);
            exit(1);
        }
    }

    thread_cpusets = (hwloc_bitmap_t *)malloc(hc_context->nworkers *
            sizeof(*thread_cpusets));
    assert(thread_cpusets);

    for (i = 0; i < hc_context->nworkers; i++) {
        thread_cpusets[i] = hwloc_bitmap_alloc();
        assert(thread_cpusets[i]);
    }

    switch (selected_affinity) {
        case (HCLIB_AFFINITY_STRIDED): {
            if (available_pus < num_workers) {
                fprintf(stderr, "ERROR Available PUs (%d) was less than number "
                        "of workers (%d), don't currently support "
                        "oversubscription with strided thread pinning\n",
                        available_pus, num_workers);
                exit(1);
            }

            int count = 0;
            int index = 0;
            while (index <= last_set_index) {
                if (hwloc_bitmap_isset(cpuset, index)) {
                    hwloc_bitmap_set(thread_cpusets[count % num_workers],
                            index);
                    count++;
                }
                index++;
            }
            break;
        }
        case (HCLIB_AFFINITY_CHUNKED): {
            const int chunk_size = (available_pus + num_workers - 1) /
                    num_workers;
            int count = 0;
            int index = 0;
            while (index <= last_set_index) {
                if (hwloc_bitmap_isset(cpuset, index)) {
                    hwloc_bitmap_set(thread_cpusets[count / chunk_size], index);
                    count++;
                }
                index++;
            }
            break;
        }
        default:
            assert(false);
    }

    hwloc_bitmap_t nodeset = hwloc_bitmap_alloc();
    hwloc_bitmap_t other_nodeset = hwloc_bitmap_alloc();
    assert(nodeset && other_nodeset);

    /*
     * Here, we look for contiguous ranges of worker threads that share any NUMA
     * nodes with us. In theory, this should be more hierarchical but isn't yet.
     * This is also super inefficient... O(T^2) where T is the number of
     * workers.
     */
    bool revert_to_naive_stealing = false;
    for (i = 0; i < hc_context->nworkers; i++) {
        // Get the NUMA nodes for this CPU set
        hwloc_cpuset_to_nodeset(topology, thread_cpusets[i], nodeset);

        int base = -1;
        int limit = -1;
        int j;
        for (j = 0; j < hc_context->nworkers; j++) {
            hwloc_cpuset_to_nodeset(topology, thread_cpusets[j], other_nodeset);
            // Take the intersection, see if there is any overlap
            hwloc_bitmap_and(other_nodeset, nodeset, other_nodeset);

            if (base < 0) {
                // Haven't found a contiguous chunk of workers yet.
                if (!hwloc_bitmap_iszero(other_nodeset)) {
                    base = j;
                }
            } else {
                /*
                 * Have a contiguous chunk of workers, either still inside it or
                 * after it.
                 */
                if (limit < 0) {
                    // Inside the contiguous chunk of workers
                    if (hwloc_bitmap_iszero(other_nodeset)) {
                        // Found the end
                        limit = j;
                    }
                } else {
                    // After the contiguous chunk of workers
                    if (!hwloc_bitmap_iszero(other_nodeset)) {
                        // No contiguous chunk to find, just do something naive.
                        revert_to_naive_stealing = true;
                        break;
                    }
                }
            }
        }

        if (revert_to_naive_stealing) {
            fprintf(stderr, "WARNING: Using naive work-stealing patterns.\n");
            base = 0;
            limit = hc_context->nworkers;
        } else {
            assert(base >= 0);
            if (limit < 0) {
                limit = hc_context->nworkers;
            }
        }

        hc_context->workers[i]->base_intra_socket_workers = base;
        hc_context->workers[i]->limit_intra_socket_workers = limit;

#ifdef VERBOSE
        char *nbuf;
        hwloc_bitmap_asprintf(&nbuf, nodeset);

        char *buffer;
        hwloc_bitmap_asprintf(&buffer, thread_cpusets[i]);
        fprintf(stderr, "Worker %d has access to %d PUs (%s), %d NUMA nodes "
                "(%s). Shared NUMA nodes with [%d, %d).\n", i,
                hwloc_bitmap_weight(thread_cpusets[i]), buffer,
                hwloc_bitmap_weight(nodeset), nbuf, base, limit);
        free(buffer);
#endif
    }

#endif
}
Exemplo n.º 12
0
static int
hwloc_solaris_set_sth_cpubind(hwloc_topology_t topology, idtype_t idtype, id_t id, hwloc_const_bitmap_t hwloc_set, int flags)
{
  unsigned target_cpu;

  /* The resulting binding is always strict */

  if (hwloc_bitmap_isequal(hwloc_set, hwloc_topology_get_complete_cpuset(topology))) {
    if (processor_bind(idtype, id, PBIND_NONE, NULL) != 0)
      return -1;
#ifdef HAVE_LIBLGRP
    if (!(flags & HWLOC_CPUBIND_NOMEMBIND)) {
      int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE);
      if (depth >= 0) {
	int n = hwloc_get_nbobjs_by_depth(topology, depth);
	int i;

	for (i = 0; i < n; i++) {
	  hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, i);
	  lgrp_affinity_set(idtype, id, obj->os_index, LGRP_AFF_NONE);
	}
      }
    }
#endif /* HAVE_LIBLGRP */
    return 0;
  }

#ifdef HAVE_LIBLGRP
  if (!(flags & HWLOC_CPUBIND_NOMEMBIND)) {
    int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE);
    if (depth >= 0) {
      int n = hwloc_get_nbobjs_by_depth(topology, depth);
      int i;
      int ok;
      hwloc_bitmap_t target = hwloc_bitmap_alloc();

      for (i = 0; i < n; i++) {
	hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, i);
        if (hwloc_bitmap_isincluded(obj->cpuset, hwloc_set))
          hwloc_bitmap_or(target, target, obj->cpuset);
      }

      ok = hwloc_bitmap_isequal(target, hwloc_set);
      hwloc_bitmap_free(target);

      if (ok) {
        /* Ok, managed to achieve hwloc_set by just combining NUMA nodes */

        for (i = 0; i < n; i++) {
          hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, i);

          if (hwloc_bitmap_isincluded(obj->cpuset, hwloc_set)) {
            lgrp_affinity_set(idtype, id, obj->os_index, LGRP_AFF_STRONG);
          } else {
            if (flags & HWLOC_CPUBIND_STRICT)
              lgrp_affinity_set(idtype, id, obj->os_index, LGRP_AFF_NONE);
            else
              lgrp_affinity_set(idtype, id, obj->os_index, LGRP_AFF_WEAK);
          }
        }

        return 0;
      }
    }
  }
#endif /* HAVE_LIBLGRP */

  if (hwloc_bitmap_weight(hwloc_set) != 1) {
    errno = EXDEV;
    return -1;
  }

  target_cpu = hwloc_bitmap_first(hwloc_set);

  if (processor_bind(idtype, id,
		     (processorid_t) (target_cpu), NULL) != 0)
    return -1;

  return 0;
}
Exemplo n.º 13
0
int main(void)
{
  hwloc_topology_t topology;
  hwloc_bitmap_t set, set2;
  hwloc_const_bitmap_t cset_available, cset_all;
  hwloc_obj_t obj;
  char *buffer;
  char type[64];
  unsigned i;
  int err;

  /* create a topology */
  err = hwloc_topology_init(&topology);
  if (err < 0) {
    fprintf(stderr, "failed to initialize the topology\n");
    return EXIT_FAILURE;
  }
  err = hwloc_topology_load(topology);
  if (err < 0) {
    fprintf(stderr, "failed to load the topology\n");
    hwloc_topology_destroy(topology);
    return EXIT_FAILURE;
  }

  /* retrieve the entire set of available PUs */
  cset_available = hwloc_topology_get_topology_cpuset(topology);

  /* retrieve the CPU binding of the current entire process */
  set = hwloc_bitmap_alloc();
  if (!set) {
    fprintf(stderr, "failed to allocate a bitmap\n");
    hwloc_topology_destroy(topology);
    return EXIT_FAILURE;
  }
  err = hwloc_get_cpubind(topology, set, HWLOC_CPUBIND_PROCESS);
  if (err < 0) {
    fprintf(stderr, "failed to get cpu binding\n");
    hwloc_bitmap_free(set);
    hwloc_topology_destroy(topology);
  }

  /* display the processing units that cannot be used by this process */
  if (hwloc_bitmap_isequal(set, cset_available)) {
    printf("this process can use all available processing units in the system\n");
  } else {
    /* compute the set where we currently cannot run.
     * we can't modify cset_available because it's a system read-only one,
     * so we do   set = available &~ set
     */
    hwloc_bitmap_andnot(set, cset_available, set);
    hwloc_bitmap_asprintf(&buffer, set);
    printf("process cannot use %d process units (%s) among %u in the system\n",
	   hwloc_bitmap_weight(set), buffer, hwloc_bitmap_weight(cset_available));
    free(buffer);
    /* restore set where it was before the &~ operation above */
    hwloc_bitmap_andnot(set, cset_available, set);
  }
  /* print the smallest object covering the current process binding */
  obj = hwloc_get_obj_covering_cpuset(topology, set);
  hwloc_obj_type_snprintf(type, sizeof(type), obj, 0);
  printf("process is bound within object %s logical index %u\n", type, obj->logical_index);

  /* retrieve the single PU where the current thread actually runs within this process binding */
  set2 = hwloc_bitmap_alloc();
  if (!set2) {
    fprintf(stderr, "failed to allocate a bitmap\n");
    hwloc_bitmap_free(set);
    hwloc_topology_destroy(topology);
    return EXIT_FAILURE;
  }
  err = hwloc_get_last_cpu_location(topology, set2, HWLOC_CPUBIND_THREAD);
  if (err < 0) {
    fprintf(stderr, "failed to get last cpu location\n");
    hwloc_bitmap_free(set);
    hwloc_bitmap_free(set2);
    hwloc_topology_destroy(topology);
  }
  /* sanity checks that are not actually needed but help the reader */
  /* this thread runs within the process binding */
  assert(hwloc_bitmap_isincluded(set2, set));
  /* this thread runs on a single PU at a time */
  assert(hwloc_bitmap_weight(set2) == 1);

  /* print the logical number of the PU where that thread runs */
  /* extract the PU OS index from the bitmap */
  i = hwloc_bitmap_first(set2);
  obj = hwloc_get_pu_obj_by_os_index(topology, i);
  printf("thread is now running on PU logical index %u (OS/physical index %u)\n",
	 obj->logical_index, i);

  /* migrate this single thread to where other PUs within the current binding */
  hwloc_bitmap_andnot(set2, set, set2);
  err = hwloc_set_cpubind(topology, set2, HWLOC_CPUBIND_THREAD);
  if (err < 0) {
    fprintf(stderr, "failed to set thread binding\n");
    hwloc_bitmap_free(set);
    hwloc_bitmap_free(set2);
    hwloc_topology_destroy(topology);
  }
  /* reprint the PU where that thread runs */
  err = hwloc_get_last_cpu_location(topology, set2, HWLOC_CPUBIND_THREAD);
  if (err < 0) {
    fprintf(stderr, "failed to get last cpu location\n");
    hwloc_bitmap_free(set);
    hwloc_bitmap_free(set2);
    hwloc_topology_destroy(topology);
  }
  /* print the logical number of the PU where that thread runs */
  /* extract the PU OS index from the bitmap */
  i = hwloc_bitmap_first(set2);
  obj = hwloc_get_pu_obj_by_os_index(topology, i);
  printf("thread is running on PU logical index %u (OS/physical index %u)\n",
	 obj->logical_index, i);

  hwloc_bitmap_free(set);
  hwloc_bitmap_free(set2);

  /* retrieve the entire set of all PUs */
  cset_all = hwloc_topology_get_complete_cpuset(topology);
  if (hwloc_bitmap_isequal(cset_all, cset_available)) {
    printf("all hardware PUs are available\n");
  } else {
    printf("only %d hardware PUs are available in the machine among %d\n",
	   hwloc_bitmap_weight(cset_available), hwloc_bitmap_weight(cset_all));
  }

  hwloc_topology_destroy(topology);
  return EXIT_SUCCESS;
}
Exemplo n.º 14
0
int main(void)
{
  hwloc_bitmap_t set;
  int i, cpu, expected_cpu = 0;

  /* empty set */
  set = hwloc_bitmap_alloc();
  assert(hwloc_bitmap_first(set) == -1);
  assert(hwloc_bitmap_last(set) == -1);
  assert(hwloc_bitmap_next(set, 0) == -1);
  assert(hwloc_bitmap_next(set, -1) == -1);
  assert(hwloc_bitmap_weight(set) == 0);

  /* full set */
  hwloc_bitmap_fill(set);
  assert(hwloc_bitmap_first(set) == 0);
  assert(hwloc_bitmap_last(set) == -1);
  assert(hwloc_bitmap_next(set, -1) == 0);
  assert(hwloc_bitmap_next(set, 0) == 1);
  assert(hwloc_bitmap_next(set, 1) == 2);
  assert(hwloc_bitmap_next(set, 2) == 3);
  assert(hwloc_bitmap_next(set, 30) == 31);
  assert(hwloc_bitmap_next(set, 31) == 32);
  assert(hwloc_bitmap_next(set, 32) == 33);
  assert(hwloc_bitmap_next(set, 62) == 63);
  assert(hwloc_bitmap_next(set, 63) == 64);
  assert(hwloc_bitmap_next(set, 64) == 65);
  assert(hwloc_bitmap_next(set, 12345) == 12346);
  assert(hwloc_bitmap_weight(set) == -1);

  /* custom sets */
  hwloc_bitmap_zero(set);
  hwloc_bitmap_set_range(set, 36, 59);
  assert(hwloc_bitmap_first(set) == 36);
  assert(hwloc_bitmap_last(set) == 59);
  assert(hwloc_bitmap_next(set, -1) == 36);
  assert(hwloc_bitmap_next(set, 0) == 36);
  assert(hwloc_bitmap_next(set, 36) == 37);
  assert(hwloc_bitmap_next(set, 59) == -1);
  assert(hwloc_bitmap_weight(set) == 24);
  hwloc_bitmap_set_range(set, 136, 259);
  assert(hwloc_bitmap_first(set) == 36);
  assert(hwloc_bitmap_last(set) == 259);
  assert(hwloc_bitmap_next(set, 59) == 136);
  assert(hwloc_bitmap_next(set, 259) == -1);
  assert(hwloc_bitmap_weight(set) == 148);
  hwloc_bitmap_clr(set, 199);
  assert(hwloc_bitmap_first(set) == 36);
  assert(hwloc_bitmap_last(set) == 259);
  assert(hwloc_bitmap_next(set, 198) == 200);
  assert(hwloc_bitmap_next(set, 199) == 200);
  assert(hwloc_bitmap_weight(set) == 147);

  i = 0;
  hwloc_bitmap_foreach_begin(cpu, set) {
    if (0 <= i && i < 24)
      expected_cpu = i + 36;
    else if (24 <= i && i < 87)
      expected_cpu = i + 112;
    else if (87 <= i && i < 147)
      expected_cpu = i + 113;

    assert(expected_cpu == cpu);

    i++;
  } hwloc_bitmap_foreach_end();

  hwloc_bitmap_free(set);

  return 0;
}
Exemplo n.º 15
0
static void
look_rset(int sdl, hwloc_obj_type_t type, struct hwloc_topology *topology, int level)
{
  rsethandle_t rset, rad;
  int i,maxcpus,j;
  int nbnodes;
  struct hwloc_obj *obj;

  if ((topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM))
    rset = rs_alloc(RS_ALL);
  else
    rset = rs_alloc(RS_PARTITION);
  rad = rs_alloc(RS_EMPTY);
  nbnodes = rs_numrads(rset, sdl, 0);
  if (nbnodes == -1) {
    perror("rs_numrads");
    return;
  }

  for (i = 0; i < nbnodes; i++) {
    hwloc_bitmap_t cpuset;
    unsigned os_index = (unsigned) -1; /* no os_index except for PU and NUMANODE below */

    if (rs_getrad(rset, rad, sdl, i, 0)) {
      fprintf(stderr,"rs_getrad(%d) failed: %s\n", i, strerror(errno));
      continue;
    }
    if (!rs_getinfo(rad, R_NUMPROCS, 0))
      continue;

    maxcpus = rs_getinfo(rad, R_MAXPROCS, 0);
    cpuset = hwloc_bitmap_alloc();
    for (j = 0; j < maxcpus; j++) {
      if (rs_op(RS_TESTRESOURCE, rad, NULL, R_PROCS, j))
	hwloc_bitmap_set(cpuset, j);
    }

    if (type == HWLOC_OBJ_PU) {
      os_index = hwloc_bitmap_first(cpuset);
      hwloc_debug("Found PU #%u inside node %d for sdl %d\n", os_index, i, sdl);
      assert(hwloc_bitmap_weight(cpuset) == 1);
    } else if (type == HWLOC_OBJ_NUMANODE) {
      /* NUMA node os_index isn't used for binding, just use the rad number to get unique values.
       * Note that we'll use that fact in hwloc_aix_prepare_membind(). */
      os_index = i;
      hwloc_debug("Using os_index #%u for NUMA node inside node %d for sdl %d\n", os_index, i, sdl);
    }

    obj = hwloc_alloc_setup_object(type, os_index);
    obj->cpuset = cpuset;
    obj->os_level = sdl;

    switch(type) {
      case HWLOC_OBJ_NUMANODE:
	obj->nodeset = hwloc_bitmap_alloc();
	hwloc_bitmap_set(obj->nodeset, i);
	obj->memory.local_memory = 0; /* TODO: odd, rs_getinfo(rad, R_MEMSIZE, 0) << 10 returns the total memory ... */
	obj->memory.page_types_len = 2;
	obj->memory.page_types = malloc(2*sizeof(*obj->memory.page_types));
	memset(obj->memory.page_types, 0, 2*sizeof(*obj->memory.page_types));
	obj->memory.page_types[0].size = hwloc_getpagesize();
#ifdef HAVE__SC_LARGE_PAGESIZE
	obj->memory.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE);
#endif
	/* TODO: obj->memory.page_types[1].count = rs_getinfo(rset, R_LGPGFREE, 0) / hugepagesize */
	break;
      case HWLOC_OBJ_CACHE:
	obj->attr->cache.size = _system_configuration.L2_cache_size;
	obj->attr->cache.associativity = _system_configuration.L2_cache_asc;

	obj->attr->cache.linesize = 0; /* unknown by default */
	if (__power_pc())
	  if (__power_4() || __power_5() || __power_6() || __power_7())
	    obj->attr->cache.linesize = 128;

	obj->attr->cache.depth = 2;
	obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED; /* OK for power[4567], unknown for others */
	break;
      case HWLOC_OBJ_GROUP:
	obj->attr->group.depth = level;
	break;
      case HWLOC_OBJ_CORE:
      {
	hwloc_obj_t obj2, obj3;
	obj2 = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, i);
	obj2->cpuset = hwloc_bitmap_dup(obj->cpuset);
	obj2->attr->cache.size = _system_configuration.dcache_size;
	obj2->attr->cache.associativity = _system_configuration.dcache_asc;
	obj2->attr->cache.linesize = _system_configuration.dcache_line;
	obj2->attr->cache.depth = 1;
	if (_system_configuration.cache_attrib & (1<<30)) {
	  /* Unified cache */
	  obj2->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED;
	  hwloc_debug("Adding an L1u cache for core %d\n", i);
	  hwloc_insert_object_by_cpuset(topology, obj2);
	} else {
	  /* Separate Instruction and Data caches */
	  obj2->attr->cache.type = HWLOC_OBJ_CACHE_DATA;
	  hwloc_debug("Adding an L1d cache for core %d\n", i);
	  hwloc_insert_object_by_cpuset(topology, obj2);

	  obj3 = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, i);
	  obj3->cpuset = hwloc_bitmap_dup(obj->cpuset);
	  obj3->attr->cache.size = _system_configuration.icache_size;
	  obj3->attr->cache.associativity = _system_configuration.icache_asc;
	  obj3->attr->cache.linesize = _system_configuration.icache_line;
	  obj3->attr->cache.depth = 1;
	  obj3->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION;
	  hwloc_debug("Adding an L1i cache for core %d\n", i);
	  hwloc_insert_object_by_cpuset(topology, obj3);
	}
	break;
      }
      default:
	break;
    }
    hwloc_debug_2args_bitmap("%s %d has cpuset %s\n",
	       hwloc_obj_type_string(type),
	       i, obj->cpuset);
    hwloc_insert_object_by_cpuset(topology, obj);
  }

  rs_free(rset);
  rs_free(rad);
}
Exemplo n.º 16
0
static
void getNumCPUs(void) {
  //
  // accessible cores
  //

  //
  // Hwloc can't tell us the number of accessible cores directly, so
  // get that by counting the parent cores of the accessible PUs.
  //

  //
  // We could seemingly use hwloc_topology_get_allowed_cpuset() to get
  // the set of accessible PUs here.  But that seems not to reflect the
  // schedaffinity settings, so use hwloc_get_proc_cpubind() instead.
  //
  hwloc_cpuset_t logAccSet;
  CHK_ERR_ERRNO((logAccSet = hwloc_bitmap_alloc()) != NULL);
  if (hwloc_get_proc_cpubind(topology, getpid(), logAccSet, 0) != 0) {
#ifdef __APPLE__
    const int errRecoverable = (errno == ENOSYS); // no cpubind on macOS
#else
    const int errRecoverable = 0;
#endif
    if (errRecoverable) {
      hwloc_bitmap_fill(logAccSet);
    } else {
      REPORT_ERR_ERRNO(hwloc_get_proc_cpubind(topology, getpid(), logAccSet, 0)
                       == 0);
    }
  }
  hwloc_bitmap_and(logAccSet, logAccSet,
                   hwloc_topology_get_online_cpuset(topology));

  hwloc_cpuset_t physAccSet;
  CHK_ERR_ERRNO((physAccSet = hwloc_bitmap_alloc()) != NULL);

#define NEXT_PU(pu)                                                     \
  hwloc_get_next_obj_inside_cpuset_by_type(topology, logAccSet,         \
                                           HWLOC_OBJ_PU, pu)

  for (hwloc_obj_t pu = NEXT_PU(NULL); pu != NULL; pu = NEXT_PU(pu)) {
    hwloc_obj_t core;
    CHK_ERR_ERRNO((core = hwloc_get_ancestor_obj_by_type(topology,
                                                         HWLOC_OBJ_CORE,
                                                         pu))
                  != NULL);
    hwloc_bitmap_set(physAccSet, core->logical_index);
  }

#undef NEXT_PU

  numCPUsPhysAcc = hwloc_bitmap_weight(physAccSet);
  hwloc_bitmap_free(physAccSet);

  CHK_ERR(numCPUsPhysAcc > 0);

  //
  // all cores
  //
  numCPUsPhysAll = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_CORE);
  CHK_ERR(numCPUsPhysAll > 0);

  //
  // accessible PUs
  //
  numCPUsLogAcc = hwloc_bitmap_weight(logAccSet);
  CHK_ERR(numCPUsLogAcc > 0);

  hwloc_bitmap_free(logAccSet);

  //
  // all PUs
  //
  numCPUsLogAll = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PU);
  CHK_ERR(numCPUsLogAll > 0);
}