Example #1
0
c_sublocid_t chpl_topo_getThreadLocality(void) {
  hwloc_cpuset_t cpuset;
  hwloc_nodeset_t nodeset;
  int flags;
  int node;

  if (!haveTopology) {
    return c_sublocid_any;
  }

  if (!topoSupport->cpubind->get_thread_cpubind) {
    return c_sublocid_any;
  }

  CHK_ERR_ERRNO((cpuset = hwloc_bitmap_alloc()) != NULL);
  CHK_ERR_ERRNO((nodeset = hwloc_bitmap_alloc()) != NULL);

  flags = HWLOC_CPUBIND_THREAD;
  CHK_ERR_ERRNO(hwloc_set_cpubind(topology, cpuset, flags) == 0);

  hwloc_cpuset_to_nodeset(topology, cpuset, nodeset);

  node = hwloc_bitmap_first(nodeset);

  hwloc_bitmap_free(nodeset);
  hwloc_bitmap_free(cpuset);

  return node;
}
Example #2
0
/*******************  FUNCTION  *********************/
int TopoHwloc::getCurrentIdFromNUMABinding(void) const
{
	hwloc_nodeset_t nodeset = hwloc_bitmap_alloc();
	hwloc_cpuset_t cpuset = hwloc_bitmap_alloc();
	hwloc_membind_policy_t policy;
	int res = -1;
	int weight;
	int status;
	#if defined(SCTK_ALLOC_DEBUG) && defined(hwloc_bitmap_list_snprintf)
	char buffer[4096];
	#endif

	//if no numa node, return immediately
	if (getNbNumaEntities() == 1)
		return -1;

	//nodes
	// flags = 0 fallback on PROCESS if THREAD is not supported (as for windows).
	status =  hwloc_get_membind_nodeset(topology,nodeset,&policy,0);
	assert(status == 0);
	if (status == 0)
		return -1;

	#if defined(SCTK_ALLOC_DEBUG) && defined(hwloc_bitmap_list_snprintf)
	status = hwloc_bitmap_list_snprintf(buffer,4096,nodeset);
	sprintf(stderr,"Current nodes : %s\n",buffer);
	#endif

	//cores
	// flags = 0 fallback on PROCESS if THREAD is not supported (as for windows).
	status =  hwloc_get_membind(topology,cpuset,&policy,0);
	assert(status == 0);
	if (status == 0)
		return -1;

	#if defined(SCTK_ALLOC_DEBUG) && defined(hwloc_bitmap_list_snprintf)
	status = hwloc_bitmap_list_snprintf(buffer,4096,cpuset);
	sprintf(stderr,"Current cores : %s\n",buffer);
	#endif

	//nodes from cores
	hwloc_cpuset_to_nodeset(topology,cpuset,nodeset);

	#if defined(SCTK_ALLOC_DEBUG) && defined(hwloc_bitmap_list_snprintf)
	status = hwloc_bitmap_list_snprintf(buffer,4096,nodeset);
	sprintf(stderr,"Current nodes from cores : %s\n",buffer);
	#endif

	//calc res
	weight = hwloc_bitmap_weight(nodeset);
	assert(weight != 0);
	if (weight == 1)
		res = getFirstBitInBitmap(nodeset);

	hwloc_bitmap_free(cpuset);
	hwloc_bitmap_free(nodeset);

	return res;
}
Example #3
0
static int
hwloc_win_get_thisthread_membind(hwloc_topology_t topology, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags)
{
  int ret;
  hwloc_cpuset_t cpuset = hwloc_bitmap_alloc();
  ret = hwloc_win_get_thread_cpubind(topology, GetCurrentThread(), cpuset, flags);
  if (!ret) {
    *policy = HWLOC_MEMBIND_BIND;
    hwloc_cpuset_to_nodeset(topology, cpuset, nodeset);
  }
  hwloc_bitmap_free(cpuset);
  return ret;
}
Example #4
0
static int
hwloc_win_get_proc_membind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags)
{
  int ret;
  hwloc_cpuset_t cpuset = hwloc_bitmap_alloc();
  ret = hwloc_win_get_proc_cpubind(topology, pid, cpuset, flags & HWLOC_MEMBIND_STRICT?HWLOC_CPUBIND_STRICT:0);
  if (!ret) {
    *policy = HWLOC_MEMBIND_BIND;
    hwloc_cpuset_to_nodeset(topology, cpuset, nodeset);
  }
  hwloc_bitmap_free(cpuset);
  return ret;
}
    void THardwareLocalityHelper::BindThreadForDevice(int deviceId) {
        if (!HasContext) {
            return;
        }
        THwlocSet deviceCpu;
        THwlocSet numaNode;
        int errCode = hwloc_cudart_get_device_cpuset(Context, deviceId, deviceCpu.Set);
        hwloc_cpuset_to_nodeset(Context, deviceCpu.Set, numaNode.Set);

        errCode = hwloc_set_cpubind(Context, deviceCpu.Set, HWLOC_CPUBIND_THREAD | HWLOC_CPUBIND_STRICT);
        if (errCode == -1) {
            MATRIXNET_ERROR_LOG << "Can't bind thread for " << deviceId << " with err " << errno << Endl;
        }

        errCode = hwloc_set_membind_nodeset(Context, numaNode.Set, HWLOC_MEMBIND_BIND, HWLOC_CPUBIND_THREAD | HWLOC_CPUBIND_STRICT);
        if (errCode == -1) {
            MATRIXNET_ERROR_LOG << "Can't bind memory for " << deviceId << " with err " << errno << Endl;
        }
    }
Example #6
0
/*
* Get the node where the current thread is running
* return the node of the core
*/
int hw_my_node()
{
  int node;
  hwloc_cpuset_t set;
  hwloc_nodeset_t nset;

  if (local_topo->nnodes != 0 ){
    set = hwloc_bitmap_alloc();
    nset = hwloc_bitmap_alloc();
    hwloc_get_cpubind (topology,set,HWLOC_CPUBIND_THREAD);
    hwloc_cpuset_to_nodeset(topology,set,nset);
    node = hwloc_bitmap_first(nset); 	
    hwloc_bitmap_free(set);
    hwloc_bitmap_free(nset);
  }
 else
   node = -1;

  return node;
}
Example #7
0
/*******************  FUNCTION  *********************/
int TopoHwloc::getCurrentIdFromThreadBinding(void) const
{
	hwloc_nodeset_t nodeset = hwloc_bitmap_alloc();
	hwloc_cpuset_t cpuset = hwloc_bitmap_alloc();
	int res = -1;
	int weight;
	#if defined(SCTK_ALLOC_DEBUG) && defined(hwloc_bitmap_list_snprintf)
	char buffer[4096];
	#endif
	
	//get current core binding
	//for windows use 0 instead of HWLOC_CPUBIND_THREAD
	int status = hwloc_get_cpubind (topology, cpuset, 0);
	assert(status == 0);
	if (status == 0)
		return -1;

	#if defined(SCTK_ALLOC_DEBUG) && defined(hwloc_bitmap_list_snprintf)
	status = hwloc_bitmap_list_snprintf(buffer,4096,cpuset);
	sprintf(stderr,"Current cores : %s\n",buffer);
	#endif

	//nodes from cores
	hwloc_cpuset_to_nodeset(topology,cpuset,nodeset);

	#if defined(SCTK_ALLOC_DEBUG) && defined(hwloc_bitmap_list_snprintf)
	status = hwloc_bitmap_list_snprintf(buffer,4096,nodeset);
	sprintf(stderr,"Current nodes from cores : %s\n",buffer);
	#endif

	//calc res
	weight = hwloc_bitmap_weight(nodeset);
	assert(weight != 0);
	if (weight == 1)
		res = getFirstBitInBitmap(nodeset);

	hwloc_bitmap_free(cpuset);
	hwloc_bitmap_free(nodeset);

	return res;
}
Example #8
0
static int
hwloc_fix_membind_cpuset(hwloc_topology_t topology, hwloc_nodeset_t nodeset, hwloc_const_cpuset_t cpuset)
{
  hwloc_const_bitmap_t topology_set = hwloc_topology_get_topology_cpuset(topology);
  hwloc_const_bitmap_t complete_set = hwloc_topology_get_complete_cpuset(topology);
  hwloc_const_bitmap_t complete_nodeset = hwloc_topology_get_complete_nodeset(topology);

  if (!topology_set) {
    /* The topology is composed of several systems, the cpuset is thus
     * ambiguous. */
    errno = EXDEV;
    return -1;
  }

  if (!complete_nodeset) {
    /* There is no NUMA node */
    errno = ENODEV;
    return -1;
  }

  if (hwloc_bitmap_iszero(cpuset)) {
    errno = EINVAL;
    return -1;
  }

  if (!hwloc_bitmap_isincluded(cpuset, complete_set)) {
    errno = EINVAL;
    return -1;
  }

  if (hwloc_bitmap_isincluded(topology_set, cpuset)) {
    hwloc_bitmap_copy(nodeset, complete_nodeset);
    return 0;
  }

  hwloc_cpuset_to_nodeset(topology, cpuset, nodeset);
  return 0;
}
Example #9
0
c_sublocid_t chpl_topo_getThreadLocality(void) {
  hwloc_cpuset_t cpuset;
  hwloc_nodeset_t nodeset;
  int flags;
  int node;

  if (!haveTopology) {
    return c_sublocid_any;
  }

  if (!topoSupport->cpubind->get_thread_cpubind) {
    return c_sublocid_any;
  }

  if ((cpuset = hwloc_bitmap_alloc()) == NULL) {
    report_error("hwloc_bitmap_alloc()", errno);
  }

  if ((nodeset = hwloc_bitmap_alloc()) == NULL) {
    report_error("hwloc_bitmap_alloc()", errno);
  }

  flags = HWLOC_CPUBIND_THREAD;
  if (hwloc_get_cpubind(topology, cpuset, flags)) {
    report_error("hwloc_get_cpubind()", errno);
  }

  hwloc_cpuset_to_nodeset(topology, cpuset, nodeset);

  node = hwloc_bitmap_first(nodeset);

  hwloc_bitmap_free(nodeset);
  hwloc_bitmap_free(cpuset);

  return node;
}
Example #10
0
int main(int argc, char *argv[])
{
  hwloc_topology_t topology;
  int loaded = 0;
  int depth;
  hwloc_bitmap_t cpubind_set, membind_set;
  int got_cpubind = 0, got_membind = 0;
  int working_on_cpubind = 1; /* membind if 0 */
  int get_binding = 0;
  int use_nodeset = 0;
  int get_last_cpu_location = 0;
  unsigned long flags = 0;
  int force = 0;
  int single = 0;
  int verbose = 0;
  int only_hbm = -1;
  int logical = 1;
  int taskset = 0;
  unsigned cpubind_flags = 0;
  hwloc_membind_policy_t membind_policy = HWLOC_MEMBIND_BIND;
  int got_mempolicy = 0;
  unsigned membind_flags = 0;
  int opt;
  int ret;
  int pid_number = -1;
  int tid_number = -1;
  hwloc_pid_t pid = 0; /* only valid when pid_number > 0, but gcc-4.8 still reports uninitialized warnings */
  char *callname;
  struct hwloc_calc_location_context_s lcontext;
  struct hwloc_calc_set_context_s scontext;

  callname = argv[0];
  /* skip argv[0], handle options */
  argv++;
  argc--;

  hwloc_utils_check_api_version(callname);

  cpubind_set = hwloc_bitmap_alloc();
  membind_set = hwloc_bitmap_alloc();

  /* don't load now, in case some options change the config before the topology is actually used */
#define LOADED() (loaded)
#define ENSURE_LOADED() do { \
  if (!loaded) { \
    hwloc_topology_init(&topology); \
    hwloc_topology_set_all_types_filter(topology, HWLOC_TYPE_FILTER_KEEP_ALL); \
    hwloc_topology_set_flags(topology, flags); \
    hwloc_topology_load(topology); \
    depth = hwloc_topology_get_depth(topology); \
    loaded = 1; \
  } \
} while (0)

  while (argc >= 1) {
    if (!strcmp(argv[0], "--")) {
      argc--;
      argv++;
      break;
    }

    opt = 0;

    if (*argv[0] == '-') {
      if (!strcmp(argv[0], "-v") || !strcmp(argv[0], "--verbose")) {
	verbose++;
	goto next;
      }
      if (!strcmp(argv[0], "-q") || !strcmp(argv[0], "--quiet")) {
	verbose--;
	goto next;
      }
      if (!strcmp(argv[0], "--help")) {
        usage("hwloc-bind", stdout);
	return EXIT_SUCCESS;
      }
      if (!strcmp(argv[0], "--single")) {
	single = 1;
	goto next;
      }
      if (!strcmp(argv[0], "-f") || !strcmp(argv[0], "--force")) {
	force = 1;
	goto next;
      }
      if (!strcmp(argv[0], "--strict")) {
	cpubind_flags |= HWLOC_CPUBIND_STRICT;
	membind_flags |= HWLOC_MEMBIND_STRICT;
	goto next;
      }
      if (!strcmp(argv[0], "--pid")) {
        if (argc < 2) {
          usage ("hwloc-bind", stderr);
          exit(EXIT_FAILURE);
        }
        pid_number = atoi(argv[1]);
        opt = 1;
        goto next;
      }
#ifdef HWLOC_LINUX_SYS
      if (!strcmp(argv[0], "--tid")) {
        if (argc < 2) {
          usage ("hwloc-bind", stderr);
          exit(EXIT_FAILURE);
        }
        tid_number = atoi(argv[1]);
        opt = 1;
        goto next;
      }
#endif
      if (!strcmp (argv[0], "--version")) {
	printf("%s %s\n", callname, HWLOC_VERSION);
	exit(EXIT_SUCCESS);
      }
      if (!strcmp(argv[0], "-l") || !strcmp(argv[0], "--logical")) {
        logical = 1;
        goto next;
      }
      if (!strcmp(argv[0], "-p") || !strcmp(argv[0], "--physical")) {
        logical = 0;
        goto next;
      }
      if (!strcmp(argv[0], "--taskset")) {
        taskset = 1;
        goto next;
      }
      if (!strcmp (argv[0], "-e") || !strncmp (argv[0], "--get-last-cpu-location", 10)) {
	get_last_cpu_location = 1;
	goto next;
      }
      if (!strcmp (argv[0], "--get")) {
	get_binding = 1;
	goto next;
      }
      if (!strcmp (argv[0], "--nodeset")) {
	use_nodeset = 1;
	goto next;
      }
      if (!strcmp (argv[0], "--cpubind")) {
	working_on_cpubind = 1;
	goto next;
      }
      if (!strcmp (argv[0], "--membind")) {
	working_on_cpubind = 0;
	goto next;
      }
      if (!strcmp (argv[0], "--mempolicy")) {
	if (!strncmp(argv[1], "default", 2))
	  membind_policy = HWLOC_MEMBIND_DEFAULT;
	else if (!strncmp(argv[1], "firsttouch", 2))
	  membind_policy = HWLOC_MEMBIND_FIRSTTOUCH;
	else if (!strncmp(argv[1], "bind", 2))
	  membind_policy = HWLOC_MEMBIND_BIND;
	else if (!strncmp(argv[1], "interleave", 2))
	  membind_policy = HWLOC_MEMBIND_INTERLEAVE;
	else if (!strncmp(argv[1], "nexttouch", 2))
	  membind_policy = HWLOC_MEMBIND_NEXTTOUCH;
	else {
	  fprintf(stderr, "Unrecognized memory binding policy %s\n", argv[1]);
          usage ("hwloc-bind", stderr);
          exit(EXIT_FAILURE);
	}
	got_mempolicy = 1;
	opt = 1;
	goto next;
      }
      if (!strcmp(argv[0], "--hbm")) {
	only_hbm = 1;
	goto next;
      }
      if (!strcmp(argv[0], "--no-hbm")) {
	only_hbm = 0;
	goto next;
      }
      if (!strcmp (argv[0], "--whole-system")) {
	if (loaded) {
	  fprintf(stderr, "Input option %s disallowed after options using the topology\n", argv[0]);
	  exit(EXIT_FAILURE);
	}
	flags |= HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM;
	goto next;
      }
      if (!strcmp (argv[0], "--restrict")) {
	hwloc_bitmap_t restrictset;
	int err;
	if (argc < 2) {
	  usage (callname, stdout);
	  exit(EXIT_FAILURE);
	}
	restrictset = hwloc_bitmap_alloc();
	hwloc_bitmap_sscanf(restrictset, argv[1]);
	ENSURE_LOADED();
	err = hwloc_topology_restrict (topology, restrictset, 0);
	if (err) {
	  perror("Restricting the topology");
	  /* FALLTHRU */
	}
	hwloc_bitmap_free(restrictset);
	argc--;
	argv++;
	goto next;
      }

      fprintf (stderr, "Unrecognized option: %s\n", argv[0]);
      usage("hwloc-bind", stderr);
      return EXIT_FAILURE;
    }

    ENSURE_LOADED();

    lcontext.topology = topology;
    lcontext.topodepth = depth;
    lcontext.only_hbm = only_hbm;
    lcontext.logical = logical;
    lcontext.verbose = verbose;
    scontext.nodeset_input = use_nodeset;
    scontext.nodeset_output = working_on_cpubind ? 0 : 1;
    scontext.output_set = working_on_cpubind ? cpubind_set : membind_set;
    ret = hwloc_calc_process_location_as_set(&lcontext, &scontext, argv[0]);
    if (ret < 0) {
      if (verbose > 0)
	fprintf(stderr, "assuming the command starts at %s\n", argv[0]);
      break;
    }
    if (working_on_cpubind)
      got_cpubind = 1;
    else
      got_membind = 1;

  next:
    argc -= opt+1;
    argv += opt+1;
  }

  ENSURE_LOADED();

  if (pid_number > 0 && tid_number > 0) {
    fprintf(stderr, "cannot operate both on tid and pid\n");
    return EXIT_FAILURE;
  }

  if (pid_number > 0) {
    pid = hwloc_pid_from_number(pid_number, !(get_binding || get_last_cpu_location));
    /* no need to set_pid()
     * the doc just says we're operating on pid, not that we're retrieving the topo/cpuset as seen from inside pid
     */
  }

  if (get_last_cpu_location && !working_on_cpubind) {
    fprintf(stderr, "Options --membind and --get-last-cpu-location cannot be combined.\n");
    return EXIT_FAILURE;
  }
  if ((get_binding || get_last_cpu_location) && (got_cpubind || got_membind)) {
    /* doesn't work because get_binding/get_last_cpu_location overwrites cpubind_set */
    fprintf(stderr, "Cannot display and set binding at the same time.\n");
    return EXIT_FAILURE;
  }

  if (get_binding || get_last_cpu_location) {
    char *s;
    const char *policystr = NULL;
    int err;
    if (working_on_cpubind) {
      if (get_last_cpu_location) {
	if (pid_number > 0)
	  err = hwloc_get_proc_last_cpu_location(topology, pid, cpubind_set, 0);
#ifdef HWLOC_LINUX_SYS
	else if (tid_number > 0)
	  err = hwloc_linux_get_tid_last_cpu_location(topology, tid_number, cpubind_set);
#endif
	else
	  err = hwloc_get_last_cpu_location(topology, cpubind_set, 0);
      } else {
	if (pid_number > 0)
	  err = hwloc_get_proc_cpubind(topology, pid, cpubind_set, 0);
#ifdef HWLOC_LINUX_SYS
	else if (tid_number > 0)
	  err = hwloc_linux_get_tid_cpubind(topology, tid_number, cpubind_set);
#endif
	else
	  err = hwloc_get_cpubind(topology, cpubind_set, 0);
      }
      if (err) {
	const char *errmsg = strerror(errno);
	if (pid_number > 0)
	  fprintf(stderr, "hwloc_get_proc_%s %d failed (errno %d %s)\n", get_last_cpu_location ? "last_cpu_location" : "cpubind", pid_number, errno, errmsg);
	else if (tid_number > 0)
	  fprintf(stderr, "hwloc_get_tid_%s %d failed (errno %d %s)\n", get_last_cpu_location ? "last_cpu_location" : "cpubind", tid_number, errno, errmsg);
	else
	  fprintf(stderr, "hwloc_get_%s failed (errno %d %s)\n", get_last_cpu_location ? "last_cpu_location" : "cpubind", errno, errmsg);
	return EXIT_FAILURE;
      }
      if (use_nodeset) {
	hwloc_bitmap_t nset = hwloc_bitmap_alloc();
	hwloc_cpuset_to_nodeset(topology, cpubind_set, nset);
	if (taskset)
	  hwloc_bitmap_taskset_asprintf(&s, nset);
	else
	  hwloc_bitmap_asprintf(&s, nset);
	hwloc_bitmap_free(nset);
      } else {
	if (taskset)
	  hwloc_bitmap_taskset_asprintf(&s, cpubind_set);
	else
	  hwloc_bitmap_asprintf(&s, cpubind_set);
      }

      } else {
      hwloc_membind_policy_t policy;
      if (pid_number > 0) {
	err = hwloc_get_proc_membind(topology, pid, membind_set, &policy, use_nodeset ? HWLOC_MEMBIND_BYNODESET : 0);
      } else if (tid_number > 0) {
	err = -1; errno = ENOSYS;
      } else {
	err = hwloc_get_membind(topology, membind_set, &policy, use_nodeset ? HWLOC_MEMBIND_BYNODESET : 0);
      }
      if (err) {
	const char *errmsg = strerror(errno);
        if (pid_number > 0)
          fprintf(stderr, "hwloc_get_proc_membind %d failed (errno %d %s)\n", pid_number, errno, errmsg);
        else
	  fprintf(stderr, "hwloc_get_membind failed (errno %d %s)\n", errno, errmsg);
	return EXIT_FAILURE;
      }
      if (taskset)
	hwloc_bitmap_taskset_asprintf(&s, membind_set);
      else
	hwloc_bitmap_asprintf(&s, membind_set);
      switch (policy) {
      case HWLOC_MEMBIND_FIRSTTOUCH: policystr = "firsttouch"; break;
      case HWLOC_MEMBIND_BIND: policystr = "bind"; break;
      case HWLOC_MEMBIND_INTERLEAVE: policystr = "interleave"; break;
      case HWLOC_MEMBIND_NEXTTOUCH: policystr = "nexttouch"; break;
      default: fprintf(stderr, "unknown memory policy %d\n", policy); assert(0); break;
      }
    }
    if (policystr)
      printf("%s (%s)\n", s, policystr);
    else
      printf("%s\n", s);
    free(s);
  }

  if (got_membind) {
    if (hwloc_bitmap_iszero(membind_set)) {
      if (verbose >= 0)
	fprintf(stderr, "cannot membind to empty set\n");
      if (!force)
	goto failed_binding;
    }
    if (verbose > 0) {
      char *s;
      hwloc_bitmap_asprintf(&s, membind_set);
      fprintf(stderr, "binding on memory set %s\n", s);
      free(s);
    }
    if (single)
      hwloc_bitmap_singlify(membind_set);
    if (pid_number > 0)
      ret = hwloc_set_proc_membind(topology, pid, membind_set, membind_policy, membind_flags | HWLOC_MEMBIND_BYNODESET);
    else if (tid_number > 0) {
      ret = -1; errno = ENOSYS;
    } else
      ret = hwloc_set_membind(topology, membind_set, membind_policy, membind_flags | HWLOC_MEMBIND_BYNODESET);
    if (ret && verbose >= 0) {
      int bind_errno = errno;
      const char *errmsg = strerror(bind_errno);
      char *s;
      hwloc_bitmap_asprintf(&s, membind_set);
      if (pid_number > 0)
        fprintf(stderr, "hwloc_set_proc_membind %s (policy %d flags %x) PID %d failed (errno %d %s)\n",
		s, membind_policy, membind_flags, pid_number, bind_errno, errmsg);
      else
        fprintf(stderr, "hwloc_set_membind %s (policy %d flags %x) failed (errno %d %s)\n",
		s, membind_policy, membind_flags, bind_errno, errmsg);
      free(s);
    }
    if (ret && !force)
      goto failed_binding;
  } else {
    if (got_mempolicy)
      fprintf(stderr, "--mempolicy ignored unless memory binding is also requested with --membind.\n");
  }

  if (got_cpubind) {
    if (hwloc_bitmap_iszero(cpubind_set)) {
      if (verbose >= 0)
	fprintf(stderr, "cannot cpubind to empty set\n");
      if (!force)
	goto failed_binding;
    }
    if (verbose > 0) {
      char *s;
      hwloc_bitmap_asprintf(&s, cpubind_set);
      fprintf(stderr, "binding on cpu set %s\n", s);
      free(s);
    }
    if (got_membind && !hwloc_bitmap_isequal(membind_set, cpubind_set)) {
      if (verbose)
	fprintf(stderr, "Conflicting CPU and memory binding requested, adding HWLOC_CPUBIND_NOMEMBIND flag.\n");
      cpubind_flags |= HWLOC_CPUBIND_NOMEMBIND;
    }
    if (single)
      hwloc_bitmap_singlify(cpubind_set);
    if (pid_number > 0)
      ret = hwloc_set_proc_cpubind(topology, pid, cpubind_set, cpubind_flags);
#ifdef HWLOC_LINUX_SYS
    else if (tid_number > 0)
      ret = hwloc_linux_set_tid_cpubind(topology, tid_number, cpubind_set);
#endif
    else
      ret = hwloc_set_cpubind(topology, cpubind_set, cpubind_flags);
    if (ret && verbose >= 0) {
      int bind_errno = errno;
      const char *errmsg = strerror(bind_errno);
      char *s;
      hwloc_bitmap_asprintf(&s, cpubind_set);
      if (pid_number > 0)
        fprintf(stderr, "hwloc_set_proc_cpubind %s (flags %x) PID %d failed (errno %d %s)\n",
		s, cpubind_flags, pid_number, bind_errno, errmsg);
      else if (tid_number > 0)
        fprintf(stderr, "hwloc_set_tid_cpubind %s (flags %x) PID %d failed (errno %d %s)\n",
		s, cpubind_flags, tid_number, bind_errno, errmsg);
      else
        fprintf(stderr, "hwloc_set_cpubind %s (flags %x) failed (errno %d %s)\n",
		s, cpubind_flags, bind_errno, errmsg);
      free(s);
    }
    if (ret && !force)
      goto failed_binding;
  }

  hwloc_bitmap_free(cpubind_set);
  hwloc_bitmap_free(membind_set);

  hwloc_topology_destroy(topology);

  if (pid_number > 0 || tid_number > 0)
    return EXIT_SUCCESS;

  if (0 == argc) {
    if (get_binding || get_last_cpu_location)
      return EXIT_SUCCESS;
    fprintf(stderr, "%s: nothing to do!\n", callname);
    return EXIT_FAILURE;
  }

  /* FIXME: check whether Windows execvp() passes INHERIT_PARENT_AFFINITY to CreateProcess()
   * because we need to propagate processor group affinity. However process-wide affinity
   * isn't supported with processor groups so far.
   */
  ret = execvp(argv[0], argv);
  if (ret) {
      fprintf(stderr, "%s: Failed to launch executable \"%s\"\n",
              callname, argv[0]);
      perror("execvp");
  }
  return EXIT_FAILURE;


failed_binding:
  hwloc_bitmap_free(cpubind_set);
  hwloc_bitmap_free(membind_set);
  hwloc_topology_destroy(topology);
  return EXIT_FAILURE;
}
Example #11
0
static void create_hwloc_cpusets() {
#ifdef USE_HWLOC
    int i;

    int err = hwloc_topology_init(&topology);
    assert(err == 0);

    err = hwloc_topology_load(topology);
    assert(err == 0);

    hwloc_bitmap_t cpuset = hwloc_bitmap_alloc();
    assert(cpuset);

    err = hwloc_get_cpubind(topology, cpuset, HWLOC_CPUBIND_PROCESS);
    assert(err == 0);
    const int available_pus = hwloc_bitmap_weight(cpuset);
    const int last_set_index = hwloc_bitmap_last(cpuset);
    const int num_workers = hc_context->nworkers;

    hclib_affinity_t selected_affinity = HCLIB_AFFINITY_STRIDED;
    const char *user_selected_affinity = getenv("HCLIB_AFFINITY");
    if (user_selected_affinity) {
        if (strcmp(user_selected_affinity, "strided") == 0) {
            selected_affinity = HCLIB_AFFINITY_STRIDED;
        } else if (strcmp(user_selected_affinity, "chunked") == 0) {
            selected_affinity = HCLIB_AFFINITY_CHUNKED;
        } else {
            fprintf(stderr, "Unsupported thread affinity \"%s\" specified with "
                    "HCLIB_AFFINITY.\n", user_selected_affinity);
            exit(1);
        }
    }

    thread_cpusets = (hwloc_bitmap_t *)malloc(hc_context->nworkers *
            sizeof(*thread_cpusets));
    assert(thread_cpusets);

    for (i = 0; i < hc_context->nworkers; i++) {
        thread_cpusets[i] = hwloc_bitmap_alloc();
        assert(thread_cpusets[i]);
    }

    switch (selected_affinity) {
        case (HCLIB_AFFINITY_STRIDED): {
            if (available_pus < num_workers) {
                fprintf(stderr, "ERROR Available PUs (%d) was less than number "
                        "of workers (%d), don't currently support "
                        "oversubscription with strided thread pinning\n",
                        available_pus, num_workers);
                exit(1);
            }

            int count = 0;
            int index = 0;
            while (index <= last_set_index) {
                if (hwloc_bitmap_isset(cpuset, index)) {
                    hwloc_bitmap_set(thread_cpusets[count % num_workers],
                            index);
                    count++;
                }
                index++;
            }
            break;
        }
        case (HCLIB_AFFINITY_CHUNKED): {
            const int chunk_size = (available_pus + num_workers - 1) /
                    num_workers;
            int count = 0;
            int index = 0;
            while (index <= last_set_index) {
                if (hwloc_bitmap_isset(cpuset, index)) {
                    hwloc_bitmap_set(thread_cpusets[count / chunk_size], index);
                    count++;
                }
                index++;
            }
            break;
        }
        default:
            assert(false);
    }

    hwloc_bitmap_t nodeset = hwloc_bitmap_alloc();
    hwloc_bitmap_t other_nodeset = hwloc_bitmap_alloc();
    assert(nodeset && other_nodeset);

    /*
     * Here, we look for contiguous ranges of worker threads that share any NUMA
     * nodes with us. In theory, this should be more hierarchical but isn't yet.
     * This is also super inefficient... O(T^2) where T is the number of
     * workers.
     */
    bool revert_to_naive_stealing = false;
    for (i = 0; i < hc_context->nworkers; i++) {
        // Get the NUMA nodes for this CPU set
        hwloc_cpuset_to_nodeset(topology, thread_cpusets[i], nodeset);

        int base = -1;
        int limit = -1;
        int j;
        for (j = 0; j < hc_context->nworkers; j++) {
            hwloc_cpuset_to_nodeset(topology, thread_cpusets[j], other_nodeset);
            // Take the intersection, see if there is any overlap
            hwloc_bitmap_and(other_nodeset, nodeset, other_nodeset);

            if (base < 0) {
                // Haven't found a contiguous chunk of workers yet.
                if (!hwloc_bitmap_iszero(other_nodeset)) {
                    base = j;
                }
            } else {
                /*
                 * Have a contiguous chunk of workers, either still inside it or
                 * after it.
                 */
                if (limit < 0) {
                    // Inside the contiguous chunk of workers
                    if (hwloc_bitmap_iszero(other_nodeset)) {
                        // Found the end
                        limit = j;
                    }
                } else {
                    // After the contiguous chunk of workers
                    if (!hwloc_bitmap_iszero(other_nodeset)) {
                        // No contiguous chunk to find, just do something naive.
                        revert_to_naive_stealing = true;
                        break;
                    }
                }
            }
        }

        if (revert_to_naive_stealing) {
            fprintf(stderr, "WARNING: Using naive work-stealing patterns.\n");
            base = 0;
            limit = hc_context->nworkers;
        } else {
            assert(base >= 0);
            if (limit < 0) {
                limit = hc_context->nworkers;
            }
        }

        hc_context->workers[i]->base_intra_socket_workers = base;
        hc_context->workers[i]->limit_intra_socket_workers = limit;

#ifdef VERBOSE
        char *nbuf;
        hwloc_bitmap_asprintf(&nbuf, nodeset);

        char *buffer;
        hwloc_bitmap_asprintf(&buffer, thread_cpusets[i]);
        fprintf(stderr, "Worker %d has access to %d PUs (%s), %d NUMA nodes "
                "(%s). Shared NUMA nodes with [%d, %d).\n", i,
                hwloc_bitmap_weight(thread_cpusets[i]), buffer,
                hwloc_bitmap_weight(nodeset), nbuf, base, limit);
        free(buffer);
#endif
    }

#endif
}