예제 #1
0
int hwloc_look_hardwired_fujitsu_fx10(struct hwloc_topology *topology)
{
  /* FIXME: what if a broken core is disabled? */
  unsigned i;
  hwloc_obj_t obj;
  hwloc_bitmap_t set;

  for(i=0; i<16; i++) {
    set = hwloc_bitmap_alloc();
    hwloc_bitmap_set(set, i);

    obj = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, -1);
    obj->cpuset = hwloc_bitmap_dup(set);
    obj->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION;
    obj->attr->cache.depth = 1;
    obj->attr->cache.size = 32*1024;
    obj->attr->cache.linesize = 128;
    obj->attr->cache.associativity = 2;
    hwloc_insert_object_by_cpuset(topology, obj);

    obj = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, -1);
    obj->cpuset = hwloc_bitmap_dup(set);
    obj->attr->cache.type = HWLOC_OBJ_CACHE_DATA;
    obj->attr->cache.depth = 1;
    obj->attr->cache.size = 32*1024;
    obj->attr->cache.linesize = 128;
    obj->attr->cache.associativity = 2;
    hwloc_insert_object_by_cpuset(topology, obj);

    obj = hwloc_alloc_setup_object(HWLOC_OBJ_CORE, i);
    obj->cpuset = set;
    hwloc_insert_object_by_cpuset(topology, obj);
  }

  set = hwloc_bitmap_alloc();
  hwloc_bitmap_set_range(set, 0, 15);

  obj = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, -1);
  obj->cpuset = hwloc_bitmap_dup(set);
  obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED;
  obj->attr->cache.depth = 2;
  obj->attr->cache.size = 12*1024*1024;
  obj->attr->cache.linesize = 128;
  obj->attr->cache.associativity = 24;
  hwloc_insert_object_by_cpuset(topology, obj);

  obj = hwloc_alloc_setup_object(HWLOC_OBJ_PACKAGE, 0);
  obj->cpuset = set;
  hwloc_obj_add_info(obj, "CPUVendor", "Fujitsu");
  hwloc_obj_add_info(obj, "CPUModel", "SPARC64 IXfx");
  hwloc_insert_object_by_cpuset(topology, obj);

  hwloc_setup_pu_level(topology, 16);

  return 0;
}
예제 #2
0
inline static void traverse(hwloc_obj_t object)
{
  static int index = 0, socket = -1, core = -1, pu = -1;

  assert(index < pu_count);
  switch (object->type) {
    case HWLOC_OBJ_SOCKET:
        socket++;
        core = -1;
        pu = -1;
        break;
    case HWLOC_OBJ_CORE:
        core++;
        pu = -1;
        break;
    case HWLOC_OBJ_PU:
        pu++;
        hw_places[index].socket = socket;
        hw_places[index].core = core;
        hw_places[index].pu = pu;
        cpu_sets[index] = hwloc_bitmap_dup(object->cpuset);
        index++;
        break;
    default:
        break;
  }

  for (int i = 0; i < object->arity; i++) {
    traverse(object->children[i]);
  }
}
예제 #3
0
static hwloc_obj_t insert_task(hwloc_topology_t topology, hwloc_cpuset_t cpuset, const char * name)
{
  hwloc_obj_t group, obj;

  hwloc_bitmap_and(cpuset, cpuset, hwloc_topology_get_topology_cpuset(topology));
  if (hwloc_bitmap_iszero(cpuset))
    return NULL;

  /* try to insert a group at exact position */
  group = hwloc_topology_alloc_group_object(topology);
  if (!group)
    return NULL;
  group->cpuset = hwloc_bitmap_dup(cpuset);
  group = hwloc_topology_insert_group_object(topology, group);
  if (!group) {
    /* try to insert in a larger parent */
    char *s;
    hwloc_bitmap_asprintf(&s, cpuset);
    group = hwloc_get_obj_covering_cpuset(topology, cpuset);
    fprintf(stderr, "Inserting process `%s' below parent larger than cpuset %s\n", name, s);
    free(s);
  }
  obj = hwloc_topology_insert_misc_object(topology, group, name);
  if (!obj)
    fprintf(stderr, "Failed to insert process `%s'\n", name);
  else
    obj->subtype = strdup("Process");

  return obj;
}
예제 #4
0
void AbstractCoreBoundTaskQueue::launchThread(int core) {
    //get the number of cores on system
    int NUM_PROCS = getNumberOfCoresOnSystem();

    if (core < NUM_PROCS) {
        _thread = new std::thread(&AbstractTaskQueue::executeTask, this);
        hwloc_cpuset_t cpuset;
        hwloc_obj_t obj;
        hwloc_topology_t topology = getHWTopology();

        obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_CORE, core);
        // the bitmap to modify
        cpuset = hwloc_bitmap_dup(obj->cpuset);
        // remove hyperthreads
        hwloc_bitmap_singlify(cpuset);
        // bind
        if (hwloc_set_thread_cpubind(topology, _thread->native_handle(), cpuset, HWLOC_CPUBIND_STRICT | HWLOC_CPUBIND_NOMEMBIND)) {
            char *str;
            int error = errno;
            hwloc_bitmap_asprintf(&str, obj->cpuset);
            fprintf(stderr, "Couldn't bind to cpuset %s: %s\n", str, strerror(error));
            fprintf(stderr, "Continuing as normal, however, no guarantees\n");
            //throw std::runtime_error(strerror(error));
        }

        hwloc_bitmap_free(cpuset);

    } else {
        // this case should never happen, as TaskQueue is only initialized from SimpleTaskScheduler, which captures this case
        throw std::logic_error("CPU to run thread on is larger than number of total cores; seems that TaskQueue was initialized outside of SimpleTaskScheduler, which should not happen");
    }
}
예제 #5
0
void initialize_hwloc(int nb_workers) {
#ifdef HAVE_HWLOC
  hwloc_topology_init(&topology);
  hwloc_topology_load(topology);
  bool numa_alloc_interleaved = (nb_workers == 0) ? false : true;
  numa_alloc_interleaved = cmdline::parse_or_default("numa_alloc_interleaved", numa_alloc_interleaved);
  if (numa_alloc_interleaved) {
    hwloc_cpuset_t all_cpus =
      hwloc_bitmap_dup(hwloc_topology_get_topology_cpuset(topology));
    int err = hwloc_set_membind(topology, all_cpus, HWLOC_MEMBIND_INTERLEAVE, 0);
    if (err < 0) {
      printf("Warning: failed to set NUMA round-robin allocation policy\n");
    }
  }
#endif
}
예제 #6
0
int main(void)
{
  hwloc_topology_t topology;
  hwloc_bitmap_t cpuset;
  int err;

  /* check the OS topology */
  hwloc_topology_init(&topology);
  hwloc_topology_load(topology);
  assert(hwloc_topology_is_thissystem(topology));

  cpuset = hwloc_bitmap_dup(hwloc_topology_get_complete_cpuset(topology));
  result("Binding with OS backend", hwloc_set_cpubind(topology, cpuset, 0));

  hwloc_topology_destroy(topology);

  /* We're assume there is a real processor numbered 0 */
  hwloc_bitmap_zero(cpuset);
  hwloc_bitmap_set(cpuset, 0);

  /* check a synthetic topology */
  hwloc_topology_init(&topology);
  hwloc_topology_set_synthetic(topology, "1");
  hwloc_topology_load(topology);
  assert(!hwloc_topology_is_thissystem(topology));

  err = hwloc_set_cpubind(topology, cpuset, 0);
  result("Binding with synthetic backend", err);
  assert(!err);

  hwloc_topology_destroy(topology);

  /* check a synthetic topology but assuming it's the system topology */
  hwloc_topology_init(&topology);
  hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM);
  hwloc_topology_set_synthetic(topology, "1");
  hwloc_topology_load(topology);
  assert(hwloc_topology_is_thissystem(topology));

  result("Binding with synthetic backend faking is_thissystem", hwloc_set_cpubind(topology, cpuset, 0));

  hwloc_topology_destroy(topology);

  hwloc_bitmap_free(cpuset);

  return 0;
}
예제 #7
0
파일: Core.cpp 프로젝트: asyr625/commonpp
bool Core::bind()
{
    auto cpuset = hwloc_bitmap_dup(core_->cpuset);
    hwloc_bitmap_singlify(cpuset);

    if (hwloc_set_cpubind(topology_, cpuset, 0))
    {
        auto error = errno;
        LOG(thread_logger, warning) << "Error setting thread affinity: "
                                    << strerror(error);
        hwloc_bitmap_free(cpuset);
        return false;
    }

    hwloc_bitmap_free(cpuset);
    return true;
}
예제 #8
0
void bindCurrentThreadToNumaNode(int node) {
  hwloc_topology_t topology = getHWTopology();
  hwloc_cpuset_t cpuset;
  hwloc_obj_t obj;

  // The actual node
  obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NODE, node);

  // obj is nullptr on non NUMA machines
  if (obj == nullptr) {
    fprintf(stderr, "Couldn't get hwloc object, bindCurrentThreadToNumaNode failed!\n");
    return;
  }

  cpuset = hwloc_bitmap_dup(obj->cpuset);
  // hwloc_bitmap_singlify(cpuset);

  // bind
  if (hwloc_set_cpubind(topology, cpuset, HWLOC_CPUBIND_STRICT | HWLOC_CPUBIND_NOMEMBIND | HWLOC_CPUBIND_THREAD)) {
    char* str;
    int error = errno;
    hwloc_bitmap_asprintf(&str, obj->cpuset);
    printf("Couldn't bind to cpuset %s: %s\n", str, strerror(error));
    free(str);
    throw std::runtime_error(strerror(error));
  }

  // free duplicated cpuset
  hwloc_bitmap_free(cpuset);

  // assuming single machine system
  obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_MACHINE, 0);
  // set membind policy interleave for this thread
  if (hwloc_set_membind_nodeset(
          topology, obj->nodeset, HWLOC_MEMBIND_INTERLEAVE, HWLOC_MEMBIND_STRICT | HWLOC_MEMBIND_THREAD) && errno != ENOSYS) {
    char* str;
    int error = errno;
    hwloc_bitmap_asprintf(&str, obj->nodeset);
    fprintf(stderr, "Couldn't membind to nodeset  %s: %s\n", str, strerror(error));
    fprintf(stderr, "Continuing as normal, however, no guarantees\n");
    free(str);
  }
}
예제 #9
0
static int
hwloc__get_largest_objs_inside_cpuset (struct hwloc_obj *current, hwloc_const_bitmap_t set,
				       struct hwloc_obj ***res, int *max)
{
  int gotten = 0;
  unsigned i;

  /* the caller must ensure this */
  if (*max <= 0)
    return 0;

  if (hwloc_bitmap_isequal(current->cpuset, set)) {
    **res = current;
    (*res)++;
    (*max)--;
    return 1;
  }

  for (i=0; i<current->arity; i++) {
    hwloc_bitmap_t subset = hwloc_bitmap_dup(set);
    int ret;

    /* split out the cpuset part corresponding to this child and see if there's anything to do */
    if (current->children[i]->cpuset) {
      hwloc_bitmap_and(subset, subset, current->children[i]->cpuset);
      if (hwloc_bitmap_iszero(subset)) {
        hwloc_bitmap_free(subset);
        continue;
      }
    }

    ret = hwloc__get_largest_objs_inside_cpuset (current->children[i], subset, res, max);
    gotten += ret;
    hwloc_bitmap_free(subset);

    /* if no more room to store remaining objects, return what we got so far */
    if (!*max)
      break;
  }

  return gotten;
}
예제 #10
0
static void print_task(hwloc_topology_t topology,
                       long pid_number, const char *name, hwloc_bitmap_t cpuset,
                       char *pidoutput,
                       int thread)
{
    printf("%s%ld\t", thread ? " " : "", pid_number);

    if (show_cpuset) {
        char *cpuset_str = NULL;
        hwloc_bitmap_asprintf(&cpuset_str, cpuset);
        printf("%s", cpuset_str);
        free(cpuset_str);
    } else {
        hwloc_bitmap_t remaining = hwloc_bitmap_dup(cpuset);
        int first = 1;
        while (!hwloc_bitmap_iszero(remaining)) {
            char type[64];
            unsigned idx;
            hwloc_obj_t obj = hwloc_get_first_largest_obj_inside_cpuset(topology, remaining);
            /* don't show a cache if there's something equivalent and nicer */
            while (hwloc_obj_type_is_cache(obj->type) && obj->arity == 1)
                obj = obj->first_child;
            hwloc_obj_type_snprintf(type, sizeof(type), obj, 1);
            idx = logical ? obj->logical_index : obj->os_index;
            if (idx == (unsigned) -1)
                printf("%s%s", first ? "" : " ", type);
            else
                printf("%s%s:%u", first ? "" : " ", type, idx);
            hwloc_bitmap_andnot(remaining, remaining, obj->cpuset);
            first = 0;
        }
        hwloc_bitmap_free(remaining);
    }

    printf("\t\t%s%s%s\n", name, pidoutput ? "\t" : "", pidoutput ? pidoutput : "");
}
예제 #11
0
static void
look_rset(int sdl, hwloc_obj_type_t type, struct hwloc_topology *topology, int level)
{
  rsethandle_t rset, rad;
  int i,maxcpus,j;
  int nbnodes;
  struct hwloc_obj *obj;

  if ((topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM))
    rset = rs_alloc(RS_ALL);
  else
    rset = rs_alloc(RS_PARTITION);
  rad = rs_alloc(RS_EMPTY);
  nbnodes = rs_numrads(rset, sdl, 0);
  if (nbnodes == -1) {
    perror("rs_numrads");
    return;
  }

  for (i = 0; i < nbnodes; i++) {
    if (rs_getrad(rset, rad, sdl, i, 0)) {
      fprintf(stderr,"rs_getrad(%d) failed: %s\n", i, strerror(errno));
      continue;
    }
    if (!rs_getinfo(rad, R_NUMPROCS, 0))
      continue;

    /* It seems logical processors are numbered from 1 here, while the
     * bindprocessor functions numbers them from 0... */
    obj = hwloc_alloc_setup_object(type, i - (type == HWLOC_OBJ_PU));
    obj->cpuset = hwloc_bitmap_alloc();
    obj->os_level = sdl;
    maxcpus = rs_getinfo(rad, R_MAXPROCS, 0);
    for (j = 0; j < maxcpus; j++) {
      if (rs_op(RS_TESTRESOURCE, rad, NULL, R_PROCS, j))
	hwloc_bitmap_set(obj->cpuset, j);
    }
    switch(type) {
      case HWLOC_OBJ_NODE:
	obj->nodeset = hwloc_bitmap_alloc();
	hwloc_bitmap_set(obj->nodeset, i);
	obj->memory.local_memory = 0; /* TODO: odd, rs_getinfo(rad, R_MEMSIZE, 0) << 10 returns the total memory ... */
	obj->memory.page_types_len = 2;
	obj->memory.page_types = malloc(2*sizeof(*obj->memory.page_types));
	memset(obj->memory.page_types, 0, 2*sizeof(*obj->memory.page_types));
	obj->memory.page_types[0].size = hwloc_getpagesize();
#ifdef HAVE__SC_LARGE_PAGESIZE
	obj->memory.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE);
#endif
	/* TODO: obj->memory.page_types[1].count = rs_getinfo(rset, R_LGPGFREE, 0) / hugepagesize */
	break;
      case HWLOC_OBJ_CACHE:
	obj->attr->cache.size = _system_configuration.L2_cache_size;
	obj->attr->cache.associativity = _system_configuration.L2_cache_asc;
	obj->attr->cache.linesize = 0; /* TODO: ? */
	obj->attr->cache.depth = 2;
	obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED; /* FIXME? */
	break;
      case HWLOC_OBJ_GROUP:
	obj->attr->group.depth = level;
	break;
      case HWLOC_OBJ_CORE:
      {
	hwloc_obj_t obj2, obj3;
	obj2 = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, i);
	obj2->cpuset = hwloc_bitmap_dup(obj->cpuset);
	obj2->attr->cache.size = _system_configuration.dcache_size;
	obj2->attr->cache.associativity = _system_configuration.dcache_asc;
	obj2->attr->cache.linesize = _system_configuration.dcache_line;
	obj2->attr->cache.depth = 1;
	if (_system_configuration.cache_attrib & (1<<30)) {
	  /* Unified cache */
	  obj2->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED;
	  hwloc_debug("Adding an L1u cache for core %d\n", i);
	  hwloc_insert_object_by_cpuset(topology, obj2);
	} else {
	  /* Separate Instruction and Data caches */
	  obj2->attr->cache.type = HWLOC_OBJ_CACHE_DATA;
	  hwloc_debug("Adding an L1d cache for core %d\n", i);
	  hwloc_insert_object_by_cpuset(topology, obj2);

	  obj3 = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, i);
	  obj3->cpuset = hwloc_bitmap_dup(obj->cpuset);
	  obj3->attr->cache.size = _system_configuration.icache_size;
	  obj3->attr->cache.associativity = _system_configuration.icache_asc;
	  obj3->attr->cache.linesize = _system_configuration.icache_line;
	  obj3->attr->cache.depth = 1;
	  obj3->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION;
	  hwloc_debug("Adding an L1i cache for core %d\n", i);
	  hwloc_insert_object_by_cpuset(topology, obj3);
	}
	break;
      }
      default:
	break;
    }
    hwloc_debug_2args_bitmap("%s %d has cpuset %s\n",
	       hwloc_obj_type_string(type),
	       i, obj->cpuset);
    hwloc_insert_object_by_cpuset(topology, obj);
  }

  rs_free(rset);
  rs_free(rad);
}
예제 #12
0
PASTIX_INT sopalin_bindthread(PASTIX_INT cpu)
{
#ifdef MARCEL

  {
    marcel_vpset_t vpset = MARCEL_VPSET_ZERO;
    marcel_vpset_vp(&vpset, cpu);
    marcel_apply_vpset(&vpset);
  }

#else /* Dans les autres cas on se preoccupe de l'archi */

#ifdef WITH_HWLOC
  {
    hwloc_topology_t topology; /* Topology object */
    hwloc_obj_t      obj;      /* Hwloc object    */
    hwloc_cpuset_t   cpuset;   /* HwLoc cpuset    */

    /* Allocate and initialize topology object.  */
    hwloc_topology_init(&topology);

    /* Perform the topology detection.  */
    hwloc_topology_load(topology);

    /* Get last one.  */
    obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_CORE, cpu);
    if (!obj)
      return 0;

    /* Get a copy of its cpuset that we may modify.  */
    /* Get only one logical processor (in case the core is SMT/hyperthreaded).  */
#if !defined(HWLOC_BITMAP_H)
    cpuset = hwloc_cpuset_dup(obj->cpuset);
    hwloc_cpuset_singlify(cpuset);
#else
    cpuset = hwloc_bitmap_dup(obj->cpuset);
    hwloc_bitmap_singlify(cpuset);
#endif

    /* And try to bind ourself there.  */
    if (hwloc_set_cpubind(topology, cpuset, HWLOC_CPUBIND_THREAD)) {
      char *str = NULL;
#if !defined(HWLOC_BITMAP_H)
      hwloc_cpuset_asprintf(&str, obj->cpuset);
#else
      hwloc_bitmap_asprintf(&str, obj->cpuset);
#endif
      printf("Couldn't bind to cpuset %s\n", str);
      free(str);
    }

    /* Get the number at Proc level */
    cpu = obj->children[0]->os_index;

    /* Free our cpuset copy */
#if !defined(HWLOC_BITMAP_H)
    hwloc_cpuset_free(cpuset);
#else
    hwloc_bitmap_free(cpuset);
#endif

    /* Destroy topology object.  */
    hwloc_topology_destroy(topology);
  }
#else /* WITH_HWLOC */
#ifdef X_ARCHpower_ibm_aix
  {
    tid_t self_ktid = thread_self ();

    bindprocessor(BINDTHREAD, self_ktid, cpu);
  }
#elif (defined X_ARCHalpha_compaq_osf1)
  {
    bind_to_cpu_id(getpid(), cpu, 0);
  }
#elif (defined X_ARCHi686_pc_linux)

#ifndef X_ARCHi686_mac
  {
    cpu_set_t mask;
    CPU_ZERO(&mask);
    CPU_SET(cpu, &mask);

#ifdef HAVE_OLD_SCHED_SETAFFINITY
    if(sched_setaffinity(0,&mask) < 0)
#else /* HAVE_OLD_SCHED_SETAFFINITY */
    if(sched_setaffinity(0,sizeof(mask),&mask) < 0)
#endif /* HAVE_OLD_SCHED_SETAFFINITY */
      {
  perror("sched_setaffinity");
  EXIT(MOD_SOPALIN, INTERNAL_ERR);
      }
  }
#else /* X_ARCHi686_mac */
  {
    thread_affinity_policy_data_t ap;
    int                           ret;

    ap.affinity_tag = 1; /* non-null affinity tag */
    ret = thread_policy_set(
          mach_thread_self(),
          THREAD_AFFINITY_POLICY,
          (integer_t*) &ap,
          THREAD_AFFINITY_POLICY_COUNT
          );
    if(ret != 0)
      {
  perror("thread_policy_set");
  EXIT(MOD_SOPALIN, INTERNAL_ERR);
      }
  }
#endif /* X_ARCHi686_mac */
#endif /* X_ACHIxxx      */
#endif /* WITH_HWLOC     */
#endif /* MARCEL         */

  return cpu;
}
예제 #13
0
int bind_myself_to_core(hwloc_topology_t topology, int id){
  hwloc_cpuset_t cpuset;
  hwloc_obj_t obj;
  char *str;
  int binding_res;
  int depth = hwloc_topology_get_depth(topology);
  int nb_cores = hwloc_get_nbobjs_by_depth(topology, depth-1);
  int my_core;
  int nb_threads = get_nb_threads();
  /* printf("depth=%d\n",depth); */

  switch (mapping_policy){
  case SCATTER:
    my_core = id*(nb_cores/nb_threads);
    break;
  default:
    if(verbose_level>=WARNING){
      printf("Wrong scheduling policy. Using COMPACT\n");
    }
  case COMPACT:
    my_core = id%nb_cores;
  }

    if(verbose_level>=INFO){
       printf("Mapping thread %d on core %d\n",id,my_core);
   }

    /* Get my core. */
    obj = hwloc_get_obj_by_depth(topology, depth-1, my_core);
    if (obj) {
      /* Get a copy of its cpuset that we may modify. */
      cpuset = hwloc_bitmap_dup(obj->cpuset);

      /* Get only one logical processor (in case the core is
	 SMT/hyperthreaded). */
      hwloc_bitmap_singlify(cpuset);


      /*hwloc_bitmap_asprintf(&str, cpuset);
      printf("Binding thread %d to cpuset %s\n", my_core,str);
      FREE(str);
      */

      /* And try  to bind ourself there. */
      binding_res = hwloc_set_cpubind(topology, cpuset, HWLOC_CPUBIND_THREAD);
      if (binding_res == -1){
	int error = errno;
	hwloc_bitmap_asprintf(&str, obj->cpuset);
	if(verbose_level>=WARNING)
	  printf("Thread %d couldn't bind to cpuset %s: %s.\n This thread is not bound to any core...\n", my_core, str, strerror(error));
	free(str); /* str is allocated by hlwoc, free it normally*/
	return 0;
      }
      /* FREE our cpuset copy */
      hwloc_bitmap_free(cpuset);
      return 1;
    }else{
      if(verbose_level>=WARNING)
	printf("No valid object for core id %d!\n",my_core);
      return 0;
    }
}
예제 #14
0
int main(void)
{
  hwloc_topology_t topology;
#ifdef HWLOC_HAVE_CPU_SET
  unsigned depth;
  hwloc_bitmap_t hwlocset;
  cpu_set_t schedset;
  hwloc_obj_t obj;
  int err;
#endif /* HWLOC_HAVE_CPU_SET */

  hwloc_topology_init(&topology);
  hwloc_topology_load(topology);

#ifdef HWLOC_HAVE_CPU_SET

  depth = hwloc_topology_get_depth(topology);

  hwlocset = hwloc_bitmap_dup(hwloc_topology_get_complete_cpuset(topology));
  hwloc_cpuset_to_glibc_sched_affinity(topology, hwlocset, &schedset, sizeof(schedset));
#ifdef HWLOC_HAVE_OLD_SCHED_SETAFFINITY
  err = sched_setaffinity(0, sizeof(schedset));
#else
  err = sched_setaffinity(0, sizeof(schedset), &schedset);
#endif
  assert(!err);
  hwloc_bitmap_free(hwlocset);

#ifdef HWLOC_HAVE_OLD_SCHED_SETAFFINITY
  err = sched_getaffinity(0, sizeof(schedset));
#else
  err = sched_getaffinity(0, sizeof(schedset), &schedset);
#endif
  assert(!err);
  hwlocset = hwloc_bitmap_alloc();
  hwloc_cpuset_from_glibc_sched_affinity(topology, hwlocset, &schedset, sizeof(schedset));
  assert(hwloc_bitmap_isincluded(hwlocset, hwloc_topology_get_complete_cpuset(topology)));
  hwloc_bitmap_andnot(hwlocset, hwlocset, hwloc_topology_get_online_cpuset(topology));
  hwloc_bitmap_andnot(hwlocset, hwlocset, hwloc_topology_get_allowed_cpuset(topology));
  assert(hwloc_bitmap_iszero(hwlocset));
  hwloc_bitmap_free(hwlocset);

  obj = hwloc_get_obj_by_depth(topology, depth-1, hwloc_get_nbobjs_by_depth(topology, depth-1) - 1);
  assert(obj);
  assert(obj->type == HWLOC_OBJ_PU);

  hwlocset = hwloc_bitmap_dup(obj->cpuset);
  hwloc_cpuset_to_glibc_sched_affinity(topology, hwlocset, &schedset, sizeof(schedset));
#ifdef HWLOC_HAVE_OLD_SCHED_SETAFFINITY
  err = sched_setaffinity(0, sizeof(schedset));
#else
  err = sched_setaffinity(0, sizeof(schedset), &schedset);
#endif
  assert(!err);
  hwloc_bitmap_free(hwlocset);

#ifdef HWLOC_HAVE_OLD_SCHED_SETAFFINITY
  err = sched_getaffinity(0, sizeof(schedset));
#else
  err = sched_getaffinity(0, sizeof(schedset), &schedset);
#endif
  assert(!err);
  hwlocset = hwloc_bitmap_alloc();
  hwloc_cpuset_from_glibc_sched_affinity(topology, hwlocset, &schedset, sizeof(schedset));
  assert(hwloc_bitmap_isequal(hwlocset, obj->cpuset));
  hwloc_bitmap_free(hwlocset);

#endif /* HWLOC_HAVE_CPU_SET */

  hwloc_topology_destroy(topology);
  return 0;
}
예제 #15
0
int main(int argc, char *argv[])
{
    const struct hwloc_topology_support *support;
    hwloc_topology_t topology;
    hwloc_const_bitmap_t topocpuset;
    hwloc_bitmap_t cpuset;
    unsigned long flags = 0;
    DIR *dir;
    struct dirent *dirent;
    int show_all = 0;
    int show_threads = 0;
    int get_last_cpu_location = 0;
    char *callname;
    char *pidcmd = NULL;
    int err;
    int opt;

    callname = strrchr(argv[0], '/');
    if (!callname)
        callname = argv[0];
    else
        callname++;
    /* skip argv[0], handle options */
    argc--;
    argv++;

    hwloc_utils_check_api_version(callname);

    while (argc >= 1) {
        opt = 0;
        if (!strcmp(argv[0], "-a"))
            show_all = 1;
        else if (!strcmp(argv[0], "-l") || !strcmp(argv[0], "--logical")) {
            logical = 1;
        } else if (!strcmp(argv[0], "-p") || !strcmp(argv[0], "--physical")) {
            logical = 0;
        } else if (!strcmp(argv[0], "-c") || !strcmp(argv[0], "--cpuset")) {
            show_cpuset = 1;
        } else if (!strcmp(argv[0], "-e") || !strncmp(argv[0], "--get-last-cpu-location", 10)) {
            get_last_cpu_location = 1;
        } else if (!strcmp(argv[0], "-t") || !strcmp(argv[0], "--threads")) {
#ifdef HWLOC_LINUX_SYS
            show_threads = 1;
#else
            fprintf (stderr, "Listing threads is currently only supported on Linux\n");
#endif
        } else if (!strcmp (argv[0], "--whole-system")) {
            flags |= HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM;
        } else if (!strcmp (argv[0], "--pid-cmd")) {
            if (argc < 2) {
                usage(callname, stdout);
                exit(EXIT_FAILURE);
            }
            pidcmd = argv[1];
            opt = 1;
        } else {
            fprintf (stderr, "Unrecognized option: %s\n", argv[0]);
            usage (callname, stderr);
            exit(EXIT_FAILURE);
        }
        argc -= opt+1;
        argv += opt+1;
    }

    err = hwloc_topology_init(&topology);
    if (err)
        goto out;

    hwloc_topology_set_flags(topology, flags);

    err = hwloc_topology_load(topology);
    if (err)
        goto out_with_topology;

    support = hwloc_topology_get_support(topology);

    if (get_last_cpu_location) {
        if (!support->cpubind->get_proc_last_cpu_location)
            goto out_with_topology;
    } else {
        if (!support->cpubind->get_proc_cpubind)
            goto out_with_topology;
    }

    topocpuset = hwloc_topology_get_topology_cpuset(topology);

    dir  = opendir("/proc");
    if (!dir)
        goto out_with_topology;

    cpuset = hwloc_bitmap_alloc();
    if (!cpuset)
        goto out_with_dir;

    while ((dirent = readdir(dir))) {
        long pid_number;
        hwloc_pid_t pid;
        char pidoutput[1024];
        char *end;
        char name[64] = "";
        /* management of threads */
        unsigned boundthreads = 0, i;
        long *tids = NULL; /* NULL if process is not threaded */
        hwloc_bitmap_t *tidcpusets = NULL;

        pid_number = strtol(dirent->d_name, &end, 10);
        if (*end)
            /* Not a number */
            continue;

        pid = hwloc_pid_from_number(pid_number, 0);

#ifdef HWLOC_LINUX_SYS
        {
            unsigned pathlen = 6 + strlen(dirent->d_name) + 1 + 7 + 1;
            char *path;
            int file;
            ssize_t n;

            path = malloc(pathlen);
            snprintf(path, pathlen, "/proc/%s/cmdline", dirent->d_name);
            file = open(path, O_RDONLY);
            free(path);

            if (file >= 0) {
                n = read(file, name, sizeof(name) - 1);
                close(file);

                if (n <= 0)
                    /* Ignore kernel threads and errors */
                    continue;

                name[n] = 0;
            }
        }
#endif /* HWLOC_LINUX_SYS */

        if (show_threads) {
#ifdef HWLOC_LINUX_SYS
            /* check if some threads must be displayed */
            unsigned pathlen = 6 + strlen(dirent->d_name) + 1 + 4 + 1;
            char *path;
            DIR *taskdir;

            path = malloc(pathlen);
            snprintf(path, pathlen, "/proc/%s/task", dirent->d_name);
            taskdir = opendir(path);
            if (taskdir) {
                struct dirent *taskdirent;
                long tid;
                unsigned n = 0;
                /* count threads */
                while ((taskdirent = readdir(taskdir))) {
                    tid = strtol(taskdirent->d_name, &end, 10);
                    if (*end)
                        /* Not a number */
                        continue;
                    n++;
                }
                if (n > 1) {
                    /* if there's more than one thread, see if some are bound */
                    tids = malloc(n * sizeof(*tids));
                    tidcpusets = calloc(n+1, sizeof(*tidcpusets));
                    if (tids && tidcpusets) {
                        /* reread the directory but gather info now */
                        rewinddir(taskdir);
                        i = 0;
                        while ((taskdirent = readdir(taskdir))) {
                            tid = strtol(taskdirent->d_name, &end, 10);
                            if (*end)
                                /* Not a number */
                                continue;
                            if (get_last_cpu_location) {
                                if (hwloc_linux_get_tid_last_cpu_location(topology, tid, cpuset))
                                    continue;
                            } else {
                                if (hwloc_linux_get_tid_cpubind(topology, tid, cpuset))
                                    continue;
                            }
                            hwloc_bitmap_and(cpuset, cpuset, topocpuset);
                            tids[i] = tid;
                            tidcpusets[i] = hwloc_bitmap_dup(cpuset);
                            i++;
                            if (hwloc_bitmap_iszero(cpuset))
                                continue;
                            if (hwloc_bitmap_isequal(cpuset, topocpuset) && !show_all)
                                continue;
                            boundthreads++;
                        }
                    } else {
                        /* failed to alloc, behave as if there were no threads */
                        free(tids);
                        tids = NULL;
                        free(tidcpusets);
                        tidcpusets = NULL;
                    }
                }
                closedir(taskdir);
            }
#endif /* HWLOC_LINUX_SYS */
        }

        if (get_last_cpu_location) {
            if (hwloc_get_proc_last_cpu_location(topology, pid, cpuset, 0))
                continue;
        } else {
            if (hwloc_get_proc_cpubind(topology, pid, cpuset, 0))
                continue;
        }

        hwloc_bitmap_and(cpuset, cpuset, topocpuset);
        if (hwloc_bitmap_iszero(cpuset))
            continue;

        /* don't print anything if the process isn't bound and if no threads are bound and if not showing all */
        if (hwloc_bitmap_isequal(cpuset, topocpuset) && (!tids || !boundthreads) && !show_all)
            continue;

        pidoutput[0] = '\0';
        if (pidcmd) {
            char *cmd;
            FILE *file;
            cmd = malloc(strlen(pidcmd)+1+5+2+1);
            sprintf(cmd, "%s %u", pidcmd, pid);
            file = popen(cmd, "r");
            if (file) {
                if (fgets(pidoutput, sizeof(pidoutput), file)) {
                    end = strchr(pidoutput, '\n');
                    if (end)
                        *end = '\0';
                }
                pclose(file);
            }
            free(cmd);
        }

        /* print the process */
        print_task(topology, pid_number, name, cpuset, pidoutput[0] == '\0' ? NULL : pidoutput, 0);
        if (tids)
            /* print each tid we found (it's tidcpuset isn't NULL anymore) */
            for(i=0; tidcpusets[i] != NULL; i++) {
                print_task(topology, tids[i], "", tidcpusets[i], NULL, 1);
                hwloc_bitmap_free(tidcpusets[i]);
            }

        /* free threads stuff */
        free(tidcpusets);
        free(tids);
    }

    err = 0;
    hwloc_bitmap_free(cpuset);

out_with_dir:
    closedir(dir);
out_with_topology:
    hwloc_topology_destroy(topology);
out:
    return err;
}
예제 #16
0
/* user to have to play with the cgroup hierarchy to modify it */
extern int task_cgroup_cpuset_set_task_affinity(slurmd_job_t *job)
{
	int fstatus = SLURM_ERROR;

#ifndef HAVE_HWLOC

	error("task/cgroup: plugin not compiled with hwloc support, "
	      "skipping affinity.");
	return fstatus;

#else
	uint32_t i;
	uint32_t nldoms;
	uint32_t nsockets;
	uint32_t ncores;
	uint32_t npus;
	uint32_t nobj;

	uint32_t pfirst,plast;
	uint32_t taskid = job->envtp->localid;
	uint32_t jntasks = job->node_tasks;
	uint32_t jnpus = jntasks * job->cpus_per_task;
	pid_t    pid = job->envtp->task_pid;

	cpu_bind_type_t bind_type;
	int verbose;

	hwloc_topology_t topology;
#if HWLOC_API_VERSION <= 0x00010000
	hwloc_cpuset_t cpuset,ct;
#else
	hwloc_bitmap_t cpuset,ct;
#endif
	hwloc_obj_t obj;
	struct hwloc_obj *pobj;
	hwloc_obj_type_t hwtype;
	hwloc_obj_type_t req_hwtype;
	int hwdepth;

	size_t tssize;
	cpu_set_t ts;

	bind_type = job->cpu_bind_type ;
	if (conf->task_plugin_param & CPU_BIND_VERBOSE ||
	    bind_type & CPU_BIND_VERBOSE)
		verbose = 1 ;

	if (bind_type & CPU_BIND_NONE) {
		if (verbose)
			info("task/cgroup: task[%u] is requesting no affinity",
			     taskid);
		return 0;
	} else if (bind_type & CPU_BIND_TO_THREADS) {
		if (verbose)
			info("task/cgroup: task[%u] is requesting "
			     "thread level binding",taskid);
		req_hwtype = HWLOC_OBJ_PU;
	} else if (bind_type & CPU_BIND_TO_CORES) {
		if (verbose)
			info("task/cgroup: task[%u] is requesting "
			     "core level binding",taskid);
		req_hwtype = HWLOC_OBJ_CORE;
	} else if (bind_type & CPU_BIND_TO_SOCKETS) {
		if (verbose)
			info("task/cgroup: task[%u] is requesting "
			     "socket level binding",taskid);
		req_hwtype = HWLOC_OBJ_SOCKET;
	} else if (bind_type & CPU_BIND_TO_LDOMS) {
		if (verbose)
			info("task/cgroup: task[%u] is requesting "
			     "ldom level binding",taskid);
		req_hwtype = HWLOC_OBJ_NODE;
	} else {
		if (verbose)
			info("task/cgroup: task[%u] using core level binding"
			     " by default",taskid);
		req_hwtype = HWLOC_OBJ_CORE;
	}

	/* Allocate and initialize hwloc objects */
	hwloc_topology_init(&topology);
#if HWLOC_API_VERSION <= 0x00010000
	cpuset = hwloc_cpuset_alloc() ;
#else
	cpuset = hwloc_bitmap_alloc() ;
#endif

	/*
	 * Perform the topology detection. It will only get allowed PUs.
	 * Detect in the same time the granularity to use for binding.
	 * The granularity can be relaxed from threads to cores if enough
	 * cores are available as with hyperthread support, ntasks-per-core
	 * param can let us have access to more threads per core for each
	 * task
	 * Revert back to machine granularity if no finer-grained granularity
	 * matching the request is found. This will result in no affinity
	 * applied.
	 * The detected granularity will be used to find where to best place
	 * the task, then the cpu_bind option will be used to relax the
	 * affinity constraint and use more PUs. (i.e. use a core granularity
	 * to dispatch the tasks across the sockets and then provide access
	 * to each task to the cores of its socket.)
	 */
	hwloc_topology_load(topology);
	npus = (uint32_t) hwloc_get_nbobjs_by_type(topology,
						   HWLOC_OBJ_PU);
	ncores = (uint32_t) hwloc_get_nbobjs_by_type(topology,
						     HWLOC_OBJ_CORE);
	nsockets = (uint32_t) hwloc_get_nbobjs_by_type(topology,
						       HWLOC_OBJ_SOCKET);
	nldoms = (uint32_t) hwloc_get_nbobjs_by_type(topology,
						     HWLOC_OBJ_NODE);
	hwtype = HWLOC_OBJ_MACHINE;
	nobj = 1;
	if (npus >= jnpus || bind_type & CPU_BIND_TO_THREADS) {
		hwtype = HWLOC_OBJ_PU;
		nobj = npus;
	}
	if (ncores >= jnpus || bind_type & CPU_BIND_TO_CORES) {
		hwtype = HWLOC_OBJ_CORE;
		nobj = ncores;
	}
	if (nsockets >= jntasks &&
	     bind_type & CPU_BIND_TO_SOCKETS) {
		hwtype = HWLOC_OBJ_SOCKET;
		nobj = nsockets;
	}
	/*
	 * HWLOC returns all the NUMA nodes available regardless of the
	 * number of underlying sockets available (regardless of the allowed
	 * resources). So there is no guarantee that each ldom will be populated
	 * with usable sockets. So add a simple check that at least ensure that
	 * we have as many sockets as ldoms before moving to ldoms granularity
	 */
	if (nldoms >= jntasks &&
	     nsockets >= nldoms &&
	     bind_type & CPU_BIND_TO_LDOMS) {
		hwtype = HWLOC_OBJ_NODE;
		nobj = nldoms;
	}

	/*
	 * Perform a block binding on the detected object respecting the
	 * granularity.
	 * If not enough objects to do the job, revert to no affinity mode
	 */
	if (hwloc_compare_types(hwtype,HWLOC_OBJ_MACHINE) == 0) {

		info("task/cgroup: task[%u] disabling affinity because of %s "
		     "granularity",taskid,hwloc_obj_type_string(hwtype));

	} else if (hwloc_compare_types(hwtype,HWLOC_OBJ_CORE) >= 0 &&
		    jnpus > nobj) {

		info("task/cgroup: task[%u] not enough %s objects, disabling "
		     "affinity",taskid,hwloc_obj_type_string(hwtype));

	} else {

		if (verbose) {
			info("task/cgroup: task[%u] using %s granularity",
			     taskid,hwloc_obj_type_string(hwtype));
		}
		if (hwloc_compare_types(hwtype,HWLOC_OBJ_CORE) >= 0) {
			/* cores or threads granularity */
			pfirst = taskid *  job->cpus_per_task ;
			plast = pfirst + job->cpus_per_task - 1;
		} else {
			/* sockets or ldoms granularity */
			pfirst = taskid;
			plast = pfirst;
		}

		hwdepth = hwloc_get_type_depth(topology,hwtype);
		for (i = pfirst; i <= plast && i < nobj ; i++) {
			obj = hwloc_get_obj_by_depth(topology,hwdepth,(int)i);

			/* if requested binding overlap the granularity */
			/* use the ancestor cpuset instead of the object one */
			if (hwloc_compare_types(hwtype,req_hwtype) > 0) {

				/* Get the parent object of req_hwtype or the */
				/* one just above if not found (meaning of >0)*/
				/* (useful for ldoms binding with !NUMA nodes)*/
				pobj = obj->parent;
				while (pobj != NULL &&
					hwloc_compare_types(pobj->type,
							    req_hwtype) > 0)
					pobj = pobj->parent;

				if (pobj != NULL) {
					if (verbose)
						info("task/cgroup: task[%u] "
						     "higher level %s found",
						     taskid,
						     hwloc_obj_type_string(
							     pobj->type));
#if HWLOC_API_VERSION <= 0x00010000
					ct = hwloc_cpuset_dup(pobj->
							      allowed_cpuset);
					hwloc_cpuset_or(cpuset,cpuset,ct);
					hwloc_cpuset_free(ct);
#else
					ct = hwloc_bitmap_dup(pobj->
							      allowed_cpuset);
					hwloc_bitmap_or(cpuset,cpuset,ct);
					hwloc_bitmap_free(ct);
#endif
				} else {
					/* should not be executed */
					if (verbose)
						info("task/cgroup: task[%u] "
						     "no higher level found",
						     taskid);
#if HWLOC_API_VERSION <= 0x00010000
					ct = hwloc_cpuset_dup(obj->
							      allowed_cpuset);
					hwloc_cpuset_or(cpuset,cpuset,ct);
					hwloc_cpuset_free(ct);
#else
					ct = hwloc_bitmap_dup(obj->
							      allowed_cpuset);
					hwloc_bitmap_or(cpuset,cpuset,ct);
					hwloc_bitmap_free(ct);
#endif
				}

			} else {
#if HWLOC_API_VERSION <= 0x00010000
				ct = hwloc_cpuset_dup(obj->allowed_cpuset);
				hwloc_cpuset_or(cpuset,cpuset,ct);
				hwloc_cpuset_free(ct);
#else
				ct = hwloc_bitmap_dup(obj->allowed_cpuset);
				hwloc_bitmap_or(cpuset,cpuset,ct);
				hwloc_bitmap_free(ct);
#endif
			}
		}

		char *str;
#if HWLOC_API_VERSION <= 0x00010000
		hwloc_cpuset_asprintf(&str,cpuset);
#else
		hwloc_bitmap_asprintf(&str,cpuset);
#endif
		tssize = sizeof(cpu_set_t);
		if (hwloc_cpuset_to_glibc_sched_affinity(topology,cpuset,
							  &ts,tssize) == 0) {
			fstatus = SLURM_SUCCESS;
			if (sched_setaffinity(pid,tssize,&ts)) {
				error("task/cgroup: task[%u] unable to set "
				      "taskset '%s'",taskid,str);
				fstatus = SLURM_ERROR;
			} else if (verbose) {
				info("task/cgroup: task[%u] taskset '%s' is set"
				     ,taskid,str);
			}
		} else {
			error("task/cgroup: task[%u] unable to build "
			      "taskset '%s'",taskid,str);
			fstatus = SLURM_ERROR;
		}
		free(str);

	}

	/* Destroy hwloc objects */
#if HWLOC_API_VERSION <= 0x00010000
	hwloc_cpuset_free(cpuset);
#else
	hwloc_bitmap_free(cpuset);
#endif
	hwloc_topology_destroy(topology);

	return fstatus;
#endif

}
예제 #17
0
int main(void)
{
  hwloc_bitmap_t set;
  hwloc_obj_t obj;
  char *str = NULL;

  hwloc_topology_init(&topology);
  hwloc_topology_load(topology);

  support = hwloc_topology_get_support(topology);

  obj = hwloc_get_root_obj(topology);
  set = hwloc_bitmap_dup(obj->cpuset);

  while (hwloc_bitmap_isequal(obj->cpuset, set)) {
    if (!obj->arity)
      break;
    obj = obj->children[0];
  }

  hwloc_bitmap_asprintf(&str, set);
  printf("system set is %s\n", str);
  free(str);

  test(set, 0);
  printf("now strict\n");
  test(set, HWLOC_CPUBIND_STRICT);

  hwloc_bitmap_free(set);
  set = hwloc_bitmap_dup(obj->cpuset);
  hwloc_bitmap_asprintf(&str, set);
  printf("obj set is %s\n", str);
  free(str);

  test(set, 0);
  printf("now strict\n");
  test(set, HWLOC_CPUBIND_STRICT);

  hwloc_bitmap_singlify(set);
  hwloc_bitmap_asprintf(&str, set);
  printf("singlified to %s\n", str);
  free(str);

  test(set, 0);
  printf("now strict\n");
  test(set, HWLOC_CPUBIND_STRICT);
  hwloc_bitmap_free(set);

  printf("\n\nmemory tests\n\n");
  printf("complete node set\n");
  set = hwloc_bitmap_dup(hwloc_get_root_obj(topology)->cpuset);
  hwloc_bitmap_asprintf(&str, set);
  printf("i.e. cpuset %s\n", str);
  free(str);
  testmem3(set);
  hwloc_bitmap_free(set);

  obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NODE, 0);
  if (obj) {
    set = hwloc_bitmap_dup(obj->cpuset);
    hwloc_bitmap_asprintf(&str, set);
    printf("cpuset set is %s\n", str);
    free(str);

    testmem3(set);

    obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NODE, 1);
    if (obj) {
      hwloc_bitmap_or(set, set, obj->cpuset);
      hwloc_bitmap_asprintf(&str, set);
      printf("cpuset set is %s\n", str);
      free(str);

      testmem3(set);
    }
    hwloc_bitmap_free(set);
  }

  hwloc_topology_destroy(topology);
  return 0;
}
예제 #18
0
int hwloc_look_hardwired_fujitsu_fx100(struct hwloc_topology *topology)
{
    /* FIXME: what if a broken core is disabled? */
    unsigned i;
    hwloc_obj_t obj;
    hwloc_bitmap_t set;

    for(i=0; i<34; i++) {
        set = hwloc_bitmap_alloc();
        hwloc_bitmap_set(set, i);

        if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_L1ICACHE)) {
            obj = hwloc_alloc_setup_object(HWLOC_OBJ_L1ICACHE, -1);
            obj->cpuset = hwloc_bitmap_dup(set);
            obj->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION;
            obj->attr->cache.depth = 1;
            obj->attr->cache.size = 64*1024;
            obj->attr->cache.linesize = 256;
            obj->attr->cache.associativity = 4;
            hwloc_insert_object_by_cpuset(topology, obj);
        }
        if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_L1CACHE)) {
            obj = hwloc_alloc_setup_object(HWLOC_OBJ_L1CACHE, -1);
            obj->cpuset = hwloc_bitmap_dup(set);
            obj->attr->cache.type = HWLOC_OBJ_CACHE_DATA;
            obj->attr->cache.depth = 1;
            obj->attr->cache.size = 64*1024;
            obj->attr->cache.linesize = 256;
            obj->attr->cache.associativity = 4;
            hwloc_insert_object_by_cpuset(topology, obj);
        }
        if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_CORE)) {
            obj = hwloc_alloc_setup_object(HWLOC_OBJ_CORE, i);
            obj->cpuset = set;
            hwloc_insert_object_by_cpuset(topology, obj);
        } else
            hwloc_bitmap_free(set);
    }

    if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_L2CACHE)) {
        obj = hwloc_alloc_setup_object(HWLOC_OBJ_L2CACHE, -1);
        obj->cpuset = hwloc_bitmap_alloc();
        hwloc_bitmap_set_range(obj->cpuset, 0, 15);
        hwloc_bitmap_set(obj->cpuset, 32);
        obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED;
        obj->attr->cache.depth = 2;
        obj->attr->cache.size = 12*1024*1024;
        obj->attr->cache.linesize = 256;
        obj->attr->cache.associativity = 24;
        hwloc_insert_object_by_cpuset(topology, obj);

        obj = hwloc_alloc_setup_object(HWLOC_OBJ_L2CACHE, -1);
        obj->cpuset = hwloc_bitmap_alloc();
        hwloc_bitmap_set_range(obj->cpuset, 16, 31);
        hwloc_bitmap_set(obj->cpuset, 33);
        obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED;
        obj->attr->cache.depth = 2;
        obj->attr->cache.size = 12*1024*1024;
        obj->attr->cache.linesize = 256;
        obj->attr->cache.associativity = 24;
        hwloc_insert_object_by_cpuset(topology, obj);
    }
    if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_PACKAGE)) {
        obj = hwloc_alloc_setup_object(HWLOC_OBJ_PACKAGE, 0);
        obj->cpuset = hwloc_bitmap_alloc();
        hwloc_bitmap_set_range(obj->cpuset, 0, 33);
        hwloc_obj_add_info(obj, "CPUVendor", "Fujitsu");
        hwloc_obj_add_info(obj, "CPUModel", "SPARC64 XIfx");
        hwloc_insert_object_by_cpuset(topology, obj);
    }

    hwloc_setup_pu_level(topology, 34);

    return 0;
}
예제 #19
0
static int
hwloc_look_darwin(struct hwloc_backend *backend)
{
  struct hwloc_topology *topology = backend->topology;
  int64_t _nprocs;
  unsigned nprocs;
  int64_t _npackages;
  unsigned i, j, cpu;
  struct hwloc_obj *obj;
  size_t size;
  int64_t l1dcachesize, l1icachesize;
  int64_t cacheways[2];
  int64_t l2cachesize;
  int64_t l3cachesize;
  int64_t cachelinesize;
  int64_t memsize;
  int64_t _tmp;
  char cpumodel[64];
  char cpuvendor[64];
  char cpufamilynumber[20], cpumodelnumber[20], cpustepping[20];
  int gotnuma = 0;
  int gotnumamemory = 0;

  if (topology->levels[0][0]->cpuset)
    /* somebody discovered things */
    return -1;

  hwloc_alloc_root_sets(topology->levels[0][0]);

  /* Don't use hwloc_fallback_nbprocessors() because it would return online cpus only,
   * while we need all cpus when computing logical_per_package, etc below.
   * We don't know which CPUs are offline, but Darwin doesn't support binding anyway.
   *
   * TODO: try hw.logicalcpu_max
   */

  if (hwloc_get_sysctlbyname("hw.logicalcpu", &_nprocs) || _nprocs <= 0)
    /* fallback to deprecated way */
    if (hwloc_get_sysctlbyname("hw.ncpu", &_nprocs) || _nprocs <= 0)
      return -1;

  nprocs = _nprocs;
  topology->support.discovery->pu = 1;

  hwloc_debug("%u procs\n", nprocs);

  size = sizeof(cpuvendor);
  if (sysctlbyname("machdep.cpu.vendor", cpuvendor, &size, NULL, 0))
    cpuvendor[0] = '\0';

  size = sizeof(cpumodel);
  if (sysctlbyname("machdep.cpu.brand_string", cpumodel, &size, NULL, 0))
    cpumodel[0] = '\0';

  if (hwloc_get_sysctlbyname("machdep.cpu.family", &_tmp))
    cpufamilynumber[0] = '\0';
  else
    snprintf(cpufamilynumber, sizeof(cpufamilynumber), "%lld", (long long) _tmp);
  if (hwloc_get_sysctlbyname("machdep.cpu.model", &_tmp))
    cpumodelnumber[0] = '\0';
  else
    snprintf(cpumodelnumber, sizeof(cpumodelnumber), "%lld", (long long) _tmp);
  /* .extfamily and .extmodel are already added to .family and .model */
  if (hwloc_get_sysctlbyname("machdep.cpu.stepping", &_tmp))
    cpustepping[0] = '\0';
  else
    snprintf(cpustepping, sizeof(cpustepping), "%lld", (long long) _tmp);

  if (!hwloc_get_sysctlbyname("hw.packages", &_npackages) && _npackages > 0) {
    unsigned npackages = _npackages;
    int64_t _cores_per_package;
    unsigned cores_per_package;
    int64_t _logical_per_package;
    unsigned logical_per_package;

    hwloc_debug("%u packages\n", npackages);

    if (!hwloc_get_sysctlbyname("machdep.cpu.thread_count", &_logical_per_package) && _logical_per_package > 0)
      /* official/modern way */
      logical_per_package = _logical_per_package;
    else if (!hwloc_get_sysctlbyname("machdep.cpu.logical_per_package", &_logical_per_package) && _logical_per_package > 0)
      /* old way, gives the max supported by this "kind" of processor,
       * can be larger than the actual number for this model.
       */
      logical_per_package = _logical_per_package;
    else
      /* Assume the trivia.  */
      logical_per_package = nprocs / npackages;

    hwloc_debug("%u threads per package\n", logical_per_package);

    if (nprocs == npackages * logical_per_package
	&& hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_PACKAGE))
      for (i = 0; i < npackages; i++) {
        obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_PACKAGE, i);
        obj->cpuset = hwloc_bitmap_alloc();
        for (cpu = i*logical_per_package; cpu < (i+1)*logical_per_package; cpu++)
          hwloc_bitmap_set(obj->cpuset, cpu);

        hwloc_debug_1arg_bitmap("package %u has cpuset %s\n",
                   i, obj->cpuset);

        if (cpuvendor[0] != '\0')
          hwloc_obj_add_info(obj, "CPUVendor", cpuvendor);
        if (cpumodel[0] != '\0')
          hwloc_obj_add_info(obj, "CPUModel", cpumodel);
        if (cpufamilynumber[0] != '\0')
          hwloc_obj_add_info(obj, "CPUFamilyNumber", cpufamilynumber);
        if (cpumodelnumber[0] != '\0')
          hwloc_obj_add_info(obj, "CPUModelNumber", cpumodelnumber);
        if (cpustepping[0] != '\0')
          hwloc_obj_add_info(obj, "CPUStepping", cpustepping);

        hwloc_insert_object_by_cpuset(topology, obj);
      }
    else {
      if (cpuvendor[0] != '\0')
        hwloc_obj_add_info(topology->levels[0][0], "CPUVendor", cpuvendor);
      if (cpumodel[0] != '\0')
        hwloc_obj_add_info(topology->levels[0][0], "CPUModel", cpumodel);
      if (cpufamilynumber[0] != '\0')
        hwloc_obj_add_info(topology->levels[0][0], "CPUFamilyNumber", cpufamilynumber);
      if (cpumodelnumber[0] != '\0')
        hwloc_obj_add_info(topology->levels[0][0], "CPUModelNumber", cpumodelnumber);
      if (cpustepping[0] != '\0')
        hwloc_obj_add_info(topology->levels[0][0], "CPUStepping", cpustepping);
    }

    if (!hwloc_get_sysctlbyname("machdep.cpu.core_count", &_cores_per_package) && _cores_per_package > 0)
      /* official/modern way */
      cores_per_package = _cores_per_package;
    else if (!hwloc_get_sysctlbyname("machdep.cpu.cores_per_package", &_cores_per_package) && _cores_per_package > 0)
      /* old way, gives the max supported by this "kind" of processor,
       * can be larger than the actual number for this model.
       */
      cores_per_package = _cores_per_package;
    else
      /* no idea */
      cores_per_package = 0;

    if (cores_per_package > 0
	&& hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_CORE)) {
      hwloc_debug("%u cores per package\n", cores_per_package);

      if (!(logical_per_package % cores_per_package))
        for (i = 0; i < npackages * cores_per_package; i++) {
          obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_CORE, i);
          obj->cpuset = hwloc_bitmap_alloc();
          for (cpu = i*(logical_per_package/cores_per_package);
               cpu < (i+1)*(logical_per_package/cores_per_package);
               cpu++)
            hwloc_bitmap_set(obj->cpuset, cpu);

          hwloc_debug_1arg_bitmap("core %u has cpuset %s\n",
                     i, obj->cpuset);
          hwloc_insert_object_by_cpuset(topology, obj);
        }
    }
  } else {
    if (cpuvendor[0] != '\0')
      hwloc_obj_add_info(topology->levels[0][0], "CPUVendor", cpuvendor);
    if (cpumodel[0] != '\0')
      hwloc_obj_add_info(topology->levels[0][0], "CPUModel", cpumodel);
    if (cpufamilynumber[0] != '\0')
      hwloc_obj_add_info(topology->levels[0][0], "CPUFamilyNumber", cpufamilynumber);
    if (cpumodelnumber[0] != '\0')
      hwloc_obj_add_info(topology->levels[0][0], "CPUModelNumber", cpumodelnumber);
    if (cpustepping[0] != '\0')
      hwloc_obj_add_info(topology->levels[0][0], "CPUStepping", cpustepping);
  }

  if (hwloc_get_sysctlbyname("hw.l1dcachesize", &l1dcachesize))
    l1dcachesize = 0;

  if (hwloc_get_sysctlbyname("hw.l1icachesize", &l1icachesize))
    l1icachesize = 0;

  if (hwloc_get_sysctlbyname("hw.l2cachesize", &l2cachesize))
    l2cachesize = 0;

  if (hwloc_get_sysctlbyname("hw.l3cachesize", &l3cachesize))
    l3cachesize = 0;

  if (hwloc_get_sysctlbyname("machdep.cpu.cache.L1_associativity", &cacheways[0]))
    cacheways[0] = 0;
  else if (cacheways[0] == 0xff)
    cacheways[0] = -1;

  if (hwloc_get_sysctlbyname("machdep.cpu.cache.L2_associativity", &cacheways[1]))
    cacheways[1] = 0;
  else if (cacheways[1] == 0xff)
    cacheways[1] = -1;

  if (hwloc_get_sysctlbyname("hw.cachelinesize", &cachelinesize))
    cachelinesize = 0;

  if (hwloc_get_sysctlbyname("hw.memsize", &memsize))
    memsize = 0;

  if (!sysctlbyname("hw.cacheconfig", NULL, &size, NULL, 0)) {
    unsigned n = size / sizeof(uint32_t);
    uint64_t cacheconfig[n];
    uint64_t cachesize[n];
    uint32_t cacheconfig32[n];

    if ((!sysctlbyname("hw.cacheconfig", cacheconfig, &size, NULL, 0))) {
      /* Yeech. Darwin seemingly has changed from 32bit to 64bit integers for
       * cacheconfig, with apparently no way for detection. Assume the machine
       * won't have more than 4 billion cpus */
      if (cacheconfig[0] > 0xFFFFFFFFUL) {
        memcpy(cacheconfig32, cacheconfig, size);
        for (i = 0 ; i < size / sizeof(uint32_t); i++)
          cacheconfig[i] = cacheconfig32[i];
      }

      memset(cachesize, 0, sizeof(uint64_t) * n);
      size = sizeof(uint64_t) * n;
      if (sysctlbyname("hw.cachesize", cachesize, &size, NULL, 0)) {
        if (n > 0)
          cachesize[0] = memsize;
        if (n > 1)
          cachesize[1] = l1dcachesize;
        if (n > 2)
          cachesize[2] = l2cachesize;
        if (n > 3)
          cachesize[3] = l3cachesize;
      }

      hwloc_debug("%s", "caches");
      for (i = 0; i < n && cacheconfig[i]; i++)
        hwloc_debug(" %"PRIu64"(%"PRIu64"kB)", cacheconfig[i], cachesize[i] / 1024);

      /* Now we know how many caches there are */
      n = i;
      hwloc_debug("\n%u cache levels\n", n - 1);

      /* For each cache level (0 is memory) */
      for (i = 0; i < n; i++) {
        /* cacheconfig tells us how many cpus share it, let's iterate on each cache */
        for (j = 0; j < (nprocs / cacheconfig[i]); j++) {
	  if (!i) {
	    obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_NUMANODE, j);
            obj->nodeset = hwloc_bitmap_alloc();
            hwloc_bitmap_set(obj->nodeset, j);
	    gotnuma++;
          } else {
	    obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_L1CACHE+i-1, HWLOC_UNKNOWN_INDEX);
	  }
          obj->cpuset = hwloc_bitmap_alloc();
          for (cpu = j*cacheconfig[i];
               cpu < ((j+1)*cacheconfig[i]);
               cpu++)
            hwloc_bitmap_set(obj->cpuset, cpu);

          if (i == 1 && l1icachesize
	      && hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_L1ICACHE)) {
            /* FIXME assuming that L1i and L1d are shared the same way. Darwin
             * does not yet provide a way to know.  */
            hwloc_obj_t l1i = hwloc_alloc_setup_object(topology, HWLOC_OBJ_L1ICACHE, HWLOC_UNKNOWN_INDEX);
            l1i->cpuset = hwloc_bitmap_dup(obj->cpuset);
            hwloc_debug_1arg_bitmap("L1icache %u has cpuset %s\n",
                j, l1i->cpuset);
            l1i->attr->cache.depth = i;
            l1i->attr->cache.size = l1icachesize;
            l1i->attr->cache.linesize = cachelinesize;
            l1i->attr->cache.associativity = 0;
            l1i->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION;

            hwloc_insert_object_by_cpuset(topology, l1i);
          }
          if (i) {
            hwloc_debug_2args_bitmap("L%ucache %u has cpuset %s\n",
                i, j, obj->cpuset);
            obj->attr->cache.depth = i;
            obj->attr->cache.size = cachesize[i];
            obj->attr->cache.linesize = cachelinesize;
            if (i <= sizeof(cacheways) / sizeof(cacheways[0]))
              obj->attr->cache.associativity = cacheways[i-1];
            else
              obj->attr->cache.associativity = 0;
            if (i == 1 && l1icachesize)
              obj->attr->cache.type = HWLOC_OBJ_CACHE_DATA;
            else
              obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED;
          } else {
            hwloc_debug_1arg_bitmap("node %u has cpuset %s\n",
                j, obj->cpuset);
	    if (cachesize[i]) {
	      obj->attr->numanode.local_memory = cachesize[i];
	      gotnumamemory++;
	    }
	    obj->attr->numanode.page_types_len = 2;
	    obj->attr->numanode.page_types = malloc(2*sizeof(*obj->attr->numanode.page_types));
	    memset(obj->attr->numanode.page_types, 0, 2*sizeof(*obj->attr->numanode.page_types));
	    obj->attr->numanode.page_types[0].size = hwloc_getpagesize();
#if HAVE_DECL__SC_LARGE_PAGESIZE
	    obj->attr->numanode.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE);
#endif
          }

	  if (hwloc_filter_check_keep_object_type(topology, obj->type))
	    hwloc_insert_object_by_cpuset(topology, obj);
	  else
	    hwloc_free_unlinked_object(obj); /* FIXME: don't built at all, just build the cpuset in case l1i needs it */
        }
      }
    }
  }

  if (gotnuma)
    topology->support.discovery->numa = 1;
  if (gotnumamemory)
    topology->support.discovery->numa = 1;

  /* add PU objects */
  hwloc_setup_pu_level(topology, nprocs);

  hwloc_obj_add_info(topology->levels[0][0], "Backend", "Darwin");
  hwloc_add_uname_info(topology, NULL);
  return 0;
}
예제 #20
0
int hwloc_look_hardwired_fujitsu_fx10(struct hwloc_topology *topology)
{
  /* If a broken core gets disabled, its bit disappears and other core bits are NOT shifted towards 0.
   * Node is not given to user job, not need to handle that case properly.
   */
  unsigned i;
  hwloc_obj_t obj;
  hwloc_bitmap_t set;

  for(i=0; i<16; i++) {
    set = hwloc_bitmap_alloc();
    hwloc_bitmap_set(set, i);

    if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_L1ICACHE)) {
      obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_L1ICACHE, -1);
      obj->cpuset = hwloc_bitmap_dup(set);
      obj->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION;
      obj->attr->cache.depth = 1;
      obj->attr->cache.size = 32*1024;
      obj->attr->cache.linesize = 128;
      obj->attr->cache.associativity = 2;
      hwloc_insert_object_by_cpuset(topology, obj);
    }
    if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_L1CACHE)) {
      obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_L1CACHE, -1);
      obj->cpuset = hwloc_bitmap_dup(set);
      obj->attr->cache.type = HWLOC_OBJ_CACHE_DATA;
      obj->attr->cache.depth = 1;
      obj->attr->cache.size = 32*1024;
      obj->attr->cache.linesize = 128;
      obj->attr->cache.associativity = 2;
      hwloc_insert_object_by_cpuset(topology, obj);
    }
    if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_CORE)) {
      obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_CORE, i);
      obj->cpuset = set;
      hwloc_insert_object_by_cpuset(topology, obj);
    } else
      hwloc_bitmap_free(set);
  }

  set = hwloc_bitmap_alloc();
  hwloc_bitmap_set_range(set, 0, 15);

  if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_L2CACHE)) {
    obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_L2CACHE, -1);
    obj->cpuset = hwloc_bitmap_dup(set);
    obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED;
    obj->attr->cache.depth = 2;
    obj->attr->cache.size = 12*1024*1024;
    obj->attr->cache.linesize = 128;
    obj->attr->cache.associativity = 24;
    hwloc_insert_object_by_cpuset(topology, obj);
  }
  if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_PACKAGE)) {
    obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_PACKAGE, 0);
    obj->cpuset = set;
    hwloc_obj_add_info(obj, "CPUVendor", "Fujitsu");
    hwloc_obj_add_info(obj, "CPUModel", "SPARC64 IXfx");
    hwloc_insert_object_by_cpuset(topology, obj);
  } else
    hwloc_bitmap_free(set);

  hwloc_setup_pu_level(topology, 16);

  return 0;
}
예제 #21
0
int main(void)
{
    int depth;
    unsigned i, n;
    unsigned long size;
    int levels;
    char string[128];
    int topodepth;
    void *m;
    hwloc_topology_t topology;
    hwloc_cpuset_t cpuset;
    hwloc_obj_t obj;

    /* Allocate and initialize topology object. */
    hwloc_topology_init(&topology);

    /* ... Optionally, put detection configuration here to ignore
       some objects types, define a synthetic topology, etc....  

       The default is to detect all the objects of the machine that
       the caller is allowed to access.  See Configure Topology
       Detection. */

    /* Perform the topology detection. */
    hwloc_topology_load(topology);

    /* Optionally, get some additional topology information
       in case we need the topology depth later. */
    topodepth = hwloc_topology_get_depth(topology);

    /*****************************************************************
     * First example:
     * Walk the topology with an array style, from level 0 (always
     * the system level) to the lowest level (always the proc level).
     *****************************************************************/
    for (depth = 0; depth < topodepth; depth++) {
        printf("*** Objects at level %d\n", depth);
        for (i = 0; i < hwloc_get_nbobjs_by_depth(topology, depth); 
             i++) {
            hwloc_obj_type_snprintf(string, sizeof(string),
				    hwloc_get_obj_by_depth(topology, depth, i), 0);
            printf("Index %u: %s\n", i, string);
        }
    }

    /*****************************************************************
     * Second example:
     * Walk the topology with a tree style.
     *****************************************************************/
    printf("*** Printing overall tree\n");
    print_children(topology, hwloc_get_root_obj(topology), 0);

    /*****************************************************************
     * Third example:
     * Print the number of packages.
     *****************************************************************/
    depth = hwloc_get_type_depth(topology, HWLOC_OBJ_PACKAGE);
    if (depth == HWLOC_TYPE_DEPTH_UNKNOWN) {
        printf("*** The number of packages is unknown\n");
    } else {
        printf("*** %u package(s)\n",
               hwloc_get_nbobjs_by_depth(topology, depth));
    }

    /*****************************************************************
     * Fourth example:
     * Compute the amount of cache that the first logical processor
     * has above it.
     *****************************************************************/
    levels = 0;
    size = 0;
    for (obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 0);
         obj;
         obj = obj->parent)
      if (obj->type == HWLOC_OBJ_CACHE) {
        levels++;
        size += obj->attr->cache.size;
      }
    printf("*** Logical processor 0 has %d caches totaling %luKB\n", 
           levels, size / 1024);

    /*****************************************************************
     * Fifth example:
     * Bind to only one thread of the last core of the machine.
     *
     * First find out where cores are, or else smaller sets of CPUs if
     * the OS doesn't have the notion of a "core".
     *****************************************************************/
    depth = hwloc_get_type_or_below_depth(topology, HWLOC_OBJ_CORE);

    /* Get last core. */
    obj = hwloc_get_obj_by_depth(topology, depth,
                   hwloc_get_nbobjs_by_depth(topology, depth) - 1);
    if (obj) {
        /* Get a copy of its cpuset that we may modify. */
        cpuset = hwloc_bitmap_dup(obj->cpuset);

        /* Get only one logical processor (in case the core is
           SMT/hyper-threaded). */
        hwloc_bitmap_singlify(cpuset);

        /* And try to bind ourself there. */
        if (hwloc_set_cpubind(topology, cpuset, 0)) {
            char *str;
            int error = errno;
            hwloc_bitmap_asprintf(&str, obj->cpuset);
            printf("Couldn't bind to cpuset %s: %s\n", str, strerror(error));
            free(str);
        }

        /* Free our cpuset copy */
        hwloc_bitmap_free(cpuset);
    }

    /*****************************************************************
     * Sixth example:
     * Allocate some memory on the last NUMA node, bind some existing
     * memory to the last NUMA node.
     *****************************************************************/
    /* Get last node. There's always at least one. */
    n = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_NUMANODE);
    obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, n - 1);

    size = 1024*1024;
    m = hwloc_alloc_membind_nodeset(topology, size, obj->nodeset,
                                    HWLOC_MEMBIND_BIND, 0);
    hwloc_free(topology, m, size);

    m = malloc(size);
    hwloc_set_area_membind_nodeset(topology, m, size, obj->nodeset,
                                   HWLOC_MEMBIND_BIND, 0);
    free(m);

    /* Destroy topology object. */
    hwloc_topology_destroy(topology);

    return 0;
}
예제 #22
0
static void
look_rset(int sdl, hwloc_obj_type_t type, struct hwloc_topology *topology, int level)
{
  rsethandle_t rset, rad;
  int i,maxcpus,j;
  int nbnodes;
  struct hwloc_obj *obj;

  if ((topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM))
    rset = rs_alloc(RS_ALL);
  else
    rset = rs_alloc(RS_PARTITION);
  rad = rs_alloc(RS_EMPTY);
  nbnodes = rs_numrads(rset, sdl, 0);
  if (nbnodes == -1) {
    perror("rs_numrads");
    return;
  }

  for (i = 0; i < nbnodes; i++) {
    hwloc_bitmap_t cpuset;
    unsigned os_index = (unsigned) -1; /* no os_index except for PU and NUMANODE below */

    if (rs_getrad(rset, rad, sdl, i, 0)) {
      fprintf(stderr,"rs_getrad(%d) failed: %s\n", i, strerror(errno));
      continue;
    }
    if (!rs_getinfo(rad, R_NUMPROCS, 0))
      continue;

    maxcpus = rs_getinfo(rad, R_MAXPROCS, 0);
    cpuset = hwloc_bitmap_alloc();
    for (j = 0; j < maxcpus; j++) {
      if (rs_op(RS_TESTRESOURCE, rad, NULL, R_PROCS, j))
	hwloc_bitmap_set(cpuset, j);
    }

    if (type == HWLOC_OBJ_PU) {
      os_index = hwloc_bitmap_first(cpuset);
      hwloc_debug("Found PU #%u inside node %d for sdl %d\n", os_index, i, sdl);
      assert(hwloc_bitmap_weight(cpuset) == 1);
    } else if (type == HWLOC_OBJ_NUMANODE) {
      /* NUMA node os_index isn't used for binding, just use the rad number to get unique values.
       * Note that we'll use that fact in hwloc_aix_prepare_membind(). */
      os_index = i;
      hwloc_debug("Using os_index #%u for NUMA node inside node %d for sdl %d\n", os_index, i, sdl);
    }

    obj = hwloc_alloc_setup_object(type, os_index);
    obj->cpuset = cpuset;
    obj->os_level = sdl;

    switch(type) {
      case HWLOC_OBJ_NUMANODE:
	obj->nodeset = hwloc_bitmap_alloc();
	hwloc_bitmap_set(obj->nodeset, i);
	obj->memory.local_memory = 0; /* TODO: odd, rs_getinfo(rad, R_MEMSIZE, 0) << 10 returns the total memory ... */
	obj->memory.page_types_len = 2;
	obj->memory.page_types = malloc(2*sizeof(*obj->memory.page_types));
	memset(obj->memory.page_types, 0, 2*sizeof(*obj->memory.page_types));
	obj->memory.page_types[0].size = hwloc_getpagesize();
#ifdef HAVE__SC_LARGE_PAGESIZE
	obj->memory.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE);
#endif
	/* TODO: obj->memory.page_types[1].count = rs_getinfo(rset, R_LGPGFREE, 0) / hugepagesize */
	break;
      case HWLOC_OBJ_CACHE:
	obj->attr->cache.size = _system_configuration.L2_cache_size;
	obj->attr->cache.associativity = _system_configuration.L2_cache_asc;

	obj->attr->cache.linesize = 0; /* unknown by default */
	if (__power_pc())
	  if (__power_4() || __power_5() || __power_6() || __power_7())
	    obj->attr->cache.linesize = 128;

	obj->attr->cache.depth = 2;
	obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED; /* OK for power[4567], unknown for others */
	break;
      case HWLOC_OBJ_GROUP:
	obj->attr->group.depth = level;
	break;
      case HWLOC_OBJ_CORE:
      {
	hwloc_obj_t obj2, obj3;
	obj2 = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, i);
	obj2->cpuset = hwloc_bitmap_dup(obj->cpuset);
	obj2->attr->cache.size = _system_configuration.dcache_size;
	obj2->attr->cache.associativity = _system_configuration.dcache_asc;
	obj2->attr->cache.linesize = _system_configuration.dcache_line;
	obj2->attr->cache.depth = 1;
	if (_system_configuration.cache_attrib & (1<<30)) {
	  /* Unified cache */
	  obj2->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED;
	  hwloc_debug("Adding an L1u cache for core %d\n", i);
	  hwloc_insert_object_by_cpuset(topology, obj2);
	} else {
	  /* Separate Instruction and Data caches */
	  obj2->attr->cache.type = HWLOC_OBJ_CACHE_DATA;
	  hwloc_debug("Adding an L1d cache for core %d\n", i);
	  hwloc_insert_object_by_cpuset(topology, obj2);

	  obj3 = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, i);
	  obj3->cpuset = hwloc_bitmap_dup(obj->cpuset);
	  obj3->attr->cache.size = _system_configuration.icache_size;
	  obj3->attr->cache.associativity = _system_configuration.icache_asc;
	  obj3->attr->cache.linesize = _system_configuration.icache_line;
	  obj3->attr->cache.depth = 1;
	  obj3->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION;
	  hwloc_debug("Adding an L1i cache for core %d\n", i);
	  hwloc_insert_object_by_cpuset(topology, obj3);
	}
	break;
      }
      default:
	break;
    }
    hwloc_debug_2args_bitmap("%s %d has cpuset %s\n",
	       hwloc_obj_type_string(type),
	       i, obj->cpuset);
    hwloc_insert_object_by_cpuset(topology, obj);
  }

  rs_free(rset);
  rs_free(rad);
}
예제 #23
0
static int
hwloc_look_darwin(struct hwloc_backend *backend)
{
  struct hwloc_topology *topology = backend->topology;
  int64_t _nprocs;
  unsigned nprocs;
  int64_t _npackages;
  unsigned i, j, cpu;
  struct hwloc_obj *obj;
  size_t size;
  int64_t l1dcachesize, l1icachesize;
  int64_t cacheways[2];
  int64_t l2cachesize;
  int64_t cachelinesize;
  int64_t memsize;
  char cpumodel[64];

  if (topology->levels[0][0]->cpuset)
    /* somebody discovered things */
    return -1;

  hwloc_alloc_obj_cpusets(topology->levels[0][0]);

  if (hwloc_get_sysctlbyname("hw.ncpu", &_nprocs) || _nprocs <= 0)
    return -1;
  nprocs = _nprocs;
  topology->support.discovery->pu = 1;

  hwloc_debug("%u procs\n", nprocs);

  size = sizeof(cpumodel);
  if (sysctlbyname("machdep.cpu.brand_string", cpumodel, &size, NULL, 0))
    cpumodel[0] = '\0';

  if (!hwloc_get_sysctlbyname("hw.packages", &_npackages) && _npackages > 0) {
    unsigned npackages = _npackages;
    int64_t _cores_per_package;
    int64_t _logical_per_package;
    unsigned logical_per_package;

    hwloc_debug("%u packages\n", npackages);

    if (!hwloc_get_sysctlbyname("machdep.cpu.logical_per_package", &_logical_per_package) && _logical_per_package > 0)
      logical_per_package = _logical_per_package;
    else
      /* Assume the trivia.  */
      logical_per_package = nprocs / npackages;

    hwloc_debug("%u threads per package\n", logical_per_package);

    if (nprocs == npackages * logical_per_package
	&& hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_PACKAGE))
      for (i = 0; i < npackages; i++) {
        obj = hwloc_alloc_setup_object(HWLOC_OBJ_PACKAGE, i);
        obj->cpuset = hwloc_bitmap_alloc();
        for (cpu = i*logical_per_package; cpu < (i+1)*logical_per_package; cpu++)
          hwloc_bitmap_set(obj->cpuset, cpu);

        hwloc_debug_1arg_bitmap("package %u has cpuset %s\n",
                   i, obj->cpuset);

        if (cpumodel[0] != '\0')
          hwloc_obj_add_info(obj, "CPUModel", cpumodel);
        hwloc_insert_object_by_cpuset(topology, obj);
      }
    else
      if (cpumodel[0] != '\0')
        hwloc_obj_add_info(topology->levels[0][0], "CPUModel", cpumodel);

    if (!hwloc_get_sysctlbyname("machdep.cpu.cores_per_package", &_cores_per_package) && _cores_per_package > 0
	&& hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_CORE)) {
      unsigned cores_per_package = _cores_per_package;
      hwloc_debug("%u cores per package\n", cores_per_package);

      if (!(logical_per_package % cores_per_package))
        for (i = 0; i < npackages * cores_per_package; i++) {
          obj = hwloc_alloc_setup_object(HWLOC_OBJ_CORE, i);
          obj->cpuset = hwloc_bitmap_alloc();
          for (cpu = i*(logical_per_package/cores_per_package);
               cpu < (i+1)*(logical_per_package/cores_per_package);
               cpu++)
            hwloc_bitmap_set(obj->cpuset, cpu);

          hwloc_debug_1arg_bitmap("core %u has cpuset %s\n",
                     i, obj->cpuset);
          hwloc_insert_object_by_cpuset(topology, obj);
        }
    }
  } else
    if (cpumodel[0] != '\0')
      hwloc_obj_add_info(topology->levels[0][0], "CPUModel", cpumodel);

  if (hwloc_get_sysctlbyname("hw.l1dcachesize", &l1dcachesize))
    l1dcachesize = 0;

  if (hwloc_get_sysctlbyname("hw.l1icachesize", &l1icachesize))
    l1icachesize = 0;

  if (hwloc_get_sysctlbyname("hw.l2cachesize", &l2cachesize))
    l2cachesize = 0;

  if (hwloc_get_sysctlbyname("machdep.cpu.cache.L1_associativity", &cacheways[0]))
    cacheways[0] = 0;
  else if (cacheways[0] == 0xff)
    cacheways[0] = -1;

  if (hwloc_get_sysctlbyname("machdep.cpu.cache.L2_associativity", &cacheways[1]))
    cacheways[1] = 0;
  else if (cacheways[1] == 0xff)
    cacheways[1] = -1;

  if (hwloc_get_sysctlbyname("hw.cachelinesize", &cachelinesize))
    cachelinesize = 0;

  if (hwloc_get_sysctlbyname("hw.memsize", &memsize))
    memsize = 0;

  if (!sysctlbyname("hw.cacheconfig", NULL, &size, NULL, 0)) {
    unsigned n = size / sizeof(uint32_t);
    uint64_t *cacheconfig = NULL;
    uint64_t *cachesize = NULL;
    uint32_t *cacheconfig32 = NULL;

    cacheconfig = malloc(sizeof(uint64_t) * n);
    if (NULL == cacheconfig) {
        goto out;
    }
    cachesize = malloc(sizeof(uint64_t) * n);
    if (NULL == cachesize) {
        goto out;
    }
    cacheconfig32 = malloc(sizeof(uint32_t) * n);
    if (NULL == cacheconfig32) {
        goto out;
    }

    if ((!sysctlbyname("hw.cacheconfig", cacheconfig, &size, NULL, 0))) {
      /* Yeech. Darwin seemingly has changed from 32bit to 64bit integers for
       * cacheconfig, with apparently no way for detection. Assume the machine
       * won't have more than 4 billion cpus */
      if (cacheconfig[0] > 0xFFFFFFFFUL) {
        memcpy(cacheconfig32, cacheconfig, size);
        for (i = 0 ; i < size / sizeof(uint32_t); i++)
          cacheconfig[i] = cacheconfig32[i];
      }

      memset(cachesize, 0, sizeof(uint64_t) * n);
      size = sizeof(uint64_t) * n;
      if (sysctlbyname("hw.cachesize", cachesize, &size, NULL, 0)) {
        if (n > 0)
          cachesize[0] = memsize;
        if (n > 1)
          cachesize[1] = l1dcachesize;
        if (n > 2)
          cachesize[2] = l2cachesize;
      }

      hwloc_debug("%s", "caches");
      for (i = 0; i < n && cacheconfig[i]; i++)
        hwloc_debug(" %"PRIu64"(%"PRIu64"kB)", cacheconfig[i], cachesize[i] / 1024);

      /* Now we know how many caches there are */
      n = i;
      hwloc_debug("\n%u cache levels\n", n - 1);

      /* For each cache level (0 is memory) */
      for (i = 0; i < n; i++) {
        /* cacheconfig tells us how many cpus share it, let's iterate on each cache */
        for (j = 0; j < (nprocs / cacheconfig[i]); j++) {
	  if (!i) {
	    obj = hwloc_alloc_setup_object(HWLOC_OBJ_NUMANODE, j);
            obj->nodeset = hwloc_bitmap_alloc();
            hwloc_bitmap_set(obj->nodeset, j);
          } else {
	    obj = hwloc_alloc_setup_object(HWLOC_OBJ_L1CACHE+i-1, -1);
	  }
          obj->cpuset = hwloc_bitmap_alloc();
          for (cpu = j*cacheconfig[i];
               cpu < ((j+1)*cacheconfig[i]);
               cpu++)
            hwloc_bitmap_set(obj->cpuset, cpu);

          if (i == 1 && l1icachesize
	      && hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_L1ICACHE)) {
            /* FIXME assuming that L1i and L1d are shared the same way. Darwin
             * does not yet provide a way to know.  */
            hwloc_obj_t l1i = hwloc_alloc_setup_object(HWLOC_OBJ_L1ICACHE, -1);
            l1i->cpuset = hwloc_bitmap_dup(obj->cpuset);
            hwloc_debug_1arg_bitmap("L1icache %u has cpuset %s\n",
                j, l1i->cpuset);
            l1i->attr->cache.depth = i;
            l1i->attr->cache.size = l1icachesize;
            l1i->attr->cache.linesize = cachelinesize;
            l1i->attr->cache.associativity = 0;
            l1i->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION;

            hwloc_insert_object_by_cpuset(topology, l1i);
          }
          if (i) {
            hwloc_debug_2args_bitmap("L%ucache %u has cpuset %s\n",
                i, j, obj->cpuset);
            obj->attr->cache.depth = i;
            obj->attr->cache.size = cachesize[i];
            obj->attr->cache.linesize = cachelinesize;
            if (i <= sizeof(cacheways) / sizeof(cacheways[0]))
              obj->attr->cache.associativity = cacheways[i-1];
            else
              obj->attr->cache.associativity = 0;
            if (i == 1 && l1icachesize)
              obj->attr->cache.type = HWLOC_OBJ_CACHE_DATA;
            else
              obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED;
          } else {
            hwloc_debug_1arg_bitmap("node %u has cpuset %s\n",
                j, obj->cpuset);
	    obj->memory.local_memory = cachesize[i];
	    obj->memory.page_types_len = 2;
	    obj->memory.page_types = malloc(2*sizeof(*obj->memory.page_types));
	    memset(obj->memory.page_types, 0, 2*sizeof(*obj->memory.page_types));
	    obj->memory.page_types[0].size = hwloc_getpagesize();
#ifdef HAVE__SC_LARGE_PAGESIZE
	    obj->memory.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE);
#endif
          }

	  if (hwloc_filter_check_keep_object_type(topology, obj->type))
	    hwloc_insert_object_by_cpuset(topology, obj);
	  else
	    hwloc_free_unlinked_object(obj); /* FIXME: don't built at all, just build the cpuset in case l1i needs it */
        }
      }
    }
  out:
    free(cacheconfig);
    free(cachesize);
    free(cacheconfig32);
  }


  /* add PU objects */
  hwloc_setup_pu_level(topology, nprocs);

  hwloc_obj_add_info(topology->levels[0][0], "Backend", "Darwin");
  hwloc_add_uname_info(topology, NULL);
  return 0;
}