int hwloc_look_hardwired_fujitsu_fx10(struct hwloc_topology *topology) { /* FIXME: what if a broken core is disabled? */ unsigned i; hwloc_obj_t obj; hwloc_bitmap_t set; for(i=0; i<16; i++) { set = hwloc_bitmap_alloc(); hwloc_bitmap_set(set, i); obj = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, -1); obj->cpuset = hwloc_bitmap_dup(set); obj->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION; obj->attr->cache.depth = 1; obj->attr->cache.size = 32*1024; obj->attr->cache.linesize = 128; obj->attr->cache.associativity = 2; hwloc_insert_object_by_cpuset(topology, obj); obj = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, -1); obj->cpuset = hwloc_bitmap_dup(set); obj->attr->cache.type = HWLOC_OBJ_CACHE_DATA; obj->attr->cache.depth = 1; obj->attr->cache.size = 32*1024; obj->attr->cache.linesize = 128; obj->attr->cache.associativity = 2; hwloc_insert_object_by_cpuset(topology, obj); obj = hwloc_alloc_setup_object(HWLOC_OBJ_CORE, i); obj->cpuset = set; hwloc_insert_object_by_cpuset(topology, obj); } set = hwloc_bitmap_alloc(); hwloc_bitmap_set_range(set, 0, 15); obj = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, -1); obj->cpuset = hwloc_bitmap_dup(set); obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED; obj->attr->cache.depth = 2; obj->attr->cache.size = 12*1024*1024; obj->attr->cache.linesize = 128; obj->attr->cache.associativity = 24; hwloc_insert_object_by_cpuset(topology, obj); obj = hwloc_alloc_setup_object(HWLOC_OBJ_PACKAGE, 0); obj->cpuset = set; hwloc_obj_add_info(obj, "CPUVendor", "Fujitsu"); hwloc_obj_add_info(obj, "CPUModel", "SPARC64 IXfx"); hwloc_insert_object_by_cpuset(topology, obj); hwloc_setup_pu_level(topology, 16); return 0; }
inline static void traverse(hwloc_obj_t object) { static int index = 0, socket = -1, core = -1, pu = -1; assert(index < pu_count); switch (object->type) { case HWLOC_OBJ_SOCKET: socket++; core = -1; pu = -1; break; case HWLOC_OBJ_CORE: core++; pu = -1; break; case HWLOC_OBJ_PU: pu++; hw_places[index].socket = socket; hw_places[index].core = core; hw_places[index].pu = pu; cpu_sets[index] = hwloc_bitmap_dup(object->cpuset); index++; break; default: break; } for (int i = 0; i < object->arity; i++) { traverse(object->children[i]); } }
static hwloc_obj_t insert_task(hwloc_topology_t topology, hwloc_cpuset_t cpuset, const char * name) { hwloc_obj_t group, obj; hwloc_bitmap_and(cpuset, cpuset, hwloc_topology_get_topology_cpuset(topology)); if (hwloc_bitmap_iszero(cpuset)) return NULL; /* try to insert a group at exact position */ group = hwloc_topology_alloc_group_object(topology); if (!group) return NULL; group->cpuset = hwloc_bitmap_dup(cpuset); group = hwloc_topology_insert_group_object(topology, group); if (!group) { /* try to insert in a larger parent */ char *s; hwloc_bitmap_asprintf(&s, cpuset); group = hwloc_get_obj_covering_cpuset(topology, cpuset); fprintf(stderr, "Inserting process `%s' below parent larger than cpuset %s\n", name, s); free(s); } obj = hwloc_topology_insert_misc_object(topology, group, name); if (!obj) fprintf(stderr, "Failed to insert process `%s'\n", name); else obj->subtype = strdup("Process"); return obj; }
void AbstractCoreBoundTaskQueue::launchThread(int core) { //get the number of cores on system int NUM_PROCS = getNumberOfCoresOnSystem(); if (core < NUM_PROCS) { _thread = new std::thread(&AbstractTaskQueue::executeTask, this); hwloc_cpuset_t cpuset; hwloc_obj_t obj; hwloc_topology_t topology = getHWTopology(); obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_CORE, core); // the bitmap to modify cpuset = hwloc_bitmap_dup(obj->cpuset); // remove hyperthreads hwloc_bitmap_singlify(cpuset); // bind if (hwloc_set_thread_cpubind(topology, _thread->native_handle(), cpuset, HWLOC_CPUBIND_STRICT | HWLOC_CPUBIND_NOMEMBIND)) { char *str; int error = errno; hwloc_bitmap_asprintf(&str, obj->cpuset); fprintf(stderr, "Couldn't bind to cpuset %s: %s\n", str, strerror(error)); fprintf(stderr, "Continuing as normal, however, no guarantees\n"); //throw std::runtime_error(strerror(error)); } hwloc_bitmap_free(cpuset); } else { // this case should never happen, as TaskQueue is only initialized from SimpleTaskScheduler, which captures this case throw std::logic_error("CPU to run thread on is larger than number of total cores; seems that TaskQueue was initialized outside of SimpleTaskScheduler, which should not happen"); } }
void initialize_hwloc(int nb_workers) { #ifdef HAVE_HWLOC hwloc_topology_init(&topology); hwloc_topology_load(topology); bool numa_alloc_interleaved = (nb_workers == 0) ? false : true; numa_alloc_interleaved = cmdline::parse_or_default("numa_alloc_interleaved", numa_alloc_interleaved); if (numa_alloc_interleaved) { hwloc_cpuset_t all_cpus = hwloc_bitmap_dup(hwloc_topology_get_topology_cpuset(topology)); int err = hwloc_set_membind(topology, all_cpus, HWLOC_MEMBIND_INTERLEAVE, 0); if (err < 0) { printf("Warning: failed to set NUMA round-robin allocation policy\n"); } } #endif }
int main(void) { hwloc_topology_t topology; hwloc_bitmap_t cpuset; int err; /* check the OS topology */ hwloc_topology_init(&topology); hwloc_topology_load(topology); assert(hwloc_topology_is_thissystem(topology)); cpuset = hwloc_bitmap_dup(hwloc_topology_get_complete_cpuset(topology)); result("Binding with OS backend", hwloc_set_cpubind(topology, cpuset, 0)); hwloc_topology_destroy(topology); /* We're assume there is a real processor numbered 0 */ hwloc_bitmap_zero(cpuset); hwloc_bitmap_set(cpuset, 0); /* check a synthetic topology */ hwloc_topology_init(&topology); hwloc_topology_set_synthetic(topology, "1"); hwloc_topology_load(topology); assert(!hwloc_topology_is_thissystem(topology)); err = hwloc_set_cpubind(topology, cpuset, 0); result("Binding with synthetic backend", err); assert(!err); hwloc_topology_destroy(topology); /* check a synthetic topology but assuming it's the system topology */ hwloc_topology_init(&topology); hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM); hwloc_topology_set_synthetic(topology, "1"); hwloc_topology_load(topology); assert(hwloc_topology_is_thissystem(topology)); result("Binding with synthetic backend faking is_thissystem", hwloc_set_cpubind(topology, cpuset, 0)); hwloc_topology_destroy(topology); hwloc_bitmap_free(cpuset); return 0; }
bool Core::bind() { auto cpuset = hwloc_bitmap_dup(core_->cpuset); hwloc_bitmap_singlify(cpuset); if (hwloc_set_cpubind(topology_, cpuset, 0)) { auto error = errno; LOG(thread_logger, warning) << "Error setting thread affinity: " << strerror(error); hwloc_bitmap_free(cpuset); return false; } hwloc_bitmap_free(cpuset); return true; }
void bindCurrentThreadToNumaNode(int node) { hwloc_topology_t topology = getHWTopology(); hwloc_cpuset_t cpuset; hwloc_obj_t obj; // The actual node obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NODE, node); // obj is nullptr on non NUMA machines if (obj == nullptr) { fprintf(stderr, "Couldn't get hwloc object, bindCurrentThreadToNumaNode failed!\n"); return; } cpuset = hwloc_bitmap_dup(obj->cpuset); // hwloc_bitmap_singlify(cpuset); // bind if (hwloc_set_cpubind(topology, cpuset, HWLOC_CPUBIND_STRICT | HWLOC_CPUBIND_NOMEMBIND | HWLOC_CPUBIND_THREAD)) { char* str; int error = errno; hwloc_bitmap_asprintf(&str, obj->cpuset); printf("Couldn't bind to cpuset %s: %s\n", str, strerror(error)); free(str); throw std::runtime_error(strerror(error)); } // free duplicated cpuset hwloc_bitmap_free(cpuset); // assuming single machine system obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_MACHINE, 0); // set membind policy interleave for this thread if (hwloc_set_membind_nodeset( topology, obj->nodeset, HWLOC_MEMBIND_INTERLEAVE, HWLOC_MEMBIND_STRICT | HWLOC_MEMBIND_THREAD) && errno != ENOSYS) { char* str; int error = errno; hwloc_bitmap_asprintf(&str, obj->nodeset); fprintf(stderr, "Couldn't membind to nodeset %s: %s\n", str, strerror(error)); fprintf(stderr, "Continuing as normal, however, no guarantees\n"); free(str); } }
static int hwloc__get_largest_objs_inside_cpuset (struct hwloc_obj *current, hwloc_const_bitmap_t set, struct hwloc_obj ***res, int *max) { int gotten = 0; unsigned i; /* the caller must ensure this */ if (*max <= 0) return 0; if (hwloc_bitmap_isequal(current->cpuset, set)) { **res = current; (*res)++; (*max)--; return 1; } for (i=0; i<current->arity; i++) { hwloc_bitmap_t subset = hwloc_bitmap_dup(set); int ret; /* split out the cpuset part corresponding to this child and see if there's anything to do */ if (current->children[i]->cpuset) { hwloc_bitmap_and(subset, subset, current->children[i]->cpuset); if (hwloc_bitmap_iszero(subset)) { hwloc_bitmap_free(subset); continue; } } ret = hwloc__get_largest_objs_inside_cpuset (current->children[i], subset, res, max); gotten += ret; hwloc_bitmap_free(subset); /* if no more room to store remaining objects, return what we got so far */ if (!*max) break; } return gotten; }
static void print_task(hwloc_topology_t topology, long pid_number, const char *name, hwloc_bitmap_t cpuset, char *pidoutput, int thread) { printf("%s%ld\t", thread ? " " : "", pid_number); if (show_cpuset) { char *cpuset_str = NULL; hwloc_bitmap_asprintf(&cpuset_str, cpuset); printf("%s", cpuset_str); free(cpuset_str); } else { hwloc_bitmap_t remaining = hwloc_bitmap_dup(cpuset); int first = 1; while (!hwloc_bitmap_iszero(remaining)) { char type[64]; unsigned idx; hwloc_obj_t obj = hwloc_get_first_largest_obj_inside_cpuset(topology, remaining); /* don't show a cache if there's something equivalent and nicer */ while (hwloc_obj_type_is_cache(obj->type) && obj->arity == 1) obj = obj->first_child; hwloc_obj_type_snprintf(type, sizeof(type), obj, 1); idx = logical ? obj->logical_index : obj->os_index; if (idx == (unsigned) -1) printf("%s%s", first ? "" : " ", type); else printf("%s%s:%u", first ? "" : " ", type, idx); hwloc_bitmap_andnot(remaining, remaining, obj->cpuset); first = 0; } hwloc_bitmap_free(remaining); } printf("\t\t%s%s%s\n", name, pidoutput ? "\t" : "", pidoutput ? pidoutput : ""); }
static void look_rset(int sdl, hwloc_obj_type_t type, struct hwloc_topology *topology, int level) { rsethandle_t rset, rad; int i,maxcpus,j; int nbnodes; struct hwloc_obj *obj; if ((topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM)) rset = rs_alloc(RS_ALL); else rset = rs_alloc(RS_PARTITION); rad = rs_alloc(RS_EMPTY); nbnodes = rs_numrads(rset, sdl, 0); if (nbnodes == -1) { perror("rs_numrads"); return; } for (i = 0; i < nbnodes; i++) { if (rs_getrad(rset, rad, sdl, i, 0)) { fprintf(stderr,"rs_getrad(%d) failed: %s\n", i, strerror(errno)); continue; } if (!rs_getinfo(rad, R_NUMPROCS, 0)) continue; /* It seems logical processors are numbered from 1 here, while the * bindprocessor functions numbers them from 0... */ obj = hwloc_alloc_setup_object(type, i - (type == HWLOC_OBJ_PU)); obj->cpuset = hwloc_bitmap_alloc(); obj->os_level = sdl; maxcpus = rs_getinfo(rad, R_MAXPROCS, 0); for (j = 0; j < maxcpus; j++) { if (rs_op(RS_TESTRESOURCE, rad, NULL, R_PROCS, j)) hwloc_bitmap_set(obj->cpuset, j); } switch(type) { case HWLOC_OBJ_NODE: obj->nodeset = hwloc_bitmap_alloc(); hwloc_bitmap_set(obj->nodeset, i); obj->memory.local_memory = 0; /* TODO: odd, rs_getinfo(rad, R_MEMSIZE, 0) << 10 returns the total memory ... */ obj->memory.page_types_len = 2; obj->memory.page_types = malloc(2*sizeof(*obj->memory.page_types)); memset(obj->memory.page_types, 0, 2*sizeof(*obj->memory.page_types)); obj->memory.page_types[0].size = hwloc_getpagesize(); #ifdef HAVE__SC_LARGE_PAGESIZE obj->memory.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE); #endif /* TODO: obj->memory.page_types[1].count = rs_getinfo(rset, R_LGPGFREE, 0) / hugepagesize */ break; case HWLOC_OBJ_CACHE: obj->attr->cache.size = _system_configuration.L2_cache_size; obj->attr->cache.associativity = _system_configuration.L2_cache_asc; obj->attr->cache.linesize = 0; /* TODO: ? */ obj->attr->cache.depth = 2; obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED; /* FIXME? */ break; case HWLOC_OBJ_GROUP: obj->attr->group.depth = level; break; case HWLOC_OBJ_CORE: { hwloc_obj_t obj2, obj3; obj2 = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, i); obj2->cpuset = hwloc_bitmap_dup(obj->cpuset); obj2->attr->cache.size = _system_configuration.dcache_size; obj2->attr->cache.associativity = _system_configuration.dcache_asc; obj2->attr->cache.linesize = _system_configuration.dcache_line; obj2->attr->cache.depth = 1; if (_system_configuration.cache_attrib & (1<<30)) { /* Unified cache */ obj2->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED; hwloc_debug("Adding an L1u cache for core %d\n", i); hwloc_insert_object_by_cpuset(topology, obj2); } else { /* Separate Instruction and Data caches */ obj2->attr->cache.type = HWLOC_OBJ_CACHE_DATA; hwloc_debug("Adding an L1d cache for core %d\n", i); hwloc_insert_object_by_cpuset(topology, obj2); obj3 = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, i); obj3->cpuset = hwloc_bitmap_dup(obj->cpuset); obj3->attr->cache.size = _system_configuration.icache_size; obj3->attr->cache.associativity = _system_configuration.icache_asc; obj3->attr->cache.linesize = _system_configuration.icache_line; obj3->attr->cache.depth = 1; obj3->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION; hwloc_debug("Adding an L1i cache for core %d\n", i); hwloc_insert_object_by_cpuset(topology, obj3); } break; } default: break; } hwloc_debug_2args_bitmap("%s %d has cpuset %s\n", hwloc_obj_type_string(type), i, obj->cpuset); hwloc_insert_object_by_cpuset(topology, obj); } rs_free(rset); rs_free(rad); }
PASTIX_INT sopalin_bindthread(PASTIX_INT cpu) { #ifdef MARCEL { marcel_vpset_t vpset = MARCEL_VPSET_ZERO; marcel_vpset_vp(&vpset, cpu); marcel_apply_vpset(&vpset); } #else /* Dans les autres cas on se preoccupe de l'archi */ #ifdef WITH_HWLOC { hwloc_topology_t topology; /* Topology object */ hwloc_obj_t obj; /* Hwloc object */ hwloc_cpuset_t cpuset; /* HwLoc cpuset */ /* Allocate and initialize topology object. */ hwloc_topology_init(&topology); /* Perform the topology detection. */ hwloc_topology_load(topology); /* Get last one. */ obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_CORE, cpu); if (!obj) return 0; /* Get a copy of its cpuset that we may modify. */ /* Get only one logical processor (in case the core is SMT/hyperthreaded). */ #if !defined(HWLOC_BITMAP_H) cpuset = hwloc_cpuset_dup(obj->cpuset); hwloc_cpuset_singlify(cpuset); #else cpuset = hwloc_bitmap_dup(obj->cpuset); hwloc_bitmap_singlify(cpuset); #endif /* And try to bind ourself there. */ if (hwloc_set_cpubind(topology, cpuset, HWLOC_CPUBIND_THREAD)) { char *str = NULL; #if !defined(HWLOC_BITMAP_H) hwloc_cpuset_asprintf(&str, obj->cpuset); #else hwloc_bitmap_asprintf(&str, obj->cpuset); #endif printf("Couldn't bind to cpuset %s\n", str); free(str); } /* Get the number at Proc level */ cpu = obj->children[0]->os_index; /* Free our cpuset copy */ #if !defined(HWLOC_BITMAP_H) hwloc_cpuset_free(cpuset); #else hwloc_bitmap_free(cpuset); #endif /* Destroy topology object. */ hwloc_topology_destroy(topology); } #else /* WITH_HWLOC */ #ifdef X_ARCHpower_ibm_aix { tid_t self_ktid = thread_self (); bindprocessor(BINDTHREAD, self_ktid, cpu); } #elif (defined X_ARCHalpha_compaq_osf1) { bind_to_cpu_id(getpid(), cpu, 0); } #elif (defined X_ARCHi686_pc_linux) #ifndef X_ARCHi686_mac { cpu_set_t mask; CPU_ZERO(&mask); CPU_SET(cpu, &mask); #ifdef HAVE_OLD_SCHED_SETAFFINITY if(sched_setaffinity(0,&mask) < 0) #else /* HAVE_OLD_SCHED_SETAFFINITY */ if(sched_setaffinity(0,sizeof(mask),&mask) < 0) #endif /* HAVE_OLD_SCHED_SETAFFINITY */ { perror("sched_setaffinity"); EXIT(MOD_SOPALIN, INTERNAL_ERR); } } #else /* X_ARCHi686_mac */ { thread_affinity_policy_data_t ap; int ret; ap.affinity_tag = 1; /* non-null affinity tag */ ret = thread_policy_set( mach_thread_self(), THREAD_AFFINITY_POLICY, (integer_t*) &ap, THREAD_AFFINITY_POLICY_COUNT ); if(ret != 0) { perror("thread_policy_set"); EXIT(MOD_SOPALIN, INTERNAL_ERR); } } #endif /* X_ARCHi686_mac */ #endif /* X_ACHIxxx */ #endif /* WITH_HWLOC */ #endif /* MARCEL */ return cpu; }
int bind_myself_to_core(hwloc_topology_t topology, int id){ hwloc_cpuset_t cpuset; hwloc_obj_t obj; char *str; int binding_res; int depth = hwloc_topology_get_depth(topology); int nb_cores = hwloc_get_nbobjs_by_depth(topology, depth-1); int my_core; int nb_threads = get_nb_threads(); /* printf("depth=%d\n",depth); */ switch (mapping_policy){ case SCATTER: my_core = id*(nb_cores/nb_threads); break; default: if(verbose_level>=WARNING){ printf("Wrong scheduling policy. Using COMPACT\n"); } case COMPACT: my_core = id%nb_cores; } if(verbose_level>=INFO){ printf("Mapping thread %d on core %d\n",id,my_core); } /* Get my core. */ obj = hwloc_get_obj_by_depth(topology, depth-1, my_core); if (obj) { /* Get a copy of its cpuset that we may modify. */ cpuset = hwloc_bitmap_dup(obj->cpuset); /* Get only one logical processor (in case the core is SMT/hyperthreaded). */ hwloc_bitmap_singlify(cpuset); /*hwloc_bitmap_asprintf(&str, cpuset); printf("Binding thread %d to cpuset %s\n", my_core,str); FREE(str); */ /* And try to bind ourself there. */ binding_res = hwloc_set_cpubind(topology, cpuset, HWLOC_CPUBIND_THREAD); if (binding_res == -1){ int error = errno; hwloc_bitmap_asprintf(&str, obj->cpuset); if(verbose_level>=WARNING) printf("Thread %d couldn't bind to cpuset %s: %s.\n This thread is not bound to any core...\n", my_core, str, strerror(error)); free(str); /* str is allocated by hlwoc, free it normally*/ return 0; } /* FREE our cpuset copy */ hwloc_bitmap_free(cpuset); return 1; }else{ if(verbose_level>=WARNING) printf("No valid object for core id %d!\n",my_core); return 0; } }
int main(void) { hwloc_topology_t topology; #ifdef HWLOC_HAVE_CPU_SET unsigned depth; hwloc_bitmap_t hwlocset; cpu_set_t schedset; hwloc_obj_t obj; int err; #endif /* HWLOC_HAVE_CPU_SET */ hwloc_topology_init(&topology); hwloc_topology_load(topology); #ifdef HWLOC_HAVE_CPU_SET depth = hwloc_topology_get_depth(topology); hwlocset = hwloc_bitmap_dup(hwloc_topology_get_complete_cpuset(topology)); hwloc_cpuset_to_glibc_sched_affinity(topology, hwlocset, &schedset, sizeof(schedset)); #ifdef HWLOC_HAVE_OLD_SCHED_SETAFFINITY err = sched_setaffinity(0, sizeof(schedset)); #else err = sched_setaffinity(0, sizeof(schedset), &schedset); #endif assert(!err); hwloc_bitmap_free(hwlocset); #ifdef HWLOC_HAVE_OLD_SCHED_SETAFFINITY err = sched_getaffinity(0, sizeof(schedset)); #else err = sched_getaffinity(0, sizeof(schedset), &schedset); #endif assert(!err); hwlocset = hwloc_bitmap_alloc(); hwloc_cpuset_from_glibc_sched_affinity(topology, hwlocset, &schedset, sizeof(schedset)); assert(hwloc_bitmap_isincluded(hwlocset, hwloc_topology_get_complete_cpuset(topology))); hwloc_bitmap_andnot(hwlocset, hwlocset, hwloc_topology_get_online_cpuset(topology)); hwloc_bitmap_andnot(hwlocset, hwlocset, hwloc_topology_get_allowed_cpuset(topology)); assert(hwloc_bitmap_iszero(hwlocset)); hwloc_bitmap_free(hwlocset); obj = hwloc_get_obj_by_depth(topology, depth-1, hwloc_get_nbobjs_by_depth(topology, depth-1) - 1); assert(obj); assert(obj->type == HWLOC_OBJ_PU); hwlocset = hwloc_bitmap_dup(obj->cpuset); hwloc_cpuset_to_glibc_sched_affinity(topology, hwlocset, &schedset, sizeof(schedset)); #ifdef HWLOC_HAVE_OLD_SCHED_SETAFFINITY err = sched_setaffinity(0, sizeof(schedset)); #else err = sched_setaffinity(0, sizeof(schedset), &schedset); #endif assert(!err); hwloc_bitmap_free(hwlocset); #ifdef HWLOC_HAVE_OLD_SCHED_SETAFFINITY err = sched_getaffinity(0, sizeof(schedset)); #else err = sched_getaffinity(0, sizeof(schedset), &schedset); #endif assert(!err); hwlocset = hwloc_bitmap_alloc(); hwloc_cpuset_from_glibc_sched_affinity(topology, hwlocset, &schedset, sizeof(schedset)); assert(hwloc_bitmap_isequal(hwlocset, obj->cpuset)); hwloc_bitmap_free(hwlocset); #endif /* HWLOC_HAVE_CPU_SET */ hwloc_topology_destroy(topology); return 0; }
int main(int argc, char *argv[]) { const struct hwloc_topology_support *support; hwloc_topology_t topology; hwloc_const_bitmap_t topocpuset; hwloc_bitmap_t cpuset; unsigned long flags = 0; DIR *dir; struct dirent *dirent; int show_all = 0; int show_threads = 0; int get_last_cpu_location = 0; char *callname; char *pidcmd = NULL; int err; int opt; callname = strrchr(argv[0], '/'); if (!callname) callname = argv[0]; else callname++; /* skip argv[0], handle options */ argc--; argv++; hwloc_utils_check_api_version(callname); while (argc >= 1) { opt = 0; if (!strcmp(argv[0], "-a")) show_all = 1; else if (!strcmp(argv[0], "-l") || !strcmp(argv[0], "--logical")) { logical = 1; } else if (!strcmp(argv[0], "-p") || !strcmp(argv[0], "--physical")) { logical = 0; } else if (!strcmp(argv[0], "-c") || !strcmp(argv[0], "--cpuset")) { show_cpuset = 1; } else if (!strcmp(argv[0], "-e") || !strncmp(argv[0], "--get-last-cpu-location", 10)) { get_last_cpu_location = 1; } else if (!strcmp(argv[0], "-t") || !strcmp(argv[0], "--threads")) { #ifdef HWLOC_LINUX_SYS show_threads = 1; #else fprintf (stderr, "Listing threads is currently only supported on Linux\n"); #endif } else if (!strcmp (argv[0], "--whole-system")) { flags |= HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM; } else if (!strcmp (argv[0], "--pid-cmd")) { if (argc < 2) { usage(callname, stdout); exit(EXIT_FAILURE); } pidcmd = argv[1]; opt = 1; } else { fprintf (stderr, "Unrecognized option: %s\n", argv[0]); usage (callname, stderr); exit(EXIT_FAILURE); } argc -= opt+1; argv += opt+1; } err = hwloc_topology_init(&topology); if (err) goto out; hwloc_topology_set_flags(topology, flags); err = hwloc_topology_load(topology); if (err) goto out_with_topology; support = hwloc_topology_get_support(topology); if (get_last_cpu_location) { if (!support->cpubind->get_proc_last_cpu_location) goto out_with_topology; } else { if (!support->cpubind->get_proc_cpubind) goto out_with_topology; } topocpuset = hwloc_topology_get_topology_cpuset(topology); dir = opendir("/proc"); if (!dir) goto out_with_topology; cpuset = hwloc_bitmap_alloc(); if (!cpuset) goto out_with_dir; while ((dirent = readdir(dir))) { long pid_number; hwloc_pid_t pid; char pidoutput[1024]; char *end; char name[64] = ""; /* management of threads */ unsigned boundthreads = 0, i; long *tids = NULL; /* NULL if process is not threaded */ hwloc_bitmap_t *tidcpusets = NULL; pid_number = strtol(dirent->d_name, &end, 10); if (*end) /* Not a number */ continue; pid = hwloc_pid_from_number(pid_number, 0); #ifdef HWLOC_LINUX_SYS { unsigned pathlen = 6 + strlen(dirent->d_name) + 1 + 7 + 1; char *path; int file; ssize_t n; path = malloc(pathlen); snprintf(path, pathlen, "/proc/%s/cmdline", dirent->d_name); file = open(path, O_RDONLY); free(path); if (file >= 0) { n = read(file, name, sizeof(name) - 1); close(file); if (n <= 0) /* Ignore kernel threads and errors */ continue; name[n] = 0; } } #endif /* HWLOC_LINUX_SYS */ if (show_threads) { #ifdef HWLOC_LINUX_SYS /* check if some threads must be displayed */ unsigned pathlen = 6 + strlen(dirent->d_name) + 1 + 4 + 1; char *path; DIR *taskdir; path = malloc(pathlen); snprintf(path, pathlen, "/proc/%s/task", dirent->d_name); taskdir = opendir(path); if (taskdir) { struct dirent *taskdirent; long tid; unsigned n = 0; /* count threads */ while ((taskdirent = readdir(taskdir))) { tid = strtol(taskdirent->d_name, &end, 10); if (*end) /* Not a number */ continue; n++; } if (n > 1) { /* if there's more than one thread, see if some are bound */ tids = malloc(n * sizeof(*tids)); tidcpusets = calloc(n+1, sizeof(*tidcpusets)); if (tids && tidcpusets) { /* reread the directory but gather info now */ rewinddir(taskdir); i = 0; while ((taskdirent = readdir(taskdir))) { tid = strtol(taskdirent->d_name, &end, 10); if (*end) /* Not a number */ continue; if (get_last_cpu_location) { if (hwloc_linux_get_tid_last_cpu_location(topology, tid, cpuset)) continue; } else { if (hwloc_linux_get_tid_cpubind(topology, tid, cpuset)) continue; } hwloc_bitmap_and(cpuset, cpuset, topocpuset); tids[i] = tid; tidcpusets[i] = hwloc_bitmap_dup(cpuset); i++; if (hwloc_bitmap_iszero(cpuset)) continue; if (hwloc_bitmap_isequal(cpuset, topocpuset) && !show_all) continue; boundthreads++; } } else { /* failed to alloc, behave as if there were no threads */ free(tids); tids = NULL; free(tidcpusets); tidcpusets = NULL; } } closedir(taskdir); } #endif /* HWLOC_LINUX_SYS */ } if (get_last_cpu_location) { if (hwloc_get_proc_last_cpu_location(topology, pid, cpuset, 0)) continue; } else { if (hwloc_get_proc_cpubind(topology, pid, cpuset, 0)) continue; } hwloc_bitmap_and(cpuset, cpuset, topocpuset); if (hwloc_bitmap_iszero(cpuset)) continue; /* don't print anything if the process isn't bound and if no threads are bound and if not showing all */ if (hwloc_bitmap_isequal(cpuset, topocpuset) && (!tids || !boundthreads) && !show_all) continue; pidoutput[0] = '\0'; if (pidcmd) { char *cmd; FILE *file; cmd = malloc(strlen(pidcmd)+1+5+2+1); sprintf(cmd, "%s %u", pidcmd, pid); file = popen(cmd, "r"); if (file) { if (fgets(pidoutput, sizeof(pidoutput), file)) { end = strchr(pidoutput, '\n'); if (end) *end = '\0'; } pclose(file); } free(cmd); } /* print the process */ print_task(topology, pid_number, name, cpuset, pidoutput[0] == '\0' ? NULL : pidoutput, 0); if (tids) /* print each tid we found (it's tidcpuset isn't NULL anymore) */ for(i=0; tidcpusets[i] != NULL; i++) { print_task(topology, tids[i], "", tidcpusets[i], NULL, 1); hwloc_bitmap_free(tidcpusets[i]); } /* free threads stuff */ free(tidcpusets); free(tids); } err = 0; hwloc_bitmap_free(cpuset); out_with_dir: closedir(dir); out_with_topology: hwloc_topology_destroy(topology); out: return err; }
/* user to have to play with the cgroup hierarchy to modify it */ extern int task_cgroup_cpuset_set_task_affinity(slurmd_job_t *job) { int fstatus = SLURM_ERROR; #ifndef HAVE_HWLOC error("task/cgroup: plugin not compiled with hwloc support, " "skipping affinity."); return fstatus; #else uint32_t i; uint32_t nldoms; uint32_t nsockets; uint32_t ncores; uint32_t npus; uint32_t nobj; uint32_t pfirst,plast; uint32_t taskid = job->envtp->localid; uint32_t jntasks = job->node_tasks; uint32_t jnpus = jntasks * job->cpus_per_task; pid_t pid = job->envtp->task_pid; cpu_bind_type_t bind_type; int verbose; hwloc_topology_t topology; #if HWLOC_API_VERSION <= 0x00010000 hwloc_cpuset_t cpuset,ct; #else hwloc_bitmap_t cpuset,ct; #endif hwloc_obj_t obj; struct hwloc_obj *pobj; hwloc_obj_type_t hwtype; hwloc_obj_type_t req_hwtype; int hwdepth; size_t tssize; cpu_set_t ts; bind_type = job->cpu_bind_type ; if (conf->task_plugin_param & CPU_BIND_VERBOSE || bind_type & CPU_BIND_VERBOSE) verbose = 1 ; if (bind_type & CPU_BIND_NONE) { if (verbose) info("task/cgroup: task[%u] is requesting no affinity", taskid); return 0; } else if (bind_type & CPU_BIND_TO_THREADS) { if (verbose) info("task/cgroup: task[%u] is requesting " "thread level binding",taskid); req_hwtype = HWLOC_OBJ_PU; } else if (bind_type & CPU_BIND_TO_CORES) { if (verbose) info("task/cgroup: task[%u] is requesting " "core level binding",taskid); req_hwtype = HWLOC_OBJ_CORE; } else if (bind_type & CPU_BIND_TO_SOCKETS) { if (verbose) info("task/cgroup: task[%u] is requesting " "socket level binding",taskid); req_hwtype = HWLOC_OBJ_SOCKET; } else if (bind_type & CPU_BIND_TO_LDOMS) { if (verbose) info("task/cgroup: task[%u] is requesting " "ldom level binding",taskid); req_hwtype = HWLOC_OBJ_NODE; } else { if (verbose) info("task/cgroup: task[%u] using core level binding" " by default",taskid); req_hwtype = HWLOC_OBJ_CORE; } /* Allocate and initialize hwloc objects */ hwloc_topology_init(&topology); #if HWLOC_API_VERSION <= 0x00010000 cpuset = hwloc_cpuset_alloc() ; #else cpuset = hwloc_bitmap_alloc() ; #endif /* * Perform the topology detection. It will only get allowed PUs. * Detect in the same time the granularity to use for binding. * The granularity can be relaxed from threads to cores if enough * cores are available as with hyperthread support, ntasks-per-core * param can let us have access to more threads per core for each * task * Revert back to machine granularity if no finer-grained granularity * matching the request is found. This will result in no affinity * applied. * The detected granularity will be used to find where to best place * the task, then the cpu_bind option will be used to relax the * affinity constraint and use more PUs. (i.e. use a core granularity * to dispatch the tasks across the sockets and then provide access * to each task to the cores of its socket.) */ hwloc_topology_load(topology); npus = (uint32_t) hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PU); ncores = (uint32_t) hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_CORE); nsockets = (uint32_t) hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_SOCKET); nldoms = (uint32_t) hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_NODE); hwtype = HWLOC_OBJ_MACHINE; nobj = 1; if (npus >= jnpus || bind_type & CPU_BIND_TO_THREADS) { hwtype = HWLOC_OBJ_PU; nobj = npus; } if (ncores >= jnpus || bind_type & CPU_BIND_TO_CORES) { hwtype = HWLOC_OBJ_CORE; nobj = ncores; } if (nsockets >= jntasks && bind_type & CPU_BIND_TO_SOCKETS) { hwtype = HWLOC_OBJ_SOCKET; nobj = nsockets; } /* * HWLOC returns all the NUMA nodes available regardless of the * number of underlying sockets available (regardless of the allowed * resources). So there is no guarantee that each ldom will be populated * with usable sockets. So add a simple check that at least ensure that * we have as many sockets as ldoms before moving to ldoms granularity */ if (nldoms >= jntasks && nsockets >= nldoms && bind_type & CPU_BIND_TO_LDOMS) { hwtype = HWLOC_OBJ_NODE; nobj = nldoms; } /* * Perform a block binding on the detected object respecting the * granularity. * If not enough objects to do the job, revert to no affinity mode */ if (hwloc_compare_types(hwtype,HWLOC_OBJ_MACHINE) == 0) { info("task/cgroup: task[%u] disabling affinity because of %s " "granularity",taskid,hwloc_obj_type_string(hwtype)); } else if (hwloc_compare_types(hwtype,HWLOC_OBJ_CORE) >= 0 && jnpus > nobj) { info("task/cgroup: task[%u] not enough %s objects, disabling " "affinity",taskid,hwloc_obj_type_string(hwtype)); } else { if (verbose) { info("task/cgroup: task[%u] using %s granularity", taskid,hwloc_obj_type_string(hwtype)); } if (hwloc_compare_types(hwtype,HWLOC_OBJ_CORE) >= 0) { /* cores or threads granularity */ pfirst = taskid * job->cpus_per_task ; plast = pfirst + job->cpus_per_task - 1; } else { /* sockets or ldoms granularity */ pfirst = taskid; plast = pfirst; } hwdepth = hwloc_get_type_depth(topology,hwtype); for (i = pfirst; i <= plast && i < nobj ; i++) { obj = hwloc_get_obj_by_depth(topology,hwdepth,(int)i); /* if requested binding overlap the granularity */ /* use the ancestor cpuset instead of the object one */ if (hwloc_compare_types(hwtype,req_hwtype) > 0) { /* Get the parent object of req_hwtype or the */ /* one just above if not found (meaning of >0)*/ /* (useful for ldoms binding with !NUMA nodes)*/ pobj = obj->parent; while (pobj != NULL && hwloc_compare_types(pobj->type, req_hwtype) > 0) pobj = pobj->parent; if (pobj != NULL) { if (verbose) info("task/cgroup: task[%u] " "higher level %s found", taskid, hwloc_obj_type_string( pobj->type)); #if HWLOC_API_VERSION <= 0x00010000 ct = hwloc_cpuset_dup(pobj-> allowed_cpuset); hwloc_cpuset_or(cpuset,cpuset,ct); hwloc_cpuset_free(ct); #else ct = hwloc_bitmap_dup(pobj-> allowed_cpuset); hwloc_bitmap_or(cpuset,cpuset,ct); hwloc_bitmap_free(ct); #endif } else { /* should not be executed */ if (verbose) info("task/cgroup: task[%u] " "no higher level found", taskid); #if HWLOC_API_VERSION <= 0x00010000 ct = hwloc_cpuset_dup(obj-> allowed_cpuset); hwloc_cpuset_or(cpuset,cpuset,ct); hwloc_cpuset_free(ct); #else ct = hwloc_bitmap_dup(obj-> allowed_cpuset); hwloc_bitmap_or(cpuset,cpuset,ct); hwloc_bitmap_free(ct); #endif } } else { #if HWLOC_API_VERSION <= 0x00010000 ct = hwloc_cpuset_dup(obj->allowed_cpuset); hwloc_cpuset_or(cpuset,cpuset,ct); hwloc_cpuset_free(ct); #else ct = hwloc_bitmap_dup(obj->allowed_cpuset); hwloc_bitmap_or(cpuset,cpuset,ct); hwloc_bitmap_free(ct); #endif } } char *str; #if HWLOC_API_VERSION <= 0x00010000 hwloc_cpuset_asprintf(&str,cpuset); #else hwloc_bitmap_asprintf(&str,cpuset); #endif tssize = sizeof(cpu_set_t); if (hwloc_cpuset_to_glibc_sched_affinity(topology,cpuset, &ts,tssize) == 0) { fstatus = SLURM_SUCCESS; if (sched_setaffinity(pid,tssize,&ts)) { error("task/cgroup: task[%u] unable to set " "taskset '%s'",taskid,str); fstatus = SLURM_ERROR; } else if (verbose) { info("task/cgroup: task[%u] taskset '%s' is set" ,taskid,str); } } else { error("task/cgroup: task[%u] unable to build " "taskset '%s'",taskid,str); fstatus = SLURM_ERROR; } free(str); } /* Destroy hwloc objects */ #if HWLOC_API_VERSION <= 0x00010000 hwloc_cpuset_free(cpuset); #else hwloc_bitmap_free(cpuset); #endif hwloc_topology_destroy(topology); return fstatus; #endif }
int main(void) { hwloc_bitmap_t set; hwloc_obj_t obj; char *str = NULL; hwloc_topology_init(&topology); hwloc_topology_load(topology); support = hwloc_topology_get_support(topology); obj = hwloc_get_root_obj(topology); set = hwloc_bitmap_dup(obj->cpuset); while (hwloc_bitmap_isequal(obj->cpuset, set)) { if (!obj->arity) break; obj = obj->children[0]; } hwloc_bitmap_asprintf(&str, set); printf("system set is %s\n", str); free(str); test(set, 0); printf("now strict\n"); test(set, HWLOC_CPUBIND_STRICT); hwloc_bitmap_free(set); set = hwloc_bitmap_dup(obj->cpuset); hwloc_bitmap_asprintf(&str, set); printf("obj set is %s\n", str); free(str); test(set, 0); printf("now strict\n"); test(set, HWLOC_CPUBIND_STRICT); hwloc_bitmap_singlify(set); hwloc_bitmap_asprintf(&str, set); printf("singlified to %s\n", str); free(str); test(set, 0); printf("now strict\n"); test(set, HWLOC_CPUBIND_STRICT); hwloc_bitmap_free(set); printf("\n\nmemory tests\n\n"); printf("complete node set\n"); set = hwloc_bitmap_dup(hwloc_get_root_obj(topology)->cpuset); hwloc_bitmap_asprintf(&str, set); printf("i.e. cpuset %s\n", str); free(str); testmem3(set); hwloc_bitmap_free(set); obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NODE, 0); if (obj) { set = hwloc_bitmap_dup(obj->cpuset); hwloc_bitmap_asprintf(&str, set); printf("cpuset set is %s\n", str); free(str); testmem3(set); obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NODE, 1); if (obj) { hwloc_bitmap_or(set, set, obj->cpuset); hwloc_bitmap_asprintf(&str, set); printf("cpuset set is %s\n", str); free(str); testmem3(set); } hwloc_bitmap_free(set); } hwloc_topology_destroy(topology); return 0; }
int hwloc_look_hardwired_fujitsu_fx100(struct hwloc_topology *topology) { /* FIXME: what if a broken core is disabled? */ unsigned i; hwloc_obj_t obj; hwloc_bitmap_t set; for(i=0; i<34; i++) { set = hwloc_bitmap_alloc(); hwloc_bitmap_set(set, i); if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_L1ICACHE)) { obj = hwloc_alloc_setup_object(HWLOC_OBJ_L1ICACHE, -1); obj->cpuset = hwloc_bitmap_dup(set); obj->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION; obj->attr->cache.depth = 1; obj->attr->cache.size = 64*1024; obj->attr->cache.linesize = 256; obj->attr->cache.associativity = 4; hwloc_insert_object_by_cpuset(topology, obj); } if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_L1CACHE)) { obj = hwloc_alloc_setup_object(HWLOC_OBJ_L1CACHE, -1); obj->cpuset = hwloc_bitmap_dup(set); obj->attr->cache.type = HWLOC_OBJ_CACHE_DATA; obj->attr->cache.depth = 1; obj->attr->cache.size = 64*1024; obj->attr->cache.linesize = 256; obj->attr->cache.associativity = 4; hwloc_insert_object_by_cpuset(topology, obj); } if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_CORE)) { obj = hwloc_alloc_setup_object(HWLOC_OBJ_CORE, i); obj->cpuset = set; hwloc_insert_object_by_cpuset(topology, obj); } else hwloc_bitmap_free(set); } if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_L2CACHE)) { obj = hwloc_alloc_setup_object(HWLOC_OBJ_L2CACHE, -1); obj->cpuset = hwloc_bitmap_alloc(); hwloc_bitmap_set_range(obj->cpuset, 0, 15); hwloc_bitmap_set(obj->cpuset, 32); obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED; obj->attr->cache.depth = 2; obj->attr->cache.size = 12*1024*1024; obj->attr->cache.linesize = 256; obj->attr->cache.associativity = 24; hwloc_insert_object_by_cpuset(topology, obj); obj = hwloc_alloc_setup_object(HWLOC_OBJ_L2CACHE, -1); obj->cpuset = hwloc_bitmap_alloc(); hwloc_bitmap_set_range(obj->cpuset, 16, 31); hwloc_bitmap_set(obj->cpuset, 33); obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED; obj->attr->cache.depth = 2; obj->attr->cache.size = 12*1024*1024; obj->attr->cache.linesize = 256; obj->attr->cache.associativity = 24; hwloc_insert_object_by_cpuset(topology, obj); } if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_PACKAGE)) { obj = hwloc_alloc_setup_object(HWLOC_OBJ_PACKAGE, 0); obj->cpuset = hwloc_bitmap_alloc(); hwloc_bitmap_set_range(obj->cpuset, 0, 33); hwloc_obj_add_info(obj, "CPUVendor", "Fujitsu"); hwloc_obj_add_info(obj, "CPUModel", "SPARC64 XIfx"); hwloc_insert_object_by_cpuset(topology, obj); } hwloc_setup_pu_level(topology, 34); return 0; }
static int hwloc_look_darwin(struct hwloc_backend *backend) { struct hwloc_topology *topology = backend->topology; int64_t _nprocs; unsigned nprocs; int64_t _npackages; unsigned i, j, cpu; struct hwloc_obj *obj; size_t size; int64_t l1dcachesize, l1icachesize; int64_t cacheways[2]; int64_t l2cachesize; int64_t l3cachesize; int64_t cachelinesize; int64_t memsize; int64_t _tmp; char cpumodel[64]; char cpuvendor[64]; char cpufamilynumber[20], cpumodelnumber[20], cpustepping[20]; int gotnuma = 0; int gotnumamemory = 0; if (topology->levels[0][0]->cpuset) /* somebody discovered things */ return -1; hwloc_alloc_root_sets(topology->levels[0][0]); /* Don't use hwloc_fallback_nbprocessors() because it would return online cpus only, * while we need all cpus when computing logical_per_package, etc below. * We don't know which CPUs are offline, but Darwin doesn't support binding anyway. * * TODO: try hw.logicalcpu_max */ if (hwloc_get_sysctlbyname("hw.logicalcpu", &_nprocs) || _nprocs <= 0) /* fallback to deprecated way */ if (hwloc_get_sysctlbyname("hw.ncpu", &_nprocs) || _nprocs <= 0) return -1; nprocs = _nprocs; topology->support.discovery->pu = 1; hwloc_debug("%u procs\n", nprocs); size = sizeof(cpuvendor); if (sysctlbyname("machdep.cpu.vendor", cpuvendor, &size, NULL, 0)) cpuvendor[0] = '\0'; size = sizeof(cpumodel); if (sysctlbyname("machdep.cpu.brand_string", cpumodel, &size, NULL, 0)) cpumodel[0] = '\0'; if (hwloc_get_sysctlbyname("machdep.cpu.family", &_tmp)) cpufamilynumber[0] = '\0'; else snprintf(cpufamilynumber, sizeof(cpufamilynumber), "%lld", (long long) _tmp); if (hwloc_get_sysctlbyname("machdep.cpu.model", &_tmp)) cpumodelnumber[0] = '\0'; else snprintf(cpumodelnumber, sizeof(cpumodelnumber), "%lld", (long long) _tmp); /* .extfamily and .extmodel are already added to .family and .model */ if (hwloc_get_sysctlbyname("machdep.cpu.stepping", &_tmp)) cpustepping[0] = '\0'; else snprintf(cpustepping, sizeof(cpustepping), "%lld", (long long) _tmp); if (!hwloc_get_sysctlbyname("hw.packages", &_npackages) && _npackages > 0) { unsigned npackages = _npackages; int64_t _cores_per_package; unsigned cores_per_package; int64_t _logical_per_package; unsigned logical_per_package; hwloc_debug("%u packages\n", npackages); if (!hwloc_get_sysctlbyname("machdep.cpu.thread_count", &_logical_per_package) && _logical_per_package > 0) /* official/modern way */ logical_per_package = _logical_per_package; else if (!hwloc_get_sysctlbyname("machdep.cpu.logical_per_package", &_logical_per_package) && _logical_per_package > 0) /* old way, gives the max supported by this "kind" of processor, * can be larger than the actual number for this model. */ logical_per_package = _logical_per_package; else /* Assume the trivia. */ logical_per_package = nprocs / npackages; hwloc_debug("%u threads per package\n", logical_per_package); if (nprocs == npackages * logical_per_package && hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_PACKAGE)) for (i = 0; i < npackages; i++) { obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_PACKAGE, i); obj->cpuset = hwloc_bitmap_alloc(); for (cpu = i*logical_per_package; cpu < (i+1)*logical_per_package; cpu++) hwloc_bitmap_set(obj->cpuset, cpu); hwloc_debug_1arg_bitmap("package %u has cpuset %s\n", i, obj->cpuset); if (cpuvendor[0] != '\0') hwloc_obj_add_info(obj, "CPUVendor", cpuvendor); if (cpumodel[0] != '\0') hwloc_obj_add_info(obj, "CPUModel", cpumodel); if (cpufamilynumber[0] != '\0') hwloc_obj_add_info(obj, "CPUFamilyNumber", cpufamilynumber); if (cpumodelnumber[0] != '\0') hwloc_obj_add_info(obj, "CPUModelNumber", cpumodelnumber); if (cpustepping[0] != '\0') hwloc_obj_add_info(obj, "CPUStepping", cpustepping); hwloc_insert_object_by_cpuset(topology, obj); } else { if (cpuvendor[0] != '\0') hwloc_obj_add_info(topology->levels[0][0], "CPUVendor", cpuvendor); if (cpumodel[0] != '\0') hwloc_obj_add_info(topology->levels[0][0], "CPUModel", cpumodel); if (cpufamilynumber[0] != '\0') hwloc_obj_add_info(topology->levels[0][0], "CPUFamilyNumber", cpufamilynumber); if (cpumodelnumber[0] != '\0') hwloc_obj_add_info(topology->levels[0][0], "CPUModelNumber", cpumodelnumber); if (cpustepping[0] != '\0') hwloc_obj_add_info(topology->levels[0][0], "CPUStepping", cpustepping); } if (!hwloc_get_sysctlbyname("machdep.cpu.core_count", &_cores_per_package) && _cores_per_package > 0) /* official/modern way */ cores_per_package = _cores_per_package; else if (!hwloc_get_sysctlbyname("machdep.cpu.cores_per_package", &_cores_per_package) && _cores_per_package > 0) /* old way, gives the max supported by this "kind" of processor, * can be larger than the actual number for this model. */ cores_per_package = _cores_per_package; else /* no idea */ cores_per_package = 0; if (cores_per_package > 0 && hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_CORE)) { hwloc_debug("%u cores per package\n", cores_per_package); if (!(logical_per_package % cores_per_package)) for (i = 0; i < npackages * cores_per_package; i++) { obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_CORE, i); obj->cpuset = hwloc_bitmap_alloc(); for (cpu = i*(logical_per_package/cores_per_package); cpu < (i+1)*(logical_per_package/cores_per_package); cpu++) hwloc_bitmap_set(obj->cpuset, cpu); hwloc_debug_1arg_bitmap("core %u has cpuset %s\n", i, obj->cpuset); hwloc_insert_object_by_cpuset(topology, obj); } } } else { if (cpuvendor[0] != '\0') hwloc_obj_add_info(topology->levels[0][0], "CPUVendor", cpuvendor); if (cpumodel[0] != '\0') hwloc_obj_add_info(topology->levels[0][0], "CPUModel", cpumodel); if (cpufamilynumber[0] != '\0') hwloc_obj_add_info(topology->levels[0][0], "CPUFamilyNumber", cpufamilynumber); if (cpumodelnumber[0] != '\0') hwloc_obj_add_info(topology->levels[0][0], "CPUModelNumber", cpumodelnumber); if (cpustepping[0] != '\0') hwloc_obj_add_info(topology->levels[0][0], "CPUStepping", cpustepping); } if (hwloc_get_sysctlbyname("hw.l1dcachesize", &l1dcachesize)) l1dcachesize = 0; if (hwloc_get_sysctlbyname("hw.l1icachesize", &l1icachesize)) l1icachesize = 0; if (hwloc_get_sysctlbyname("hw.l2cachesize", &l2cachesize)) l2cachesize = 0; if (hwloc_get_sysctlbyname("hw.l3cachesize", &l3cachesize)) l3cachesize = 0; if (hwloc_get_sysctlbyname("machdep.cpu.cache.L1_associativity", &cacheways[0])) cacheways[0] = 0; else if (cacheways[0] == 0xff) cacheways[0] = -1; if (hwloc_get_sysctlbyname("machdep.cpu.cache.L2_associativity", &cacheways[1])) cacheways[1] = 0; else if (cacheways[1] == 0xff) cacheways[1] = -1; if (hwloc_get_sysctlbyname("hw.cachelinesize", &cachelinesize)) cachelinesize = 0; if (hwloc_get_sysctlbyname("hw.memsize", &memsize)) memsize = 0; if (!sysctlbyname("hw.cacheconfig", NULL, &size, NULL, 0)) { unsigned n = size / sizeof(uint32_t); uint64_t cacheconfig[n]; uint64_t cachesize[n]; uint32_t cacheconfig32[n]; if ((!sysctlbyname("hw.cacheconfig", cacheconfig, &size, NULL, 0))) { /* Yeech. Darwin seemingly has changed from 32bit to 64bit integers for * cacheconfig, with apparently no way for detection. Assume the machine * won't have more than 4 billion cpus */ if (cacheconfig[0] > 0xFFFFFFFFUL) { memcpy(cacheconfig32, cacheconfig, size); for (i = 0 ; i < size / sizeof(uint32_t); i++) cacheconfig[i] = cacheconfig32[i]; } memset(cachesize, 0, sizeof(uint64_t) * n); size = sizeof(uint64_t) * n; if (sysctlbyname("hw.cachesize", cachesize, &size, NULL, 0)) { if (n > 0) cachesize[0] = memsize; if (n > 1) cachesize[1] = l1dcachesize; if (n > 2) cachesize[2] = l2cachesize; if (n > 3) cachesize[3] = l3cachesize; } hwloc_debug("%s", "caches"); for (i = 0; i < n && cacheconfig[i]; i++) hwloc_debug(" %"PRIu64"(%"PRIu64"kB)", cacheconfig[i], cachesize[i] / 1024); /* Now we know how many caches there are */ n = i; hwloc_debug("\n%u cache levels\n", n - 1); /* For each cache level (0 is memory) */ for (i = 0; i < n; i++) { /* cacheconfig tells us how many cpus share it, let's iterate on each cache */ for (j = 0; j < (nprocs / cacheconfig[i]); j++) { if (!i) { obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_NUMANODE, j); obj->nodeset = hwloc_bitmap_alloc(); hwloc_bitmap_set(obj->nodeset, j); gotnuma++; } else { obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_L1CACHE+i-1, HWLOC_UNKNOWN_INDEX); } obj->cpuset = hwloc_bitmap_alloc(); for (cpu = j*cacheconfig[i]; cpu < ((j+1)*cacheconfig[i]); cpu++) hwloc_bitmap_set(obj->cpuset, cpu); if (i == 1 && l1icachesize && hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_L1ICACHE)) { /* FIXME assuming that L1i and L1d are shared the same way. Darwin * does not yet provide a way to know. */ hwloc_obj_t l1i = hwloc_alloc_setup_object(topology, HWLOC_OBJ_L1ICACHE, HWLOC_UNKNOWN_INDEX); l1i->cpuset = hwloc_bitmap_dup(obj->cpuset); hwloc_debug_1arg_bitmap("L1icache %u has cpuset %s\n", j, l1i->cpuset); l1i->attr->cache.depth = i; l1i->attr->cache.size = l1icachesize; l1i->attr->cache.linesize = cachelinesize; l1i->attr->cache.associativity = 0; l1i->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION; hwloc_insert_object_by_cpuset(topology, l1i); } if (i) { hwloc_debug_2args_bitmap("L%ucache %u has cpuset %s\n", i, j, obj->cpuset); obj->attr->cache.depth = i; obj->attr->cache.size = cachesize[i]; obj->attr->cache.linesize = cachelinesize; if (i <= sizeof(cacheways) / sizeof(cacheways[0])) obj->attr->cache.associativity = cacheways[i-1]; else obj->attr->cache.associativity = 0; if (i == 1 && l1icachesize) obj->attr->cache.type = HWLOC_OBJ_CACHE_DATA; else obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED; } else { hwloc_debug_1arg_bitmap("node %u has cpuset %s\n", j, obj->cpuset); if (cachesize[i]) { obj->attr->numanode.local_memory = cachesize[i]; gotnumamemory++; } obj->attr->numanode.page_types_len = 2; obj->attr->numanode.page_types = malloc(2*sizeof(*obj->attr->numanode.page_types)); memset(obj->attr->numanode.page_types, 0, 2*sizeof(*obj->attr->numanode.page_types)); obj->attr->numanode.page_types[0].size = hwloc_getpagesize(); #if HAVE_DECL__SC_LARGE_PAGESIZE obj->attr->numanode.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE); #endif } if (hwloc_filter_check_keep_object_type(topology, obj->type)) hwloc_insert_object_by_cpuset(topology, obj); else hwloc_free_unlinked_object(obj); /* FIXME: don't built at all, just build the cpuset in case l1i needs it */ } } } } if (gotnuma) topology->support.discovery->numa = 1; if (gotnumamemory) topology->support.discovery->numa = 1; /* add PU objects */ hwloc_setup_pu_level(topology, nprocs); hwloc_obj_add_info(topology->levels[0][0], "Backend", "Darwin"); hwloc_add_uname_info(topology, NULL); return 0; }
int hwloc_look_hardwired_fujitsu_fx10(struct hwloc_topology *topology) { /* If a broken core gets disabled, its bit disappears and other core bits are NOT shifted towards 0. * Node is not given to user job, not need to handle that case properly. */ unsigned i; hwloc_obj_t obj; hwloc_bitmap_t set; for(i=0; i<16; i++) { set = hwloc_bitmap_alloc(); hwloc_bitmap_set(set, i); if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_L1ICACHE)) { obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_L1ICACHE, -1); obj->cpuset = hwloc_bitmap_dup(set); obj->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION; obj->attr->cache.depth = 1; obj->attr->cache.size = 32*1024; obj->attr->cache.linesize = 128; obj->attr->cache.associativity = 2; hwloc_insert_object_by_cpuset(topology, obj); } if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_L1CACHE)) { obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_L1CACHE, -1); obj->cpuset = hwloc_bitmap_dup(set); obj->attr->cache.type = HWLOC_OBJ_CACHE_DATA; obj->attr->cache.depth = 1; obj->attr->cache.size = 32*1024; obj->attr->cache.linesize = 128; obj->attr->cache.associativity = 2; hwloc_insert_object_by_cpuset(topology, obj); } if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_CORE)) { obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_CORE, i); obj->cpuset = set; hwloc_insert_object_by_cpuset(topology, obj); } else hwloc_bitmap_free(set); } set = hwloc_bitmap_alloc(); hwloc_bitmap_set_range(set, 0, 15); if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_L2CACHE)) { obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_L2CACHE, -1); obj->cpuset = hwloc_bitmap_dup(set); obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED; obj->attr->cache.depth = 2; obj->attr->cache.size = 12*1024*1024; obj->attr->cache.linesize = 128; obj->attr->cache.associativity = 24; hwloc_insert_object_by_cpuset(topology, obj); } if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_PACKAGE)) { obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_PACKAGE, 0); obj->cpuset = set; hwloc_obj_add_info(obj, "CPUVendor", "Fujitsu"); hwloc_obj_add_info(obj, "CPUModel", "SPARC64 IXfx"); hwloc_insert_object_by_cpuset(topology, obj); } else hwloc_bitmap_free(set); hwloc_setup_pu_level(topology, 16); return 0; }
int main(void) { int depth; unsigned i, n; unsigned long size; int levels; char string[128]; int topodepth; void *m; hwloc_topology_t topology; hwloc_cpuset_t cpuset; hwloc_obj_t obj; /* Allocate and initialize topology object. */ hwloc_topology_init(&topology); /* ... Optionally, put detection configuration here to ignore some objects types, define a synthetic topology, etc.... The default is to detect all the objects of the machine that the caller is allowed to access. See Configure Topology Detection. */ /* Perform the topology detection. */ hwloc_topology_load(topology); /* Optionally, get some additional topology information in case we need the topology depth later. */ topodepth = hwloc_topology_get_depth(topology); /***************************************************************** * First example: * Walk the topology with an array style, from level 0 (always * the system level) to the lowest level (always the proc level). *****************************************************************/ for (depth = 0; depth < topodepth; depth++) { printf("*** Objects at level %d\n", depth); for (i = 0; i < hwloc_get_nbobjs_by_depth(topology, depth); i++) { hwloc_obj_type_snprintf(string, sizeof(string), hwloc_get_obj_by_depth(topology, depth, i), 0); printf("Index %u: %s\n", i, string); } } /***************************************************************** * Second example: * Walk the topology with a tree style. *****************************************************************/ printf("*** Printing overall tree\n"); print_children(topology, hwloc_get_root_obj(topology), 0); /***************************************************************** * Third example: * Print the number of packages. *****************************************************************/ depth = hwloc_get_type_depth(topology, HWLOC_OBJ_PACKAGE); if (depth == HWLOC_TYPE_DEPTH_UNKNOWN) { printf("*** The number of packages is unknown\n"); } else { printf("*** %u package(s)\n", hwloc_get_nbobjs_by_depth(topology, depth)); } /***************************************************************** * Fourth example: * Compute the amount of cache that the first logical processor * has above it. *****************************************************************/ levels = 0; size = 0; for (obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 0); obj; obj = obj->parent) if (obj->type == HWLOC_OBJ_CACHE) { levels++; size += obj->attr->cache.size; } printf("*** Logical processor 0 has %d caches totaling %luKB\n", levels, size / 1024); /***************************************************************** * Fifth example: * Bind to only one thread of the last core of the machine. * * First find out where cores are, or else smaller sets of CPUs if * the OS doesn't have the notion of a "core". *****************************************************************/ depth = hwloc_get_type_or_below_depth(topology, HWLOC_OBJ_CORE); /* Get last core. */ obj = hwloc_get_obj_by_depth(topology, depth, hwloc_get_nbobjs_by_depth(topology, depth) - 1); if (obj) { /* Get a copy of its cpuset that we may modify. */ cpuset = hwloc_bitmap_dup(obj->cpuset); /* Get only one logical processor (in case the core is SMT/hyper-threaded). */ hwloc_bitmap_singlify(cpuset); /* And try to bind ourself there. */ if (hwloc_set_cpubind(topology, cpuset, 0)) { char *str; int error = errno; hwloc_bitmap_asprintf(&str, obj->cpuset); printf("Couldn't bind to cpuset %s: %s\n", str, strerror(error)); free(str); } /* Free our cpuset copy */ hwloc_bitmap_free(cpuset); } /***************************************************************** * Sixth example: * Allocate some memory on the last NUMA node, bind some existing * memory to the last NUMA node. *****************************************************************/ /* Get last node. There's always at least one. */ n = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_NUMANODE); obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, n - 1); size = 1024*1024; m = hwloc_alloc_membind_nodeset(topology, size, obj->nodeset, HWLOC_MEMBIND_BIND, 0); hwloc_free(topology, m, size); m = malloc(size); hwloc_set_area_membind_nodeset(topology, m, size, obj->nodeset, HWLOC_MEMBIND_BIND, 0); free(m); /* Destroy topology object. */ hwloc_topology_destroy(topology); return 0; }
static void look_rset(int sdl, hwloc_obj_type_t type, struct hwloc_topology *topology, int level) { rsethandle_t rset, rad; int i,maxcpus,j; int nbnodes; struct hwloc_obj *obj; if ((topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM)) rset = rs_alloc(RS_ALL); else rset = rs_alloc(RS_PARTITION); rad = rs_alloc(RS_EMPTY); nbnodes = rs_numrads(rset, sdl, 0); if (nbnodes == -1) { perror("rs_numrads"); return; } for (i = 0; i < nbnodes; i++) { hwloc_bitmap_t cpuset; unsigned os_index = (unsigned) -1; /* no os_index except for PU and NUMANODE below */ if (rs_getrad(rset, rad, sdl, i, 0)) { fprintf(stderr,"rs_getrad(%d) failed: %s\n", i, strerror(errno)); continue; } if (!rs_getinfo(rad, R_NUMPROCS, 0)) continue; maxcpus = rs_getinfo(rad, R_MAXPROCS, 0); cpuset = hwloc_bitmap_alloc(); for (j = 0; j < maxcpus; j++) { if (rs_op(RS_TESTRESOURCE, rad, NULL, R_PROCS, j)) hwloc_bitmap_set(cpuset, j); } if (type == HWLOC_OBJ_PU) { os_index = hwloc_bitmap_first(cpuset); hwloc_debug("Found PU #%u inside node %d for sdl %d\n", os_index, i, sdl); assert(hwloc_bitmap_weight(cpuset) == 1); } else if (type == HWLOC_OBJ_NUMANODE) { /* NUMA node os_index isn't used for binding, just use the rad number to get unique values. * Note that we'll use that fact in hwloc_aix_prepare_membind(). */ os_index = i; hwloc_debug("Using os_index #%u for NUMA node inside node %d for sdl %d\n", os_index, i, sdl); } obj = hwloc_alloc_setup_object(type, os_index); obj->cpuset = cpuset; obj->os_level = sdl; switch(type) { case HWLOC_OBJ_NUMANODE: obj->nodeset = hwloc_bitmap_alloc(); hwloc_bitmap_set(obj->nodeset, i); obj->memory.local_memory = 0; /* TODO: odd, rs_getinfo(rad, R_MEMSIZE, 0) << 10 returns the total memory ... */ obj->memory.page_types_len = 2; obj->memory.page_types = malloc(2*sizeof(*obj->memory.page_types)); memset(obj->memory.page_types, 0, 2*sizeof(*obj->memory.page_types)); obj->memory.page_types[0].size = hwloc_getpagesize(); #ifdef HAVE__SC_LARGE_PAGESIZE obj->memory.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE); #endif /* TODO: obj->memory.page_types[1].count = rs_getinfo(rset, R_LGPGFREE, 0) / hugepagesize */ break; case HWLOC_OBJ_CACHE: obj->attr->cache.size = _system_configuration.L2_cache_size; obj->attr->cache.associativity = _system_configuration.L2_cache_asc; obj->attr->cache.linesize = 0; /* unknown by default */ if (__power_pc()) if (__power_4() || __power_5() || __power_6() || __power_7()) obj->attr->cache.linesize = 128; obj->attr->cache.depth = 2; obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED; /* OK for power[4567], unknown for others */ break; case HWLOC_OBJ_GROUP: obj->attr->group.depth = level; break; case HWLOC_OBJ_CORE: { hwloc_obj_t obj2, obj3; obj2 = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, i); obj2->cpuset = hwloc_bitmap_dup(obj->cpuset); obj2->attr->cache.size = _system_configuration.dcache_size; obj2->attr->cache.associativity = _system_configuration.dcache_asc; obj2->attr->cache.linesize = _system_configuration.dcache_line; obj2->attr->cache.depth = 1; if (_system_configuration.cache_attrib & (1<<30)) { /* Unified cache */ obj2->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED; hwloc_debug("Adding an L1u cache for core %d\n", i); hwloc_insert_object_by_cpuset(topology, obj2); } else { /* Separate Instruction and Data caches */ obj2->attr->cache.type = HWLOC_OBJ_CACHE_DATA; hwloc_debug("Adding an L1d cache for core %d\n", i); hwloc_insert_object_by_cpuset(topology, obj2); obj3 = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, i); obj3->cpuset = hwloc_bitmap_dup(obj->cpuset); obj3->attr->cache.size = _system_configuration.icache_size; obj3->attr->cache.associativity = _system_configuration.icache_asc; obj3->attr->cache.linesize = _system_configuration.icache_line; obj3->attr->cache.depth = 1; obj3->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION; hwloc_debug("Adding an L1i cache for core %d\n", i); hwloc_insert_object_by_cpuset(topology, obj3); } break; } default: break; } hwloc_debug_2args_bitmap("%s %d has cpuset %s\n", hwloc_obj_type_string(type), i, obj->cpuset); hwloc_insert_object_by_cpuset(topology, obj); } rs_free(rset); rs_free(rad); }
static int hwloc_look_darwin(struct hwloc_backend *backend) { struct hwloc_topology *topology = backend->topology; int64_t _nprocs; unsigned nprocs; int64_t _npackages; unsigned i, j, cpu; struct hwloc_obj *obj; size_t size; int64_t l1dcachesize, l1icachesize; int64_t cacheways[2]; int64_t l2cachesize; int64_t cachelinesize; int64_t memsize; char cpumodel[64]; if (topology->levels[0][0]->cpuset) /* somebody discovered things */ return -1; hwloc_alloc_obj_cpusets(topology->levels[0][0]); if (hwloc_get_sysctlbyname("hw.ncpu", &_nprocs) || _nprocs <= 0) return -1; nprocs = _nprocs; topology->support.discovery->pu = 1; hwloc_debug("%u procs\n", nprocs); size = sizeof(cpumodel); if (sysctlbyname("machdep.cpu.brand_string", cpumodel, &size, NULL, 0)) cpumodel[0] = '\0'; if (!hwloc_get_sysctlbyname("hw.packages", &_npackages) && _npackages > 0) { unsigned npackages = _npackages; int64_t _cores_per_package; int64_t _logical_per_package; unsigned logical_per_package; hwloc_debug("%u packages\n", npackages); if (!hwloc_get_sysctlbyname("machdep.cpu.logical_per_package", &_logical_per_package) && _logical_per_package > 0) logical_per_package = _logical_per_package; else /* Assume the trivia. */ logical_per_package = nprocs / npackages; hwloc_debug("%u threads per package\n", logical_per_package); if (nprocs == npackages * logical_per_package && hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_PACKAGE)) for (i = 0; i < npackages; i++) { obj = hwloc_alloc_setup_object(HWLOC_OBJ_PACKAGE, i); obj->cpuset = hwloc_bitmap_alloc(); for (cpu = i*logical_per_package; cpu < (i+1)*logical_per_package; cpu++) hwloc_bitmap_set(obj->cpuset, cpu); hwloc_debug_1arg_bitmap("package %u has cpuset %s\n", i, obj->cpuset); if (cpumodel[0] != '\0') hwloc_obj_add_info(obj, "CPUModel", cpumodel); hwloc_insert_object_by_cpuset(topology, obj); } else if (cpumodel[0] != '\0') hwloc_obj_add_info(topology->levels[0][0], "CPUModel", cpumodel); if (!hwloc_get_sysctlbyname("machdep.cpu.cores_per_package", &_cores_per_package) && _cores_per_package > 0 && hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_CORE)) { unsigned cores_per_package = _cores_per_package; hwloc_debug("%u cores per package\n", cores_per_package); if (!(logical_per_package % cores_per_package)) for (i = 0; i < npackages * cores_per_package; i++) { obj = hwloc_alloc_setup_object(HWLOC_OBJ_CORE, i); obj->cpuset = hwloc_bitmap_alloc(); for (cpu = i*(logical_per_package/cores_per_package); cpu < (i+1)*(logical_per_package/cores_per_package); cpu++) hwloc_bitmap_set(obj->cpuset, cpu); hwloc_debug_1arg_bitmap("core %u has cpuset %s\n", i, obj->cpuset); hwloc_insert_object_by_cpuset(topology, obj); } } } else if (cpumodel[0] != '\0') hwloc_obj_add_info(topology->levels[0][0], "CPUModel", cpumodel); if (hwloc_get_sysctlbyname("hw.l1dcachesize", &l1dcachesize)) l1dcachesize = 0; if (hwloc_get_sysctlbyname("hw.l1icachesize", &l1icachesize)) l1icachesize = 0; if (hwloc_get_sysctlbyname("hw.l2cachesize", &l2cachesize)) l2cachesize = 0; if (hwloc_get_sysctlbyname("machdep.cpu.cache.L1_associativity", &cacheways[0])) cacheways[0] = 0; else if (cacheways[0] == 0xff) cacheways[0] = -1; if (hwloc_get_sysctlbyname("machdep.cpu.cache.L2_associativity", &cacheways[1])) cacheways[1] = 0; else if (cacheways[1] == 0xff) cacheways[1] = -1; if (hwloc_get_sysctlbyname("hw.cachelinesize", &cachelinesize)) cachelinesize = 0; if (hwloc_get_sysctlbyname("hw.memsize", &memsize)) memsize = 0; if (!sysctlbyname("hw.cacheconfig", NULL, &size, NULL, 0)) { unsigned n = size / sizeof(uint32_t); uint64_t *cacheconfig = NULL; uint64_t *cachesize = NULL; uint32_t *cacheconfig32 = NULL; cacheconfig = malloc(sizeof(uint64_t) * n); if (NULL == cacheconfig) { goto out; } cachesize = malloc(sizeof(uint64_t) * n); if (NULL == cachesize) { goto out; } cacheconfig32 = malloc(sizeof(uint32_t) * n); if (NULL == cacheconfig32) { goto out; } if ((!sysctlbyname("hw.cacheconfig", cacheconfig, &size, NULL, 0))) { /* Yeech. Darwin seemingly has changed from 32bit to 64bit integers for * cacheconfig, with apparently no way for detection. Assume the machine * won't have more than 4 billion cpus */ if (cacheconfig[0] > 0xFFFFFFFFUL) { memcpy(cacheconfig32, cacheconfig, size); for (i = 0 ; i < size / sizeof(uint32_t); i++) cacheconfig[i] = cacheconfig32[i]; } memset(cachesize, 0, sizeof(uint64_t) * n); size = sizeof(uint64_t) * n; if (sysctlbyname("hw.cachesize", cachesize, &size, NULL, 0)) { if (n > 0) cachesize[0] = memsize; if (n > 1) cachesize[1] = l1dcachesize; if (n > 2) cachesize[2] = l2cachesize; } hwloc_debug("%s", "caches"); for (i = 0; i < n && cacheconfig[i]; i++) hwloc_debug(" %"PRIu64"(%"PRIu64"kB)", cacheconfig[i], cachesize[i] / 1024); /* Now we know how many caches there are */ n = i; hwloc_debug("\n%u cache levels\n", n - 1); /* For each cache level (0 is memory) */ for (i = 0; i < n; i++) { /* cacheconfig tells us how many cpus share it, let's iterate on each cache */ for (j = 0; j < (nprocs / cacheconfig[i]); j++) { if (!i) { obj = hwloc_alloc_setup_object(HWLOC_OBJ_NUMANODE, j); obj->nodeset = hwloc_bitmap_alloc(); hwloc_bitmap_set(obj->nodeset, j); } else { obj = hwloc_alloc_setup_object(HWLOC_OBJ_L1CACHE+i-1, -1); } obj->cpuset = hwloc_bitmap_alloc(); for (cpu = j*cacheconfig[i]; cpu < ((j+1)*cacheconfig[i]); cpu++) hwloc_bitmap_set(obj->cpuset, cpu); if (i == 1 && l1icachesize && hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_L1ICACHE)) { /* FIXME assuming that L1i and L1d are shared the same way. Darwin * does not yet provide a way to know. */ hwloc_obj_t l1i = hwloc_alloc_setup_object(HWLOC_OBJ_L1ICACHE, -1); l1i->cpuset = hwloc_bitmap_dup(obj->cpuset); hwloc_debug_1arg_bitmap("L1icache %u has cpuset %s\n", j, l1i->cpuset); l1i->attr->cache.depth = i; l1i->attr->cache.size = l1icachesize; l1i->attr->cache.linesize = cachelinesize; l1i->attr->cache.associativity = 0; l1i->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION; hwloc_insert_object_by_cpuset(topology, l1i); } if (i) { hwloc_debug_2args_bitmap("L%ucache %u has cpuset %s\n", i, j, obj->cpuset); obj->attr->cache.depth = i; obj->attr->cache.size = cachesize[i]; obj->attr->cache.linesize = cachelinesize; if (i <= sizeof(cacheways) / sizeof(cacheways[0])) obj->attr->cache.associativity = cacheways[i-1]; else obj->attr->cache.associativity = 0; if (i == 1 && l1icachesize) obj->attr->cache.type = HWLOC_OBJ_CACHE_DATA; else obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED; } else { hwloc_debug_1arg_bitmap("node %u has cpuset %s\n", j, obj->cpuset); obj->memory.local_memory = cachesize[i]; obj->memory.page_types_len = 2; obj->memory.page_types = malloc(2*sizeof(*obj->memory.page_types)); memset(obj->memory.page_types, 0, 2*sizeof(*obj->memory.page_types)); obj->memory.page_types[0].size = hwloc_getpagesize(); #ifdef HAVE__SC_LARGE_PAGESIZE obj->memory.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE); #endif } if (hwloc_filter_check_keep_object_type(topology, obj->type)) hwloc_insert_object_by_cpuset(topology, obj); else hwloc_free_unlinked_object(obj); /* FIXME: don't built at all, just build the cpuset in case l1i needs it */ } } } out: free(cacheconfig); free(cachesize); free(cacheconfig32); } /* add PU objects */ hwloc_setup_pu_level(topology, nprocs); hwloc_obj_add_info(topology->levels[0][0], "Backend", "Darwin"); hwloc_add_uname_info(topology, NULL); return 0; }