static int hwloc_look_darwin(struct hwloc_backend *backend) { struct hwloc_topology *topology = backend->topology; int64_t _nprocs; unsigned nprocs; int64_t _npackages; unsigned i, j, cpu; struct hwloc_obj *obj; size_t size; int64_t l1dcachesize, l1icachesize; int64_t cacheways[2]; int64_t l2cachesize; int64_t cachelinesize; int64_t memsize; char cpumodel[64]; if (topology->levels[0][0]->cpuset) /* somebody discovered things */ return -1; hwloc_alloc_obj_cpusets(topology->levels[0][0]); if (hwloc_get_sysctlbyname("hw.ncpu", &_nprocs) || _nprocs <= 0) return -1; nprocs = _nprocs; topology->support.discovery->pu = 1; hwloc_debug("%u procs\n", nprocs); size = sizeof(cpumodel); if (sysctlbyname("machdep.cpu.brand_string", cpumodel, &size, NULL, 0)) cpumodel[0] = '\0'; if (!hwloc_get_sysctlbyname("hw.packages", &_npackages) && _npackages > 0) { unsigned npackages = _npackages; int64_t _cores_per_package; int64_t _logical_per_package; unsigned logical_per_package; hwloc_debug("%u packages\n", npackages); if (!hwloc_get_sysctlbyname("machdep.cpu.logical_per_package", &_logical_per_package) && _logical_per_package > 0) logical_per_package = _logical_per_package; else /* Assume the trivia. */ logical_per_package = nprocs / npackages; hwloc_debug("%u threads per package\n", logical_per_package); if (nprocs == npackages * logical_per_package && hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_PACKAGE)) for (i = 0; i < npackages; i++) { obj = hwloc_alloc_setup_object(HWLOC_OBJ_PACKAGE, i); obj->cpuset = hwloc_bitmap_alloc(); for (cpu = i*logical_per_package; cpu < (i+1)*logical_per_package; cpu++) hwloc_bitmap_set(obj->cpuset, cpu); hwloc_debug_1arg_bitmap("package %u has cpuset %s\n", i, obj->cpuset); if (cpumodel[0] != '\0') hwloc_obj_add_info(obj, "CPUModel", cpumodel); hwloc_insert_object_by_cpuset(topology, obj); } else if (cpumodel[0] != '\0') hwloc_obj_add_info(topology->levels[0][0], "CPUModel", cpumodel); if (!hwloc_get_sysctlbyname("machdep.cpu.cores_per_package", &_cores_per_package) && _cores_per_package > 0 && hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_CORE)) { unsigned cores_per_package = _cores_per_package; hwloc_debug("%u cores per package\n", cores_per_package); if (!(logical_per_package % cores_per_package)) for (i = 0; i < npackages * cores_per_package; i++) { obj = hwloc_alloc_setup_object(HWLOC_OBJ_CORE, i); obj->cpuset = hwloc_bitmap_alloc(); for (cpu = i*(logical_per_package/cores_per_package); cpu < (i+1)*(logical_per_package/cores_per_package); cpu++) hwloc_bitmap_set(obj->cpuset, cpu); hwloc_debug_1arg_bitmap("core %u has cpuset %s\n", i, obj->cpuset); hwloc_insert_object_by_cpuset(topology, obj); } } } else if (cpumodel[0] != '\0') hwloc_obj_add_info(topology->levels[0][0], "CPUModel", cpumodel); if (hwloc_get_sysctlbyname("hw.l1dcachesize", &l1dcachesize)) l1dcachesize = 0; if (hwloc_get_sysctlbyname("hw.l1icachesize", &l1icachesize)) l1icachesize = 0; if (hwloc_get_sysctlbyname("hw.l2cachesize", &l2cachesize)) l2cachesize = 0; if (hwloc_get_sysctlbyname("machdep.cpu.cache.L1_associativity", &cacheways[0])) cacheways[0] = 0; else if (cacheways[0] == 0xff) cacheways[0] = -1; if (hwloc_get_sysctlbyname("machdep.cpu.cache.L2_associativity", &cacheways[1])) cacheways[1] = 0; else if (cacheways[1] == 0xff) cacheways[1] = -1; if (hwloc_get_sysctlbyname("hw.cachelinesize", &cachelinesize)) cachelinesize = 0; if (hwloc_get_sysctlbyname("hw.memsize", &memsize)) memsize = 0; if (!sysctlbyname("hw.cacheconfig", NULL, &size, NULL, 0)) { unsigned n = size / sizeof(uint32_t); uint64_t *cacheconfig = NULL; uint64_t *cachesize = NULL; uint32_t *cacheconfig32 = NULL; cacheconfig = malloc(sizeof(uint64_t) * n); if (NULL == cacheconfig) { goto out; } cachesize = malloc(sizeof(uint64_t) * n); if (NULL == cachesize) { goto out; } cacheconfig32 = malloc(sizeof(uint32_t) * n); if (NULL == cacheconfig32) { goto out; } if ((!sysctlbyname("hw.cacheconfig", cacheconfig, &size, NULL, 0))) { /* Yeech. Darwin seemingly has changed from 32bit to 64bit integers for * cacheconfig, with apparently no way for detection. Assume the machine * won't have more than 4 billion cpus */ if (cacheconfig[0] > 0xFFFFFFFFUL) { memcpy(cacheconfig32, cacheconfig, size); for (i = 0 ; i < size / sizeof(uint32_t); i++) cacheconfig[i] = cacheconfig32[i]; } memset(cachesize, 0, sizeof(uint64_t) * n); size = sizeof(uint64_t) * n; if (sysctlbyname("hw.cachesize", cachesize, &size, NULL, 0)) { if (n > 0) cachesize[0] = memsize; if (n > 1) cachesize[1] = l1dcachesize; if (n > 2) cachesize[2] = l2cachesize; } hwloc_debug("%s", "caches"); for (i = 0; i < n && cacheconfig[i]; i++) hwloc_debug(" %"PRIu64"(%"PRIu64"kB)", cacheconfig[i], cachesize[i] / 1024); /* Now we know how many caches there are */ n = i; hwloc_debug("\n%u cache levels\n", n - 1); /* For each cache level (0 is memory) */ for (i = 0; i < n; i++) { /* cacheconfig tells us how many cpus share it, let's iterate on each cache */ for (j = 0; j < (nprocs / cacheconfig[i]); j++) { if (!i) { obj = hwloc_alloc_setup_object(HWLOC_OBJ_NUMANODE, j); obj->nodeset = hwloc_bitmap_alloc(); hwloc_bitmap_set(obj->nodeset, j); } else { obj = hwloc_alloc_setup_object(HWLOC_OBJ_L1CACHE+i-1, -1); } obj->cpuset = hwloc_bitmap_alloc(); for (cpu = j*cacheconfig[i]; cpu < ((j+1)*cacheconfig[i]); cpu++) hwloc_bitmap_set(obj->cpuset, cpu); if (i == 1 && l1icachesize && hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_L1ICACHE)) { /* FIXME assuming that L1i and L1d are shared the same way. Darwin * does not yet provide a way to know. */ hwloc_obj_t l1i = hwloc_alloc_setup_object(HWLOC_OBJ_L1ICACHE, -1); l1i->cpuset = hwloc_bitmap_dup(obj->cpuset); hwloc_debug_1arg_bitmap("L1icache %u has cpuset %s\n", j, l1i->cpuset); l1i->attr->cache.depth = i; l1i->attr->cache.size = l1icachesize; l1i->attr->cache.linesize = cachelinesize; l1i->attr->cache.associativity = 0; l1i->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION; hwloc_insert_object_by_cpuset(topology, l1i); } if (i) { hwloc_debug_2args_bitmap("L%ucache %u has cpuset %s\n", i, j, obj->cpuset); obj->attr->cache.depth = i; obj->attr->cache.size = cachesize[i]; obj->attr->cache.linesize = cachelinesize; if (i <= sizeof(cacheways) / sizeof(cacheways[0])) obj->attr->cache.associativity = cacheways[i-1]; else obj->attr->cache.associativity = 0; if (i == 1 && l1icachesize) obj->attr->cache.type = HWLOC_OBJ_CACHE_DATA; else obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED; } else { hwloc_debug_1arg_bitmap("node %u has cpuset %s\n", j, obj->cpuset); obj->memory.local_memory = cachesize[i]; obj->memory.page_types_len = 2; obj->memory.page_types = malloc(2*sizeof(*obj->memory.page_types)); memset(obj->memory.page_types, 0, 2*sizeof(*obj->memory.page_types)); obj->memory.page_types[0].size = hwloc_getpagesize(); #ifdef HAVE__SC_LARGE_PAGESIZE obj->memory.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE); #endif } if (hwloc_filter_check_keep_object_type(topology, obj->type)) hwloc_insert_object_by_cpuset(topology, obj); else hwloc_free_unlinked_object(obj); /* FIXME: don't built at all, just build the cpuset in case l1i needs it */ } } } out: free(cacheconfig); free(cachesize); free(cacheconfig32); } /* add PU objects */ hwloc_setup_pu_level(topology, nprocs); hwloc_obj_add_info(topology->levels[0][0], "Backend", "Darwin"); hwloc_add_uname_info(topology, NULL); return 0; }
static int hwloc_look_osf(struct hwloc_backend *backend) { struct hwloc_topology *topology = backend->topology; cpu_cursor_t cursor; unsigned nbnodes; radid_t radid, radid2; radset_t radset, radset2; cpuid_t cpuid; cpuset_t cpuset; struct hwloc_obj *obj; unsigned distance; if (topology->levels[0][0]->cpuset) /* somebody discovered things */ return 0; hwloc_alloc_obj_cpusets(topology->levels[0][0]); nbnodes = rad_get_num(); cpusetcreate(&cpuset); radsetcreate(&radset); radsetcreate(&radset2); { hwloc_obj_t *nodes = calloc(nbnodes, sizeof(hwloc_obj_t)); unsigned *indexes = calloc(nbnodes, sizeof(unsigned)); float *distances = calloc(nbnodes*nbnodes, sizeof(float)); unsigned nfound; numa_attr_t attr; attr.nattr_type = R_RAD; attr.nattr_descr.rd_radset = radset; attr.nattr_flags = 0; for (radid = 0; radid < (radid_t) nbnodes; radid++) { rademptyset(radset); radaddset(radset, radid); cpuemptyset(cpuset); if (rad_get_cpus(radid, cpuset)==-1) { fprintf(stderr,"rad_get_cpus(%d) failed: %s\n",radid,strerror(errno)); continue; } indexes[radid] = radid; nodes[radid] = obj = hwloc_alloc_setup_object(HWLOC_OBJ_NODE, radid); obj->cpuset = hwloc_bitmap_alloc(); obj->memory.local_memory = rad_get_physmem(radid) * hwloc_getpagesize(); obj->memory.page_types_len = 2; obj->memory.page_types = malloc(2*sizeof(*obj->memory.page_types)); memset(obj->memory.page_types, 0, 2*sizeof(*obj->memory.page_types)); obj->memory.page_types[0].size = hwloc_getpagesize(); #ifdef HAVE__SC_LARGE_PAGESIZE obj->memory.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE); #endif cursor = SET_CURSOR_INIT; while((cpuid = cpu_foreach(cpuset, 0, &cursor)) != CPU_NONE) hwloc_bitmap_set(obj->cpuset, cpuid); hwloc_debug_1arg_bitmap("node %d has cpuset %s\n", radid, obj->cpuset); hwloc_insert_object_by_cpuset(topology, obj); nfound = 0; for (radid2 = 0; radid2 < (radid_t) nbnodes; radid2++) distances[radid*nbnodes+radid2] = RAD_DIST_REMOTE; for (distance = RAD_DIST_LOCAL; distance < RAD_DIST_REMOTE; distance++) { attr.nattr_distance = distance; /* get set of NUMA nodes at distance <= DISTANCE */ if (nloc(&attr, radset2)) { fprintf(stderr,"nloc failed: %s\n", strerror(errno)); continue; } cursor = SET_CURSOR_INIT; while ((radid2 = rad_foreach(radset2, 0, &cursor)) != RAD_NONE) { if (distances[radid*nbnodes+radid2] == RAD_DIST_REMOTE) { distances[radid*nbnodes+radid2] = (float) distance; nfound++; } } if (nfound == nbnodes) /* Finished finding distances, no need to go up to RAD_DIST_REMOTE */ break; } } hwloc_distances_set(topology, HWLOC_OBJ_NODE, nbnodes, indexes, nodes, distances, 0 /* OS cannot force */); } radsetdestroy(&radset2); radsetdestroy(&radset); cpusetdestroy(&cpuset); /* add PU objects */ hwloc_setup_pu_level(topology, hwloc_fallback_nbprocessors(topology)); hwloc_obj_add_info(topology->levels[0][0], "Backend", "OSF"); if (topology->is_thissystem) hwloc_add_uname_info(topology); return 1; }
static int hwloc_look_kstat(struct hwloc_topology *topology) { /* FIXME this assumes that all packages are identical */ char *CPUType = hwloc_solaris_get_chip_type(); char *CPUModel = hwloc_solaris_get_chip_model(); kstat_ctl_t *kc = kstat_open(); kstat_t *ksp; kstat_named_t *stat; unsigned look_cores = 1, look_chips = 1; unsigned Pproc_max = 0; unsigned Pproc_alloc = 256; struct hwloc_solaris_Pproc { unsigned Lpkg, Ppkg, Lcore, Lproc; } * Pproc = malloc(Pproc_alloc * sizeof(*Pproc)); unsigned Lproc_num = 0; unsigned Lproc_alloc = 256; struct hwloc_solaris_Lproc { unsigned Pproc; } * Lproc = malloc(Lproc_alloc * sizeof(*Lproc)); unsigned Lcore_num = 0; unsigned Lcore_alloc = 256; struct hwloc_solaris_Lcore { unsigned Pcore, Ppkg; } * Lcore = malloc(Lcore_alloc * sizeof(*Lcore)); unsigned Lpkg_num = 0; unsigned Lpkg_alloc = 256; struct hwloc_solaris_Lpkg { unsigned Ppkg; } * Lpkg = malloc(Lpkg_alloc * sizeof(*Lpkg)); unsigned pkgid, coreid, cpuid; unsigned i; for (i = 0; i < Pproc_alloc; i++) { Pproc[i].Lproc = -1; Pproc[i].Lpkg = -1; Pproc[i].Ppkg = -1; Pproc[i].Lcore = -1; } if (!kc) { hwloc_debug("kstat_open failed: %s\n", strerror(errno)); free(Pproc); free(Lproc); free(Lcore); free(Lpkg); return 0; } for (ksp = kc->kc_chain; ksp; ksp = ksp->ks_next) { if (strncmp("cpu_info", ksp->ks_module, 8)) continue; cpuid = ksp->ks_instance; if (kstat_read(kc, ksp, NULL) == -1) { fprintf(stderr, "kstat_read failed for CPU%u: %s\n", cpuid, strerror(errno)); continue; } hwloc_debug("cpu%u\n", cpuid); if (cpuid >= Pproc_alloc) { struct hwloc_solaris_Pproc *tmp = realloc(Pproc, 2*Pproc_alloc * sizeof(*Pproc)); if (!tmp) goto err; Pproc = tmp; Pproc_alloc *= 2; for(i = Pproc_alloc/2; i < Pproc_alloc; i++) { Pproc[i].Lproc = -1; Pproc[i].Lpkg = -1; Pproc[i].Ppkg = -1; Pproc[i].Lcore = -1; } } Pproc[cpuid].Lproc = Lproc_num; if (Lproc_num >= Lproc_alloc) { struct hwloc_solaris_Lproc *tmp = realloc(Lproc, 2*Lproc_alloc * sizeof(*Lproc)); if (!tmp) goto err; Lproc = tmp; Lproc_alloc *= 2; } Lproc[Lproc_num].Pproc = cpuid; Lproc_num++; if (cpuid >= Pproc_max) Pproc_max = cpuid + 1; stat = (kstat_named_t *) kstat_data_lookup(ksp, "state"); if (!stat) hwloc_debug("could not read state for CPU%u: %s\n", cpuid, strerror(errno)); else if (stat->data_type != KSTAT_DATA_CHAR) hwloc_debug("unknown kstat type %d for cpu state\n", stat->data_type); else { hwloc_debug("cpu%u's state is %s\n", cpuid, stat->value.c); if (strcmp(stat->value.c, "on-line")) /* not online */ hwloc_bitmap_clr(topology->levels[0][0]->online_cpuset, cpuid); } if (look_chips) do { /* Get Chip ID */ stat = (kstat_named_t *) kstat_data_lookup(ksp, "chip_id"); if (!stat) { if (Lpkg_num) fprintf(stderr, "could not read package id for CPU%u: %s\n", cpuid, strerror(errno)); else hwloc_debug("could not read package id for CPU%u: %s\n", cpuid, strerror(errno)); look_chips = 0; continue; } switch (stat->data_type) { case KSTAT_DATA_INT32: pkgid = stat->value.i32; break; case KSTAT_DATA_UINT32: pkgid = stat->value.ui32; break; #ifdef _INT64_TYPE case KSTAT_DATA_UINT64: pkgid = stat->value.ui64; break; case KSTAT_DATA_INT64: pkgid = stat->value.i64; break; #endif default: fprintf(stderr, "chip_id type %d unknown\n", stat->data_type); look_chips = 0; continue; } Pproc[cpuid].Ppkg = pkgid; for (i = 0; i < Lpkg_num; i++) if (pkgid == Lpkg[i].Ppkg) break; Pproc[cpuid].Lpkg = i; hwloc_debug("%u on package %u (%u)\n", cpuid, i, pkgid); if (i == Lpkg_num) { if (Lpkg_num == Lpkg_alloc) { struct hwloc_solaris_Lpkg *tmp = realloc(Lpkg, 2*Lpkg_alloc * sizeof(*Lpkg)); if (!tmp) goto err; Lpkg = tmp; Lpkg_alloc *= 2; } Lpkg[Lpkg_num++].Ppkg = pkgid; } } while(0); if (look_cores) do { /* Get Core ID */ stat = (kstat_named_t *) kstat_data_lookup(ksp, "core_id"); if (!stat) { if (Lcore_num) fprintf(stderr, "could not read core id for CPU%u: %s\n", cpuid, strerror(errno)); else hwloc_debug("could not read core id for CPU%u: %s\n", cpuid, strerror(errno)); look_cores = 0; continue; } switch (stat->data_type) { case KSTAT_DATA_INT32: coreid = stat->value.i32; break; case KSTAT_DATA_UINT32: coreid = stat->value.ui32; break; #ifdef _INT64_TYPE case KSTAT_DATA_UINT64: coreid = stat->value.ui64; break; case KSTAT_DATA_INT64: coreid = stat->value.i64; break; #endif default: fprintf(stderr, "core_id type %d unknown\n", stat->data_type); look_cores = 0; continue; } for (i = 0; i < Lcore_num; i++) if (coreid == Lcore[i].Pcore && Pproc[cpuid].Ppkg == Lcore[i].Ppkg) break; Pproc[cpuid].Lcore = i; hwloc_debug("%u on core %u (%u)\n", cpuid, i, coreid); if (i == Lcore_num) { if (Lcore_num == Lcore_alloc) { struct hwloc_solaris_Lcore *tmp = realloc(Lcore, 2*Lcore_alloc * sizeof(*Lcore)); if (!tmp) goto err; Lcore = tmp; Lcore_alloc *= 2; } Lcore[Lcore_num].Ppkg = Pproc[cpuid].Ppkg; Lcore[Lcore_num++].Pcore = coreid; } } while(0); /* Note: there is also clog_id for the Thread ID (not unique) and * pkg_core_id for the core ID (not unique). They are not useful to us * however. */ } if (look_chips) { struct hwloc_obj *obj; unsigned j,k; hwloc_debug("%d Packages\n", Lpkg_num); for (j = 0; j < Lpkg_num; j++) { obj = hwloc_alloc_setup_object(HWLOC_OBJ_PACKAGE, Lpkg[j].Ppkg); if (CPUType) hwloc_obj_add_info(obj, "CPUType", CPUType); if (CPUModel) hwloc_obj_add_info(obj, "CPUModel", CPUModel); obj->cpuset = hwloc_bitmap_alloc(); for(k=0; k<Pproc_max; k++) if (Pproc[k].Lpkg == j) hwloc_bitmap_set(obj->cpuset, k); hwloc_debug_1arg_bitmap("Package %d has cpuset %s\n", j, obj->cpuset); hwloc_insert_object_by_cpuset(topology, obj); } hwloc_debug("%s", "\n"); } if (look_cores) { struct hwloc_obj *obj; unsigned j,k; hwloc_debug("%d Cores\n", Lcore_num); for (j = 0; j < Lcore_num; j++) { obj = hwloc_alloc_setup_object(HWLOC_OBJ_CORE, Lcore[j].Pcore); obj->cpuset = hwloc_bitmap_alloc(); for(k=0; k<Pproc_max; k++) if (Pproc[k].Lcore == j) hwloc_bitmap_set(obj->cpuset, k); hwloc_debug_1arg_bitmap("Core %d has cpuset %s\n", j, obj->cpuset); hwloc_insert_object_by_cpuset(topology, obj); } hwloc_debug("%s", "\n"); } if (Lproc_num) { struct hwloc_obj *obj; unsigned j,k; hwloc_debug("%d PUs\n", Lproc_num); for (j = 0; j < Lproc_num; j++) { obj = hwloc_alloc_setup_object(HWLOC_OBJ_PU, Lproc[j].Pproc); obj->cpuset = hwloc_bitmap_alloc(); for(k=0; k<Pproc_max; k++) if (Pproc[k].Lproc == j) hwloc_bitmap_set(obj->cpuset, k); hwloc_debug_1arg_bitmap("PU %d has cpuset %s\n", j, obj->cpuset); hwloc_insert_object_by_cpuset(topology, obj); } hwloc_debug("%s", "\n"); } kstat_close(kc); free(Pproc); free(Lproc); free(Lcore); free(Lpkg); return Lproc_num > 0; err: kstat_close(kc); free(Pproc); free(Lproc); free(Lcore); free(Lpkg); return 0; }
static void browse(struct hwloc_topology *topology, lgrp_cookie_t cookie, lgrp_id_t lgrp, hwloc_obj_t *glob_lgrps, unsigned *curlgrp) { int n; hwloc_obj_t obj; lgrp_mem_size_t mem_size; n = lgrp_cpus(cookie, lgrp, NULL, 0, LGRP_CONTENT_HIERARCHY); if (n == -1) return; /* Is this lgrp a NUMA node? */ if ((mem_size = lgrp_mem_size(cookie, lgrp, LGRP_MEM_SZ_INSTALLED, LGRP_CONTENT_DIRECT)) > 0) { int i; processorid_t *cpuids; cpuids = malloc(sizeof(processorid_t) * n); assert(cpuids != NULL); obj = hwloc_alloc_setup_object(HWLOC_OBJ_NUMANODE, lgrp); obj->nodeset = hwloc_bitmap_alloc(); hwloc_bitmap_set(obj->nodeset, lgrp); obj->cpuset = hwloc_bitmap_alloc(); glob_lgrps[(*curlgrp)++] = obj; lgrp_cpus(cookie, lgrp, cpuids, n, LGRP_CONTENT_HIERARCHY); for (i = 0; i < n ; i++) { hwloc_debug("node %ld's cpu %d is %d\n", lgrp, i, cpuids[i]); hwloc_bitmap_set(obj->cpuset, cpuids[i]); } hwloc_debug_1arg_bitmap("node %ld has cpuset %s\n", lgrp, obj->cpuset); /* or LGRP_MEM_SZ_FREE */ hwloc_debug("node %ld has %lldkB\n", lgrp, mem_size/1024); obj->memory.local_memory = mem_size; obj->memory.page_types_len = 2; obj->memory.page_types = malloc(2*sizeof(*obj->memory.page_types)); memset(obj->memory.page_types, 0, 2*sizeof(*obj->memory.page_types)); obj->memory.page_types[0].size = hwloc_getpagesize(); #if HAVE_DECL__SC_LARGE_PAGESIZE obj->memory.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE); #endif hwloc_insert_object_by_cpuset(topology, obj); free(cpuids); } n = lgrp_children(cookie, lgrp, NULL, 0); { lgrp_id_t *lgrps; int i; lgrps = malloc(sizeof(lgrp_id_t) * n); assert(lgrps != NULL); lgrp_children(cookie, lgrp, lgrps, n); hwloc_debug("lgrp %ld has %d children\n", lgrp, n); for (i = 0; i < n ; i++) { browse(topology, cookie, lgrps[i], glob_lgrps, curlgrp); } hwloc_debug("lgrp %ld's children done\n", lgrp); free(lgrps); } }
void hwloc_look_darwin(struct hwloc_topology *topology) { int64_t _nprocs; unsigned nprocs; int64_t _npackages; unsigned i, j, cpu; struct hwloc_obj *obj; size_t size; int64_t l1cachesize; int64_t l2cachesize; int64_t cachelinesize; int64_t memsize; if (hwloc_get_sysctlbyname("hw.ncpu", &_nprocs) || _nprocs <= 0) return; nprocs = _nprocs; topology->support.discovery->pu = 1; hwloc_debug("%u procs\n", nprocs); if (!hwloc_get_sysctlbyname("hw.packages", &_npackages) && _npackages > 0) { unsigned npackages = _npackages; int64_t _cores_per_package; int64_t _logical_per_package; unsigned logical_per_package; hwloc_debug("%u packages\n", npackages); if (!hwloc_get_sysctlbyname("machdep.cpu.logical_per_package", &_logical_per_package) && _logical_per_package > 0) logical_per_package = _logical_per_package; else /* Assume the trivia. */ logical_per_package = nprocs / npackages; hwloc_debug("%u threads per package\n", logical_per_package); if (nprocs == npackages * logical_per_package) for (i = 0; i < npackages; i++) { obj = hwloc_alloc_setup_object(HWLOC_OBJ_SOCKET, i); obj->cpuset = hwloc_bitmap_alloc(); for (cpu = i*logical_per_package; cpu < (i+1)*logical_per_package; cpu++) hwloc_bitmap_set(obj->cpuset, cpu); hwloc_debug_1arg_bitmap("package %u has cpuset %s\n", i, obj->cpuset); hwloc_insert_object_by_cpuset(topology, obj); } if (!hwloc_get_sysctlbyname("machdep.cpu.cores_per_package", &_cores_per_package) && _cores_per_package > 0) { unsigned cores_per_package = _cores_per_package; hwloc_debug("%u cores per package\n", cores_per_package); if (!(logical_per_package % cores_per_package)) for (i = 0; i < npackages * cores_per_package; i++) { obj = hwloc_alloc_setup_object(HWLOC_OBJ_CORE, i); obj->cpuset = hwloc_bitmap_alloc(); for (cpu = i*(logical_per_package/cores_per_package); cpu < (i+1)*(logical_per_package/cores_per_package); cpu++) hwloc_bitmap_set(obj->cpuset, cpu); hwloc_debug_1arg_bitmap("core %u has cpuset %s\n", i, obj->cpuset); hwloc_insert_object_by_cpuset(topology, obj); } } } if (hwloc_get_sysctlbyname("hw.l1dcachesize", &l1cachesize)) l1cachesize = 0; if (hwloc_get_sysctlbyname("hw.l2cachesize", &l2cachesize)) l2cachesize = 0; if (hwloc_get_sysctlbyname("hw.cachelinesize", &cachelinesize)) cachelinesize = 0; if (hwloc_get_sysctlbyname("hw.memsize", &memsize)) memsize = 0; if (!sysctlbyname("hw.cacheconfig", NULL, &size, NULL, 0)) { unsigned n = size / sizeof(uint32_t); uint64_t *cacheconfig = NULL; uint64_t *cachesize = NULL; uint32_t *cacheconfig32 = NULL; cacheconfig = malloc(sizeof(uint64_t) * n); if (NULL == cacheconfig) { goto out; } cachesize = malloc(sizeof(uint64_t) * n); if (NULL == cachesize) { goto out; } cacheconfig32 = malloc(sizeof(uint32_t) * n); if (NULL == cacheconfig32) { goto out; } if ((!sysctlbyname("hw.cacheconfig", cacheconfig, &size, NULL, 0))) { /* Yeech. Darwin seemingly has changed from 32bit to 64bit integers for * cacheconfig, with apparently no way for detection. Assume the machine * won't have more than 4 billion cpus */ if (cacheconfig[0] > 0xFFFFFFFFUL) { memcpy(cacheconfig32, cacheconfig, size); for (i = 0 ; i < size / sizeof(uint32_t); i++) cacheconfig[i] = cacheconfig32[i]; } memset(cachesize, 0, sizeof(uint64_t) * n); size = sizeof(uint64_t) * n; if (sysctlbyname("hw.cachesize", cachesize, &size, NULL, 0)) { if (n > 0) cachesize[0] = memsize; if (n > 1) cachesize[1] = l1cachesize; if (n > 2) cachesize[2] = l2cachesize; } hwloc_debug("%s", "caches"); for (i = 0; i < n && cacheconfig[i]; i++) hwloc_debug(" %"PRIu64"(%"PRIu64"kB)", cacheconfig[i], cachesize[i] / 1024); cacheconfig[i] = cacheconfig32[i]; /* Now we know how many caches there are */ n = i; hwloc_debug("\n%u cache levels\n", n - 1); /* For each cache level (0 is memory) */ for (i = 0; i < n; i++) { /* cacheconfig tells us how many cpus share it, let's iterate on each cache */ for (j = 0; j < (nprocs / cacheconfig[i]); j++) { obj = hwloc_alloc_setup_object(i?HWLOC_OBJ_CACHE:HWLOC_OBJ_NODE, j); if (!i) { obj->nodeset = hwloc_bitmap_alloc(); hwloc_bitmap_set(obj->nodeset, j); } obj->cpuset = hwloc_bitmap_alloc(); for (cpu = j*cacheconfig[i]; cpu < ((j+1)*cacheconfig[i]); cpu++) hwloc_bitmap_set(obj->cpuset, cpu); if (i) { hwloc_debug_2args_bitmap("L%ucache %u has cpuset %s\n", i, j, obj->cpuset); obj->attr->cache.depth = i; obj->attr->cache.size = cachesize[i]; obj->attr->cache.linesize = cachelinesize; } else { hwloc_debug_1arg_bitmap("node %u has cpuset %s\n", j, obj->cpuset); obj->memory.local_memory = cachesize[i]; obj->memory.page_types_len = 2; obj->memory.page_types = malloc(2*sizeof(*obj->memory.page_types)); memset(obj->memory.page_types, 0, 2*sizeof(*obj->memory.page_types)); obj->memory.page_types[0].size = getpagesize(); #ifdef HAVE__SC_LARGE_PAGESIZE obj->memory.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE); #endif } hwloc_insert_object_by_cpuset(topology, obj); } } } out: if (NULL != cacheconfig) { free(cacheconfig); } if (NULL != cachesize) { free(cachesize); } if (NULL != cacheconfig32) { free(cacheconfig32); } } /* add PU objects */ hwloc_setup_pu_level(topology, nprocs); hwloc_add_object_info(topology->levels[0][0], "Backend", "Darwin"); }
static int hwloc_look_windows(struct hwloc_backend *backend) { struct hwloc_topology *topology = backend->topology; hwloc_bitmap_t groups_pu_set = NULL; SYSTEM_INFO SystemInfo; DWORD length; if (topology->levels[0][0]->cpuset) /* somebody discovered things */ return -1; hwloc_alloc_obj_cpusets(topology->levels[0][0]); GetSystemInfo(&SystemInfo); if (!GetLogicalProcessorInformationExProc && GetLogicalProcessorInformationProc) { PSYSTEM_LOGICAL_PROCESSOR_INFORMATION procInfo, tmpprocInfo; unsigned id; unsigned i; struct hwloc_obj *obj; hwloc_obj_type_t type; length = 0; procInfo = NULL; while (1) { if (GetLogicalProcessorInformationProc(procInfo, &length)) break; if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) return -1; tmpprocInfo = realloc(procInfo, length); if (!tmpprocInfo) { free(procInfo); goto out; } procInfo = tmpprocInfo; } assert(!length || procInfo); for (i = 0; i < length / sizeof(*procInfo); i++) { /* Ignore unknown caches */ if (procInfo->Relationship == RelationCache && procInfo->Cache.Type != CacheUnified && procInfo->Cache.Type != CacheData && procInfo->Cache.Type != CacheInstruction) continue; id = -1; switch (procInfo[i].Relationship) { case RelationNumaNode: type = HWLOC_OBJ_NUMANODE; id = procInfo[i].NumaNode.NodeNumber; break; case RelationProcessorPackage: type = HWLOC_OBJ_PACKAGE; break; case RelationCache: type = (procInfo[i].Cache.Type == CacheInstruction ? HWLOC_OBJ_L1ICACHE : HWLOC_OBJ_L1CACHE) + procInfo[i].Cache.Level - 1; break; case RelationProcessorCore: type = HWLOC_OBJ_CORE; break; case RelationGroup: default: type = HWLOC_OBJ_GROUP; break; } if (!hwloc_filter_check_keep_object_type(topology, type)) continue; obj = hwloc_alloc_setup_object(topology, type, id); obj->cpuset = hwloc_bitmap_alloc(); hwloc_debug("%s#%u mask %lx\n", hwloc_type_name(type), id, procInfo[i].ProcessorMask); /* ProcessorMask is a ULONG_PTR */ hwloc_bitmap_set_ith_ULONG_PTR(obj->cpuset, 0, procInfo[i].ProcessorMask); hwloc_debug_2args_bitmap("%s#%u bitmap %s\n", hwloc_type_name(type), id, obj->cpuset); switch (type) { case HWLOC_OBJ_NUMANODE: { ULONGLONG avail; obj->nodeset = hwloc_bitmap_alloc(); hwloc_bitmap_set(obj->nodeset, id); if ((GetNumaAvailableMemoryNodeExProc && GetNumaAvailableMemoryNodeExProc(id, &avail)) || (GetNumaAvailableMemoryNodeProc && GetNumaAvailableMemoryNodeProc(id, &avail))) obj->memory.local_memory = avail; obj->memory.page_types_len = 2; obj->memory.page_types = malloc(2 * sizeof(*obj->memory.page_types)); memset(obj->memory.page_types, 0, 2 * sizeof(*obj->memory.page_types)); obj->memory.page_types_len = 1; obj->memory.page_types[0].size = SystemInfo.dwPageSize; #if HAVE_DECL__SC_LARGE_PAGESIZE obj->memory.page_types_len++; obj->memory.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE); #endif break; } case HWLOC_OBJ_L1CACHE: case HWLOC_OBJ_L2CACHE: case HWLOC_OBJ_L3CACHE: case HWLOC_OBJ_L4CACHE: case HWLOC_OBJ_L5CACHE: case HWLOC_OBJ_L1ICACHE: case HWLOC_OBJ_L2ICACHE: case HWLOC_OBJ_L3ICACHE: obj->attr->cache.size = procInfo[i].Cache.Size; obj->attr->cache.associativity = procInfo[i].Cache.Associativity == CACHE_FULLY_ASSOCIATIVE ? -1 : procInfo[i].Cache.Associativity ; obj->attr->cache.linesize = procInfo[i].Cache.LineSize; obj->attr->cache.depth = procInfo[i].Cache.Level; switch (procInfo->Cache.Type) { case CacheUnified: obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED; break; case CacheData: obj->attr->cache.type = HWLOC_OBJ_CACHE_DATA; break; case CacheInstruction: obj->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION; break; default: hwloc_free_unlinked_object(obj); continue; } break; case HWLOC_OBJ_GROUP: obj->attr->group.kind = procInfo[i].Relationship == RelationGroup ? HWLOC_GROUP_KIND_WINDOWS_PROCESSOR_GROUP : HWLOC_GROUP_KIND_WINDOWS_RELATIONSHIP_UNKNOWN; break; default: break; } hwloc_insert_object_by_cpuset(topology, obj); } free(procInfo); } if (GetLogicalProcessorInformationExProc) { PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX procInfoTotal, tmpprocInfoTotal, procInfo; unsigned id; struct hwloc_obj *obj; hwloc_obj_type_t type; length = 0; procInfoTotal = NULL; while (1) { if (GetLogicalProcessorInformationExProc(RelationAll, procInfoTotal, &length)) break; if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) return -1; tmpprocInfoTotal = realloc(procInfoTotal, length); if (!tmpprocInfoTotal) { free(procInfoTotal); goto out; } procInfoTotal = tmpprocInfoTotal; } for (procInfo = procInfoTotal; (void*) procInfo < (void*) ((uintptr_t) procInfoTotal + length); procInfo = (void*) ((uintptr_t) procInfo + procInfo->Size)) { unsigned num, i; GROUP_AFFINITY *GroupMask; /* Ignore unknown caches */ if (procInfo->Relationship == RelationCache && procInfo->Cache.Type != CacheUnified && procInfo->Cache.Type != CacheData && procInfo->Cache.Type != CacheInstruction) continue; id = -1; switch (procInfo->Relationship) { case RelationNumaNode: type = HWLOC_OBJ_NUMANODE; num = 1; GroupMask = &procInfo->NumaNode.GroupMask; id = procInfo->NumaNode.NodeNumber; break; case RelationProcessorPackage: type = HWLOC_OBJ_PACKAGE; num = procInfo->Processor.GroupCount; GroupMask = procInfo->Processor.GroupMask; break; case RelationCache: type = (procInfo->Cache.Type == CacheInstruction ? HWLOC_OBJ_L1ICACHE : HWLOC_OBJ_L1CACHE) + procInfo->Cache.Level - 1; num = 1; GroupMask = &procInfo->Cache.GroupMask; break; case RelationProcessorCore: type = HWLOC_OBJ_CORE; num = procInfo->Processor.GroupCount; GroupMask = procInfo->Processor.GroupMask; break; case RelationGroup: /* So strange an interface... */ for (id = 0; id < procInfo->Group.ActiveGroupCount; id++) { KAFFINITY mask; hwloc_bitmap_t set; set = hwloc_bitmap_alloc(); mask = procInfo->Group.GroupInfo[id].ActiveProcessorMask; hwloc_debug("group %u %d cpus mask %lx\n", id, procInfo->Group.GroupInfo[id].ActiveProcessorCount, mask); /* KAFFINITY is ULONG_PTR */ hwloc_bitmap_set_ith_ULONG_PTR(set, id, mask); /* FIXME: what if running 32bits on a 64bits windows with 64-processor groups? * ULONG_PTR is 32bits, so half the group is invisible? * maybe scale id to id*8/sizeof(ULONG_PTR) so that groups are 64-PU aligned? */ hwloc_debug_2args_bitmap("group %u %d bitmap %s\n", id, procInfo->Group.GroupInfo[id].ActiveProcessorCount, set); /* save the set of PUs so that we can create them at the end */ if (!groups_pu_set) groups_pu_set = hwloc_bitmap_alloc(); hwloc_bitmap_or(groups_pu_set, groups_pu_set, set); if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_GROUP)) { obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_GROUP, id); obj->cpuset = set; obj->attr->group.kind = HWLOC_GROUP_KIND_WINDOWS_PROCESSOR_GROUP; hwloc_insert_object_by_cpuset(topology, obj); } else hwloc_bitmap_free(set); } continue; default: /* Don't know how to get the mask. */ hwloc_debug("unknown relation %d\n", procInfo->Relationship); continue; } if (!hwloc_filter_check_keep_object_type(topology, type)) continue; obj = hwloc_alloc_setup_object(topology, type, id); obj->cpuset = hwloc_bitmap_alloc(); for (i = 0; i < num; i++) { hwloc_debug("%s#%u %d: mask %d:%lx\n", hwloc_type_name(type), id, i, GroupMask[i].Group, GroupMask[i].Mask); /* GROUP_AFFINITY.Mask is KAFFINITY, which is ULONG_PTR */ hwloc_bitmap_set_ith_ULONG_PTR(obj->cpuset, GroupMask[i].Group, GroupMask[i].Mask); /* FIXME: scale id to id*8/sizeof(ULONG_PTR) as above? */ } hwloc_debug_2args_bitmap("%s#%u bitmap %s\n", hwloc_type_name(type), id, obj->cpuset); switch (type) { case HWLOC_OBJ_NUMANODE: { ULONGLONG avail; obj->nodeset = hwloc_bitmap_alloc(); hwloc_bitmap_set(obj->nodeset, id); if ((GetNumaAvailableMemoryNodeExProc && GetNumaAvailableMemoryNodeExProc(id, &avail)) || (GetNumaAvailableMemoryNodeProc && GetNumaAvailableMemoryNodeProc(id, &avail))) obj->memory.local_memory = avail; obj->memory.page_types = malloc(2 * sizeof(*obj->memory.page_types)); memset(obj->memory.page_types, 0, 2 * sizeof(*obj->memory.page_types)); obj->memory.page_types_len = 1; obj->memory.page_types[0].size = SystemInfo.dwPageSize; #if HAVE_DECL__SC_LARGE_PAGESIZE obj->memory.page_types_len++; obj->memory.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE); #endif break; } case HWLOC_OBJ_L1CACHE: case HWLOC_OBJ_L2CACHE: case HWLOC_OBJ_L3CACHE: case HWLOC_OBJ_L4CACHE: case HWLOC_OBJ_L5CACHE: case HWLOC_OBJ_L1ICACHE: case HWLOC_OBJ_L2ICACHE: case HWLOC_OBJ_L3ICACHE: obj->attr->cache.size = procInfo->Cache.CacheSize; obj->attr->cache.associativity = procInfo->Cache.Associativity == CACHE_FULLY_ASSOCIATIVE ? -1 : procInfo->Cache.Associativity ; obj->attr->cache.linesize = procInfo->Cache.LineSize; obj->attr->cache.depth = procInfo->Cache.Level; switch (procInfo->Cache.Type) { case CacheUnified: obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED; break; case CacheData: obj->attr->cache.type = HWLOC_OBJ_CACHE_DATA; break; case CacheInstruction: obj->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION; break; default: hwloc_free_unlinked_object(obj); continue; } break; default: break; } hwloc_insert_object_by_cpuset(topology, obj); } free(procInfoTotal); } if (groups_pu_set) { /* the system supports multiple Groups. * PU indexes may be discontiguous, especially if Groups contain less than 64 procs. */ hwloc_obj_t obj; unsigned idx; hwloc_bitmap_foreach_begin(idx, groups_pu_set) { obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_PU, idx); obj->cpuset = hwloc_bitmap_alloc(); hwloc_bitmap_only(obj->cpuset, idx); hwloc_debug_1arg_bitmap("cpu %u has cpuset %s\n", idx, obj->cpuset); hwloc_insert_object_by_cpuset(topology, obj); } hwloc_bitmap_foreach_end(); hwloc_bitmap_free(groups_pu_set); } else {
static int hwloc_look_darwin(struct hwloc_backend *backend) { struct hwloc_topology *topology = backend->topology; int64_t _nprocs; unsigned nprocs; int64_t _npackages; unsigned i, j, cpu; struct hwloc_obj *obj; size_t size; int64_t l1dcachesize, l1icachesize; int64_t cacheways[2]; int64_t l2cachesize; int64_t l3cachesize; int64_t cachelinesize; int64_t memsize; int64_t _tmp; char cpumodel[64]; char cpuvendor[64]; char cpufamilynumber[20], cpumodelnumber[20], cpustepping[20]; int gotnuma = 0; int gotnumamemory = 0; if (topology->levels[0][0]->cpuset) /* somebody discovered things */ return -1; hwloc_alloc_root_sets(topology->levels[0][0]); /* Don't use hwloc_fallback_nbprocessors() because it would return online cpus only, * while we need all cpus when computing logical_per_package, etc below. * We don't know which CPUs are offline, but Darwin doesn't support binding anyway. * * TODO: try hw.logicalcpu_max */ if (hwloc_get_sysctlbyname("hw.logicalcpu", &_nprocs) || _nprocs <= 0) /* fallback to deprecated way */ if (hwloc_get_sysctlbyname("hw.ncpu", &_nprocs) || _nprocs <= 0) return -1; nprocs = _nprocs; topology->support.discovery->pu = 1; hwloc_debug("%u procs\n", nprocs); size = sizeof(cpuvendor); if (sysctlbyname("machdep.cpu.vendor", cpuvendor, &size, NULL, 0)) cpuvendor[0] = '\0'; size = sizeof(cpumodel); if (sysctlbyname("machdep.cpu.brand_string", cpumodel, &size, NULL, 0)) cpumodel[0] = '\0'; if (hwloc_get_sysctlbyname("machdep.cpu.family", &_tmp)) cpufamilynumber[0] = '\0'; else snprintf(cpufamilynumber, sizeof(cpufamilynumber), "%lld", (long long) _tmp); if (hwloc_get_sysctlbyname("machdep.cpu.model", &_tmp)) cpumodelnumber[0] = '\0'; else snprintf(cpumodelnumber, sizeof(cpumodelnumber), "%lld", (long long) _tmp); /* .extfamily and .extmodel are already added to .family and .model */ if (hwloc_get_sysctlbyname("machdep.cpu.stepping", &_tmp)) cpustepping[0] = '\0'; else snprintf(cpustepping, sizeof(cpustepping), "%lld", (long long) _tmp); if (!hwloc_get_sysctlbyname("hw.packages", &_npackages) && _npackages > 0) { unsigned npackages = _npackages; int64_t _cores_per_package; unsigned cores_per_package; int64_t _logical_per_package; unsigned logical_per_package; hwloc_debug("%u packages\n", npackages); if (!hwloc_get_sysctlbyname("machdep.cpu.thread_count", &_logical_per_package) && _logical_per_package > 0) /* official/modern way */ logical_per_package = _logical_per_package; else if (!hwloc_get_sysctlbyname("machdep.cpu.logical_per_package", &_logical_per_package) && _logical_per_package > 0) /* old way, gives the max supported by this "kind" of processor, * can be larger than the actual number for this model. */ logical_per_package = _logical_per_package; else /* Assume the trivia. */ logical_per_package = nprocs / npackages; hwloc_debug("%u threads per package\n", logical_per_package); if (nprocs == npackages * logical_per_package && hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_PACKAGE)) for (i = 0; i < npackages; i++) { obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_PACKAGE, i); obj->cpuset = hwloc_bitmap_alloc(); for (cpu = i*logical_per_package; cpu < (i+1)*logical_per_package; cpu++) hwloc_bitmap_set(obj->cpuset, cpu); hwloc_debug_1arg_bitmap("package %u has cpuset %s\n", i, obj->cpuset); if (cpuvendor[0] != '\0') hwloc_obj_add_info(obj, "CPUVendor", cpuvendor); if (cpumodel[0] != '\0') hwloc_obj_add_info(obj, "CPUModel", cpumodel); if (cpufamilynumber[0] != '\0') hwloc_obj_add_info(obj, "CPUFamilyNumber", cpufamilynumber); if (cpumodelnumber[0] != '\0') hwloc_obj_add_info(obj, "CPUModelNumber", cpumodelnumber); if (cpustepping[0] != '\0') hwloc_obj_add_info(obj, "CPUStepping", cpustepping); hwloc_insert_object_by_cpuset(topology, obj); } else { if (cpuvendor[0] != '\0') hwloc_obj_add_info(topology->levels[0][0], "CPUVendor", cpuvendor); if (cpumodel[0] != '\0') hwloc_obj_add_info(topology->levels[0][0], "CPUModel", cpumodel); if (cpufamilynumber[0] != '\0') hwloc_obj_add_info(topology->levels[0][0], "CPUFamilyNumber", cpufamilynumber); if (cpumodelnumber[0] != '\0') hwloc_obj_add_info(topology->levels[0][0], "CPUModelNumber", cpumodelnumber); if (cpustepping[0] != '\0') hwloc_obj_add_info(topology->levels[0][0], "CPUStepping", cpustepping); } if (!hwloc_get_sysctlbyname("machdep.cpu.core_count", &_cores_per_package) && _cores_per_package > 0) /* official/modern way */ cores_per_package = _cores_per_package; else if (!hwloc_get_sysctlbyname("machdep.cpu.cores_per_package", &_cores_per_package) && _cores_per_package > 0) /* old way, gives the max supported by this "kind" of processor, * can be larger than the actual number for this model. */ cores_per_package = _cores_per_package; else /* no idea */ cores_per_package = 0; if (cores_per_package > 0 && hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_CORE)) { hwloc_debug("%u cores per package\n", cores_per_package); if (!(logical_per_package % cores_per_package)) for (i = 0; i < npackages * cores_per_package; i++) { obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_CORE, i); obj->cpuset = hwloc_bitmap_alloc(); for (cpu = i*(logical_per_package/cores_per_package); cpu < (i+1)*(logical_per_package/cores_per_package); cpu++) hwloc_bitmap_set(obj->cpuset, cpu); hwloc_debug_1arg_bitmap("core %u has cpuset %s\n", i, obj->cpuset); hwloc_insert_object_by_cpuset(topology, obj); } } } else { if (cpuvendor[0] != '\0') hwloc_obj_add_info(topology->levels[0][0], "CPUVendor", cpuvendor); if (cpumodel[0] != '\0') hwloc_obj_add_info(topology->levels[0][0], "CPUModel", cpumodel); if (cpufamilynumber[0] != '\0') hwloc_obj_add_info(topology->levels[0][0], "CPUFamilyNumber", cpufamilynumber); if (cpumodelnumber[0] != '\0') hwloc_obj_add_info(topology->levels[0][0], "CPUModelNumber", cpumodelnumber); if (cpustepping[0] != '\0') hwloc_obj_add_info(topology->levels[0][0], "CPUStepping", cpustepping); } if (hwloc_get_sysctlbyname("hw.l1dcachesize", &l1dcachesize)) l1dcachesize = 0; if (hwloc_get_sysctlbyname("hw.l1icachesize", &l1icachesize)) l1icachesize = 0; if (hwloc_get_sysctlbyname("hw.l2cachesize", &l2cachesize)) l2cachesize = 0; if (hwloc_get_sysctlbyname("hw.l3cachesize", &l3cachesize)) l3cachesize = 0; if (hwloc_get_sysctlbyname("machdep.cpu.cache.L1_associativity", &cacheways[0])) cacheways[0] = 0; else if (cacheways[0] == 0xff) cacheways[0] = -1; if (hwloc_get_sysctlbyname("machdep.cpu.cache.L2_associativity", &cacheways[1])) cacheways[1] = 0; else if (cacheways[1] == 0xff) cacheways[1] = -1; if (hwloc_get_sysctlbyname("hw.cachelinesize", &cachelinesize)) cachelinesize = 0; if (hwloc_get_sysctlbyname("hw.memsize", &memsize)) memsize = 0; if (!sysctlbyname("hw.cacheconfig", NULL, &size, NULL, 0)) { unsigned n = size / sizeof(uint32_t); uint64_t cacheconfig[n]; uint64_t cachesize[n]; uint32_t cacheconfig32[n]; if ((!sysctlbyname("hw.cacheconfig", cacheconfig, &size, NULL, 0))) { /* Yeech. Darwin seemingly has changed from 32bit to 64bit integers for * cacheconfig, with apparently no way for detection. Assume the machine * won't have more than 4 billion cpus */ if (cacheconfig[0] > 0xFFFFFFFFUL) { memcpy(cacheconfig32, cacheconfig, size); for (i = 0 ; i < size / sizeof(uint32_t); i++) cacheconfig[i] = cacheconfig32[i]; } memset(cachesize, 0, sizeof(uint64_t) * n); size = sizeof(uint64_t) * n; if (sysctlbyname("hw.cachesize", cachesize, &size, NULL, 0)) { if (n > 0) cachesize[0] = memsize; if (n > 1) cachesize[1] = l1dcachesize; if (n > 2) cachesize[2] = l2cachesize; if (n > 3) cachesize[3] = l3cachesize; } hwloc_debug("%s", "caches"); for (i = 0; i < n && cacheconfig[i]; i++) hwloc_debug(" %"PRIu64"(%"PRIu64"kB)", cacheconfig[i], cachesize[i] / 1024); /* Now we know how many caches there are */ n = i; hwloc_debug("\n%u cache levels\n", n - 1); /* For each cache level (0 is memory) */ for (i = 0; i < n; i++) { /* cacheconfig tells us how many cpus share it, let's iterate on each cache */ for (j = 0; j < (nprocs / cacheconfig[i]); j++) { if (!i) { obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_NUMANODE, j); obj->nodeset = hwloc_bitmap_alloc(); hwloc_bitmap_set(obj->nodeset, j); gotnuma++; } else { obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_L1CACHE+i-1, HWLOC_UNKNOWN_INDEX); } obj->cpuset = hwloc_bitmap_alloc(); for (cpu = j*cacheconfig[i]; cpu < ((j+1)*cacheconfig[i]); cpu++) hwloc_bitmap_set(obj->cpuset, cpu); if (i == 1 && l1icachesize && hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_L1ICACHE)) { /* FIXME assuming that L1i and L1d are shared the same way. Darwin * does not yet provide a way to know. */ hwloc_obj_t l1i = hwloc_alloc_setup_object(topology, HWLOC_OBJ_L1ICACHE, HWLOC_UNKNOWN_INDEX); l1i->cpuset = hwloc_bitmap_dup(obj->cpuset); hwloc_debug_1arg_bitmap("L1icache %u has cpuset %s\n", j, l1i->cpuset); l1i->attr->cache.depth = i; l1i->attr->cache.size = l1icachesize; l1i->attr->cache.linesize = cachelinesize; l1i->attr->cache.associativity = 0; l1i->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION; hwloc_insert_object_by_cpuset(topology, l1i); } if (i) { hwloc_debug_2args_bitmap("L%ucache %u has cpuset %s\n", i, j, obj->cpuset); obj->attr->cache.depth = i; obj->attr->cache.size = cachesize[i]; obj->attr->cache.linesize = cachelinesize; if (i <= sizeof(cacheways) / sizeof(cacheways[0])) obj->attr->cache.associativity = cacheways[i-1]; else obj->attr->cache.associativity = 0; if (i == 1 && l1icachesize) obj->attr->cache.type = HWLOC_OBJ_CACHE_DATA; else obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED; } else { hwloc_debug_1arg_bitmap("node %u has cpuset %s\n", j, obj->cpuset); if (cachesize[i]) { obj->attr->numanode.local_memory = cachesize[i]; gotnumamemory++; } obj->attr->numanode.page_types_len = 2; obj->attr->numanode.page_types = malloc(2*sizeof(*obj->attr->numanode.page_types)); memset(obj->attr->numanode.page_types, 0, 2*sizeof(*obj->attr->numanode.page_types)); obj->attr->numanode.page_types[0].size = hwloc_getpagesize(); #if HAVE_DECL__SC_LARGE_PAGESIZE obj->attr->numanode.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE); #endif } if (hwloc_filter_check_keep_object_type(topology, obj->type)) hwloc_insert_object_by_cpuset(topology, obj); else hwloc_free_unlinked_object(obj); /* FIXME: don't built at all, just build the cpuset in case l1i needs it */ } } } } if (gotnuma) topology->support.discovery->numa = 1; if (gotnumamemory) topology->support.discovery->numa = 1; /* add PU objects */ hwloc_setup_pu_level(topology, nprocs); hwloc_obj_add_info(topology->levels[0][0], "Backend", "Darwin"); hwloc_add_uname_info(topology, NULL); return 0; }