int main(void) { hwloc_bitmap_t orig, expected; orig = hwloc_bitmap_alloc(); expected = hwloc_bitmap_alloc(); /* empty set gives empty set */ hwloc_bitmap_singlify(orig); assert(hwloc_bitmap_iszero(orig)); /* full set gives first bit only */ hwloc_bitmap_fill(orig); hwloc_bitmap_singlify(orig); hwloc_bitmap_zero(expected); hwloc_bitmap_set(expected, 0); assert(hwloc_bitmap_isequal(orig, expected)); assert(!hwloc_bitmap_compare(orig, expected)); /* actual non-trivial set */ hwloc_bitmap_zero(orig); hwloc_bitmap_set(orig, 45); hwloc_bitmap_set(orig, 46); hwloc_bitmap_set(orig, 517); hwloc_bitmap_singlify(orig); hwloc_bitmap_zero(expected); hwloc_bitmap_set(expected, 45); assert(hwloc_bitmap_isequal(orig, expected)); assert(!hwloc_bitmap_compare(orig, expected)); hwloc_bitmap_free(orig); hwloc_bitmap_free(expected); return 0; }
/* * Set the node where the current process will run */ void hw_set_first_core_node(int node, int proc) { hwloc_nodeset_t nset; hwloc_cpuset_t set,newset; if (local_topo->nnodes != 0 ){ nset = hwloc_bitmap_alloc(); set = hwloc_bitmap_alloc(); newset = hwloc_bitmap_alloc(); hwloc_bitmap_zero(set); hwloc_bitmap_zero(newset); hwloc_bitmap_zero(nset); hwloc_bitmap_set(nset,node); hwloc_cpuset_from_nodeset(topology,set,nset); int core = hwloc_bitmap_first(set); hwloc_bitmap_set(newset,core); if (proc) hwloc_set_proc_cpubind (topology,0,newset,HWLOC_CPUBIND_PROCESS); else hwloc_set_proc_cpubind (topology,0,newset,HWLOC_CPUBIND_THREAD); hwloc_bitmap_free(newset); hwloc_bitmap_free(set); hwloc_bitmap_free(nset); } }
END_TEST START_TEST(add_extra_memory_nodes_if_needed_test) { long long mem_requested; long long mem_reserved; std::set<int> current_mem_ids; hwloc_bitmap_t job_mems = hwloc_bitmap_alloc(); hwloc_bitmap_t torque_root_mems = hwloc_bitmap_alloc(); char buf[1024]; hwloc_bitmap_set(job_mems, 0); current_mem_ids.insert(0); hwloc_bitmap_set(torque_root_mems, 0); hwloc_bitmap_set(torque_root_mems, 1); mem_requested = 16 * 1024; mem_requested *= 1024; mem_requested *= 1024; mem_reserved = 15 * 1024; mem_reserved *= 1024; mem_reserved *= 1024; add_extra_memory_nodes_if_needed(mem_requested, mem_reserved, job_mems, torque_root_mems, current_mem_ids); fail_unless(hwloc_bitmap_weight(job_mems) == 2); hwloc_bitmap_displaylist(buf, sizeof(buf), job_mems); fail_unless(strchr(buf, '0') != NULL); fail_unless(strchr(buf, '1') != NULL); }
int main(void) { hwloc_topology_t topology; hwloc_bitmap_t set, set2, nocpunomemnodeset, nocpubutmemnodeset, nomembutcpunodeset, nomembutcpucpuset; hwloc_obj_t node; struct bitmask *bitmask, *bitmask2; unsigned long mask; unsigned long maxnode; int i; if (numa_available() < 0) /* libnuma has inconsistent behavior when the kernel isn't NUMA-aware. * don't try to check everything precisely. */ exit(77); hwloc_topology_init(&topology); hwloc_topology_load(topology); /* convert full stuff between cpuset and libnuma */ set = hwloc_bitmap_alloc(); nocpunomemnodeset = hwloc_bitmap_alloc(); nocpubutmemnodeset = hwloc_bitmap_alloc(); nomembutcpunodeset = hwloc_bitmap_alloc(); nomembutcpucpuset = hwloc_bitmap_alloc(); /* gather all nodes if any, or the whole system if no nodes */ if (hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_NUMANODE)) { node = NULL; while ((node = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_NUMANODE, node)) != NULL) { hwloc_bitmap_or(set, set, node->cpuset); if (hwloc_bitmap_iszero(node->cpuset)) { if (node->memory.local_memory) hwloc_bitmap_set(nocpubutmemnodeset, node->os_index); else hwloc_bitmap_set(nocpunomemnodeset, node->os_index); } else if (!node->memory.local_memory) { hwloc_bitmap_set(nomembutcpunodeset, node->os_index); hwloc_bitmap_or(nomembutcpucpuset, nomembutcpucpuset, node->cpuset); } } } else { hwloc_bitmap_or(set, set, hwloc_topology_get_complete_cpuset(topology)); } set2 = hwloc_bitmap_alloc(); hwloc_cpuset_from_linux_libnuma_bitmask(topology, set2, numa_all_nodes_ptr); /* numa_all_nodes_ptr doesn't contain NODES with CPU but no memory */ hwloc_bitmap_or(set2, set2, nomembutcpucpuset); assert(hwloc_bitmap_isequal(set, set2)); hwloc_bitmap_free(set2); bitmask = hwloc_cpuset_to_linux_libnuma_bitmask(topology, set); /* numa_all_nodes_ptr contains NODES with no CPU but with memory */ hwloc_bitmap_foreach_begin(i, nocpubutmemnodeset) { numa_bitmask_setbit(bitmask, i); } hwloc_bitmap_foreach_end();
static int hwloc_aix_get_sth_rset_cpubind(hwloc_topology_t topology, rstype_t what, rsid_t who, hwloc_bitmap_t hwloc_set, int flags __hwloc_attribute_unused, int *boundp) { rsethandle_t rset; unsigned cpu, maxcpus; int res = -1; int bound = 0; rset = rs_alloc(RS_EMPTY); if (ra_getrset(what, who, 0, rset) == -1) goto out; hwloc_bitmap_zero(hwloc_set); maxcpus = rs_getinfo(rset, R_MAXPROCS, 0); for (cpu = 0; cpu < maxcpus; cpu++) if (rs_op(RS_TESTRESOURCE, rset, NULL, R_PROCS, cpu) == 1) hwloc_bitmap_set(hwloc_set, cpu); else bound = 1; hwloc_bitmap_and(hwloc_set, hwloc_set, hwloc_topology_get_complete_cpuset(topology)); res = 0; *boundp = bound; out: rs_free(rset); return res; }
static void switch_set_index(hwloc_bitmap_t set, unsigned old_index, unsigned new_index) { if (hwloc_bitmap_isset(set, old_index)) { hwloc_bitmap_clr(set, old_index); hwloc_bitmap_set(set, new_index); } }
static int hwloc_solaris_get_sth_membind(hwloc_topology_t topology, idtype_t idtype, id_t id, hwloc_nodeset_t nodeset, hwloc_membind_policy_t *policy, int flags __hwloc_attribute_unused) { int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE); int n; int i; if (depth < 0) { errno = ENOSYS; return -1; } hwloc_bitmap_zero(nodeset); n = hwloc_get_nbobjs_by_depth(topology, depth); for (i = 0; i < n; i++) { hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, i); lgrp_affinity_t aff = lgrp_affinity_get(idtype, id, obj->os_index); if (aff == LGRP_AFF_STRONG) hwloc_bitmap_set(nodeset, obj->os_index); } if (hwloc_bitmap_iszero(nodeset)) hwloc_bitmap_copy(nodeset, hwloc_topology_get_complete_nodeset(topology)); *policy = HWLOC_MEMBIND_BIND; return 0; }
static int hwloc_aix_get_sth_membind(hwloc_topology_t topology, rstype_t what, rsid_t who, hwloc_bitmap_t nodeset, hwloc_membind_policy_t *policy, int flags __hwloc_attribute_unused) { hwloc_bitmap_t hwloc_set; rsethandle_t rset; unsigned cpu, maxcpus; int res = -1; int depth, n, i; depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE); if (depth < 0) { errno = EXDEV; return -1; } n = hwloc_get_nbobjs_by_depth(topology, depth); rset = rs_alloc(RS_EMPTY); if (ra_getrset(what, who, 0, rset) == -1) goto out; hwloc_set = hwloc_bitmap_alloc(); maxcpus = rs_getinfo(rset, R_MAXPROCS, 0); for (cpu = 0; cpu < maxcpus; cpu++) if (rs_op(RS_TESTRESOURCE, rset, NULL, R_PROCS, cpu) == 1) hwloc_bitmap_set(hwloc_set, cpu); hwloc_bitmap_and(hwloc_set, hwloc_set, hwloc_topology_get_complete_cpuset(topology)); hwloc_bitmap_zero(nodeset); for (i = 0; i < n; i++) { hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, i); if (hwloc_bitmap_isincluded(obj->cpuset, hwloc_set)) hwloc_bitmap_set(nodeset, obj->os_index); } hwloc_bitmap_free(hwloc_set); *policy = HWLOC_MEMBIND_BIND; res = 0; out: rs_free(rset); return res; }
int hwloc_look_hardwired_fujitsu_fx10(struct hwloc_topology *topology) { /* FIXME: what if a broken core is disabled? */ unsigned i; hwloc_obj_t obj; hwloc_bitmap_t set; for(i=0; i<16; i++) { set = hwloc_bitmap_alloc(); hwloc_bitmap_set(set, i); obj = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, -1); obj->cpuset = hwloc_bitmap_dup(set); obj->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION; obj->attr->cache.depth = 1; obj->attr->cache.size = 32*1024; obj->attr->cache.linesize = 128; obj->attr->cache.associativity = 2; hwloc_insert_object_by_cpuset(topology, obj); obj = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, -1); obj->cpuset = hwloc_bitmap_dup(set); obj->attr->cache.type = HWLOC_OBJ_CACHE_DATA; obj->attr->cache.depth = 1; obj->attr->cache.size = 32*1024; obj->attr->cache.linesize = 128; obj->attr->cache.associativity = 2; hwloc_insert_object_by_cpuset(topology, obj); obj = hwloc_alloc_setup_object(HWLOC_OBJ_CORE, i); obj->cpuset = set; hwloc_insert_object_by_cpuset(topology, obj); } set = hwloc_bitmap_alloc(); hwloc_bitmap_set_range(set, 0, 15); obj = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, -1); obj->cpuset = hwloc_bitmap_dup(set); obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED; obj->attr->cache.depth = 2; obj->attr->cache.size = 12*1024*1024; obj->attr->cache.linesize = 128; obj->attr->cache.associativity = 24; hwloc_insert_object_by_cpuset(topology, obj); obj = hwloc_alloc_setup_object(HWLOC_OBJ_PACKAGE, 0); obj->cpuset = set; hwloc_obj_add_info(obj, "CPUVendor", "Fujitsu"); hwloc_obj_add_info(obj, "CPUModel", "SPARC64 IXfx"); hwloc_insert_object_by_cpuset(topology, obj); hwloc_setup_pu_level(topology, 16); return 0; }
/* * Set the core where the current process will run * return the core */ void hw_set_proc_core(int core) { hwloc_cpuset_t set; core = phys_cpus[core]; set = hwloc_bitmap_alloc(); hwloc_bitmap_zero(set); hwloc_bitmap_set(set,core); hwloc_set_proc_cpubind (topology,0,set,HWLOC_CPUBIND_PROCESS); hwloc_bitmap_free(set); }
/* * Set the core where the current thread will run * return the core */ void hw_set_my_core(int cpu) { hwloc_cpuset_t set; cpu = phys_cpus[cpu]; set = hwloc_bitmap_alloc(); hwloc_bitmap_zero(set); hwloc_bitmap_set(set,cpu); hwloc_set_cpubind (topology,set,HWLOC_CPUBIND_THREAD); hwloc_bitmap_free(set); }
/* * Set the node where the current process will run */ void hw_set_thread_cores(int distance, int core) { int i; hwloc_cpuset_t set; set = hwloc_bitmap_alloc(); hwloc_bitmap_zero(set); for(i=core;i<core+distance;i++) hwloc_bitmap_set(set,i); hwloc_set_proc_cpubind (topology,0,set,HWLOC_CPUBIND_THREAD); hwloc_bitmap_free(set); }
static int hwloc_win_get_area_membind(hwloc_topology_t topology __hwloc_attribute_unused, const void *addr, size_t len, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags) { SYSTEM_INFO SystemInfo; DWORD page_size; uintptr_t start; unsigned nb; GetSystemInfo(&SystemInfo); page_size = SystemInfo.dwPageSize; start = (((uintptr_t) addr) / page_size) * page_size; nb = (unsigned)((((uintptr_t) addr + len - start) + page_size - 1) / page_size); if (!nb) nb = 1; { PSAPI_WORKING_SET_EX_INFORMATION *pv; unsigned i; pv = calloc(nb, sizeof(*pv)); for (i = 0; i < nb; i++) pv[i].VirtualAddress = (void*) (start + i * page_size); if (!QueryWorkingSetExProc(GetCurrentProcess(), pv, nb * sizeof(*pv))) { free(pv); return -1; } *policy = HWLOC_MEMBIND_BIND; if (flags & HWLOC_MEMBIND_STRICT) { unsigned node = pv[0].VirtualAttributes.Node; for (i = 1; i < nb; i++) { if (pv[i].VirtualAttributes.Node != node) { errno = EXDEV; free(pv); return -1; } } hwloc_bitmap_only(nodeset, node); free(pv); return 0; } hwloc_bitmap_zero(nodeset); for (i = 0; i < nb; i++) hwloc_bitmap_set(nodeset, pv[i].VirtualAttributes.Node); free(pv); return 0; } }
/* * Set the node where the current thread will run */ void hw_set_thread_node(int node) { hwloc_nodeset_t nset; hwloc_cpuset_t set; if (local_topo->nnodes != 0 ){ nset = hwloc_bitmap_alloc(); set = hwloc_bitmap_alloc(); hwloc_bitmap_zero(nset); hwloc_bitmap_set(nset,node); hwloc_cpuset_from_nodeset(topology,set,nset); hwloc_set_proc_cpubind (topology,0,set,HWLOC_CPUBIND_THREAD); hwloc_bitmap_free(set); hwloc_bitmap_free(nset); } }
static void hwloc_netbsd_bsd2hwloc(hwloc_bitmap_t hwloc_cpuset, const cpuset_t *cpuset) { unsigned cpu, cpulimit; int found = 0; hwloc_bitmap_zero(hwloc_cpuset); cpulimit = cpuset_size(cpuset) * CHAR_BIT; for (cpu = 0; cpu < cpulimit; cpu++) if (cpuset_isset(cpu, cpuset)) { hwloc_bitmap_set(hwloc_cpuset, cpu); found++; } /* when never bound, it returns an empty set, fill it instead */ if (!found) hwloc_bitmap_fill(hwloc_cpuset); }
int main(void) { hwloc_topology_t topology; hwloc_bitmap_t cpuset; int err; /* check the OS topology */ hwloc_topology_init(&topology); hwloc_topology_load(topology); assert(hwloc_topology_is_thissystem(topology)); cpuset = hwloc_bitmap_dup(hwloc_topology_get_complete_cpuset(topology)); result("Binding with OS backend", hwloc_set_cpubind(topology, cpuset, 0)); hwloc_topology_destroy(topology); /* We're assume there is a real processor numbered 0 */ hwloc_bitmap_zero(cpuset); hwloc_bitmap_set(cpuset, 0); /* check a synthetic topology */ hwloc_topology_init(&topology); hwloc_topology_set_synthetic(topology, "1"); hwloc_topology_load(topology); assert(!hwloc_topology_is_thissystem(topology)); err = hwloc_set_cpubind(topology, cpuset, 0); result("Binding with synthetic backend", err); assert(!err); hwloc_topology_destroy(topology); /* check a synthetic topology but assuming it's the system topology */ hwloc_topology_init(&topology); hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM); hwloc_topology_set_synthetic(topology, "1"); hwloc_topology_load(topology); assert(hwloc_topology_is_thissystem(topology)); result("Binding with synthetic backend faking is_thissystem", hwloc_set_cpubind(topology, cpuset, 0)); hwloc_topology_destroy(topology); hwloc_bitmap_free(cpuset); return 0; }
void hwloc_topology::set_thread_affinity_mask( mask_type mask , error_code& ec ) const { // {{{ hwloc_cpuset_t cpuset = hwloc_bitmap_alloc(); for (std::size_t i = 0; i < sizeof(std::size_t) * CHAR_BIT; ++i) { if (mask & (static_cast<std::size_t>(1) << i)) { hwloc_bitmap_set(cpuset, static_cast<unsigned int>(i)); } } { scoped_lock lk(topo_mtx); if (hwloc_set_cpubind(topo, cpuset, HWLOC_CPUBIND_STRICT | HWLOC_CPUBIND_THREAD)) { // Strict binding not supported or failed, try weak binding. if (hwloc_set_cpubind(topo, cpuset, HWLOC_CPUBIND_THREAD)) { hwloc_bitmap_free(cpuset); HPX_THROWS_IF(ec, kernel_error , "hpx::threads::hwloc_topology::set_thread_affinity_mask" , boost::str(boost::format( "failed to set thread %x affinity mask") % mask)); if (ec) return; } } } #if defined(__linux) || defined(linux) || defined(__linux__) || defined(__FreeBSD__) sleep(0); // Allow the OS to pick up the change. #endif hwloc_bitmap_free(cpuset); if (&ec != &throws) ec = make_success_code(); } // }}}
int opal_hwloc_base_membind(opal_hwloc_base_memory_segment_t *segs, size_t count, int node_id) { size_t i; int rc = OPAL_SUCCESS; char *msg = NULL; hwloc_cpuset_t cpuset = NULL; /* bozo check */ if (OPAL_SUCCESS != opal_hwloc_base_get_topology()) { msg = "hwloc_set_area_membind() failure - topology not available"; return opal_hwloc_base_report_bind_failure(__FILE__, __LINE__, msg, rc); } cpuset = hwloc_bitmap_alloc(); if (NULL == cpuset) { rc = OPAL_ERR_OUT_OF_RESOURCE; msg = "hwloc_bitmap_alloc() failure"; goto out; } hwloc_bitmap_set(cpuset, node_id); for(i = 0; i < count; i++) { if (0 != hwloc_set_area_membind(opal_hwloc_topology, segs[i].mbs_start_addr, segs[i].mbs_len, cpuset, HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_STRICT)) { rc = OPAL_ERROR; msg = "hwloc_set_area_membind() failure"; goto out; } } out: if (NULL != cpuset) { hwloc_bitmap_free(cpuset); } if (OPAL_SUCCESS != rc) { return opal_hwloc_base_report_bind_failure(__FILE__, __LINE__, msg, rc); } return OPAL_SUCCESS; }
static int hwloc_look_kstat(struct hwloc_topology *topology) { /* FIXME this assumes that all packages are identical */ char *CPUType = hwloc_solaris_get_chip_type(); char *CPUModel = hwloc_solaris_get_chip_model(); kstat_ctl_t *kc = kstat_open(); kstat_t *ksp; kstat_named_t *stat; unsigned look_cores = 1, look_chips = 1; unsigned Pproc_max = 0; unsigned Pproc_alloc = 256; struct hwloc_solaris_Pproc { unsigned Lpkg, Ppkg, Lcore, Lproc; } * Pproc = malloc(Pproc_alloc * sizeof(*Pproc)); unsigned Lproc_num = 0; unsigned Lproc_alloc = 256; struct hwloc_solaris_Lproc { unsigned Pproc; } * Lproc = malloc(Lproc_alloc * sizeof(*Lproc)); unsigned Lcore_num = 0; unsigned Lcore_alloc = 256; struct hwloc_solaris_Lcore { unsigned Pcore, Ppkg; } * Lcore = malloc(Lcore_alloc * sizeof(*Lcore)); unsigned Lpkg_num = 0; unsigned Lpkg_alloc = 256; struct hwloc_solaris_Lpkg { unsigned Ppkg; } * Lpkg = malloc(Lpkg_alloc * sizeof(*Lpkg)); unsigned pkgid, coreid, cpuid; unsigned i; for (i = 0; i < Pproc_alloc; i++) { Pproc[i].Lproc = -1; Pproc[i].Lpkg = -1; Pproc[i].Ppkg = -1; Pproc[i].Lcore = -1; } if (!kc) { hwloc_debug("kstat_open failed: %s\n", strerror(errno)); free(Pproc); free(Lproc); free(Lcore); free(Lpkg); return 0; } for (ksp = kc->kc_chain; ksp; ksp = ksp->ks_next) { if (strncmp("cpu_info", ksp->ks_module, 8)) continue; cpuid = ksp->ks_instance; if (kstat_read(kc, ksp, NULL) == -1) { fprintf(stderr, "kstat_read failed for CPU%u: %s\n", cpuid, strerror(errno)); continue; } hwloc_debug("cpu%u\n", cpuid); if (cpuid >= Pproc_alloc) { struct hwloc_solaris_Pproc *tmp = realloc(Pproc, 2*Pproc_alloc * sizeof(*Pproc)); if (!tmp) goto err; Pproc = tmp; Pproc_alloc *= 2; for(i = Pproc_alloc/2; i < Pproc_alloc; i++) { Pproc[i].Lproc = -1; Pproc[i].Lpkg = -1; Pproc[i].Ppkg = -1; Pproc[i].Lcore = -1; } } Pproc[cpuid].Lproc = Lproc_num; if (Lproc_num >= Lproc_alloc) { struct hwloc_solaris_Lproc *tmp = realloc(Lproc, 2*Lproc_alloc * sizeof(*Lproc)); if (!tmp) goto err; Lproc = tmp; Lproc_alloc *= 2; } Lproc[Lproc_num].Pproc = cpuid; Lproc_num++; if (cpuid >= Pproc_max) Pproc_max = cpuid + 1; stat = (kstat_named_t *) kstat_data_lookup(ksp, "state"); if (!stat) hwloc_debug("could not read state for CPU%u: %s\n", cpuid, strerror(errno)); else if (stat->data_type != KSTAT_DATA_CHAR) hwloc_debug("unknown kstat type %d for cpu state\n", stat->data_type); else { hwloc_debug("cpu%u's state is %s\n", cpuid, stat->value.c); if (strcmp(stat->value.c, "on-line")) /* not online */ hwloc_bitmap_clr(topology->levels[0][0]->online_cpuset, cpuid); } if (look_chips) do { /* Get Chip ID */ stat = (kstat_named_t *) kstat_data_lookup(ksp, "chip_id"); if (!stat) { if (Lpkg_num) fprintf(stderr, "could not read package id for CPU%u: %s\n", cpuid, strerror(errno)); else hwloc_debug("could not read package id for CPU%u: %s\n", cpuid, strerror(errno)); look_chips = 0; continue; } switch (stat->data_type) { case KSTAT_DATA_INT32: pkgid = stat->value.i32; break; case KSTAT_DATA_UINT32: pkgid = stat->value.ui32; break; #ifdef _INT64_TYPE case KSTAT_DATA_UINT64: pkgid = stat->value.ui64; break; case KSTAT_DATA_INT64: pkgid = stat->value.i64; break; #endif default: fprintf(stderr, "chip_id type %d unknown\n", stat->data_type); look_chips = 0; continue; } Pproc[cpuid].Ppkg = pkgid; for (i = 0; i < Lpkg_num; i++) if (pkgid == Lpkg[i].Ppkg) break; Pproc[cpuid].Lpkg = i; hwloc_debug("%u on package %u (%u)\n", cpuid, i, pkgid); if (i == Lpkg_num) { if (Lpkg_num == Lpkg_alloc) { struct hwloc_solaris_Lpkg *tmp = realloc(Lpkg, 2*Lpkg_alloc * sizeof(*Lpkg)); if (!tmp) goto err; Lpkg = tmp; Lpkg_alloc *= 2; } Lpkg[Lpkg_num++].Ppkg = pkgid; } } while(0); if (look_cores) do { /* Get Core ID */ stat = (kstat_named_t *) kstat_data_lookup(ksp, "core_id"); if (!stat) { if (Lcore_num) fprintf(stderr, "could not read core id for CPU%u: %s\n", cpuid, strerror(errno)); else hwloc_debug("could not read core id for CPU%u: %s\n", cpuid, strerror(errno)); look_cores = 0; continue; } switch (stat->data_type) { case KSTAT_DATA_INT32: coreid = stat->value.i32; break; case KSTAT_DATA_UINT32: coreid = stat->value.ui32; break; #ifdef _INT64_TYPE case KSTAT_DATA_UINT64: coreid = stat->value.ui64; break; case KSTAT_DATA_INT64: coreid = stat->value.i64; break; #endif default: fprintf(stderr, "core_id type %d unknown\n", stat->data_type); look_cores = 0; continue; } for (i = 0; i < Lcore_num; i++) if (coreid == Lcore[i].Pcore && Pproc[cpuid].Ppkg == Lcore[i].Ppkg) break; Pproc[cpuid].Lcore = i; hwloc_debug("%u on core %u (%u)\n", cpuid, i, coreid); if (i == Lcore_num) { if (Lcore_num == Lcore_alloc) { struct hwloc_solaris_Lcore *tmp = realloc(Lcore, 2*Lcore_alloc * sizeof(*Lcore)); if (!tmp) goto err; Lcore = tmp; Lcore_alloc *= 2; } Lcore[Lcore_num].Ppkg = Pproc[cpuid].Ppkg; Lcore[Lcore_num++].Pcore = coreid; } } while(0); /* Note: there is also clog_id for the Thread ID (not unique) and * pkg_core_id for the core ID (not unique). They are not useful to us * however. */ } if (look_chips) { struct hwloc_obj *obj; unsigned j,k; hwloc_debug("%d Packages\n", Lpkg_num); for (j = 0; j < Lpkg_num; j++) { obj = hwloc_alloc_setup_object(HWLOC_OBJ_PACKAGE, Lpkg[j].Ppkg); if (CPUType) hwloc_obj_add_info(obj, "CPUType", CPUType); if (CPUModel) hwloc_obj_add_info(obj, "CPUModel", CPUModel); obj->cpuset = hwloc_bitmap_alloc(); for(k=0; k<Pproc_max; k++) if (Pproc[k].Lpkg == j) hwloc_bitmap_set(obj->cpuset, k); hwloc_debug_1arg_bitmap("Package %d has cpuset %s\n", j, obj->cpuset); hwloc_insert_object_by_cpuset(topology, obj); } hwloc_debug("%s", "\n"); } if (look_cores) { struct hwloc_obj *obj; unsigned j,k; hwloc_debug("%d Cores\n", Lcore_num); for (j = 0; j < Lcore_num; j++) { obj = hwloc_alloc_setup_object(HWLOC_OBJ_CORE, Lcore[j].Pcore); obj->cpuset = hwloc_bitmap_alloc(); for(k=0; k<Pproc_max; k++) if (Pproc[k].Lcore == j) hwloc_bitmap_set(obj->cpuset, k); hwloc_debug_1arg_bitmap("Core %d has cpuset %s\n", j, obj->cpuset); hwloc_insert_object_by_cpuset(topology, obj); } hwloc_debug("%s", "\n"); } if (Lproc_num) { struct hwloc_obj *obj; unsigned j,k; hwloc_debug("%d PUs\n", Lproc_num); for (j = 0; j < Lproc_num; j++) { obj = hwloc_alloc_setup_object(HWLOC_OBJ_PU, Lproc[j].Pproc); obj->cpuset = hwloc_bitmap_alloc(); for(k=0; k<Pproc_max; k++) if (Pproc[k].Lproc == j) hwloc_bitmap_set(obj->cpuset, k); hwloc_debug_1arg_bitmap("PU %d has cpuset %s\n", j, obj->cpuset); hwloc_insert_object_by_cpuset(topology, obj); } hwloc_debug("%s", "\n"); } kstat_close(kc); free(Pproc); free(Lproc); free(Lcore); free(Lpkg); return Lproc_num > 0; err: kstat_close(kc); free(Pproc); free(Lproc); free(Lcore); free(Lpkg); return 0; }
static void look_rset(int sdl, hwloc_obj_type_t type, struct hwloc_topology *topology, int level) { rsethandle_t rset, rad; int i,maxcpus,j; int nbnodes; struct hwloc_obj *obj; if ((topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM)) rset = rs_alloc(RS_ALL); else rset = rs_alloc(RS_PARTITION); rad = rs_alloc(RS_EMPTY); nbnodes = rs_numrads(rset, sdl, 0); if (nbnodes == -1) { perror("rs_numrads"); return; } for (i = 0; i < nbnodes; i++) { if (rs_getrad(rset, rad, sdl, i, 0)) { fprintf(stderr,"rs_getrad(%d) failed: %s\n", i, strerror(errno)); continue; } if (!rs_getinfo(rad, R_NUMPROCS, 0)) continue; /* It seems logical processors are numbered from 1 here, while the * bindprocessor functions numbers them from 0... */ obj = hwloc_alloc_setup_object(type, i - (type == HWLOC_OBJ_PU)); obj->cpuset = hwloc_bitmap_alloc(); obj->os_level = sdl; maxcpus = rs_getinfo(rad, R_MAXPROCS, 0); for (j = 0; j < maxcpus; j++) { if (rs_op(RS_TESTRESOURCE, rad, NULL, R_PROCS, j)) hwloc_bitmap_set(obj->cpuset, j); } switch(type) { case HWLOC_OBJ_NODE: obj->nodeset = hwloc_bitmap_alloc(); hwloc_bitmap_set(obj->nodeset, i); obj->memory.local_memory = 0; /* TODO: odd, rs_getinfo(rad, R_MEMSIZE, 0) << 10 returns the total memory ... */ obj->memory.page_types_len = 2; obj->memory.page_types = malloc(2*sizeof(*obj->memory.page_types)); memset(obj->memory.page_types, 0, 2*sizeof(*obj->memory.page_types)); obj->memory.page_types[0].size = hwloc_getpagesize(); #ifdef HAVE__SC_LARGE_PAGESIZE obj->memory.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE); #endif /* TODO: obj->memory.page_types[1].count = rs_getinfo(rset, R_LGPGFREE, 0) / hugepagesize */ break; case HWLOC_OBJ_CACHE: obj->attr->cache.size = _system_configuration.L2_cache_size; obj->attr->cache.associativity = _system_configuration.L2_cache_asc; obj->attr->cache.linesize = 0; /* TODO: ? */ obj->attr->cache.depth = 2; obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED; /* FIXME? */ break; case HWLOC_OBJ_GROUP: obj->attr->group.depth = level; break; case HWLOC_OBJ_CORE: { hwloc_obj_t obj2, obj3; obj2 = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, i); obj2->cpuset = hwloc_bitmap_dup(obj->cpuset); obj2->attr->cache.size = _system_configuration.dcache_size; obj2->attr->cache.associativity = _system_configuration.dcache_asc; obj2->attr->cache.linesize = _system_configuration.dcache_line; obj2->attr->cache.depth = 1; if (_system_configuration.cache_attrib & (1<<30)) { /* Unified cache */ obj2->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED; hwloc_debug("Adding an L1u cache for core %d\n", i); hwloc_insert_object_by_cpuset(topology, obj2); } else { /* Separate Instruction and Data caches */ obj2->attr->cache.type = HWLOC_OBJ_CACHE_DATA; hwloc_debug("Adding an L1d cache for core %d\n", i); hwloc_insert_object_by_cpuset(topology, obj2); obj3 = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, i); obj3->cpuset = hwloc_bitmap_dup(obj->cpuset); obj3->attr->cache.size = _system_configuration.icache_size; obj3->attr->cache.associativity = _system_configuration.icache_asc; obj3->attr->cache.linesize = _system_configuration.icache_line; obj3->attr->cache.depth = 1; obj3->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION; hwloc_debug("Adding an L1i cache for core %d\n", i); hwloc_insert_object_by_cpuset(topology, obj3); } break; } default: break; } hwloc_debug_2args_bitmap("%s %d has cpuset %s\n", hwloc_obj_type_string(type), i, obj->cpuset); hwloc_insert_object_by_cpuset(topology, obj); } rs_free(rset); rs_free(rad); }
static int hwloc_look_hpux(struct hwloc_backend *backend) { struct hwloc_topology *topology = backend->topology; int has_numa = sysconf(_SC_CCNUMA_SUPPORT) == 1; hwloc_obj_t *nodes = NULL, obj; spu_t currentcpu; ldom_t currentnode; int i, nbnodes = 0; if (topology->levels[0][0]->cpuset) /* somebody discovered things */ return -1; hwloc_alloc_obj_cpusets(topology->levels[0][0]); if (has_numa) { nbnodes = mpctl((topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM) ? MPC_GETNUMLDOMS_SYS : MPC_GETNUMLDOMS, 0, 0); hwloc_debug("%d nodes\n", nbnodes); nodes = malloc(nbnodes * sizeof(*nodes)); i = 0; currentnode = mpctl((topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM) ? MPC_GETFIRSTLDOM_SYS : MPC_GETFIRSTLDOM, 0, 0); while (currentnode != -1 && i < nbnodes) { hwloc_debug("node %d is %d\n", i, currentnode); nodes[i] = obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_NUMANODE, currentnode); obj->cpuset = hwloc_bitmap_alloc(); obj->nodeset = hwloc_bitmap_alloc(); hwloc_bitmap_set(obj->nodeset, currentnode); /* TODO: obj->attr->node.memory_kB */ /* TODO: obj->attr->node.huge_page_free */ currentnode = mpctl((topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM) ? MPC_GETNEXTLDOM_SYS : MPC_GETNEXTLDOM, currentnode, 0); i++; } } i = 0; currentcpu = mpctl((topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM) ? MPC_GETFIRSTSPU_SYS : MPC_GETFIRSTSPU, 0,0); while (currentcpu != -1) { obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_PU, currentcpu); obj->cpuset = hwloc_bitmap_alloc(); hwloc_bitmap_set(obj->cpuset, currentcpu); hwloc_debug("cpu %d\n", currentcpu); if (nodes) { /* Add this cpu to its node */ currentnode = mpctl(MPC_SPUTOLDOM, currentcpu, 0); /* Hopefully it's just the same as previous cpu */ if (i >= nbnodes || (ldom_t) nodes[i]->os_index != currentnode) for (i = 0; i < nbnodes; i++) if ((ldom_t) nodes[i]->os_index == currentnode) break; if (i < nbnodes) { hwloc_bitmap_set(nodes[i]->cpuset, currentcpu); hwloc_debug("is in node %d\n", i); } else { hwloc_debug("%s", "is in no node?!\n"); } } /* Add cpu */ hwloc_insert_object_by_cpuset(topology, obj); currentcpu = mpctl((topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM) ? MPC_GETNEXTSPU_SYS : MPC_GETNEXTSPU, currentcpu, 0); } if (nodes) { /* Add nodes */ for (i = 0 ; i < nbnodes ; i++) hwloc_insert_object_by_cpuset(topology, nodes[i]); free(nodes); } topology->support.discovery->pu = 1; hwloc_obj_add_info(topology->levels[0][0], "Backend", "HP-UX"); hwloc_add_uname_info(topology, NULL); return 0; }
static void create_hwloc_cpusets() { #ifdef USE_HWLOC int i; int err = hwloc_topology_init(&topology); assert(err == 0); err = hwloc_topology_load(topology); assert(err == 0); hwloc_bitmap_t cpuset = hwloc_bitmap_alloc(); assert(cpuset); err = hwloc_get_cpubind(topology, cpuset, HWLOC_CPUBIND_PROCESS); assert(err == 0); const int available_pus = hwloc_bitmap_weight(cpuset); const int last_set_index = hwloc_bitmap_last(cpuset); const int num_workers = hc_context->nworkers; hclib_affinity_t selected_affinity = HCLIB_AFFINITY_STRIDED; const char *user_selected_affinity = getenv("HCLIB_AFFINITY"); if (user_selected_affinity) { if (strcmp(user_selected_affinity, "strided") == 0) { selected_affinity = HCLIB_AFFINITY_STRIDED; } else if (strcmp(user_selected_affinity, "chunked") == 0) { selected_affinity = HCLIB_AFFINITY_CHUNKED; } else { fprintf(stderr, "Unsupported thread affinity \"%s\" specified with " "HCLIB_AFFINITY.\n", user_selected_affinity); exit(1); } } thread_cpusets = (hwloc_bitmap_t *)malloc(hc_context->nworkers * sizeof(*thread_cpusets)); assert(thread_cpusets); for (i = 0; i < hc_context->nworkers; i++) { thread_cpusets[i] = hwloc_bitmap_alloc(); assert(thread_cpusets[i]); } switch (selected_affinity) { case (HCLIB_AFFINITY_STRIDED): { if (available_pus < num_workers) { fprintf(stderr, "ERROR Available PUs (%d) was less than number " "of workers (%d), don't currently support " "oversubscription with strided thread pinning\n", available_pus, num_workers); exit(1); } int count = 0; int index = 0; while (index <= last_set_index) { if (hwloc_bitmap_isset(cpuset, index)) { hwloc_bitmap_set(thread_cpusets[count % num_workers], index); count++; } index++; } break; } case (HCLIB_AFFINITY_CHUNKED): { const int chunk_size = (available_pus + num_workers - 1) / num_workers; int count = 0; int index = 0; while (index <= last_set_index) { if (hwloc_bitmap_isset(cpuset, index)) { hwloc_bitmap_set(thread_cpusets[count / chunk_size], index); count++; } index++; } break; } default: assert(false); } hwloc_bitmap_t nodeset = hwloc_bitmap_alloc(); hwloc_bitmap_t other_nodeset = hwloc_bitmap_alloc(); assert(nodeset && other_nodeset); /* * Here, we look for contiguous ranges of worker threads that share any NUMA * nodes with us. In theory, this should be more hierarchical but isn't yet. * This is also super inefficient... O(T^2) where T is the number of * workers. */ bool revert_to_naive_stealing = false; for (i = 0; i < hc_context->nworkers; i++) { // Get the NUMA nodes for this CPU set hwloc_cpuset_to_nodeset(topology, thread_cpusets[i], nodeset); int base = -1; int limit = -1; int j; for (j = 0; j < hc_context->nworkers; j++) { hwloc_cpuset_to_nodeset(topology, thread_cpusets[j], other_nodeset); // Take the intersection, see if there is any overlap hwloc_bitmap_and(other_nodeset, nodeset, other_nodeset); if (base < 0) { // Haven't found a contiguous chunk of workers yet. if (!hwloc_bitmap_iszero(other_nodeset)) { base = j; } } else { /* * Have a contiguous chunk of workers, either still inside it or * after it. */ if (limit < 0) { // Inside the contiguous chunk of workers if (hwloc_bitmap_iszero(other_nodeset)) { // Found the end limit = j; } } else { // After the contiguous chunk of workers if (!hwloc_bitmap_iszero(other_nodeset)) { // No contiguous chunk to find, just do something naive. revert_to_naive_stealing = true; break; } } } } if (revert_to_naive_stealing) { fprintf(stderr, "WARNING: Using naive work-stealing patterns.\n"); base = 0; limit = hc_context->nworkers; } else { assert(base >= 0); if (limit < 0) { limit = hc_context->nworkers; } } hc_context->workers[i]->base_intra_socket_workers = base; hc_context->workers[i]->limit_intra_socket_workers = limit; #ifdef VERBOSE char *nbuf; hwloc_bitmap_asprintf(&nbuf, nodeset); char *buffer; hwloc_bitmap_asprintf(&buffer, thread_cpusets[i]); fprintf(stderr, "Worker %d has access to %d PUs (%s), %d NUMA nodes " "(%s). Shared NUMA nodes with [%d, %d).\n", i, hwloc_bitmap_weight(thread_cpusets[i]), buffer, hwloc_bitmap_weight(nodeset), nbuf, base, limit); free(buffer); #endif } #endif }
static int hwloc_look_darwin(struct hwloc_backend *backend) { struct hwloc_topology *topology = backend->topology; int64_t _nprocs; unsigned nprocs; int64_t _npackages; unsigned i, j, cpu; struct hwloc_obj *obj; size_t size; int64_t l1dcachesize, l1icachesize; int64_t cacheways[2]; int64_t l2cachesize; int64_t cachelinesize; int64_t memsize; char cpumodel[64]; if (topology->levels[0][0]->cpuset) /* somebody discovered things */ return -1; hwloc_alloc_obj_cpusets(topology->levels[0][0]); if (hwloc_get_sysctlbyname("hw.ncpu", &_nprocs) || _nprocs <= 0) return -1; nprocs = _nprocs; topology->support.discovery->pu = 1; hwloc_debug("%u procs\n", nprocs); size = sizeof(cpumodel); if (sysctlbyname("machdep.cpu.brand_string", cpumodel, &size, NULL, 0)) cpumodel[0] = '\0'; if (!hwloc_get_sysctlbyname("hw.packages", &_npackages) && _npackages > 0) { unsigned npackages = _npackages; int64_t _cores_per_package; int64_t _logical_per_package; unsigned logical_per_package; hwloc_debug("%u packages\n", npackages); if (!hwloc_get_sysctlbyname("machdep.cpu.logical_per_package", &_logical_per_package) && _logical_per_package > 0) logical_per_package = _logical_per_package; else /* Assume the trivia. */ logical_per_package = nprocs / npackages; hwloc_debug("%u threads per package\n", logical_per_package); if (nprocs == npackages * logical_per_package && hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_PACKAGE)) for (i = 0; i < npackages; i++) { obj = hwloc_alloc_setup_object(HWLOC_OBJ_PACKAGE, i); obj->cpuset = hwloc_bitmap_alloc(); for (cpu = i*logical_per_package; cpu < (i+1)*logical_per_package; cpu++) hwloc_bitmap_set(obj->cpuset, cpu); hwloc_debug_1arg_bitmap("package %u has cpuset %s\n", i, obj->cpuset); if (cpumodel[0] != '\0') hwloc_obj_add_info(obj, "CPUModel", cpumodel); hwloc_insert_object_by_cpuset(topology, obj); } else if (cpumodel[0] != '\0') hwloc_obj_add_info(topology->levels[0][0], "CPUModel", cpumodel); if (!hwloc_get_sysctlbyname("machdep.cpu.cores_per_package", &_cores_per_package) && _cores_per_package > 0 && hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_CORE)) { unsigned cores_per_package = _cores_per_package; hwloc_debug("%u cores per package\n", cores_per_package); if (!(logical_per_package % cores_per_package)) for (i = 0; i < npackages * cores_per_package; i++) { obj = hwloc_alloc_setup_object(HWLOC_OBJ_CORE, i); obj->cpuset = hwloc_bitmap_alloc(); for (cpu = i*(logical_per_package/cores_per_package); cpu < (i+1)*(logical_per_package/cores_per_package); cpu++) hwloc_bitmap_set(obj->cpuset, cpu); hwloc_debug_1arg_bitmap("core %u has cpuset %s\n", i, obj->cpuset); hwloc_insert_object_by_cpuset(topology, obj); } } } else if (cpumodel[0] != '\0') hwloc_obj_add_info(topology->levels[0][0], "CPUModel", cpumodel); if (hwloc_get_sysctlbyname("hw.l1dcachesize", &l1dcachesize)) l1dcachesize = 0; if (hwloc_get_sysctlbyname("hw.l1icachesize", &l1icachesize)) l1icachesize = 0; if (hwloc_get_sysctlbyname("hw.l2cachesize", &l2cachesize)) l2cachesize = 0; if (hwloc_get_sysctlbyname("machdep.cpu.cache.L1_associativity", &cacheways[0])) cacheways[0] = 0; else if (cacheways[0] == 0xff) cacheways[0] = -1; if (hwloc_get_sysctlbyname("machdep.cpu.cache.L2_associativity", &cacheways[1])) cacheways[1] = 0; else if (cacheways[1] == 0xff) cacheways[1] = -1; if (hwloc_get_sysctlbyname("hw.cachelinesize", &cachelinesize)) cachelinesize = 0; if (hwloc_get_sysctlbyname("hw.memsize", &memsize)) memsize = 0; if (!sysctlbyname("hw.cacheconfig", NULL, &size, NULL, 0)) { unsigned n = size / sizeof(uint32_t); uint64_t *cacheconfig = NULL; uint64_t *cachesize = NULL; uint32_t *cacheconfig32 = NULL; cacheconfig = malloc(sizeof(uint64_t) * n); if (NULL == cacheconfig) { goto out; } cachesize = malloc(sizeof(uint64_t) * n); if (NULL == cachesize) { goto out; } cacheconfig32 = malloc(sizeof(uint32_t) * n); if (NULL == cacheconfig32) { goto out; } if ((!sysctlbyname("hw.cacheconfig", cacheconfig, &size, NULL, 0))) { /* Yeech. Darwin seemingly has changed from 32bit to 64bit integers for * cacheconfig, with apparently no way for detection. Assume the machine * won't have more than 4 billion cpus */ if (cacheconfig[0] > 0xFFFFFFFFUL) { memcpy(cacheconfig32, cacheconfig, size); for (i = 0 ; i < size / sizeof(uint32_t); i++) cacheconfig[i] = cacheconfig32[i]; } memset(cachesize, 0, sizeof(uint64_t) * n); size = sizeof(uint64_t) * n; if (sysctlbyname("hw.cachesize", cachesize, &size, NULL, 0)) { if (n > 0) cachesize[0] = memsize; if (n > 1) cachesize[1] = l1dcachesize; if (n > 2) cachesize[2] = l2cachesize; } hwloc_debug("%s", "caches"); for (i = 0; i < n && cacheconfig[i]; i++) hwloc_debug(" %"PRIu64"(%"PRIu64"kB)", cacheconfig[i], cachesize[i] / 1024); /* Now we know how many caches there are */ n = i; hwloc_debug("\n%u cache levels\n", n - 1); /* For each cache level (0 is memory) */ for (i = 0; i < n; i++) { /* cacheconfig tells us how many cpus share it, let's iterate on each cache */ for (j = 0; j < (nprocs / cacheconfig[i]); j++) { if (!i) { obj = hwloc_alloc_setup_object(HWLOC_OBJ_NUMANODE, j); obj->nodeset = hwloc_bitmap_alloc(); hwloc_bitmap_set(obj->nodeset, j); } else { obj = hwloc_alloc_setup_object(HWLOC_OBJ_L1CACHE+i-1, -1); } obj->cpuset = hwloc_bitmap_alloc(); for (cpu = j*cacheconfig[i]; cpu < ((j+1)*cacheconfig[i]); cpu++) hwloc_bitmap_set(obj->cpuset, cpu); if (i == 1 && l1icachesize && hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_L1ICACHE)) { /* FIXME assuming that L1i and L1d are shared the same way. Darwin * does not yet provide a way to know. */ hwloc_obj_t l1i = hwloc_alloc_setup_object(HWLOC_OBJ_L1ICACHE, -1); l1i->cpuset = hwloc_bitmap_dup(obj->cpuset); hwloc_debug_1arg_bitmap("L1icache %u has cpuset %s\n", j, l1i->cpuset); l1i->attr->cache.depth = i; l1i->attr->cache.size = l1icachesize; l1i->attr->cache.linesize = cachelinesize; l1i->attr->cache.associativity = 0; l1i->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION; hwloc_insert_object_by_cpuset(topology, l1i); } if (i) { hwloc_debug_2args_bitmap("L%ucache %u has cpuset %s\n", i, j, obj->cpuset); obj->attr->cache.depth = i; obj->attr->cache.size = cachesize[i]; obj->attr->cache.linesize = cachelinesize; if (i <= sizeof(cacheways) / sizeof(cacheways[0])) obj->attr->cache.associativity = cacheways[i-1]; else obj->attr->cache.associativity = 0; if (i == 1 && l1icachesize) obj->attr->cache.type = HWLOC_OBJ_CACHE_DATA; else obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED; } else { hwloc_debug_1arg_bitmap("node %u has cpuset %s\n", j, obj->cpuset); obj->memory.local_memory = cachesize[i]; obj->memory.page_types_len = 2; obj->memory.page_types = malloc(2*sizeof(*obj->memory.page_types)); memset(obj->memory.page_types, 0, 2*sizeof(*obj->memory.page_types)); obj->memory.page_types[0].size = hwloc_getpagesize(); #ifdef HAVE__SC_LARGE_PAGESIZE obj->memory.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE); #endif } if (hwloc_filter_check_keep_object_type(topology, obj->type)) hwloc_insert_object_by_cpuset(topology, obj); else hwloc_free_unlinked_object(obj); /* FIXME: don't built at all, just build the cpuset in case l1i needs it */ } } } out: free(cacheconfig); free(cachesize); free(cacheconfig32); } /* add PU objects */ hwloc_setup_pu_level(topology, nprocs); hwloc_obj_add_info(topology->levels[0][0], "Backend", "Darwin"); hwloc_add_uname_info(topology, NULL); return 0; }
/* * Recursively build objects whose cpu start at first_cpu * - level gives where to look in the type, arity and id arrays * - the id array is used as a variable to get unique IDs for a given level. * - generated memory should be added to *memory_kB. * - generated cpus should be added to parent_cpuset. * - next cpu number to be used should be returned. */ static void hwloc__look_synthetic(struct hwloc_topology *topology, struct hwloc_synthetic_backend_data_s *data, int level, hwloc_bitmap_t parent_cpuset) { hwloc_obj_t obj; unsigned i; struct hwloc_synthetic_level_data_s *curlevel = &data->level[level]; hwloc_obj_type_t type = curlevel->type; unsigned os_index; /* pre-hooks */ switch (type) { case HWLOC_OBJ_GROUP: break; case HWLOC_OBJ_MACHINE: break; case HWLOC_OBJ_NUMANODE: break; case HWLOC_OBJ_PACKAGE: break; case HWLOC_OBJ_CACHE: break; case HWLOC_OBJ_CORE: break; case HWLOC_OBJ_PU: break; case HWLOC_OBJ_SYSTEM: case HWLOC_OBJ_BRIDGE: case HWLOC_OBJ_PCI_DEVICE: case HWLOC_OBJ_OS_DEVICE: case HWLOC_OBJ_MISC: case HWLOC_OBJ_TYPE_MAX: /* Should never happen */ assert(0); break; } os_index = curlevel->next_os_index++; if (curlevel->index_array) os_index = curlevel->index_array[os_index]; obj = hwloc_alloc_setup_object(type, os_index); obj->cpuset = hwloc_bitmap_alloc(); if (!curlevel->arity) { hwloc_bitmap_set(obj->cpuset, os_index); } else { for (i = 0; i < curlevel->arity; i++) hwloc__look_synthetic(topology, data, level + 1, obj->cpuset); } if (type == HWLOC_OBJ_NUMANODE) { obj->nodeset = hwloc_bitmap_alloc(); hwloc_bitmap_set(obj->nodeset, os_index); } hwloc_bitmap_or(parent_cpuset, parent_cpuset, obj->cpuset); hwloc_synthetic__post_look_hooks(curlevel, obj); hwloc_insert_object_by_cpuset(topology, obj); }
static void browse(struct hwloc_topology *topology, lgrp_cookie_t cookie, lgrp_id_t lgrp, hwloc_obj_t *glob_lgrps, unsigned *curlgrp) { int n; hwloc_obj_t obj; lgrp_mem_size_t mem_size; n = lgrp_cpus(cookie, lgrp, NULL, 0, LGRP_CONTENT_HIERARCHY); if (n == -1) return; /* Is this lgrp a NUMA node? */ if ((mem_size = lgrp_mem_size(cookie, lgrp, LGRP_MEM_SZ_INSTALLED, LGRP_CONTENT_DIRECT)) > 0) { int i; processorid_t *cpuids; cpuids = malloc(sizeof(processorid_t) * n); assert(cpuids != NULL); obj = hwloc_alloc_setup_object(HWLOC_OBJ_NUMANODE, lgrp); obj->nodeset = hwloc_bitmap_alloc(); hwloc_bitmap_set(obj->nodeset, lgrp); obj->cpuset = hwloc_bitmap_alloc(); glob_lgrps[(*curlgrp)++] = obj; lgrp_cpus(cookie, lgrp, cpuids, n, LGRP_CONTENT_HIERARCHY); for (i = 0; i < n ; i++) { hwloc_debug("node %ld's cpu %d is %d\n", lgrp, i, cpuids[i]); hwloc_bitmap_set(obj->cpuset, cpuids[i]); } hwloc_debug_1arg_bitmap("node %ld has cpuset %s\n", lgrp, obj->cpuset); /* or LGRP_MEM_SZ_FREE */ hwloc_debug("node %ld has %lldkB\n", lgrp, mem_size/1024); obj->memory.local_memory = mem_size; obj->memory.page_types_len = 2; obj->memory.page_types = malloc(2*sizeof(*obj->memory.page_types)); memset(obj->memory.page_types, 0, 2*sizeof(*obj->memory.page_types)); obj->memory.page_types[0].size = hwloc_getpagesize(); #if HAVE_DECL__SC_LARGE_PAGESIZE obj->memory.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE); #endif hwloc_insert_object_by_cpuset(topology, obj); free(cpuids); } n = lgrp_children(cookie, lgrp, NULL, 0); { lgrp_id_t *lgrps; int i; lgrps = malloc(sizeof(lgrp_id_t) * n); assert(lgrps != NULL); lgrp_children(cookie, lgrp, lgrps, n); hwloc_debug("lgrp %ld has %d children\n", lgrp, n); for (i = 0; i < n ; i++) { browse(topology, cookie, lgrps[i], glob_lgrps, curlgrp); } hwloc_debug("lgrp %ld's children done\n", lgrp); free(lgrps); } }
/* * Recursively build objects whose cpu start at first_cpu * - level gives where to look in the type, arity and id arrays * - the id array is used as a variable to get unique IDs for a given level. * - generated memory should be added to *memory_kB. * - generated cpus should be added to parent_cpuset. * - next cpu number to be used should be returned. */ static unsigned hwloc__look_synthetic(struct hwloc_topology *topology, int level, unsigned first_cpu, hwloc_bitmap_t parent_cpuset) { hwloc_obj_t obj; unsigned i; hwloc_obj_type_t type = topology->backend_params.synthetic.type[level]; /* pre-hooks */ switch (type) { case HWLOC_OBJ_MISC: break; case HWLOC_OBJ_GROUP: break; case HWLOC_OBJ_SYSTEM: case HWLOC_OBJ_BRIDGE: case HWLOC_OBJ_PCI_DEVICE: case HWLOC_OBJ_OS_DEVICE: /* Shouldn't happen. */ abort(); break; case HWLOC_OBJ_MACHINE: break; case HWLOC_OBJ_NODE: break; case HWLOC_OBJ_SOCKET: break; case HWLOC_OBJ_CACHE: break; case HWLOC_OBJ_CORE: break; case HWLOC_OBJ_PU: break; case HWLOC_OBJ_TYPE_MAX: /* Should never happen */ assert(0); break; } obj = hwloc_alloc_setup_object(type, topology->backend_params.synthetic.id[level]++); obj->cpuset = hwloc_bitmap_alloc(); if (!topology->backend_params.synthetic.arity[level]) { hwloc_bitmap_set(obj->cpuset, first_cpu++); } else { for (i = 0; i < topology->backend_params.synthetic.arity[level]; i++) first_cpu = hwloc__look_synthetic(topology, level + 1, first_cpu, obj->cpuset); } if (type == HWLOC_OBJ_NODE) { obj->nodeset = hwloc_bitmap_alloc(); hwloc_bitmap_set(obj->nodeset, obj->os_index); } hwloc_bitmap_or(parent_cpuset, parent_cpuset, obj->cpuset); /* post-hooks */ switch (type) { case HWLOC_OBJ_MISC: break; case HWLOC_OBJ_GROUP: obj->attr->group.depth = topology->backend_params.synthetic.depth[level]; break; case HWLOC_OBJ_SYSTEM: case HWLOC_OBJ_BRIDGE: case HWLOC_OBJ_PCI_DEVICE: case HWLOC_OBJ_OS_DEVICE: abort(); break; case HWLOC_OBJ_MACHINE: break; case HWLOC_OBJ_NODE: /* 1GB in memory nodes, 256k 4k-pages. */ obj->memory.local_memory = 1024*1024*1024; obj->memory.page_types_len = 1; obj->memory.page_types = malloc(sizeof(*obj->memory.page_types)); memset(obj->memory.page_types, 0, sizeof(*obj->memory.page_types)); obj->memory.page_types[0].size = 4096; obj->memory.page_types[0].count = 256*1024; break; case HWLOC_OBJ_SOCKET: break; case HWLOC_OBJ_CACHE: obj->attr->cache.depth = topology->backend_params.synthetic.depth[level]; obj->attr->cache.linesize = 64; if (obj->attr->cache.depth == 1) /* 32Kb in L1 */ obj->attr->cache.size = 32*1024; else /* *4 at each level, starting from 1MB for L2 */ obj->attr->cache.size = 256*1024 << (2*obj->attr->cache.depth); break; case HWLOC_OBJ_CORE: break; case HWLOC_OBJ_PU: break; case HWLOC_OBJ_TYPE_MAX: /* Should never happen */ assert(0); break; } hwloc_insert_object_by_cpuset(topology, obj); return first_cpu; }
static int hwloc_aix_get_pid_getthrds_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, pid_t pid, hwloc_bitmap_t hwloc_set, int flags __hwloc_attribute_unused) { #if HWLOC_BITS_PER_LONG == 64 struct thrdentry64 thread_info; tid64_t next_thread; #else struct thrdsinfo thread_info; tid_t next_thread; #endif next_thread = 0; /* TODO: get multiple at once */ #if HWLOC_BITS_PER_LONG == 64 while (getthrds64 (pid, &thread_info, sizeof (thread_info), &next_thread, 1) == 1) { #else while (getthrds (pid, &thread_info, sizeof (thread_info), &next_thread, 1) == 1) { #endif if (PROCESSOR_CLASS_ANY != thread_info.ti_cpuid) hwloc_bitmap_set(hwloc_set, thread_info.ti_cpuid); else hwloc_bitmap_fill(hwloc_set); } /* TODO: what if the thread list changes and we get nothing? */ return 0; } static int hwloc_aix_get_tid_getthrds_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, tid_t tid, hwloc_bitmap_t hwloc_set, int flags __hwloc_attribute_unused) { #if HWLOC_BITS_PER_LONG == 64 struct thrdentry64 thread_info; tid64_t next_thread; #else struct thrdsinfo thread_info; tid_t next_thread; #endif pid_t pid = getpid(); next_thread = 0; /* TODO: get multiple at once */ #if HWLOC_BITS_PER_LONG == 64 while (getthrds64 (pid, &thread_info, sizeof (thread_info), &next_thread, 1) == 1) { #else while (getthrds (pid, &thread_info, sizeof (thread_info), &next_thread, 1) == 1) { #endif if (thread_info.ti_tid == tid) { if (PROCESSOR_CLASS_ANY != thread_info.ti_cpuid) hwloc_bitmap_set(hwloc_set, thread_info.ti_cpuid); else hwloc_bitmap_fill(hwloc_set); break; } } /* TODO: what if the thread goes away in the meantime? */ return 0; } static int hwloc_aix_set_thisproc_cpubind(hwloc_topology_t topology, hwloc_const_bitmap_t hwloc_set, int flags) { rsid_t who; who.at_pid = getpid(); return hwloc_aix_set_sth_cpubind(topology, R_PROCESS, who, who.at_pid, hwloc_set, flags); } static int hwloc_aix_get_thisproc_cpubind(hwloc_topology_t topology, hwloc_bitmap_t hwloc_set, int flags) { int ret, bound; rsid_t who; who.at_pid = getpid(); ret = hwloc_aix_get_sth_rset_cpubind(topology, R_PROCESS, who, hwloc_set, flags, &bound); if (!ret && !bound) { hwloc_bitmap_zero(hwloc_set); ret = hwloc_aix_get_pid_getthrds_cpubind(topology, who.at_pid, hwloc_set, flags); } return ret; }
void hwloc_look_hpux(struct hwloc_topology *topology) { int has_numa = sysconf(_SC_CCNUMA_SUPPORT) == 1; hwloc_obj_t *nodes = NULL, obj; spu_t currentcpu; ldom_t currentnode; int i, nbnodes = 0; #ifdef HAVE__SC_LARGE_PAGESIZE topology->levels[0][0]->attr->machine.huge_page_size_kB = sysconf(_SC_LARGE_PAGESIZE); #endif if (has_numa) { nbnodes = mpctl(topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM ? MPC_GETNUMLDOMS_SYS : MPC_GETNUMLDOMS, 0, 0); hwloc_debug("%d nodes\n", nbnodes); nodes = malloc(nbnodes * sizeof(*nodes)); i = 0; currentnode = mpctl(topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM ? MPC_GETFIRSTLDOM_SYS : MPC_GETFIRSTLDOM, 0, 0); while (currentnode != -1 && i < nbnodes) { hwloc_debug("node %d is %d\n", i, currentnode); nodes[i] = obj = hwloc_alloc_setup_object(HWLOC_OBJ_NODE, currentnode); obj->cpuset = hwloc_bitmap_alloc(); obj->nodeset = hwloc_bitmap_alloc(); hwloc_bitmap_set(obj->nodeset, currentnode); /* TODO: obj->attr->node.memory_kB */ /* TODO: obj->attr->node.huge_page_free */ currentnode = mpctl(topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM ? MPC_GETNEXTLDOM_SYS : MPC_GETNEXTLDOM, currentnode, 0); i++; } } i = 0; currentcpu = mpctl(topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM ? MPC_GETFIRSTSPU_SYS : MPC_GETFIRSTSPU, 0,0); while (currentcpu != -1) { obj = hwloc_alloc_setup_object(HWLOC_OBJ_PU, currentcpu); obj->cpuset = hwloc_bitmap_alloc(); hwloc_bitmap_set(obj->cpuset, currentcpu); hwloc_debug("cpu %d\n", currentcpu); if (nodes) { /* Add this cpu to its node */ currentnode = mpctl(MPC_SPUTOLDOM, currentcpu, 0); if ((ldom_t) nodes[i]->os_index != currentnode) for (i = 0; i < nbnodes; i++) if ((ldom_t) nodes[i]->os_index == currentnode) break; if (i < nbnodes) { hwloc_bitmap_set(nodes[i]->cpuset, currentcpu); hwloc_debug("is in node %d\n", i); } else { hwloc_debug("%s", "is in no node?!\n"); } } /* Add cpu */ hwloc_insert_object_by_cpuset(topology, obj); currentcpu = mpctl(topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM ? MPC_GETNEXTSPU_SYS : MPC_GETNEXTSPU, currentcpu, 0); } if (nodes) { /* Add nodes */ for (i = 0 ; i < nbnodes ; i++) hwloc_insert_object_by_cpuset(topology, nodes[i]); free(nodes); } topology->support.discovery->pu = 1; hwloc_obj_add_info(topology->levels[0][0], "Backend", "HP-UX"); }
static int hwloc_look_osf(struct hwloc_backend *backend) { struct hwloc_topology *topology = backend->topology; cpu_cursor_t cursor; unsigned nbnodes; radid_t radid, radid2; radset_t radset, radset2; cpuid_t cpuid; cpuset_t cpuset; struct hwloc_obj *obj; unsigned distance; if (topology->levels[0][0]->cpuset) /* somebody discovered things */ return 0; hwloc_alloc_obj_cpusets(topology->levels[0][0]); nbnodes = rad_get_num(); cpusetcreate(&cpuset); radsetcreate(&radset); radsetcreate(&radset2); { hwloc_obj_t *nodes = calloc(nbnodes, sizeof(hwloc_obj_t)); unsigned *indexes = calloc(nbnodes, sizeof(unsigned)); float *distances = calloc(nbnodes*nbnodes, sizeof(float)); unsigned nfound; numa_attr_t attr; attr.nattr_type = R_RAD; attr.nattr_descr.rd_radset = radset; attr.nattr_flags = 0; for (radid = 0; radid < (radid_t) nbnodes; radid++) { rademptyset(radset); radaddset(radset, radid); cpuemptyset(cpuset); if (rad_get_cpus(radid, cpuset)==-1) { fprintf(stderr,"rad_get_cpus(%d) failed: %s\n",radid,strerror(errno)); continue; } indexes[radid] = radid; nodes[radid] = obj = hwloc_alloc_setup_object(HWLOC_OBJ_NODE, radid); obj->cpuset = hwloc_bitmap_alloc(); obj->memory.local_memory = rad_get_physmem(radid) * hwloc_getpagesize(); obj->memory.page_types_len = 2; obj->memory.page_types = malloc(2*sizeof(*obj->memory.page_types)); memset(obj->memory.page_types, 0, 2*sizeof(*obj->memory.page_types)); obj->memory.page_types[0].size = hwloc_getpagesize(); #ifdef HAVE__SC_LARGE_PAGESIZE obj->memory.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE); #endif cursor = SET_CURSOR_INIT; while((cpuid = cpu_foreach(cpuset, 0, &cursor)) != CPU_NONE) hwloc_bitmap_set(obj->cpuset, cpuid); hwloc_debug_1arg_bitmap("node %d has cpuset %s\n", radid, obj->cpuset); hwloc_insert_object_by_cpuset(topology, obj); nfound = 0; for (radid2 = 0; radid2 < (radid_t) nbnodes; radid2++) distances[radid*nbnodes+radid2] = RAD_DIST_REMOTE; for (distance = RAD_DIST_LOCAL; distance < RAD_DIST_REMOTE; distance++) { attr.nattr_distance = distance; /* get set of NUMA nodes at distance <= DISTANCE */ if (nloc(&attr, radset2)) { fprintf(stderr,"nloc failed: %s\n", strerror(errno)); continue; } cursor = SET_CURSOR_INIT; while ((radid2 = rad_foreach(radset2, 0, &cursor)) != RAD_NONE) { if (distances[radid*nbnodes+radid2] == RAD_DIST_REMOTE) { distances[radid*nbnodes+radid2] = (float) distance; nfound++; } } if (nfound == nbnodes) /* Finished finding distances, no need to go up to RAD_DIST_REMOTE */ break; } } hwloc_distances_set(topology, HWLOC_OBJ_NODE, nbnodes, indexes, nodes, distances, 0 /* OS cannot force */); } radsetdestroy(&radset2); radsetdestroy(&radset); cpusetdestroy(&cpuset); /* add PU objects */ hwloc_setup_pu_level(topology, hwloc_fallback_nbprocessors(topology)); hwloc_obj_add_info(topology->levels[0][0], "Backend", "OSF"); if (topology->is_thissystem) hwloc_add_uname_info(topology); return 1; }
static HYD_status handle_user_binding(const char *binding) { int i, j, k, num_bind_entries, *bind_entry_lengths; char *bindstr, **bind_entries; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); HYDU_ASSERT(hwloc_initialized, status); num_bind_entries = 1; for (i = 0; binding[i]; i++) if (binding[i] == ',') num_bind_entries++; HYDU_MALLOC_OR_JUMP(bind_entries, char **, num_bind_entries * sizeof(char *), status); HYDU_MALLOC_OR_JUMP(bind_entry_lengths, int *, num_bind_entries * sizeof(int), status); for (i = 0; i < num_bind_entries; i++) bind_entry_lengths[i] = 0; j = 0; for (i = 0; binding[i]; i++) { if (binding[i] != ',') bind_entry_lengths[j]++; else j++; } for (i = 0; i < num_bind_entries; i++) { HYDU_MALLOC_OR_JUMP(bind_entries[i], char *, bind_entry_lengths[i] * sizeof(char), status); } j = 0; k = 0; for (i = 0; binding[i]; i++) { if (binding[i] != ',') bind_entries[j][k++] = binding[i]; else { bind_entries[j][k] = 0; j++; k = 0; } } bind_entries[j][k++] = 0; /* initialize bitmaps */ HYDU_MALLOC_OR_JUMP(HYDT_topo_hwloc_info.bitmap, hwloc_bitmap_t *, num_bind_entries * sizeof(hwloc_bitmap_t), status); for (i = 0; i < num_bind_entries; i++) { HYDT_topo_hwloc_info.bitmap[i] = hwloc_bitmap_alloc(); hwloc_bitmap_zero(HYDT_topo_hwloc_info.bitmap[i]); bindstr = strtok(bind_entries[i], "+"); while (bindstr) { hwloc_bitmap_set(HYDT_topo_hwloc_info.bitmap[i], atoi(bindstr)); bindstr = strtok(NULL, "+"); } } HYDT_topo_hwloc_info.num_bitmaps = num_bind_entries; HYDT_topo_hwloc_info.user_binding = 1; /* free temporary memory */ for (i = 0; i < num_bind_entries; i++) { MPL_free(bind_entries[i]); } MPL_free(bind_entries); MPL_free(bind_entry_lengths); fn_exit: HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }