TEST_F(HwLocHelperTest, get_node_for_core){ std::vector<unsigned> cores2; unsigned nodes = hwloc_get_nbobjs_by_type(getHWTopology(), HWLOC_OBJ_NODE); for(unsigned i = 0; i < nodes; i++){ cores2 = getCoresForNode(getHWTopology(), i); for(size_t j = 0; j < cores2.size(); j++){ EXPECT_EQ(i, getNodeForCore(cores2.at(j))); } } }
// get number of cores on system and compare to sum of cores fopr each node TEST_F(HwLocHelperTest, number_of_nodes){ int cores = getNumberOfCoresOnSystem(); int cores2 = 0; unsigned nodes = hwloc_get_nbobjs_by_type(getHWTopology(), HWLOC_OBJ_NODE); if (nodes) { for(unsigned i = 0; i < nodes; i++){ cores2 += getCoresForNode(getHWTopology(), i).size(); } EXPECT_EQ(cores2, cores); } }
void AbstractCoreBoundTaskQueue::launchThread(int core) { //get the number of cores on system int NUM_PROCS = getNumberOfCoresOnSystem(); if (core < NUM_PROCS) { _thread = new std::thread(&AbstractTaskQueue::executeTask, this); hwloc_cpuset_t cpuset; hwloc_obj_t obj; hwloc_topology_t topology = getHWTopology(); obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_CORE, core); // the bitmap to modify cpuset = hwloc_bitmap_dup(obj->cpuset); // remove hyperthreads hwloc_bitmap_singlify(cpuset); // bind if (hwloc_set_thread_cpubind(topology, _thread->native_handle(), cpuset, HWLOC_CPUBIND_STRICT | HWLOC_CPUBIND_NOMEMBIND)) { char *str; int error = errno; hwloc_bitmap_asprintf(&str, obj->cpuset); fprintf(stderr, "Couldn't bind to cpuset %s: %s\n", str, strerror(error)); fprintf(stderr, "Continuing as normal, however, no guarantees\n"); //throw std::runtime_error(strerror(error)); } hwloc_bitmap_free(cpuset); } else { // this case should never happen, as TaskQueue is only initialized from SimpleTaskScheduler, which captures this case throw std::logic_error("CPU to run thread on is larger than number of total cores; seems that TaskQueue was initialized outside of SimpleTaskScheduler, which should not happen"); } }
//assumes equal number of cores per node unsigned getNumberOfCoresPerNumaNode(){ hwloc_topology_t topology = getHWTopology(); unsigned number_of_cores, number_of_nodes; number_of_cores = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_CORE); number_of_nodes = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_NODE); return number_of_cores/number_of_nodes; };
unsigned getNumberOfNodesOnSystem() { static int NUM_NODES = []() { hwloc_topology_t topology = getHWTopology(); return std::max(hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_NODE), 1); }(); assert(NUM_NODES >= 1); return NUM_NODES; }
int getNumberOfCoresOnSystem() { static int NUM_PROCS = []() { hwloc_topology_t topology = getHWTopology(); return hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_CORE); }(); assert(NUM_PROCS >= 1); return NUM_PROCS; }
signed getCurrentCore() { hwloc_topology_t topology = getHWTopology(); hwloc_cpuset_t cpu_set = hwloc_bitmap_alloc(); if (hwloc_get_last_cpu_location(topology, cpu_set, HWLOC_CPUBIND_THREAD) < 0) { return -1; } hwloc_obj_t current_core = hwloc_get_next_obj_covering_cpuset_by_type(topology, cpu_set, HWLOC_OBJ_CORE, NULL); hwloc_bitmap_free(cpu_set); return current_core->logical_index; }
// assumes equal number of cores per node unsigned getNumberOfCoresPerNumaNode() { hwloc_topology_t topology = getHWTopology(); int number_of_cores, number_of_nodes; number_of_cores = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_CORE); number_of_nodes = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_NODE); if (number_of_nodes > 0) return number_of_cores / number_of_nodes; if (number_of_nodes == 0) return number_of_cores; throw std::runtime_error("Multi-Level NUMA handling not implemented"); };
signed getNodeForCore(unsigned core) { hwloc_topology_t topology = getHWTopology(); unsigned nodes = getNumberOfNodes(topology); hwloc_obj_t core_obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_CORE, core); for (unsigned i = 0; i < nodes; i++) { hwloc_obj_t obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NODE, i); if (hwloc_obj_is_in_subtree(topology, core_obj, obj)) { return i; } } return -1; }
unsigned getNodeForCore(unsigned core){ hwloc_topology_t topology = getHWTopology(); unsigned nodes = getNumberOfNodes(topology); hwloc_obj_t obj; hwloc_obj_t core_obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_CORE, core); for(unsigned i = 0; i < nodes; i++){ obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NODE, i); if (hwloc_obj_is_in_subtree(topology, core_obj, obj)){ return i; } } throw std::runtime_error("expected to find node for core"); }
void bindCurrentThreadToNumaNode(int node) { hwloc_topology_t topology = getHWTopology(); hwloc_cpuset_t cpuset; hwloc_obj_t obj; // The actual node obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NODE, node); // obj is nullptr on non NUMA machines if (obj == nullptr) { fprintf(stderr, "Couldn't get hwloc object, bindCurrentThreadToNumaNode failed!\n"); return; } cpuset = hwloc_bitmap_dup(obj->cpuset); // hwloc_bitmap_singlify(cpuset); // bind if (hwloc_set_cpubind(topology, cpuset, HWLOC_CPUBIND_STRICT | HWLOC_CPUBIND_NOMEMBIND | HWLOC_CPUBIND_THREAD)) { char* str; int error = errno; hwloc_bitmap_asprintf(&str, obj->cpuset); printf("Couldn't bind to cpuset %s: %s\n", str, strerror(error)); free(str); throw std::runtime_error(strerror(error)); } // free duplicated cpuset hwloc_bitmap_free(cpuset); // assuming single machine system obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_MACHINE, 0); // set membind policy interleave for this thread if (hwloc_set_membind_nodeset( topology, obj->nodeset, HWLOC_MEMBIND_INTERLEAVE, HWLOC_MEMBIND_STRICT | HWLOC_MEMBIND_THREAD) && errno != ENOSYS) { char* str; int error = errno; hwloc_bitmap_asprintf(&str, obj->nodeset); fprintf(stderr, "Couldn't membind to nodeset %s: %s\n", str, strerror(error)); fprintf(stderr, "Continuing as normal, however, no guarantees\n"); free(str); } }