int main(void) { int i, k, w, ncpus; struct bitmask *cpus; int maxnode = numa_num_configured_nodes()-1; if (numa_available() < 0) { printf("no numa\n"); exit(1); } cpus = numa_allocate_cpumask(); ncpus = cpus->size; for (i = 0; i <= maxnode ; i++) { if (numa_node_to_cpus(i, cpus) < 0) { printf("node %d failed to convert\n",i); } printf("%d: ", i); w = 0; for (k = 0; k < ncpus; k++) if (numa_bitmask_isbitset(cpus, k)) printf(" %s%d", w>0?",":"", k); putchar('\n'); } return 0; }
void backend_set_numa(unsigned id) { struct bitmask *bm = numa_allocate_cpumask(); numa_bitmask_setbit(bm, id); numa_sched_setaffinity(0, bm); numa_free_cpumask(bm); }
CPU_Set_t * CPU_GetPossible(void) { CPU_Set_t *cs = numa_allocate_cpumask(); copy_bitmask_to_bitmask(numa_all_cpus_ptr, cs); return cs; }
int pthread_create(pthread_t *thread, const pthread_attr_t *attr, void *(*start_routine) (void *), void *arg) { int core; int ret; cpu_set_t mask; CPU_ZERO(&mask); ret = old_pthread_create(thread, attr, start_routine, arg); if(!get_shm()->active) return ret; core = get_next_core(); if(!get_shm()->per_node) { CPU_SET(core, &mask); } else { int i, node = numa_node_of_cpu(core); struct bitmask * bmp = numa_allocate_cpumask(); numa_node_to_cpus(node, bmp); for(i = 0; i < numa_num_configured_cpus(); i++) { if(numa_bitmask_isbitset(bmp, i)) CPU_SET(i, &mask); } numa_free_cpumask(bmp); } old_pthread_setaffinity_np(*thread, sizeof(mask), &mask); VERBOSE("-> Set affinity to %d\n", core); return ret; }
/*---------------------------------------------------------------------------*/ static int numa_node_to_cpusmask(int node, uint64_t *cpusmask, int *nr) { struct bitmask *mask; uint64_t bmask = 0; int retval = -1; unsigned int i; mask = numa_allocate_cpumask(); retval = numa_node_to_cpus(node, mask); if (retval < 0) goto cleanup; *nr = 0; for (i = 0; i < mask->size && i < 64; i++) { if (numa_bitmask_isbitset(mask, i)) { cpusmask_set_bit(i, &bmask); (*nr)++; } } retval = 0; cleanup: *cpusmask = bmask; numa_free_cpumask(mask); return retval; }
int bind_cpu(int cpu) { struct bitmask *nodemask = numa_parse_nodestring("0"); struct bitmask *cpumask = numa_allocate_cpumask(); numa_bind(nodemask); cpumask = numa_bitmask_clearall(cpumask); return numa_sched_setaffinity( getpid(), numa_bitmask_setbit(cpumask, cpu-1) ); }
CPU_Set_t * CPU_GetAffinity(void) { struct bitmask *cs = numa_allocate_cpumask(); int res = numa_sched_getaffinity(0, cs); if (res < 0) epanic("numa_sched_getaffinity"); return cs; }
void CPU_Bind(int cpu) { CPU_Set_t *cs = numa_allocate_cpumask(); numa_bitmask_setbit(cs, cpu); int res = numa_sched_setaffinity(0, cs); if (res < 0) epanic("bindToCPU(%d)", cpu); CPU_FreeSet(cs); }
/** * \brief get a array of cores with a ceartain placement */ static coreid_t* placement(uint32_t n, bool do_fill) { coreid_t* result = malloc(sizeof(coreid_t)*n); uint32_t numa_nodes = numa_max_node()+1; uint32_t num_cores = numa_num_configured_cpus(); struct bitmask* nodes[numa_nodes]; for (int i = 0; i < numa_nodes; i++) { nodes[i] = numa_allocate_cpumask(); numa_node_to_cpus(i, nodes[i]); } int num_taken = 0; if (numa_available() == 0) { if (do_fill) { for (int i = 0; i < numa_nodes; i++) { for (int j = 0; j < num_cores; j++) { if (numa_bitmask_isbitset(nodes[i], j)) { result[num_taken] = j; num_taken++; } if (num_taken == n) { return result; } } } } else { uint8_t ith_of_node = 0; // go through numa nodes for (int i = 0; i < numa_nodes; i++) { // go through cores and see if part of numa node for (int j = 0; j < num_cores; j++) { // take the ith core of the node if (numa_bitmask_isbitset(nodes[i], j)){ int index = i+ith_of_node*numa_nodes; if (index < n) { result[i+ith_of_node*numa_nodes] = j; num_taken++; ith_of_node++; } } if (num_taken == n) { return result; } } ith_of_node = 0; } } } else { printf("Libnuma not available \n"); return NULL; } return NULL; }
static void regular_nodes_init(void) { int i, node = 0, nodes_num = numa_num_configured_nodes(); struct bitmask *node_cpus = numa_allocate_cpumask(); regular_nodes_mask = numa_allocate_nodemask(); for (i = 0; i < nodes_num; i++) { numa_node_to_cpus(node, node_cpus); if (numa_bitmask_weight(node_cpus)) numa_bitmask_setbit(regular_nodes_mask, i); } numa_bitmask_free(node_cpus); }
void print_node_cpus(int node) { int i, err; struct bitmask *cpus; cpus = numa_allocate_cpumask(); err = numa_node_to_cpus(node, cpus); if (err >= 0) { for (i = 0; i < cpus->size; i++) if (numa_bitmask_isbitset(cpus, i)) printf(" %d", i); } putchar('\n'); }
///This function tries to fill bandwidth array based on knowledge about known CPU models static int fill_bandwidth_values_heuristically(int* bandwidth, int bandwidth_len) { int ret = MEMKIND_ERROR_UNAVAILABLE; // Default error returned if heuristic aproach fails int i, nodes_num, memory_only_nodes_num = 0; struct bitmask *memory_only_nodes, *node_cpus; if (is_cpu_xeon_phi_x200() == 0) { log_info("Known CPU model detected: Intel(R) Xeon Phi(TM) x200."); nodes_num = numa_num_configured_nodes(); // Check if number of numa-nodes meets expectations for // supported configurations of Intel Xeon Phi x200 if( nodes_num != 2 && nodes_num != 4 && nodes_num!= 8 ) { return ret; } memory_only_nodes = numa_allocate_nodemask(); node_cpus = numa_allocate_cpumask(); for(i=0; i<nodes_num; i++) { numa_node_to_cpus(i, node_cpus); if(numa_bitmask_weight(node_cpus) == 0) { memory_only_nodes_num++; numa_bitmask_setbit(memory_only_nodes, i); } } // Check if number of memory-only nodes is equal number of memory+cpu nodes // If it passes change ret to 0 (success) and fill bw table if ( memory_only_nodes_num == (nodes_num - memory_only_nodes_num) ) { ret = 0; assign_arbitrary_bandwidth_values(bandwidth, bandwidth_len, memory_only_nodes); } numa_bitmask_free(memory_only_nodes); numa_bitmask_free(node_cpus); } return ret; }
/** * @brief Returns an array of cores of size req_cores choosen * round-robin from NUMA nodes in batches of req_step. * * @param req_step The step with - how many cores should be picked * from each NUMA node in each iteration. Use a negative value * for a "fill"-strategy, where NUMA nodes are completely filled * before moving on to the next one. */ void placement(size_t req_cores, size_t req_step, coreid_t *cores) { // For convenience, allows to lookup 2*n for n in 0..n/2 if (req_step==0) req_step=1; size_t max_node = numa_max_node(); size_t num_cores = numa_num_configured_cpus(); size_t cores_per_node = num_cores/(max_node+1); printf("req_cores: %zu\n", req_cores); printf("req_step: %zu\n", req_step); printf("cores / NUMA node: %zu\n", cores_per_node); printf("max_node: %zu\n", max_node); size_t num_selected = 0; size_t curr_numa_idx = 0; // How many nodes to choose from each NUMA node size_t choose_per_node[max_node+1]; memset(choose_per_node, 0, sizeof(size_t)*(max_node+1)); // Step 1: // Figure out how many cores to choose from each node while (num_selected<req_cores) { // Determine number of cores of that node // How many cores should be choosen in this step? // At max req_step size_t num_choose = min(min(req_step, req_cores-num_selected), cores_per_node-choose_per_node[curr_numa_idx]); // Increment counter indicating how many to choose from this node choose_per_node[curr_numa_idx] += num_choose; num_selected += num_choose; // Move on to the next NUMA node curr_numa_idx = (curr_numa_idx + 1) % (max_node+1); } // Step 2: // Get the cores from each NUMA node // // hyperthreads? -> should have higher core IDs, and hence picked in // the end. struct bitmask *mask = numa_allocate_cpumask(); size_t idx = 0; for (size_t i=0; i<=max_node; i++) { dbg_printf("node %2zu choosing %2zu\n", i, choose_per_node[i]); // Determine which cores are on node i numa_node_to_cpus(i, mask); size_t choosen = 0; for (coreid_t p=0; p<num_cores && choosen<choose_per_node[i]; p++) { // Is processor p on node i if (numa_bitmask_isbitset(mask, p)) { cores[idx++] = p; choosen++; dbg_printf("Choosing %" PRIuCOREID " on node %zu\n", p, i); } } } assert (idx == req_cores); }
static int _get_cpu_masks(int num_numa_nodes, int32_t *numa_array, cpu_set_t **cpuMasks) { struct bitmask **remaining_numa_node_cpus = NULL, *collective; unsigned long **numa_node_cpus = NULL; int i, j, at_least_one_cpu = 0, rc = 0; cpu_set_t *cpusetptr; char *bitmask_str = NULL; if (numa_available()) { CRAY_ERR("Libnuma not available"); return -1; } /* * numa_node_cpus: The CPUs available to the NUMA node. * numa_all_cpus_ptr: all CPUs on which the calling task may execute. * remaining_numa_node_cpus: Bitwise-AND of the above two to get all of * the CPUs that the task can run on in this * NUMA node. * collective: Collects all of the CPUs as a precaution. */ remaining_numa_node_cpus = xmalloc(num_numa_nodes * sizeof(struct bitmask *)); collective = numa_allocate_cpumask(); numa_node_cpus = xmalloc(num_numa_nodes * sizeof(unsigned long*)); for (i = 0; i < num_numa_nodes; i++) { remaining_numa_node_cpus[i] = numa_allocate_cpumask(); numa_node_cpus[i] = xmalloc(sizeof(unsigned long) * NUM_INTS_TO_HOLD_ALL_CPUS); rc = numa_node_to_cpus(numa_array[i], numa_node_cpus[i], NUM_INTS_TO_HOLD_ALL_CPUS); if (rc) { CRAY_ERR("numa_node_to_cpus failed: Return code %d", rc); } for (j = 0; j < NUM_INTS_TO_HOLD_ALL_CPUS; j++) { (remaining_numa_node_cpus[i]->maskp[j]) = (numa_node_cpus[i][j]) & (numa_all_cpus_ptr->maskp[j]); collective->maskp[j] |= (remaining_numa_node_cpus[i]->maskp[j]); } } /* * Ensure that we have not masked off all of the CPUs. * If we have, just re-enable them all. Better to clear them all than * none of them. */ for (j = 0; j < collective->size; j++) { if (numa_bitmask_isbitset(collective, j)) { at_least_one_cpu = 1; } } if (!at_least_one_cpu) { for (i = 0; i < num_numa_nodes; i++) { for (j = 0; j < (remaining_numa_node_cpus[i]->size / (sizeof(unsigned long) * 8)); j++) { (remaining_numa_node_cpus[i]->maskp[j]) = (numa_all_cpus_ptr->maskp[j]); } } } if (debug_flags & DEBUG_FLAG_TASK) { bitmask_str = NULL; for (i = 0; i < num_numa_nodes; i++) { for (j = 0; j < NUM_INTS_TO_HOLD_ALL_CPUS; j++) { xstrfmtcat(bitmask_str, "%6lx ", numa_node_cpus[i][j]); } } info("%sBitmask: Allowed CPUs for NUMA Node", bitmask_str); xfree(bitmask_str); bitmask_str = NULL; for (i = 0; i < num_numa_nodes; i++) { for (j = 0; j < NUM_INTS_TO_HOLD_ALL_CPUS; j++) { xstrfmtcat(bitmask_str, "%6lx ", numa_all_cpus_ptr->maskp[j]); } } info("%sBitmask: Allowed CPUs for cpuset", bitmask_str); xfree(bitmask_str); bitmask_str = NULL; for (i = 0; i < num_numa_nodes; i++) { for (j = 0; j < NUM_INTS_TO_HOLD_ALL_CPUS; j++) { xstrfmtcat(bitmask_str, "%6lx ", remaining_numa_node_cpus[i]-> maskp[j]); } } info("%sBitmask: Allowed CPUs between cpuset and NUMA Node", bitmask_str); xfree(bitmask_str); } // Convert bitmasks to cpu_set_t types cpusetptr = xmalloc(num_numa_nodes * sizeof(cpu_set_t)); for (i = 0; i < num_numa_nodes; i++) { CPU_ZERO(&cpusetptr[i]); for (j = 0; j < remaining_numa_node_cpus[i]->size; j++) { if (numa_bitmask_isbitset(remaining_numa_node_cpus[i], j)) { CPU_SET(j, &cpusetptr[i]); } } if (debug_flags & DEBUG_FLAG_TASK) { info("CPU_COUNT() of set: %d", CPU_COUNT(&cpusetptr[i])); } } *cpuMasks = cpusetptr; // Freeing Everything numa_free_cpumask(collective); for (i = 0; i < num_numa_nodes; i++) { xfree(numa_node_cpus[i]); numa_free_cpumask(remaining_numa_node_cpus[i]); } xfree(numa_node_cpus); xfree(numa_node_cpus); xfree(remaining_numa_node_cpus); return 0; }
static uint32_t* placement(uint32_t n, bool do_fill, bool hyper) { uint32_t* result = (uint32_t*) malloc(sizeof(uint32_t)*n); uint32_t numa_nodes = numa_max_node()+1; uint32_t num_cores = 0; if (hyper) { num_cores = numa_num_configured_cpus()/2; } else { num_cores = numa_num_configured_cpus(); } struct bitmask* nodes[numa_nodes]; for (int i = 0; i < numa_nodes; i++) { nodes[i] = numa_allocate_cpumask(); numa_node_to_cpus(i, nodes[i]); } int num_taken = 0; if (numa_available() == 0) { if (do_fill) { for (int i = 0; i < numa_nodes; i++) { for (int j = 0; j < num_cores; j++) { if (numa_bitmask_isbitset(nodes[i], j)) { result[num_taken] = j; num_taken++; } if (num_taken == n) { return result; } } } } else { int cores_per_node = n/numa_nodes; int rest = n - (cores_per_node*numa_nodes); int taken_per_node = 0; for (int i = 0; i < numa_nodes; i++) { for (int j = 0; j < num_cores; j++) { if (numa_bitmask_isbitset(nodes[i], j)) { if (taken_per_node == cores_per_node) { if (rest > 0) { result[num_taken] = j; num_taken++; rest--; if (num_taken == n) { return result; } } break; } result[num_taken] = j; num_taken++; taken_per_node++; if (num_taken == n) { return result; } } } taken_per_node = 0; } } } else { printf("Libnuma not available \n"); return NULL; } return NULL; }
int init_virtual_topology(config_t* cfg, cpu_model_t* cpu_model, virtual_topology_t** virtual_topologyp) { char* mc_pci_file; char* str; char* saveptr; char* token = "NULL"; int* physical_node_ids; physical_node_t** physical_nodes; int num_physical_nodes; int n, v, i, j, sibling_idx, node_i_idx; int node_id; physical_node_t* node_i, *node_j, *sibling_node; int ret; int min_distance; int hyperthreading; struct bitmask* mem_nodes; virtual_topology_t* virtual_topology; __cconfig_lookup_string(cfg, "topology.physical_nodes", &str); // parse the physical nodes string physical_node_ids = calloc(numa_num_possible_nodes(), sizeof(*physical_node_ids)); num_physical_nodes = 0; while (token = strtok_r(str, ",", &saveptr)) { physical_node_ids[num_physical_nodes] = atoi(token); str = NULL; if (++num_physical_nodes > numa_num_possible_nodes()) { // we re being asked to run on more nodes than available free(physical_node_ids); ret = E_ERROR; goto done; } } physical_nodes = calloc(num_physical_nodes, sizeof(*physical_nodes)); // select those nodes we can run on (e.g. not constrained by any numactl) mem_nodes = numa_get_mems_allowed(); for (i=0, n=0; i<num_physical_nodes; i++) { node_id = physical_node_ids[i]; if (numa_bitmask_isbitset(mem_nodes, node_id)) { physical_nodes[n] = malloc(sizeof(**physical_nodes)); physical_nodes[n]->node_id = node_id; // TODO: what if we want to avoid using only a single hardware contexts of a hyperthreaded core? physical_nodes[n]->cpu_bitmask = numa_allocate_cpumask(); numa_node_to_cpus(node_id, physical_nodes[n]->cpu_bitmask); __cconfig_lookup_bool(cfg, "topology.hyperthreading", &hyperthreading); if (hyperthreading) { physical_nodes[n]->num_cpus = num_cpus(physical_nodes[n]->cpu_bitmask); } else { DBG_LOG(INFO, "Not using hyperthreading.\n"); // disable the upper half of the processors in the bitmask physical_nodes[n]->num_cpus = num_cpus(physical_nodes[n]->cpu_bitmask) / 2; int fc = first_cpu(physical_nodes[n]->cpu_bitmask); for (j=fc+system_num_cpus()/2; j<fc+system_num_cpus()/2+physical_nodes[n]->num_cpus; j++) { if (numa_bitmask_isbitset(physical_nodes[n]->cpu_bitmask, j)) { numa_bitmask_clearbit(physical_nodes[n]->cpu_bitmask, j); } } } n++; } } free(physical_node_ids); num_physical_nodes = n; // if pci bus topology of each physical node is not provided then discover it if (__cconfig_lookup_string(cfg, "topology.mc_pci", &mc_pci_file) == CONFIG_FALSE || (__cconfig_lookup_string(cfg, "topology.mc_pci", &mc_pci_file) == CONFIG_TRUE && load_mc_pci_topology(mc_pci_file, physical_nodes, num_physical_nodes) != E_SUCCESS)) { discover_mc_pci_topology(cpu_model, physical_nodes, num_physical_nodes); save_mc_pci_topology(mc_pci_file, physical_nodes, num_physical_nodes); } // form virtual nodes by grouping physical nodes that are close to each other virtual_topology = malloc(sizeof(*virtual_topology)); virtual_topology->num_virtual_nodes = num_physical_nodes / 2 + num_physical_nodes % 2; virtual_topology->virtual_nodes = calloc(virtual_topology->num_virtual_nodes, sizeof(*(virtual_topology->virtual_nodes))); for (i=0, v=0; i<num_physical_nodes; i++) { min_distance = INT_MAX; sibling_node = NULL; sibling_idx = -1; if ((node_i = physical_nodes[i]) == NULL) { continue; } for (j=i+1; j<num_physical_nodes; j++) { if ((node_j = physical_nodes[j]) == NULL) { continue; } if (numa_distance(node_i->node_id,node_j->node_id) < min_distance) { sibling_node = node_j; sibling_idx = j; } } if (sibling_node) { physical_nodes[i] = physical_nodes[sibling_idx] = NULL; virtual_node_t* virtual_node = &virtual_topology->virtual_nodes[v]; virtual_node->dram_node = node_i; virtual_node->nvram_node = sibling_node; virtual_node->node_id = v; virtual_node->cpu_model = cpu_model; DBG_LOG(INFO, "Fusing physical nodes %d %d into virtual node %d\n", node_i->node_id, sibling_node->node_id, virtual_node->node_id); v++; } } // any physical node that is not paired with another physical node is // formed into a virtual node on its own if (2*v < num_physical_nodes) { for (i=0; i<num_physical_nodes; i++) { node_i = physical_nodes[i]; virtual_node_t* virtual_node = &virtual_topology->virtual_nodes[v]; virtual_node->dram_node = virtual_node->nvram_node = node_i; virtual_node->node_id = v; DBG_LOG(WARNING, "Forming physical node %d into virtual node %d without a sibling node.\n", node_i->node_id, virtual_node->node_id); } } *virtual_topologyp = virtual_topology; ret = E_SUCCESS; done: free(physical_nodes); return ret; }
unique_bitmask_ptr make_cpumask_ptr() { return unique_bitmask_ptr(numa_allocate_cpumask(), numa_free_nodemask); }
char * build_default_affinity_string (int shuffle) { int nr_nodes = numa_num_configured_nodes(); int nr_cores = numa_num_configured_cpus(); char * str; int str_size = 512; int str_written = 0; int i; struct bitmask ** bm = (struct bitmask**) malloc(sizeof(struct bitmask*) * nr_nodes); for (i = 0; i < nr_nodes; i++) { bm[i] = numa_allocate_cpumask(); numa_node_to_cpus(i, bm[i]); } str = (char*) malloc(str_size * sizeof(char)); assert(str); if(!shuffle) { for(i = 0; i < nr_nodes; i++) { int j; for(j = 0; j < nr_cores; j++) { if (numa_bitmask_isbitset(bm[i], j)) { add_core_to_str(&str, &str_size, &str_written, j); } } } } else { int next_node = 0; for(i = 0; i < nr_cores; i++) { int idx = (i / nr_nodes) + 1; int found = 0; int j = 0; do { if (numa_bitmask_isbitset(bm[next_node], j)) { found++; } if(found == idx){ add_core_to_str(&str, &str_size, &str_written, j); break; } j = (j + 1) % nr_cores; } while (found != idx); next_node = (next_node + 1) % nr_nodes; } } if(str_written) { str[str_written - 1] = 0; } return str; }