static void print_distances(int maxnode) { int i,k; if (numa_distance(maxnode,0) == 0) { printf("No distance information available.\n"); return; } printf("node distances:\n"); printf("node "); for (i = 0; i <= maxnode; i++) if (numa_bitmask_isbitset(numa_nodes_ptr, i)) printf("% 3d ", i); printf("\n"); for (i = 0; i <= maxnode; i++) { if (!numa_bitmask_isbitset(numa_nodes_ptr, i)) continue; printf("% 3d: ", i); for (k = 0; k <= maxnode; k++) if (numa_bitmask_isbitset(numa_nodes_ptr, i) && numa_bitmask_isbitset(numa_nodes_ptr, k)) printf("% 3d ", numa_distance(i,k)); printf("\n"); } }
/** * virNumaGetDistances: * @node: identifier of the requested NUMA node * @distances: array of distances to sibling nodes * @ndistances: size of @distances * * Get array of distances to sibling nodes from @node. If a * distances[x] equals to zero, the node x is not enabled or * doesn't exist. As a special case, if @node itself refers to * disabled or nonexistent NUMA node, then @distances and * @ndistances are set to NULL and zero respectively. * * The distances are a bit of magic. For a local node the value * is 10, for remote it's typically 20 meaning that time penalty * for accessing a remote node is two time bigger than when * accessing a local node. * * Returns 0 on success, -1 otherwise. */ int virNumaGetDistances(int node, int **distances, int *ndistances) { int ret = -1; int max_node; size_t i; if (!virNumaNodeIsAvailable(node)) { VIR_DEBUG("Node %d does not exist", node); *distances = NULL; *ndistances = 0; return 0; } if ((max_node = virNumaGetMaxNode()) < 0) goto cleanup; if (VIR_ALLOC_N(*distances, max_node + 1) < 0) goto cleanup; *ndistances = max_node + 1; for (i = 0; i <= max_node; i++) { if (!virNumaNodeIsAvailable(node)) continue; (*distances)[i] = numa_distance(node, i); } ret = 0; cleanup: return ret; }
static int set_closest_numanode(int num_unique, const struct bandwidth_nodes_t *bandwidth_nodes, int target_bandwidth, int num_cpunode, int *closest_numanode) { /*************************************************************************** * num_unique (IN): * * Length of bandwidth_nodes vector. * * bandwidth_nodes (IN): * * Output vector from create_bandwitdth_nodes(). * * target_bandwidth (IN): * * The bandwidth to select for comparison. * * num_cpunode (IN): * * Number of cpu's and length of closest_numanode. * * closest_numanode (OUT): * * Vector that maps cpu index to closest numa node of the specified * * bandwidth. * * RETURNS zero on success, error code on failure * ***************************************************************************/ int err = 0; int min_distance, distance, i, j, old_errno, min_unique; struct bandwidth_nodes_t match; match.bandwidth = -1; for (i = 0; i < num_cpunode; ++i) { closest_numanode[i] = -1; } for (i = 0; i < num_unique; ++i) { if (bandwidth_nodes[i].bandwidth == target_bandwidth) { match = bandwidth_nodes[i]; break; } } if (match.bandwidth == -1) { err = MEMKIND_ERROR_UNAVAILABLE; } else { for (i = 0; i < num_cpunode; ++i) { min_distance = INT_MAX; min_unique = 1; for (j = 0; j < match.num_numanodes; ++j) { old_errno = errno; distance = numa_distance(numa_node_of_cpu(i), match.numanodes[j]); errno = old_errno; if (distance < min_distance) { min_distance = distance; closest_numanode[i] = match.numanodes[j]; min_unique = 1; } else if (distance == min_distance) { min_unique = 0; } } if (!min_unique) { err = MEMKIND_ERROR_RUNTIME; } } } return err; }
/* * Class: xerial_jnuma_NumaNative * Method: distance * Signature: (II)I */ JNIEXPORT jint JNICALL Java_xerial_jnuma_NumaNative_distance (JNIEnv *env, jobject obj, jint node1, jint node2) { return numa_distance(node1, node2); }
int INTERNAL qt_affinity_gendists(qthread_shepherd_t *sheps, qthread_shepherd_id_t nshepherds) { /*{{{ */ const size_t num_extant_nodes = numa_max_node() + 1; nodemask_t bmask; qthread_debug(AFFINITY_FUNCTIONS, "sheps(%p), nshepherds(%u), num_extant_nodes:%u\n", sheps, nshepherds, (unsigned)num_extant_nodes); if (numa_available() == -1) { return QTHREAD_THIRD_PARTY_ERROR; } nodemask_zero(&bmask); /* assign nodes */ qthread_debug(AFFINITY_DETAILS, "assign nodes...\n"); for (size_t i = 0; i < nshepherds; ++i) { sheps[i].node = i % num_extant_nodes; qthread_debug(AFFINITY_DETAILS, "set bit %u in bmask\n", i % num_extant_nodes); nodemask_set(&bmask, i % num_extant_nodes); } qthread_debug(AFFINITY_DETAILS, "numa_set_interleave_mask\n"); numa_set_interleave_mask(&bmask); qthread_debug(AFFINITY_DETAILS, "querying distances...\n"); /* truly ancient versions of libnuma (in the changelog, this is * considered "pre-history") do not have numa_distance() */ for (qthread_shepherd_id_t i = 0; i < nshepherds; i++) { qthread_debug(AFFINITY_DETAILS, "i = %u < %u...\n", i, nshepherds); const unsigned int node_i = sheps[i].node; size_t j, k; sheps[i].shep_dists = calloc(nshepherds, sizeof(unsigned int)); sheps[i].sorted_sheplist = calloc(nshepherds - 1, sizeof(qthread_shepherd_id_t)); qthread_debug(AFFINITY_DETAILS, "allocs %p %p\n", sheps[i].shep_dists, sheps[i].sorted_sheplist); assert(sheps[i].shep_dists); assert(sheps[i].sorted_sheplist); for (j = 0; j < nshepherds; j++) { const unsigned int node_j = sheps[j].node; #if QTHREAD_NUMA_DISTANCE_WORKING if ((node_i != QTHREAD_NO_NODE) && (node_j != QTHREAD_NO_NODE) && (node_i != node_j)) { sheps[i].shep_dists[j] = numa_distance(node_i, node_j); } else { #endif /* XXX too arbitrary */ if (i == j) { sheps[i].shep_dists[j] = 0; } else { sheps[i].shep_dists[j] = 20; } #if QTHREAD_NUMA_DISTANCE_WORKING } #endif qthread_debug(AFFINITY_DETAILS, "shep %u to shep %u distance: %u\n", i, j, sheps[i].shep_dists[j]); } k = 0; for (j = 0; j < nshepherds; j++) { if (j != i) { sheps[i].sorted_sheplist[k++] = j; } } if (nshepherds > 1) { sort_sheps(sheps[i].shep_dists, sheps[i].sorted_sheplist, nshepherds); } } return QTHREAD_SUCCESS; } /*}}} */
int init_virtual_topology(config_t* cfg, cpu_model_t* cpu_model, virtual_topology_t** virtual_topologyp) { char* mc_pci_file; char* str; char* saveptr; char* token = "NULL"; int* physical_node_ids; physical_node_t** physical_nodes; int num_physical_nodes; int n, v, i, j, sibling_idx, node_i_idx; int node_id; physical_node_t* node_i, *node_j, *sibling_node; int ret; int min_distance; int hyperthreading; struct bitmask* mem_nodes; virtual_topology_t* virtual_topology; __cconfig_lookup_string(cfg, "topology.physical_nodes", &str); // parse the physical nodes string physical_node_ids = calloc(numa_num_possible_nodes(), sizeof(*physical_node_ids)); num_physical_nodes = 0; while (token = strtok_r(str, ",", &saveptr)) { physical_node_ids[num_physical_nodes] = atoi(token); str = NULL; if (++num_physical_nodes > numa_num_possible_nodes()) { // we re being asked to run on more nodes than available free(physical_node_ids); ret = E_ERROR; goto done; } } physical_nodes = calloc(num_physical_nodes, sizeof(*physical_nodes)); // select those nodes we can run on (e.g. not constrained by any numactl) mem_nodes = numa_get_mems_allowed(); for (i=0, n=0; i<num_physical_nodes; i++) { node_id = physical_node_ids[i]; if (numa_bitmask_isbitset(mem_nodes, node_id)) { physical_nodes[n] = malloc(sizeof(**physical_nodes)); physical_nodes[n]->node_id = node_id; // TODO: what if we want to avoid using only a single hardware contexts of a hyperthreaded core? physical_nodes[n]->cpu_bitmask = numa_allocate_cpumask(); numa_node_to_cpus(node_id, physical_nodes[n]->cpu_bitmask); __cconfig_lookup_bool(cfg, "topology.hyperthreading", &hyperthreading); if (hyperthreading) { physical_nodes[n]->num_cpus = num_cpus(physical_nodes[n]->cpu_bitmask); } else { DBG_LOG(INFO, "Not using hyperthreading.\n"); // disable the upper half of the processors in the bitmask physical_nodes[n]->num_cpus = num_cpus(physical_nodes[n]->cpu_bitmask) / 2; int fc = first_cpu(physical_nodes[n]->cpu_bitmask); for (j=fc+system_num_cpus()/2; j<fc+system_num_cpus()/2+physical_nodes[n]->num_cpus; j++) { if (numa_bitmask_isbitset(physical_nodes[n]->cpu_bitmask, j)) { numa_bitmask_clearbit(physical_nodes[n]->cpu_bitmask, j); } } } n++; } } free(physical_node_ids); num_physical_nodes = n; // if pci bus topology of each physical node is not provided then discover it if (__cconfig_lookup_string(cfg, "topology.mc_pci", &mc_pci_file) == CONFIG_FALSE || (__cconfig_lookup_string(cfg, "topology.mc_pci", &mc_pci_file) == CONFIG_TRUE && load_mc_pci_topology(mc_pci_file, physical_nodes, num_physical_nodes) != E_SUCCESS)) { discover_mc_pci_topology(cpu_model, physical_nodes, num_physical_nodes); save_mc_pci_topology(mc_pci_file, physical_nodes, num_physical_nodes); } // form virtual nodes by grouping physical nodes that are close to each other virtual_topology = malloc(sizeof(*virtual_topology)); virtual_topology->num_virtual_nodes = num_physical_nodes / 2 + num_physical_nodes % 2; virtual_topology->virtual_nodes = calloc(virtual_topology->num_virtual_nodes, sizeof(*(virtual_topology->virtual_nodes))); for (i=0, v=0; i<num_physical_nodes; i++) { min_distance = INT_MAX; sibling_node = NULL; sibling_idx = -1; if ((node_i = physical_nodes[i]) == NULL) { continue; } for (j=i+1; j<num_physical_nodes; j++) { if ((node_j = physical_nodes[j]) == NULL) { continue; } if (numa_distance(node_i->node_id,node_j->node_id) < min_distance) { sibling_node = node_j; sibling_idx = j; } } if (sibling_node) { physical_nodes[i] = physical_nodes[sibling_idx] = NULL; virtual_node_t* virtual_node = &virtual_topology->virtual_nodes[v]; virtual_node->dram_node = node_i; virtual_node->nvram_node = sibling_node; virtual_node->node_id = v; virtual_node->cpu_model = cpu_model; DBG_LOG(INFO, "Fusing physical nodes %d %d into virtual node %d\n", node_i->node_id, sibling_node->node_id, virtual_node->node_id); v++; } } // any physical node that is not paired with another physical node is // formed into a virtual node on its own if (2*v < num_physical_nodes) { for (i=0; i<num_physical_nodes; i++) { node_i = physical_nodes[i]; virtual_node_t* virtual_node = &virtual_topology->virtual_nodes[v]; virtual_node->dram_node = virtual_node->nvram_node = node_i; virtual_node->node_id = v; DBG_LOG(WARNING, "Forming physical node %d into virtual node %d without a sibling node.\n", node_i->node_id, virtual_node->node_id); } } *virtual_topologyp = virtual_topology; ret = E_SUCCESS; done: free(physical_nodes); return ret; }