int main(void) { int i, k, w, ncpus; struct bitmask *cpus; int maxnode = numa_num_configured_nodes()-1; if (numa_available() < 0) { printf("no numa\n"); exit(1); } cpus = numa_allocate_cpumask(); ncpus = cpus->size; for (i = 0; i <= maxnode ; i++) { if (numa_node_to_cpus(i, cpus) < 0) { printf("node %d failed to convert\n",i); } printf("%d: ", i); w = 0; for (k = 0; k < ncpus; k++) if (numa_bitmask_isbitset(cpus, k)) printf(" %s%d", w>0?",":"", k); putchar('\n'); } return 0; }
int main(void) { int *rcv_fd, nodes; if (numa_available() < 0) ksft_exit_skip("no numa api support\n"); nodes = numa_max_node() + 1; rcv_fd = calloc(nodes, sizeof(int)); if (!rcv_fd) error(1, 0, "failed to allocate array"); fprintf(stderr, "---- IPv4 UDP ----\n"); test(rcv_fd, nodes, AF_INET, SOCK_DGRAM); fprintf(stderr, "---- IPv6 UDP ----\n"); test(rcv_fd, nodes, AF_INET6, SOCK_DGRAM); fprintf(stderr, "---- IPv4 TCP ----\n"); test(rcv_fd, nodes, AF_INET, SOCK_STREAM); fprintf(stderr, "---- IPv6 TCP ----\n"); test(rcv_fd, nodes, AF_INET6, SOCK_STREAM); free(rcv_fd); fprintf(stderr, "SUCCESS\n"); return 0; }
static void setup(void) { int node, ret; tst_require_root(); TEST(ltp_syscall(__NR_migrate_pages, 0, 0, NULL, NULL)); if (numa_available() == -1) tst_brkm(TCONF, NULL, "NUMA not available"); ret = get_allowed_nodes(NH_MEMS, 1, &node); if (ret < 0) tst_brkm(TBROK | TERRNO, NULL, "get_allowed_nodes_arr: %d", ret); sane_max_node = LTP_ALIGN(get_max_node(), sizeof(unsigned long)*8); sane_nodemask_size = sane_max_node / 8; sane_old_nodes = SAFE_MALLOC(NULL, sane_nodemask_size); sane_new_nodes = SAFE_MALLOC(NULL, sane_nodemask_size); memset(sane_old_nodes, 0, sane_nodemask_size); memset(sane_new_nodes, 0, sane_nodemask_size); set_bit(sane_old_nodes, node, 1); set_bit(sane_new_nodes, node, 1); TEST_PAUSE; }
//---------------------------------------------------------------------- //-- a little cross platform numa allocator //-- use the existing theron defines for convenience //---------------------------------------------------------------------- inline void *AllocOnNode(const long node, const size_t size) { #if THERON_NUMA #if THERON_WINDOWS #if _WIN32_WINNT >= 0x0600 return VirtualAllocExNuma( GetCurrentProcess(), NULL, size, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE, node ); #else return NULL; #endif #elif THERON_GCC if ((numa_available() < 0)) { return NULL; } return numa_alloc_onnode(size, node); #endif #endif // THERON_NUMA return NULL; }
/* before any other calls in this library can be used numa_available() must be called. */ int hbw_check_available(void) { int rc = numa_available(); if (rc != 0) { fprintf(stderr, "hbwmalloc: libnuma error = %d\n", rc); #ifdef HAVE_ERRNO_H return ENODEV; /* ENODEV if high-bandwidth memory is unavailable. */ #else return -1; #endif } #ifdef HAVE_PTHREAD /* this ensures that initializing within hbw_check_available() * can be thread-safe. */ pthread_once( &myhbwmalloc_once_control, myhbwmalloc_init ); /* FIXME need thread barrier here to be thread-safe */ #else myhbwmalloc_init(); #endif if (myhbwmalloc_mspace == NULL) { fprintf(stderr, "hbwmalloc: mspace creation failed\n"); #ifdef HAVE_ERRNO_H return ENODEV; /* ENODEV if high-bandwidth memory is unavailable. */ #else return -1; #endif } return 0; }
unsigned long long nodeGetFreeMemory(virConnectPtr conn ATTRIBUTE_UNUSED) { unsigned long long freeMem = 0; int n; if (numa_available() < 0) { nodeReportError(VIR_ERR_NO_SUPPORT, "%s", _("NUMA not supported on this host")); goto cleanup; } for (n = 0 ; n <= numa_max_node() ; n++) { long long mem; if (numa_node_size64(n, &mem) < 0) { nodeReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("Failed to query NUMA free memory")); goto cleanup; } freeMem += mem; } cleanup: return freeMem; }
/** * @brief construct global 'cohort' lock. * * This lock performs handovers in three levels: First within * the same NUMA node, then within the same ArgoDSM node, and * finally over ArgoDSM nodes. */ cohort_lock() : has_global_lock(false), numanodes(1), // sane default numahandover(0), nodelockowner(NO_OWNER), tas_flag(argo::conew_<bool>(false)), global_lock(new argo::globallock::global_tas_lock(tas_flag)), node_lock(new argo::locallock::ticket_lock()) { int num_cpus = sysconf(_SC_NPROCESSORS_CONF); // sane default numa_mapping.resize(num_cpus, 0); #ifdef ARGO_USE_LIBNUMA /* use libnuma only if it is actually available */ if(numa_available() != -1) { numanodes = numa_num_configured_nodes(); /* Initialize the NUMA map */ for (int i = 0; i < num_cpus; ++i) { numa_mapping[i] = numa_node_of_cpu(i); } } #endif /* initialize hierarchy components */ handovers = new int[numanodes](); local_lock = new argo::locallock::mcs_lock[numanodes]; }
void checknuma(void) { static int numa = -1; if (numa < 0) { if (numa_available() < 0) complain("This system does not support NUMA policy"); } numa = 0; }
/** * \brief get a array of cores with a ceartain placement */ static coreid_t* placement(uint32_t n, bool do_fill) { coreid_t* result = malloc(sizeof(coreid_t)*n); uint32_t numa_nodes = numa_max_node()+1; uint32_t num_cores = numa_num_configured_cpus(); struct bitmask* nodes[numa_nodes]; for (int i = 0; i < numa_nodes; i++) { nodes[i] = numa_allocate_cpumask(); numa_node_to_cpus(i, nodes[i]); } int num_taken = 0; if (numa_available() == 0) { if (do_fill) { for (int i = 0; i < numa_nodes; i++) { for (int j = 0; j < num_cores; j++) { if (numa_bitmask_isbitset(nodes[i], j)) { result[num_taken] = j; num_taken++; } if (num_taken == n) { return result; } } } } else { uint8_t ith_of_node = 0; // go through numa nodes for (int i = 0; i < numa_nodes; i++) { // go through cores and see if part of numa node for (int j = 0; j < num_cores; j++) { // take the ith core of the node if (numa_bitmask_isbitset(nodes[i], j)){ int index = i+ith_of_node*numa_nodes; if (index < n) { result[i+ith_of_node*numa_nodes] = j; num_taken++; ith_of_node++; } } if (num_taken == n) { return result; } } ith_of_node = 0; } } } else { printf("Libnuma not available \n"); return NULL; } return NULL; }
/* Determine the cpu-to-node mapping, bind the first thread. */ void numa_start() { DIR *root_dir; struct dirent *root_entry; char path[NAME_MAX]; size_t root_path_length; if (numa_available() < 0) { fprintf(stderr, "No NUMA support. Exiting.\n"); exit(1); } strcpy(path, NODE_MAP_PATH); root_path_length = strlen(path); root_dir = opendir(path); if (!root_dir) { perror("numa_start"); exit(1); } while ((root_entry = readdir(root_dir)) != NULL) { DIR *node_dir; struct dirent *node_entry; unsigned long node; if (strncmp(root_entry->d_name, "node", 4)) { continue; } node = strtoul(root_entry->d_name + 4, NULL, 0); strcpy(path + root_path_length, root_entry->d_name); node_dir = opendir(path); if (!root_dir) { perror("numa_start"); exit(1); } while ((node_entry = readdir(node_dir)) != NULL) { unsigned long cpu; if (strncmp(node_entry->d_name, "cpu", 3) || isalpha(node_entry->d_name[3])) { continue; } cpu = strtoul(node_entry->d_name + 3, NULL, 0); cpu_to_node[cpu] = node; } } discover_cpu(); }
int bind2node(int core_id) { char node_str[8]; if (core_id < 0 || numa_available() == -1) return -1; snprintf(node_str, sizeof(node_str), "%u", numa_node_of_cpu(core_id)); numa_bind(numa_parse_nodestring(node_str)); return 0; }
int main(void) { hwloc_topology_t topology; hwloc_bitmap_t set, set2, nocpunomemnodeset, nocpubutmemnodeset, nomembutcpunodeset, nomembutcpucpuset; hwloc_obj_t node; struct bitmask *bitmask, *bitmask2; unsigned long mask; unsigned long maxnode; int i; if (numa_available() < 0) /* libnuma has inconsistent behavior when the kernel isn't NUMA-aware. * don't try to check everything precisely. */ exit(77); hwloc_topology_init(&topology); hwloc_topology_load(topology); /* convert full stuff between cpuset and libnuma */ set = hwloc_bitmap_alloc(); nocpunomemnodeset = hwloc_bitmap_alloc(); nocpubutmemnodeset = hwloc_bitmap_alloc(); nomembutcpunodeset = hwloc_bitmap_alloc(); nomembutcpucpuset = hwloc_bitmap_alloc(); /* gather all nodes if any, or the whole system if no nodes */ if (hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_NUMANODE)) { node = NULL; while ((node = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_NUMANODE, node)) != NULL) { hwloc_bitmap_or(set, set, node->cpuset); if (hwloc_bitmap_iszero(node->cpuset)) { if (node->memory.local_memory) hwloc_bitmap_set(nocpubutmemnodeset, node->os_index); else hwloc_bitmap_set(nocpunomemnodeset, node->os_index); } else if (!node->memory.local_memory) { hwloc_bitmap_set(nomembutcpunodeset, node->os_index); hwloc_bitmap_or(nomembutcpucpuset, nomembutcpucpuset, node->cpuset); } } } else { hwloc_bitmap_or(set, set, hwloc_topology_get_complete_cpuset(topology)); } set2 = hwloc_bitmap_alloc(); hwloc_cpuset_from_linux_libnuma_bitmask(topology, set2, numa_all_nodes_ptr); /* numa_all_nodes_ptr doesn't contain NODES with CPU but no memory */ hwloc_bitmap_or(set2, set2, nomembutcpucpuset); assert(hwloc_bitmap_isequal(set, set2)); hwloc_bitmap_free(set2); bitmask = hwloc_cpuset_to_linux_libnuma_bitmask(topology, set); /* numa_all_nodes_ptr contains NODES with no CPU but with memory */ hwloc_bitmap_foreach_begin(i, nocpubutmemnodeset) { numa_bitmask_setbit(bitmask, i); } hwloc_bitmap_foreach_end();
void show(void) { unsigned long prefnode; struct bitmask *membind, *interleave, *cpubind; unsigned long cur; int policy; int numa_num_nodes = numa_num_possible_nodes(); if (numa_available() < 0) { show_physcpubind(); printf("No NUMA support available on this system.\n"); exit(1); } cpubind = numa_get_run_node_mask(); prefnode = numa_preferred(); interleave = numa_get_interleave_mask(); membind = numa_get_membind(); cur = numa_get_interleave_node(); policy = 0; if (get_mempolicy(&policy, NULL, 0, 0, 0) < 0) perror("get_mempolicy"); printf("policy: %s\n", policy_name(policy)); printf("preferred node: "); switch (policy) { case MPOL_PREFERRED: if (prefnode != -1) { printf("%ld\n", prefnode); break; } /*FALL THROUGH*/ case MPOL_DEFAULT: printf("current\n"); break; case MPOL_INTERLEAVE: printf("%ld (interleave next)\n",cur); break; case MPOL_BIND: printf("%d\n", find_first_bit(&membind, numa_num_nodes)); break; } if (policy == MPOL_INTERLEAVE) { printmask("interleavemask", interleave); printf("interleavenode: %ld\n", cur); } show_physcpubind(); printmask("cpubind", cpubind); // for compatibility printmask("nodebind", cpubind); printmask("membind", membind); }
int main (void) { debug_printf("numa test started...\n"); if (numa_available() == SYS_ERR_OK) { debug_printf("num nodes=%u\n", numa_max_node()); debug_printf("num cores: %u\n", numa_max_core()); } else { debug_printf("numa not available\n"); } return 0; }
int main(int argc, char **argv) { int i; cpu_set_t cpuset; mem_bench_info_t mbinfo; if (numa_available() == -1){ fprintf(stderr, "numa functions aren't available\n"); exit(EXIT_FAILURE); } parsearg(argc, argv); mbinfo.working_size = option.access_size; // set affinity CPU_ZERO(&cpuset); CPU_SET(option.usecore, &cpuset); sched_setaffinity(getpid(), sizeof(cpu_set_t), &cpuset); // read benchmark printf("===========================================\n" "memory benchmark\n" "===========================================\n"); for (i = 0; i <= numa_max_node(); i++) { mbinfo.destnode = i; mbinfo.pc.ops = 0; mbinfo.pc.clk = 0; numa_membench(&mbinfo); printf("memory_alloc_node\t%d\n" "access_size\t%ld\n" "total_ops\t%ld\n" "total_clk\t%ld\n" "exec_time_sec\t%.9f\n" "ops_per_sec\t%e\n" "clk_per_op\t%e\n" "usec_per_op\t%f\n", i, mbinfo.working_size, mbinfo.pc.ops, mbinfo.pc.clk, mbinfo.pc.wallclocktime, mbinfo.pc.ops / mbinfo.pc.wallclocktime, ((double)mbinfo.pc.clk) / mbinfo.pc.ops, mbinfo.pc.wallclocktime * 1000000 / mbinfo.pc.ops ); } return 0; }
/* static */ int ThreadPool::getNumaNodeCount() { #if defined(_WIN32_WINNT) && _WIN32_WINNT >= _WIN32_WINNT_WIN7 ULONG num = 1; if (GetNumaHighestNodeNumber(&num)) num++; return (int)num; #elif HAVE_LIBNUMA if (numa_available() >= 0) return numa_max_node() + 1; else return 1; #else return 1; #endif }
static qthread_shepherd_id_t guess_num_shepherds(void) { /*{{{ */ qthread_shepherd_id_t nshepherds = 1; if (numa_available() != 1) { /* this is (probably) correct if/when we have multithreaded shepherds, * ... BUT ONLY IF ALL NODES HAVE CPUS!!!!!! */ nshepherds = numa_max_node() + 1; qthread_debug(AFFINITY_DETAILS, "numa_max_node() returned %i\n", nshepherds); } if (nshepherds <= 0) { nshepherds = 1; } return nshepherds; } /*}}} */
TaskManager :: TaskManager() { num_threads = GetMaxThreads(); // if (MyMPI_GetNTasks() > 1) num_threads = 1; #ifdef USE_NUMA numa_available(); num_nodes = numa_max_node() + 1; if (num_nodes > num_threads) num_nodes = num_threads; for (int j = 0; j < num_nodes; j++) { void * mem = numa_alloc_onnode (sizeof(NodeData), j); nodedata[j] = new (mem) NodeData; complete[j] = -1; workers_on_node[j] = 0; } #else num_nodes = 1; nodedata[0] = new NodeData; complete[0] = -1; workers_on_node[0] = 0; #endif jobnr = 0; done = 0; sleep = false; sleep_usecs = 1000; active_workers = 0; static int cnt = 0; char buf[100]; if (use_paje_trace) { #ifdef PARALLEL sprintf(buf, "ng%d_rank%d.trace", cnt++, MyMPI_GetId()); #else sprintf(buf, "ng%d.trace", cnt++); #endif } else buf[0] = 0; //sprintf(buf, ""); trace = new PajeTrace(num_threads, buf); }
int main(void) { #if HAVE_NUMA_H nodemask_t nodemask; void hardware(); if (numa_available() < 0) { printf("This system does not support NUMA policy\n"); numa_error("numa_available"); numa_exit_on_error = 1; exit(numa_exit_on_error); } nodemask_zero(&nodemask); nodemask_set(&nodemask, 1); numa_bind(&nodemask); hardware(); return numa_exit_on_error; #else printf("NUMA is not available\n"); return 1; #endif }
int nodeGetCellsFreeMemory(virConnectPtr conn ATTRIBUTE_UNUSED, unsigned long long *freeMems, int startCell, int maxCells) { int n, lastCell, numCells; int ret = -1; int maxCell; if (numa_available() < 0) { nodeReportError(VIR_ERR_NO_SUPPORT, "%s", _("NUMA not supported on this host")); goto cleanup; } maxCell = numa_max_node(); if (startCell > maxCell) { nodeReportError(VIR_ERR_INTERNAL_ERROR, _("start cell %d out of range (0-%d)"), startCell, maxCell); goto cleanup; } lastCell = startCell + maxCells - 1; if (lastCell > maxCell) lastCell = maxCell; for (numCells = 0, n = startCell ; n <= lastCell ; n++) { long long mem; if (numa_node_size64(n, &mem) < 0) { nodeReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("Failed to query NUMA free memory")); goto cleanup; } freeMems[numCells++] = mem; } ret = numCells; cleanup: return ret; }
/* * check_config() - check for required configuration * @min_nodes: the minimum required NUMA nodes * * Checks if numa support is availabe, kernel is >= 2.6.18, arch is * one of the supported architectures. */ void check_config(unsigned int min_nodes) { #if HAVE_NUMA_H && HAVE_NUMAIF_H int num_allowed_nodes; int ret; ret = get_allowed_nodes_arr(NH_MEMS, &num_allowed_nodes, NULL); if (ret < 0) tst_brkm(TBROK | TERRNO, NULL, "get_allowed_nodes(): %d", ret); if (numa_available() < 0) { tst_brkm(TCONF, NULL, "NUMA support is not available"); } else if (num_allowed_nodes < min_nodes) { tst_brkm(TCONF, NULL, "at least %d allowed NUMA nodes" " are required", min_nodes); } else if (tst_kvercmp(2, 6, 18) < 0) { tst_brkm(TCONF, NULL, "2.6.18 or greater kernel required"); } #else tst_brkm(TCONF, NULL, "NUMA support not provided"); #endif }
/* static */ void ThreadPool::setThreadNodeAffinity(int numaNode) { #if defined(_WIN32_WINNT) && _WIN32_WINNT >= _WIN32_WINNT_WIN7 GROUP_AFFINITY groupAffinity; if (GetNumaNodeProcessorMaskEx((USHORT)numaNode, &groupAffinity)) { if (SetThreadAffinityMask(GetCurrentThread(), (DWORD_PTR)groupAffinity.Mask)) return; } x265_log(NULL, X265_LOG_ERROR, "unable to set thread affinity to NUMA node %d\n", numaNode); #elif HAVE_LIBNUMA if (numa_available() >= 0) { numa_run_on_node(numaNode); numa_set_preferred(numaNode); numa_set_localalloc(); return; } x265_log(NULL, X265_LOG_ERROR, "unable to set thread affinity to NUMA node %d\n", numaNode); #else (void)numaNode; #endif }
main (int argc, char **argv) { char *task_str; MY_MASK cpu_mask; MY_MASK mem_mask; int task_id; if (numa_available() < 0) { fprintf(stderr, "ERROR: numa support not available\n"); exit(0); } if ((task_str = getenv("SLURM_PROCID")) == NULL) { fprintf(stderr, "ERROR: getenv(SLURM_PROCID) failed\n"); exit(1); } task_id = atoi(task_str); _load_cpu_mask(&cpu_mask); _load_mem_mask(&mem_mask); printf("TASK_ID:%d,CPU_MASK:%lu,MEM_MASK:%lu\n", task_id, _mask_to_int(&cpu_mask), _mask_to_int(&mem_mask)); exit(0); }
static int parse_pmtt_bandwidth(int num_bandwidth, int *bandwidth, const char *pmtt_path) { /*************************************************************************** * num_bandwidth (IN): * * Length of bandwidth vector and maximum number of numa nodes. * * bandwidth (OUT): * * Vector giving bandwidth for all numa nodes. If numa bandwidth is * * not discovered the value is set to zero. * * pmtt_path (IN): * * Path to PMTT table to be parsed. * * RETURNS zero on success, error code on failure * ***************************************************************************/ const size_t PMTT_BUF_SIZE = 2000; int err = 0; FILE *mfp = NULL; ACPI_TABLE_PMTT hdr; unsigned char buf[PMTT_BUF_SIZE]; ACPI_PMTT_HEADER *pbuf = (ACPI_PMTT_HEADER *)&buf; size_t size; size_t nread; size_t pmtt_socket_size; memset(bandwidth, 0, sizeof(int)*num_bandwidth); if (numa_available() == -1) { err = MEMKIND_ERROR_PMTT; goto exit; } mfp = fopen(pmtt_path, "r"); if (mfp == NULL) { err = MEMKIND_ERROR_PMTT; goto exit; } nread = fread(&hdr, sizeof(ACPI_TABLE_PMTT), 1, mfp); if (nread != 1 || memcmp(hdr.Header.Signature, "PMTT", 4) != 0) { /* PMTT signature failure */ err = MEMKIND_ERROR_PMTT; goto exit; } size = hdr.Header.Length - sizeof(ACPI_TABLE_PMTT); if (size > PMTT_BUF_SIZE) { /* PMTT byte count failure */ err = MEMKIND_ERROR_PMTT; goto exit; } nread = fread(buf, size, 1, mfp); if (nread != 1 || fgetc(mfp) != EOF) { /* PMTT incorrect number of bytes read */ err = MEMKIND_ERROR_PMTT; goto exit; } if (pbuf->Type != ACPI_PMTT_TYPE_SOCKET) { /* SOCKET */ /* PMTT did not find socket record first */ err = MEMKIND_ERROR_PMTT; goto exit; } pmtt_socket_size = pbuf->Length; if (pmtt_socket_size != size) { /* PMTT extra bytes after socket record */ err = MEMKIND_ERROR_PMTT; goto exit; } if (parse_pmtt_memory_controllers(num_bandwidth, bandwidth, (ACPI_PMTT_HEADER *)&buf[sizeof(ACPI_PMTT_SOCKET)], pmtt_socket_size - sizeof(ACPI_PMTT_SOCKET))) { err = MEMKIND_ERROR_PMTT; goto exit; } exit: if (mfp != NULL) { fclose(mfp); } return err; }
/* * Class: xerial_jnuma_NumaNative * Method: numaAvailable * Signature: ()Z */ JNIEXPORT jboolean JNICALL Java_xerial_jnuma_NumaNative_isAvailable (JNIEnv *env, jobject obj) { return numa_available() != -1; }
bool virNumaIsAvailable(void) { return numa_available() != -1; }
/* * task_pre_launch() is called prior to exec of application task. * It is followed by TaskProlog program (from slurm.conf) and * --task-prolog (from srun command line). */ extern int task_pre_launch (slurmd_job_t *job) { char base[PATH_MAX], path[PATH_MAX]; int rc = SLURM_SUCCESS; debug("affinity task_pre_launch:%u.%u, task:%u bind:%u", job->jobid, job->stepid, job->envtp->procid, job->cpu_bind_type); if (conf->task_plugin_param & CPU_BIND_CPUSETS) { info("Using cpuset affinity for tasks"); #ifdef MULTIPLE_SLURMD if (snprintf(base, PATH_MAX, "%s/slurm_%s_%u", CPUSET_DIR, (conf->node_name != NULL)?conf->node_name:"", job->jobid) > PATH_MAX) { error("cpuset path too long"); return SLURM_ERROR; } #else if (snprintf(base, PATH_MAX, "%s/slurm%u", CPUSET_DIR, job->jobid) > PATH_MAX) { error("cpuset path too long"); return SLURM_ERROR; } #endif if (snprintf(path, PATH_MAX, "%s/slurm%u.%u_%d", base, job->jobid, job->stepid, job->envtp->localid) > PATH_MAX) { error("cpuset path too long"); return SLURM_ERROR; } } else info("Using sched_affinity for tasks"); /*** CPU binding support ***/ if (job->cpu_bind_type) { cpu_set_t new_mask, cur_mask; pid_t mypid = job->envtp->task_pid; slurm_getaffinity(mypid, sizeof(cur_mask), &cur_mask); if (get_cpuset(&new_mask, job) && (!(job->cpu_bind_type & CPU_BIND_NONE))) { if (conf->task_plugin_param & CPU_BIND_CPUSETS) { rc = slurm_set_cpuset(base, path, mypid, sizeof(new_mask), &new_mask); slurm_get_cpuset(path, mypid, sizeof(cur_mask), &cur_mask); } else { rc = slurm_setaffinity(mypid, sizeof(new_mask), &new_mask); slurm_getaffinity(mypid, sizeof(cur_mask), &cur_mask); } } slurm_chkaffinity(rc ? &cur_mask : &new_mask, job, rc); } else if (job->mem_bind_type && (conf->task_plugin_param & CPU_BIND_CPUSETS)) { cpu_set_t cur_mask; pid_t mypid = job->envtp->task_pid; /* Establish cpuset just for the memory binding */ slurm_getaffinity(mypid, sizeof(cur_mask), &cur_mask); rc = slurm_set_cpuset(base, path, (pid_t) job->envtp->task_pid, sizeof(cur_mask), &cur_mask); } #ifdef HAVE_NUMA if ((conf->task_plugin_param & CPU_BIND_CPUSETS) && (slurm_memset_available() >= 0)) { nodemask_t new_mask, cur_mask; cur_mask = numa_get_membind(); if (get_memset(&new_mask, job) && (!(job->mem_bind_type & MEM_BIND_NONE))) { slurm_set_memset(path, &new_mask); if (numa_available() >= 0) numa_set_membind(&new_mask); cur_mask = new_mask; } slurm_chk_memset(&cur_mask, job); } else if (job->mem_bind_type && (numa_available() >= 0)) { nodemask_t new_mask, cur_mask; cur_mask = numa_get_membind(); if (get_memset(&new_mask, job) && (!(job->mem_bind_type & MEM_BIND_NONE))) { numa_set_membind(&new_mask); cur_mask = new_mask; } slurm_chk_memset(&cur_mask, job); } #endif return rc; }
int linuxNodeInfoCPUPopulate(FILE *cpuinfo, const char *sysfs_cpudir, virNodeInfoPtr nodeinfo) { char line[1024]; DIR *cpudir = NULL; struct dirent *cpudirent = NULL; unsigned int cpu; unsigned long core, sock, cur_threads; cpu_set_t core_mask; cpu_set_t socket_mask; int online; nodeinfo->cpus = 0; nodeinfo->mhz = 0; nodeinfo->cores = 0; nodeinfo->nodes = 1; # if HAVE_NUMACTL if (numa_available() >= 0) nodeinfo->nodes = numa_max_node() + 1; # endif if (!virStrcpyStatic(sysfs_path, sysfs_cpudir)) { virReportSystemError(errno, _("cannot copy %s"), sysfs_cpudir); return -1; } /* NB: It is impossible to fill our nodes, since cpuinfo * has no knowledge of NUMA nodes */ /* NOTE: hyperthreads are ignored here; they are parsed out of /sys */ while (fgets(line, sizeof(line), cpuinfo) != NULL) { # if defined(__x86_64__) || \ defined(__amd64__) || \ defined(__i386__) char *buf = line; if (STRPREFIX(buf, "cpu MHz")) { char *p; unsigned int ui; buf += 9; while (*buf && c_isspace(*buf)) buf++; if (*buf != ':' || !buf[1]) { nodeReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("parsing cpuinfo cpu MHz")); return -1; } if (virStrToLong_ui(buf+1, &p, 10, &ui) == 0 /* Accept trailing fractional part. */ && (*p == '\0' || *p == '.' || c_isspace(*p))) nodeinfo->mhz = ui; } # elif defined(__powerpc__) || \ defined(__powerpc64__) char *buf = line; if (STRPREFIX(buf, "clock")) { char *p; unsigned int ui; buf += 5; while (*buf && c_isspace(*buf)) buf++; if (*buf != ':' || !buf[1]) { nodeReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("parsing cpuinfo cpu MHz")); return -1; } if (virStrToLong_ui(buf+1, &p, 10, &ui) == 0 /* Accept trailing fractional part. */ && (*p == '\0' || *p == '.' || c_isspace(*p))) nodeinfo->mhz = ui; /* No other interesting infos are available in /proc/cpuinfo. * However, there is a line identifying processor's version, * identification and machine, but we don't want it to be caught * and parsed in next iteration, because it is not in expected * format and thus lead to error. */ } # else # warning Parser for /proc/cpuinfo needs to be adapted for your architecture # endif } /* OK, we've parsed clock speed out of /proc/cpuinfo. Get the core, socket * thread and topology information from /sys */ cpudir = opendir(sysfs_cpudir); if (cpudir == NULL) { virReportSystemError(errno, _("cannot opendir %s"), sysfs_cpudir); return -1; } CPU_ZERO(&core_mask); CPU_ZERO(&socket_mask); while ((cpudirent = readdir(cpudir))) { if (sscanf(cpudirent->d_name, "cpu%u", &cpu) != 1) continue; online = cpu_online(cpu); if (online < 0) { closedir(cpudir); return -1; } if (!online) continue; nodeinfo->cpus++; /* Parse core */ core = parse_core(cpu); if (!CPU_ISSET(core, &core_mask)) { CPU_SET(core, &core_mask); nodeinfo->cores++; } /* Parse socket */ sock = parse_socket(cpu); if (!CPU_ISSET(sock, &socket_mask)) { CPU_SET(sock, &socket_mask); nodeinfo->sockets++; } cur_threads = count_thread_siblings(cpu); if (cur_threads == 0) { closedir(cpudir); return -1; } if (cur_threads > nodeinfo->threads) nodeinfo->threads = cur_threads; } if (errno) { virReportSystemError(errno, _("problem reading %s"), sysfs_path); closedir(cpudir); return -1; } closedir(cpudir); /* there should always be at least one cpu, socket and one thread */ if (nodeinfo->cpus == 0) { nodeReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("no CPUs found")); return -1; } if (nodeinfo->sockets == 0) { nodeReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("no sockets found")); return -1; } if (nodeinfo->threads == 0) { nodeReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("no threads found")); return -1; } /* nodeinfo->sockets is supposed to be a number of sockets per NUMA node, * however if NUMA nodes are not composed of whole sockets, we just lie * about the number of NUMA nodes and force apps to check capabilities XML * for the actual NUMA topology. */ if (nodeinfo->sockets % nodeinfo->nodes == 0) nodeinfo->sockets /= nodeinfo->nodes; else nodeinfo->nodes = 1; return 0; }
int virNumaSetupMemoryPolicy(virNumaTuneDef numatune, virBitmapPtr nodemask) { nodemask_t mask; int mode = -1; int node = -1; int ret = -1; int i = 0; int maxnode = 0; virBitmapPtr tmp_nodemask = NULL; if (numatune.memory.placement_mode == VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_STATIC) { if (!numatune.memory.nodemask) return 0; VIR_DEBUG("Set NUMA memory policy with specified nodeset"); tmp_nodemask = numatune.memory.nodemask; } else if (numatune.memory.placement_mode == VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_AUTO) { VIR_DEBUG("Set NUMA memory policy with advisory nodeset from numad"); tmp_nodemask = nodemask; } else { return 0; } if (numa_available() < 0) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("Host kernel is not aware of NUMA.")); return -1; } maxnode = numa_max_node() + 1; /* Convert nodemask to NUMA bitmask. */ nodemask_zero(&mask); i = -1; while ((i = virBitmapNextSetBit(tmp_nodemask, i)) >= 0) { if (i > maxnode || i > NUMA_NUM_NODES) { virReportError(VIR_ERR_INTERNAL_ERROR, _("Nodeset is out of range, host cannot support " "NUMA node bigger than %d"), i); return -1; } nodemask_set(&mask, i); } mode = numatune.memory.mode; if (mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT) { numa_set_bind_policy(1); numa_set_membind(&mask); numa_set_bind_policy(0); } else if (mode == VIR_DOMAIN_NUMATUNE_MEM_PREFERRED) { int nnodes = 0; for (i = 0; i < NUMA_NUM_NODES; i++) { if (nodemask_isset(&mask, i)) { node = i; nnodes++; } } if (nnodes != 1) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("NUMA memory tuning in 'preferred' mode " "only supports single node")); goto cleanup; } numa_set_bind_policy(0); numa_set_preferred(node); } else if (mode == VIR_DOMAIN_NUMATUNE_MEM_INTERLEAVE) { numa_set_interleave_mask(&mask); } else { /* XXX: Shouldn't go here, as we already do checking when * parsing domain XML. */ virReportError(VIR_ERR_XML_ERROR, "%s", _("Invalid mode for memory NUMA tuning.")); goto cleanup; } ret = 0; cleanup: return ret; }
static void setup(void) { int ret, i, j; int pagesize = getpagesize(); void *p; tst_require_root(NULL); TEST(ltp_syscall(__NR_migrate_pages, 0, 0, NULL, NULL)); if (numa_available() == -1) tst_brkm(TCONF, NULL, "NUMA not available"); ret = get_allowed_nodes_arr(NH_MEMS, &num_nodes, &nodes); if (ret < 0) tst_brkm(TBROK | TERRNO, NULL, "get_allowed_nodes(): %d", ret); if (num_nodes < 2) tst_brkm(TCONF, NULL, "at least 2 allowed NUMA nodes" " are required"); else if (tst_kvercmp(2, 6, 18) < 0) tst_brkm(TCONF, NULL, "2.6.18 or greater kernel required"); /* * find 2 nodes, which can hold NODE_MIN_FREEMEM bytes * The reason is that: * 1. migrate_pages() is expected to succeed * 2. this test avoids hitting: * Bug 870326 - migrate_pages() reports success, but pages are * not moved to desired node * https://bugzilla.redhat.com/show_bug.cgi?id=870326 */ nodeA = nodeB = -1; for (i = 0; i < num_nodes; i++) { p = numa_alloc_onnode(NODE_MIN_FREEMEM, nodes[i]); if (p == NULL) break; memset(p, 0xff, NODE_MIN_FREEMEM); j = 0; while (j < NODE_MIN_FREEMEM) { if (addr_on_node(p + j) != nodes[i]) break; j += pagesize; } numa_free(p, NODE_MIN_FREEMEM); if (j >= NODE_MIN_FREEMEM) { if (nodeA == -1) nodeA = nodes[i]; else if (nodeB == -1) nodeB = nodes[i]; else break; } } if (nodeA == -1 || nodeB == -1) tst_brkm(TCONF, NULL, "at least 2 NUMA nodes with " "free mem > %d are needed", NODE_MIN_FREEMEM); tst_resm(TINFO, "Using nodes: %d %d", nodeA, nodeB); ltpuser = getpwnam(nobody_uid); if (ltpuser == NULL) tst_brkm(TBROK | TERRNO, NULL, "getpwnam failed"); TEST_PAUSE; }