void* mmap_1g(void* addr /* = nullptr */, int node /* = -1 */) { #ifdef __linux__ if (s_num1GPages >= kMaxNum1GPages) return nullptr; if (get_huge1g_info(node).free_hugepages <= 0) return nullptr; if (node >= 0 && !numa_node_allowed(node)) return nullptr; #ifdef HAVE_NUMA bitmask* memMask = nullptr; bitmask* interleaveMask = nullptr; if (node >= 0 && numa_num_nodes > 1) { memMask = numa_get_membind(); interleaveMask = numa_get_interleave_mask(); bitmask* mask = numa_allocate_nodemask(); numa_bitmask_setbit(mask, node); numa_set_membind(mask); numa_bitmask_free(mask); } #endif void* ret = mmap_1g_impl(addr); if (ret != nullptr) { s_1GPages[s_num1GPages++] = ret; } #ifdef HAVE_NUMA if (memMask) { assert(interleaveMask); numa_set_membind(memMask); numa_set_interleave_mask(interleaveMask); numa_bitmask_free(memMask); numa_bitmask_free(interleaveMask); } #endif return ret; #else return nullptr; #endif }
/* * get_current_nodeid_list() - fill arg array with nodes from * current thread's allowed node mask. return # of nodes in * mask. */ static int get_current_nodeid_list(unsigned int *fromids) { /* * FIXME (garrcoop): gcp is unitialized and shortly hereafter used in * an initialization statement..... UHHHHHHH... test writer fail? */ glctx_t *gcp; nodemask_t my_allowed_nodes; int nr_nodes = 0, max_node = gcp->numa_max_node; int node; gcp = &glctx; #if defined(LIBNUMA_API_VERSION) && LIBNUMA_API_VERSION == 2 my_allowed_nodes = numa_get_membind_compat(); #else my_allowed_nodes = numa_get_membind(); #endif for (node = 0; node <= max_node; ++node) { if (nodemask_isset(&my_allowed_nodes, node)) *(fromids + nr_nodes++) = node; } /* * shouldn't happen, but let 'em know if it does */ if (nr_nodes == 0) fprintf(stderr, "%s: my allowed node mask is empty !!???\n", gcp->program_name); return nr_nodes; }
static char * printMemMask(void) { #ifdef HAVE_LIBNUMA struct bitmask *memmask; int i, j, p, max, s; static char ret[PSCPU_MAX/4+10]; memmask = numa_get_membind(); strcpy(ret, "0x"); i = 0; p = 2; max = numa_max_node(); while (i <= max) { s = 0; for (j = 0; j < 4 && i <= max; j++) { s += (numa_bitmask_isbitset(memmask, i++) ? 1 : 0) * pow(2, j); } snprintf(ret+(p++), 2, "%X", s); } return ret; #else return "(no numa support)"; #endif }
void show(void) { unsigned long prefnode; struct bitmask *membind, *interleave, *cpubind; unsigned long cur; int policy; int numa_num_nodes = numa_num_possible_nodes(); if (numa_available() < 0) { show_physcpubind(); printf("No NUMA support available on this system.\n"); exit(1); } cpubind = numa_get_run_node_mask(); prefnode = numa_preferred(); interleave = numa_get_interleave_mask(); membind = numa_get_membind(); cur = numa_get_interleave_node(); policy = 0; if (get_mempolicy(&policy, NULL, 0, 0, 0) < 0) perror("get_mempolicy"); printf("policy: %s\n", policy_name(policy)); printf("preferred node: "); switch (policy) { case MPOL_PREFERRED: if (prefnode != -1) { printf("%ld\n", prefnode); break; } /*FALL THROUGH*/ case MPOL_DEFAULT: printf("current\n"); break; case MPOL_INTERLEAVE: printf("%ld (interleave next)\n",cur); break; case MPOL_BIND: printf("%d\n", find_first_bit(&membind, numa_num_nodes)); break; } if (policy == MPOL_INTERLEAVE) { printmask("interleavemask", interleave); printf("interleavenode: %ld\n", cur); } show_physcpubind(); printmask("cpubind", cpubind); // for compatibility printmask("nodebind", cpubind); printmask("membind", membind); }
/* * get_arg_nodeid_list() -- get list [array] of node ids from comma-separated list. * * on success, returns count of id's in list; on error -1 */ static int get_arg_nodeid_list(char *args, unsigned int *list) { glctx_t *gcp; char *next; nodemask_t my_allowed_nodes; int node, count = 0; gcp = &glctx; #if defined(LIBNUMA_API_VERSION) && LIBNUMA_API_VERSION == 2 my_allowed_nodes = numa_get_membind_compat(); #else my_allowed_nodes = numa_get_membind(); #endif while (*args != '\0') { if (!isdigit(*args)) { fprintf(stderr, "%s: expected digit for <node/list>\n", gcp->program_name); return -1; } node = strtoul(args, &next, 10); if (node > gcp->numa_max_node) { fprintf(stderr, "%s: node ids must be <= %d\n", gcp->program_name, gcp->numa_max_node); return -1; } if (!nodemask_isset(&my_allowed_nodes, node)) { fprintf(stderr, "%s: node %d is not in my allowed node mask\n", gcp->program_name, node); return -1; } *(list + count++) = node; if (*next == '\0') return count; if (*next != ',') { break; } if (count >= gcp->numa_max_node) { fprintf(stderr, "%s: too many node ids in list\n", gcp->program_name); } args = next + 1; } return -1; }
int numa_get_mem_node() { struct bitmask *bmp = numa_get_membind(); int nbytes = numa_bitmask_nbytes(bmp); int num_nodes = 0; int node_id = -1; int i; for (i = 0; i < nbytes * 8; i++) if (numa_bitmask_isbitset(bmp, i)) { num_nodes++; node_id = i; } assert(num_nodes == 1); return node_id; }
void* mmap_2m(void* addr, int prot, int node /* = -1 */, bool map_shared /* = false */, bool map_fixed /* = false */) { #ifdef __linux__ if (get_huge2m_info(node).free_hugepages <= 0) return nullptr; #ifdef HAVE_NUMA bitmask* memMask = nullptr; bitmask* interleaveMask = nullptr; if (node >= 0 && numa_num_nodes > 1) { assert(numa_node_set != 0); if ((numa_node_set & (1u << node)) == 0) { // Numa policy forbids allocation on the node. return nullptr; } memMask = numa_get_membind(); interleaveMask = numa_get_interleave_mask(); bitmask* mask = numa_allocate_nodemask(); numa_bitmask_setbit(mask, node); numa_set_membind(mask); numa_bitmask_free(mask); } #endif void* ret = mmap_2m_impl(addr, prot, map_shared, map_fixed); s_num2MPages += !!ret; #ifdef HAVE_NUMA if (memMask) { numa_set_membind(memMask); numa_set_interleave_mask(interleaveMask); numa_bitmask_free(memMask); numa_bitmask_free(interleaveMask); } #endif return ret; #else // not linux return nullptr; #endif }
/* * task_pre_launch() is called prior to exec of application task. * It is followed by TaskProlog program (from slurm.conf) and * --task-prolog (from srun command line). */ extern int task_pre_launch (slurmd_job_t *job) { char base[PATH_MAX], path[PATH_MAX]; int rc = SLURM_SUCCESS; debug("affinity task_pre_launch:%u.%u, task:%u bind:%u", job->jobid, job->stepid, job->envtp->procid, job->cpu_bind_type); if (conf->task_plugin_param & CPU_BIND_CPUSETS) { info("Using cpuset affinity for tasks"); #ifdef MULTIPLE_SLURMD if (snprintf(base, PATH_MAX, "%s/slurm_%s_%u", CPUSET_DIR, (conf->node_name != NULL)?conf->node_name:"", job->jobid) > PATH_MAX) { error("cpuset path too long"); return SLURM_ERROR; } #else if (snprintf(base, PATH_MAX, "%s/slurm%u", CPUSET_DIR, job->jobid) > PATH_MAX) { error("cpuset path too long"); return SLURM_ERROR; } #endif if (snprintf(path, PATH_MAX, "%s/slurm%u.%u_%d", base, job->jobid, job->stepid, job->envtp->localid) > PATH_MAX) { error("cpuset path too long"); return SLURM_ERROR; } } else info("Using sched_affinity for tasks"); /*** CPU binding support ***/ if (job->cpu_bind_type) { cpu_set_t new_mask, cur_mask; pid_t mypid = job->envtp->task_pid; slurm_getaffinity(mypid, sizeof(cur_mask), &cur_mask); if (get_cpuset(&new_mask, job) && (!(job->cpu_bind_type & CPU_BIND_NONE))) { if (conf->task_plugin_param & CPU_BIND_CPUSETS) { rc = slurm_set_cpuset(base, path, mypid, sizeof(new_mask), &new_mask); slurm_get_cpuset(path, mypid, sizeof(cur_mask), &cur_mask); } else { rc = slurm_setaffinity(mypid, sizeof(new_mask), &new_mask); slurm_getaffinity(mypid, sizeof(cur_mask), &cur_mask); } } slurm_chkaffinity(rc ? &cur_mask : &new_mask, job, rc); } else if (job->mem_bind_type && (conf->task_plugin_param & CPU_BIND_CPUSETS)) { cpu_set_t cur_mask; pid_t mypid = job->envtp->task_pid; /* Establish cpuset just for the memory binding */ slurm_getaffinity(mypid, sizeof(cur_mask), &cur_mask); rc = slurm_set_cpuset(base, path, (pid_t) job->envtp->task_pid, sizeof(cur_mask), &cur_mask); } #ifdef HAVE_NUMA if ((conf->task_plugin_param & CPU_BIND_CPUSETS) && (slurm_memset_available() >= 0)) { nodemask_t new_mask, cur_mask; cur_mask = numa_get_membind(); if (get_memset(&new_mask, job) && (!(job->mem_bind_type & MEM_BIND_NONE))) { slurm_set_memset(path, &new_mask); if (numa_available() >= 0) numa_set_membind(&new_mask); cur_mask = new_mask; } slurm_chk_memset(&cur_mask, job); } else if (job->mem_bind_type && (numa_available() >= 0)) { nodemask_t new_mask, cur_mask; cur_mask = numa_get_membind(); if (get_memset(&new_mask, job) && (!(job->mem_bind_type & MEM_BIND_NONE))) { numa_set_membind(&new_mask); cur_mask = new_mask; } slurm_chk_memset(&cur_mask, job); } #endif return rc; }
size_t remap_interleaved_2m_pages(void* addr, size_t pages, int prot, bool shared /* = false */) { #ifdef __linux__ assert(reinterpret_cast<uintptr_t>(addr) % size2m == 0); assert(addr != nullptr); if (pages == 0) return 0; #ifdef HAVE_NUMA const int maxNode = numa_max_node(); bitmask* memMask = nullptr; bitmask* interleaveMask = nullptr; bitmask* mask = nullptr; if (maxNode > 0) { memMask = numa_get_membind(); interleaveMask = numa_get_interleave_mask(); mask = numa_allocate_nodemask(); } #else constexpr int maxNode = 0; #endif int node = -1; int failed = 0; // consecutive failure count int mapped_count = 0; do { #ifdef HAVE_NUMA if (maxNode > 0) { if (++node > maxNode) node = 0; if (!numa_node_allowed(node)) { // Numa policy forbids allocation on node if (++failed > maxNode) break; continue; } numa_bitmask_setbit(mask, node); numa_set_membind(mask); numa_bitmask_clearbit(mask, node); } #endif // Fail early if we don't have huge pages reserved. if (get_huge2m_info(node).free_hugepages > 0 && mmap_2m_impl(addr, prot, shared, true /* MAP_FIXED */)) { addr = (char*)addr + size2m; ++mapped_count; failed = 0; continue; } // We failed on node, give up if we have failed on all nodes if (++failed > maxNode) break; } while (mapped_count < pages); #ifdef HAVE_NUMA if (mask) { numa_set_membind(memMask); numa_set_interleave_mask(interleaveMask); numa_bitmask_free(mask); numa_bitmask_free(interleaveMask); numa_bitmask_free(memMask); } #endif return mapped_count; #else // not linux return 0; #endif }
static void _load_mem_mask(MY_MASK *mem_mask) { *mem_mask = numa_get_membind(); }