void backend_set_numa(unsigned id) { struct bitmask *bm = numa_allocate_cpumask(); numa_bitmask_setbit(bm, id); numa_sched_setaffinity(0, bm); numa_free_cpumask(bm); }
void bind2node_id(int node_id) { struct bitmask *bmp = numa_allocate_nodemask(); numa_bitmask_setbit(bmp, node_id); numa_bind(bmp); numa_free_nodemask(bmp); }
void* mmap_1g(void* addr /* = nullptr */, int node /* = -1 */) { #ifdef __linux__ if (s_num1GPages >= kMaxNum1GPages) return nullptr; if (get_huge1g_info(node).free_hugepages <= 0) return nullptr; if (node >= 0 && !numa_node_allowed(node)) return nullptr; #ifdef HAVE_NUMA bitmask* memMask = nullptr; bitmask* interleaveMask = nullptr; if (node >= 0 && numa_num_nodes > 1) { memMask = numa_get_membind(); interleaveMask = numa_get_interleave_mask(); bitmask* mask = numa_allocate_nodemask(); numa_bitmask_setbit(mask, node); numa_set_membind(mask); numa_bitmask_free(mask); } #endif void* ret = mmap_1g_impl(addr); if (ret != nullptr) { s_1GPages[s_num1GPages++] = ret; } #ifdef HAVE_NUMA if (memMask) { assert(interleaveMask); numa_set_membind(memMask); numa_set_interleave_mask(interleaveMask); numa_bitmask_free(memMask); numa_bitmask_free(interleaveMask); } #endif return ret; #else return nullptr; #endif }
int bind_cpu(int cpu) { struct bitmask *nodemask = numa_parse_nodestring("0"); struct bitmask *cpumask = numa_allocate_cpumask(); numa_bind(nodemask); cpumask = numa_bitmask_clearall(cpumask); return numa_sched_setaffinity( getpid(), numa_bitmask_setbit(cpumask, cpu-1) ); }
/* * Parse the string @a maskStr containing a hex number (with or without * leading "0x") and set nodemask accordingly. * * If the sting is not a valid hex number, each bit in nodemask becomes set. */ static void parseNUMAmask(struct bitmask *nodemask, char *maskStr, int32_t rank) { char *mask, *curchar, *endptr; size_t len; uint32_t curbit; uint16_t i, j, digit; mask = maskStr; if (strncmp(maskStr, "0x", 2) == 0) { /* skip "0x", treat always as hex */ mask += 2; } mask = ustrdup(mask); /* gets destroyed */ len = strlen(mask); curchar = mask + (len - 1); curbit = 0; for (i = len; i > 0; i--) { digit = strtol(curchar, &endptr, 16); if (*endptr != '\0') { mlog("%s: error parsing memory mask '%s'\n", __func__, maskStr); goto error; } for (j = 0; j < 4; j++) { if (digit & (1 << j)) { if ((long int)(curbit + j) > numa_max_node()) { mlog("%s: invalid memory mask entry '%s' for rank %d\n", __func__, maskStr, rank); fprintf(stderr, "Invalid memory mask entry '%s' for rank" " %d\n", maskStr, rank); goto error; } if (numa_bitmask_isbitset(numa_get_mems_allowed(), curbit + j)) { numa_bitmask_setbit(nodemask, curbit + j); } else { mlog("%s: setting bit %u in memory mask not allowed in" " rank %d\n", __func__, curbit + j, rank); fprintf(stderr, "Not allowed to set bit %u in memory mask" " of rank %d\n", curbit + j, rank); } } } curbit += 4; *curchar = '\0'; curchar--; } ufree(mask); return; error: ufree(mask); numa_bitmask_setall(nodemask); }
void CPU_Bind(int cpu) { CPU_Set_t *cs = numa_allocate_cpumask(); numa_bitmask_setbit(cs, cpu); int res = numa_sched_setaffinity(0, cs); if (res < 0) epanic("bindToCPU(%d)", cpu); CPU_FreeSet(cs); }
void ConfigureTableThread() { int32_t node_id = GlobalContext::get_numa_index(); struct bitmask *mask = numa_allocate_nodemask(); mask = numa_bitmask_setbit(mask, node_id); // set NUMA zone binding to be prefer numa_set_bind_policy(0); numa_set_membind(mask); numa_free_nodemask(mask); }
int main(void) { hwloc_topology_t topology; hwloc_bitmap_t set, set2, nocpunomemnodeset, nocpubutmemnodeset, nomembutcpunodeset, nomembutcpucpuset; hwloc_obj_t node; struct bitmask *bitmask, *bitmask2; unsigned long mask; unsigned long maxnode; int i; if (numa_available() < 0) /* libnuma has inconsistent behavior when the kernel isn't NUMA-aware. * don't try to check everything precisely. */ exit(77); hwloc_topology_init(&topology); hwloc_topology_load(topology); /* convert full stuff between cpuset and libnuma */ set = hwloc_bitmap_alloc(); nocpunomemnodeset = hwloc_bitmap_alloc(); nocpubutmemnodeset = hwloc_bitmap_alloc(); nomembutcpunodeset = hwloc_bitmap_alloc(); nomembutcpucpuset = hwloc_bitmap_alloc(); /* gather all nodes if any, or the whole system if no nodes */ if (hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_NUMANODE)) { node = NULL; while ((node = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_NUMANODE, node)) != NULL) { hwloc_bitmap_or(set, set, node->cpuset); if (hwloc_bitmap_iszero(node->cpuset)) { if (node->memory.local_memory) hwloc_bitmap_set(nocpubutmemnodeset, node->os_index); else hwloc_bitmap_set(nocpunomemnodeset, node->os_index); } else if (!node->memory.local_memory) { hwloc_bitmap_set(nomembutcpunodeset, node->os_index); hwloc_bitmap_or(nomembutcpucpuset, nomembutcpucpuset, node->cpuset); } } } else { hwloc_bitmap_or(set, set, hwloc_topology_get_complete_cpuset(topology)); } set2 = hwloc_bitmap_alloc(); hwloc_cpuset_from_linux_libnuma_bitmask(topology, set2, numa_all_nodes_ptr); /* numa_all_nodes_ptr doesn't contain NODES with CPU but no memory */ hwloc_bitmap_or(set2, set2, nomembutcpucpuset); assert(hwloc_bitmap_isequal(set, set2)); hwloc_bitmap_free(set2); bitmask = hwloc_cpuset_to_linux_libnuma_bitmask(topology, set); /* numa_all_nodes_ptr contains NODES with no CPU but with memory */ hwloc_bitmap_foreach_begin(i, nocpubutmemnodeset) { numa_bitmask_setbit(bitmask, i); } hwloc_bitmap_foreach_end();
/** * mem_alloc_pages_onnode - allocates pages on a given numa node * @nr: the number of pages * @size: the page size (4KB, 2MB, or 1GB) * @numa_node: the numa node to allocate the pages from * @numa_policy: how strictly to take @numa_node * * Returns a pointer (virtual address) to a page or NULL if fail. */ void *mem_alloc_pages_onnode(int nr, int size, int node, int numa_policy) { void *vaddr; struct bitmask *mask = numa_allocate_nodemask(); numa_bitmask_setbit(mask, node); vaddr = mem_alloc_pages(nr, size, mask, numa_policy); numa_bitmask_free(mask); return vaddr; }
void *__mem_alloc_pages_onnode(void *base, int nr, int size, int node) { void *vaddr; struct bitmask *mask = numa_allocate_nodemask(); numa_bitmask_setbit(mask, node); vaddr = __mem_alloc_pages(base, nr, size, mask, MPOL_BIND); numa_bitmask_free(mask); return vaddr; }
void check_all_numa_nodes(int policy, void *ptr, size_t size) { if (policy != MPOL_INTERLEAVE && policy != MPOL_DEFAULT) return; unique_bitmask_ptr expected_bitmask = make_nodemask_ptr(); for(int i=0; i < numa_num_configured_nodes(); i++) { numa_bitmask_setbit(expected_bitmask.get(), i); } check_numa_nodes(expected_bitmask, policy, ptr, size); }
void ConfigureTableThread() { int32_t idx = ThreadContext::get_id() - GlobalContext::get_head_table_thread_id(); int32_t node_id = idx % num_mem_nodes_; CHECK_EQ(numa_run_on_node(node_id), 0); struct bitmask *mask = numa_allocate_nodemask(); mask = numa_bitmask_setbit(mask, node_id); // set NUMA zone binding to be prefer numa_set_bind_policy(0); numa_set_membind(mask); numa_free_nodemask(mask); }
static void verify_mempolicy(unsigned int node, int mode) { struct bitmask *bm = numa_allocate_nodemask(); unsigned int i; numa_bitmask_setbit(bm, node); TEST(set_mempolicy(mode, bm->maskp, bm->size+1)); if (TST_RET) { tst_res(TFAIL | TTERRNO, "set_mempolicy(%s) node %u", tst_numa_mode_name(mode), node); return; } tst_res(TPASS, "set_mempolicy(%s) node %u", tst_numa_mode_name(mode), node); numa_free_nodemask(bm); const char *prefix = "child: "; if (SAFE_FORK()) { prefix = "parent: "; tst_reap_children(); } tst_nodemap_reset_counters(nodes); alloc_fault_count(nodes, NULL, PAGES_ALLOCATED * page_size); tst_nodemap_print_counters(nodes); for (i = 0; i < nodes->cnt; i++) { if (nodes->map[i] == node) { if (nodes->counters[i] == PAGES_ALLOCATED) { tst_res(TPASS, "%sNode %u allocated %u", prefix, node, PAGES_ALLOCATED); } else { tst_res(TFAIL, "%sNode %u allocated %u, expected %u", prefix, node, nodes->counters[i], PAGES_ALLOCATED); } continue; } if (nodes->counters[i]) { tst_res(TFAIL, "%sNode %u allocated %u, expected 0", prefix, i, nodes->counters[i]); } } }
int main(void) { int max = numa_max_node(); int maxmask = numa_num_possible_nodes(); struct bitmask *nodes, *mask; int pagesize = getpagesize(); int i; int pol; int node; int err = 0; nodes = numa_bitmask_alloc(maxmask); mask = numa_bitmask_alloc(maxmask); for (i = max; i >= 0; --i) { char *mem = mmap(NULL, pagesize*(max+1), PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); char *adr = mem; if (mem == (char *)-1) err("mmap"); printf("%d offset %lx\n", i, (long)(adr - mem)); numa_bitmask_clearall(nodes); numa_bitmask_clearall(mask); numa_bitmask_setbit(nodes, i); if (mbind(adr, pagesize, MPOL_PREFERRED, nodes->maskp, nodes->size, 0) < 0) err("mbind"); ++*adr; if (get_mempolicy(&pol, mask->maskp, mask->size, adr, MPOL_F_ADDR) < 0) err("get_mempolicy"); assert(pol == MPOL_PREFERRED); assert(numa_bitmask_isbitset(mask, i)); node = 0x123; if (get_mempolicy(&node, NULL, 0, adr, MPOL_F_ADDR|MPOL_F_NODE) < 0) err("get_mempolicy2"); printf("got node %d expected %d\n", node, i); if (node != i) err = 1; } return err; }
static void regular_nodes_init(void) { int i, node = 0, nodes_num = numa_num_configured_nodes(); struct bitmask *node_cpus = numa_allocate_cpumask(); regular_nodes_mask = numa_allocate_nodemask(); for (i = 0; i < nodes_num; i++) { numa_node_to_cpus(node, node_cpus); if (numa_bitmask_weight(node_cpus)) numa_bitmask_setbit(regular_nodes_mask, i); } numa_bitmask_free(node_cpus); }
MEMKIND_EXPORT int memkind_hbw_all_get_mbind_nodemask(struct memkind *kind, unsigned long *nodemask, unsigned long maxnode) { int cpu; struct bitmask nodemask_bm = {maxnode, nodemask}; struct memkind_hbw_closest_numanode_t *g = &memkind_hbw_closest_numanode_g; pthread_once(&memkind_hbw_closest_numanode_once_g, memkind_hbw_closest_numanode_init); if (MEMKIND_LIKELY(!g->init_err && nodemask)) { numa_bitmask_clearall(&nodemask_bm); for (cpu = 0; cpu < g->num_cpu; ++cpu) { numa_bitmask_setbit(&nodemask_bm, g->closest_numanode[cpu]); } } return g->init_err; }
///This function tries to fill bandwidth array based on knowledge about known CPU models static int fill_bandwidth_values_heuristically(int* bandwidth, int bandwidth_len) { int ret = MEMKIND_ERROR_UNAVAILABLE; // Default error returned if heuristic aproach fails int i, nodes_num, memory_only_nodes_num = 0; struct bitmask *memory_only_nodes, *node_cpus; if (is_cpu_xeon_phi_x200() == 0) { log_info("Known CPU model detected: Intel(R) Xeon Phi(TM) x200."); nodes_num = numa_num_configured_nodes(); // Check if number of numa-nodes meets expectations for // supported configurations of Intel Xeon Phi x200 if( nodes_num != 2 && nodes_num != 4 && nodes_num!= 8 ) { return ret; } memory_only_nodes = numa_allocate_nodemask(); node_cpus = numa_allocate_cpumask(); for(i=0; i<nodes_num; i++) { numa_node_to_cpus(i, node_cpus); if(numa_bitmask_weight(node_cpus) == 0) { memory_only_nodes_num++; numa_bitmask_setbit(memory_only_nodes, i); } } // Check if number of memory-only nodes is equal number of memory+cpu nodes // If it passes change ret to 0 (success) and fill bw table if ( memory_only_nodes_num == (nodes_num - memory_only_nodes_num) ) { ret = 0; assign_arbitrary_bandwidth_values(bandwidth, bandwidth_len, memory_only_nodes); } numa_bitmask_free(memory_only_nodes); numa_bitmask_free(node_cpus); } return ret; }
int bind_cpu(int cpu) { cpu_set_t *cmask; struct bitmask *bmask; size_t ncpu, setsize; int ret; ncpu = get_num_cpus(); if (cpu < 0 || cpu >= (int)ncpu) { errno = -EINVAL; return -1; } cmask = CPU_ALLOC(ncpu); if (cmask == NULL) return -1; setsize = CPU_ALLOC_SIZE(ncpu); CPU_ZERO_S(setsize, cmask); CPU_SET_S(cpu, setsize, cmask); ret = sched_setaffinity(0, ncpu, cmask); CPU_FREE(cmask); /* skip NUMA stuff for UMA systems */ if (numa_max_node() == 0) return ret; bmask = numa_bitmask_alloc(16); assert(bmask); numa_bitmask_setbit(bmask, cpu % 2); numa_set_membind(bmask); numa_bitmask_free(bmask); return ret; }
int memkind_hbw_get_mbind_nodemask(struct memkind *kind, unsigned long *nodemask, unsigned long maxnode) { int cpu; struct bitmask nodemask_bm = {maxnode, nodemask}; struct memkind_hbw_closest_numanode_t *g = &memkind_hbw_closest_numanode_g; pthread_once(&memkind_hbw_closest_numanode_once_g, memkind_hbw_closest_numanode_init); if (!g->init_err && nodemask) { numa_bitmask_clearall(&nodemask_bm); cpu = sched_getcpu(); if (cpu < g->num_cpu) { numa_bitmask_setbit(&nodemask_bm, g->closest_numanode[cpu]); } else { return MEMKIND_ERROR_GETCPU; } } return g->init_err; }
int fire_worker_init(fire_worker_context_t *context) { buffer[context->queue_id] = (char *)malloc(buf_size); /* init worker struct */ fire_worker_t *cc = &(workers[context->queue_id]); cc->total_packets = 0; cc->total_bytes = 0; /* nids init */ nids_init(context->core_id); #if !defined(AFFINITY_NO) /* set schedule affinity */ unsigned long mask = 1 << context->core_id; if (sched_setaffinity(0, sizeof(unsigned long), (cpu_set_t *)&mask) < 0) { assert(0); } /* set schedule policy */ struct sched_param param; param.sched_priority = 99; pthread_setschedparam(pthread_self(), SCHED_FIFO, ¶m); #endif if (numa_max_node() == 0) return 0; struct bitmask *bmask; bmask = numa_bitmask_alloc(16); assert(bmask); numa_bitmask_setbit(bmask, context->core_id % 2); numa_set_membind(bmask); numa_bitmask_free(bmask); return 0; }
int main(void) { void* ptr; struct bitmask *nmask; int err; nmask = numa_allocate_nodemask(); numa_bitmask_setbit(nmask, 0); ptr = shmem_open(); err = mbind(ptr, 4096 * 3, MPOL_INTERLEAVE, nmask->maskp, nmask->size, 0); if (err < 0) perror("mbind1"), exit(1); err = mbind(ptr + 4096, 4096, MPOL_BIND, nmask->maskp, nmask->size, 0); if (err < 0) perror("mbind1"), exit(1); return 0; }
void* mmap_2m(void* addr, int prot, int node /* = -1 */, bool map_shared /* = false */, bool map_fixed /* = false */) { #ifdef __linux__ if (get_huge2m_info(node).free_hugepages <= 0) return nullptr; #ifdef HAVE_NUMA bitmask* memMask = nullptr; bitmask* interleaveMask = nullptr; if (node >= 0 && numa_num_nodes > 1) { assert(numa_node_set != 0); if ((numa_node_set & (1u << node)) == 0) { // Numa policy forbids allocation on the node. return nullptr; } memMask = numa_get_membind(); interleaveMask = numa_get_interleave_mask(); bitmask* mask = numa_allocate_nodemask(); numa_bitmask_setbit(mask, node); numa_set_membind(mask); numa_bitmask_free(mask); } #endif void* ret = mmap_2m_impl(addr, prot, map_shared, map_fixed); s_num2MPages += !!ret; #ifdef HAVE_NUMA if (memMask) { numa_set_membind(memMask); numa_set_interleave_mask(interleaveMask); numa_bitmask_free(memMask); numa_bitmask_free(interleaveMask); } #endif return ret; #else // not linux return nullptr; #endif }
size_t remap_interleaved_2m_pages(void* addr, size_t pages, int prot, bool shared /* = false */) { #ifdef __linux__ assert(reinterpret_cast<uintptr_t>(addr) % size2m == 0); assert(addr != nullptr); if (pages == 0) return 0; #ifdef HAVE_NUMA const int maxNode = numa_max_node(); bitmask* memMask = nullptr; bitmask* interleaveMask = nullptr; bitmask* mask = nullptr; if (maxNode > 0) { memMask = numa_get_membind(); interleaveMask = numa_get_interleave_mask(); mask = numa_allocate_nodemask(); } #else constexpr int maxNode = 0; #endif int node = -1; int failed = 0; // consecutive failure count int mapped_count = 0; do { #ifdef HAVE_NUMA if (maxNode > 0) { if (++node > maxNode) node = 0; if (!numa_node_allowed(node)) { // Numa policy forbids allocation on node if (++failed > maxNode) break; continue; } numa_bitmask_setbit(mask, node); numa_set_membind(mask); numa_bitmask_clearbit(mask, node); } #endif // Fail early if we don't have huge pages reserved. if (get_huge2m_info(node).free_hugepages > 0 && mmap_2m_impl(addr, prot, shared, true /* MAP_FIXED */)) { addr = (char*)addr + size2m; ++mapped_count; failed = 0; continue; } // We failed on node, give up if we have failed on all nodes if (++failed > maxNode) break; } while (mapped_count < pages); #ifdef HAVE_NUMA if (mask) { numa_set_membind(memMask); numa_set_interleave_mask(interleaveMask); numa_bitmask_free(mask); numa_bitmask_free(interleaveMask); numa_bitmask_free(memMask); } #endif return mapped_count; #else // not linux return 0; #endif }
int main(int argc, char **argv) { FILE *fp; void *addr, *start, *end, *lastend; int node, err, lc; char buf[BUFSIZ]; struct bitmask *nmask = numa_allocate_nodemask(); pagesize = getpagesize(); tst_parse_opts(argc, argv, options, usage); if (opt_node) { node = SAFE_STRTOL(NULL, opt_nodestr, 1, LONG_MAX); } else { err = get_allowed_nodes(NH_MEMS | NH_MEMS, 1, &node); if (err == -3) tst_brkm(TCONF, NULL, "requires at least one node."); else if (err < 0) tst_brkm(TBROK | TERRNO, NULL, "get_allowed_nodes"); } numa_bitmask_setbit(nmask, node); for (lc = 0; TEST_LOOPING(lc); lc++) { tst_count = 0; addr = mmap(NULL, pagesize * 3, PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0); if (addr == MAP_FAILED) tst_brkm(TBROK | TERRNO, NULL, "mmap"); tst_resm(TINFO, "pid = %d addr = %p", getpid(), addr); /* make page populate */ memset(addr, 0, pagesize * 3); /* first mbind */ err = mbind(addr + pagesize, pagesize, MPOL_BIND, nmask->maskp, nmask->size, MPOL_MF_MOVE_ALL); if (err != 0) { if (errno != ENOSYS) tst_brkm(TBROK | TERRNO, NULL, "mbind1"); else tst_brkm(TCONF, NULL, "mbind syscall not implemented on this system."); } /* second mbind */ err = mbind(addr, pagesize * 3, MPOL_DEFAULT, NULL, 0, 0); if (err != 0) tst_brkm(TBROK | TERRNO, NULL, "mbind2"); /* /proc/self/maps in the form of "00400000-00406000 r-xp 00000000". */ fp = fopen("/proc/self/maps", "r"); if (fp == NULL) tst_brkm(TBROK | TERRNO, NULL, "fopen"); while (fgets(buf, BUFSIZ, fp) != NULL) { if (sscanf(buf, "%p-%p ", &start, &end) != 2) continue; if (start == addr) { tst_resm(TINFO, "start = %p, end = %p", start, end); if (end == addr + pagesize * 3) { tst_resm(TPASS, "only 1 VMA."); break; } lastend = end; while (fgets(buf, BUFSIZ, fp) != NULL) { /* No more VMAs, break */ if (sscanf(buf, "%p-%p ", &start, &end) != 2) break; tst_resm(TINFO, "start = %p, end = %p", start, end); /* more VMAs found */ if (start == lastend) lastend = end; if (end == addr + pagesize * 3) { tst_resm(TFAIL, ">1 unmerged VMAs."); break; } } if (end != addr + pagesize * 3) tst_resm(TFAIL, "no matched VMAs."); break; } } fclose(fp); if (munmap(addr, pagesize * 3) == -1) tst_brkm(TWARN | TERRNO, NULL, "munmap"); } tst_exit(); }
int main(int argc, char *argv[]) { int i; int ret; int nr = 2; char c; char *p; int mapflag = MAP_ANONYMOUS; int protflag = PROT_READ|PROT_WRITE; unsigned long nr_nodes = numa_max_node() + 1; struct bitmask *new_nodes; unsigned long nodemask; int do_unpoison = 0; int loop = 3; while ((c = getopt(argc, argv, "vp:m:n:ul:h:")) != -1) { switch(c) { case 'v': verbose = 1; break; case 'p': testpipe = optarg; { struct stat stat; lstat(testpipe, &stat); if (!S_ISFIFO(stat.st_mode)) errmsg("Given file is not fifo.\n"); } break; case 'm': if (!strcmp(optarg, "private")) mapflag |= MAP_PRIVATE; else if (!strcmp(optarg, "shared")) mapflag |= MAP_SHARED; else errmsg("invalid optarg for -m\n"); break; case 'n': nr = strtoul(optarg, NULL, 10); break; case 'u': do_unpoison = 1; break; case 'l': loop = strtoul(optarg, NULL, 10); break; case 'h': HPS = strtoul(optarg, NULL, 10) * 1024; mapflag |= MAP_HUGETLB; /* todo: arch independent */ if (HPS != 2097152 && HPS != 1073741824) errmsg("Invalid hugepage size\n"); break; default: errmsg("invalid option\n"); break; } } if (nr_nodes < 2) errmsg("A minimum of 2 nodes is required for this test.\n"); new_nodes = numa_bitmask_alloc(nr_nodes); numa_bitmask_setbit(new_nodes, 1); nodemask = 1; /* only node 0 allowed */ if (set_mempolicy(MPOL_BIND, &nodemask, nr_nodes) == -1) err("set_mempolicy"); signal(SIGUSR2, sig_handle); pprintf("start background migration\n"); pause(); signal(SIGUSR1, sig_handle_flag); pprintf("hugepages prepared\n"); while (flag) { p = checked_mmap((void *)ADDR_INPUT, nr * HPS, protflag, mapflag, -1, 0); /* fault in */ memset(p, 'a', nr * HPS); for (i = 0; i < nr; i++) { ret = madvise(p + i * HPS, 4096, MADV_HWPOISON); if (ret) { perror("madvise"); pprintf("madvise returned %d\n", ret); } } if (do_unpoison) { pprintf("need unpoison\n"); pause(); } checked_munmap(p, nr * HPS); if (loop-- <= 0) break; } pprintf("exit\n"); pause(); return 0; }
CPU_Set_t * CPU_ParseSet(const char *s) { // XXX numa_parse_cpustring will only accept CPU's that are // less than the highest CPU we're affinitized to and it // always masks it to the CPU's the program started with. // return numa_parse_cpustring((char*)s); struct bitmask *bm = NULL; int *cpus = NULL; int curCpu = 0, maxCpus = 0, maxCpuVal = 0; while (*s) { while (isspace(*s)) ++s; if (!*s) break; if (!isdigit(*s)) { fprintf(stderr, "CPU set expected number: %s", s); goto fail; } char *end; int lo = strtol(s, &end, 10); int hi = lo; s = end; while (isspace(*s)) ++s; if (*s == '-') { s++; while (isspace(*s)) ++s; if (!isdigit(*s)) { fprintf(stderr, "CPU set expected number: %s", s); goto fail; } hi = strtol(s, &end, 10); s = end; while (isspace(*s)) ++s; } for (int cpu = lo; cpu <= hi; ++cpu) { if (curCpu == maxCpus) { maxCpus = maxCpus ? 2*maxCpus : 16; cpus = realloc(cpus, maxCpus * sizeof *cpus); } cpus[curCpu++] = cpu; if (cpu > maxCpuVal) maxCpuVal = cpu; } if (*s == ',') { s++; continue; } if (*s) { fprintf(stderr, "CPU set expected ',': %s", s); goto fail; } } bm = numa_bitmask_alloc(maxCpuVal + 1); if (!bm) panic("Failed to allocate CPU bitmask"); for (int i = 0; i < curCpu; ++i) numa_bitmask_setbit(bm, cpus[i]); fail: free(cpus); return bm; }
int main(int argc, char *argv[]) { int i; int nr = 2; int ret; char c; char *p; int mapflag = MAP_ANONYMOUS; int protflag = PROT_READ|PROT_WRITE; struct bitmask *all_nodes; struct bitmask *old_nodes; struct bitmask *new_nodes; unsigned long nr_nodes = numa_max_node() + 1; while ((c = getopt(argc, argv, "vp:m:n:h:")) != -1) { switch(c) { case 'v': verbose = 1; break; case 'p': testpipe = optarg; { struct stat stat; lstat(testpipe, &stat); if (!S_ISFIFO(stat.st_mode)) errmsg("Given file is not fifo.\n"); } break; case 'm': if (!strcmp(optarg, "private")) mapflag |= MAP_PRIVATE; else if (!strcmp(optarg, "shared")) mapflag |= MAP_SHARED; else errmsg("invalid optarg for -m\n"); break; case 'n': nr = strtoul(optarg, NULL, 10); break; case 'h': mapflag |= MAP_HUGETLB; HPS = strtoul(optarg, NULL, 10) * 1024; /* todo: arch independent */ if (HPS != 2097152 && HPS != 1073741824) errmsg("Invalid hugepage size\n"); break; default: errmsg("invalid option\n"); break; } } if (nr_nodes < 2) errmsg("A minimum of 2 nodes is required for this test.\n"); all_nodes = numa_bitmask_alloc(nr_nodes); old_nodes = numa_bitmask_alloc(nr_nodes); new_nodes = numa_bitmask_alloc(nr_nodes); numa_bitmask_setbit(all_nodes, 0); numa_bitmask_setbit(all_nodes, 1); numa_bitmask_setbit(old_nodes, 0); numa_bitmask_setbit(new_nodes, 1); numa_sched_setaffinity(0, old_nodes); signal(SIGUSR1, sig_handle); p = mmap((void *)ADDR_INPUT, nr * HPS, protflag, mapflag, -1, 0); if (p == MAP_FAILED) err("mmap"); /* fault in */ memset(p, 'a', nr * HPS); pprintf("before memory_hotremove\n"); pause(); numa_sched_setaffinity(0, all_nodes); signal(SIGUSR1, sig_handle_flag); memset(p, 'a', nr * HPS); pprintf("entering busy loop\n"); while (flag) { memset(p, 'a', nr * HPS); /* important to control race b/w migration and fault */ sleep(1); } pprintf("exited busy loop\n"); pause(); return 0; }
void* StorageManager::allocateSlots(const std::size_t num_slots, const int numa_node) { #if defined(QUICKSTEP_HAVE_MMAP_LINUX_HUGETLB) static constexpr int kLargePageMmapFlags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB; #elif defined(QUICKSTEP_HAVE_MMAP_BSD_SUPERPAGE) static constexpr int kLargePageMmapFlags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER; #endif makeRoomForBlockOrBlob(num_slots); void *slots = nullptr; #if defined(QUICKSTEP_HAVE_MMAP_LINUX_HUGETLB) || defined(QUICKSTEP_HAVE_MMAP_BSD_SUPERPAGE) slots = mmap(nullptr, num_slots * kSlotSizeBytes, PROT_READ | PROT_WRITE, kLargePageMmapFlags, -1, 0); // Fallback to regular mmap() if large page allocation failed. Even on // systems with large page support, large page allocation may fail if the // user running the executable is not a member of hugetlb_shm_group on Linux, // or if all the reserved hugepages are already in use. if (slots == MAP_FAILED) { slots = mmap(nullptr, num_slots * kSlotSizeBytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); } if (slots == MAP_FAILED) { slots = nullptr; } #elif defined(QUICKSTEP_HAVE_MMAP_PLAIN) slots = mmap(nullptr, num_slots * kSlotSizeBytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (slots == MAP_FAILED) { slots = nullptr; } #else slots = malloc_with_alignment(num_slots * kSlotSizeBytes, kCacheLineBytes); if (slots != nullptr) { memset(slots, 0x0, num_slots * kSlotSizeBytes); } #endif if (slots == nullptr) { throw OutOfMemory(); } #if defined(QUICKSTEP_HAVE_LIBNUMA) if (numa_node != -1) { DEBUG_ASSERT(numa_node < numa_num_configured_nodes()); struct bitmask *numa_node_bitmask = numa_allocate_nodemask(); // numa_node can be 0 through n-1, where n is the num of NUMA nodes. numa_bitmask_setbit(numa_node_bitmask, numa_node); long mbind_status = mbind(slots, // NOLINT(runtime/int) num_slots * kSlotSizeBytes, MPOL_PREFERRED, numa_node_bitmask->maskp, numa_node_bitmask->size, 0); numa_free_nodemask(numa_node_bitmask); if (mbind_status == -1) { LOG(WARNING) << "mbind() failed with errno " << errno << " (" << std::strerror(errno) << ")"; } } #endif // QUICKSTEP_HAVE_LIBNUMA total_memory_usage_ += num_slots; return slots; }
/*----------------------------------------------------------------------------*/ int mtcp_core_affinitize(int cpu) { #ifndef DISABLE_NUMA struct bitmask *bmask; #endif /* DISABLE_NUMA */ cpu_set_t cpus; FILE *fp; char sysfname[MAX_FILE_NAME]; int phy_id; size_t n; int ret; n = GetNumCPUs(); if (cpu < 0 || cpu >= (int) n) { errno = -EINVAL; return -1; } CPU_ZERO(&cpus); CPU_SET((unsigned)cpu, &cpus); ret = sched_setaffinity(Gettid(), sizeof(cpus), &cpus); #ifndef DISABLE_NUMA if (numa_max_node() == 0) return ret; bmask = numa_bitmask_alloc(n); assert(bmask); #endif /* DISABLE_NUMA */ /* read physical id of the core from sys information */ snprintf(sysfname, MAX_FILE_NAME - 1, "/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu); fp = fopen(sysfname, "r"); if (!fp) { perror(sysfname); errno = EFAULT; return -1; } ret = fscanf(fp, "%d", &phy_id); if (ret != 1) { perror("Fail to read core id"); errno = EFAULT; return -1; } #ifndef DISABLE_NUMA numa_bitmask_setbit(bmask, phy_id); numa_set_membind(bmask); numa_bitmask_free(bmask); #endif /* DISABLE_NUMA */ fclose(fp); return ret; }
static void * s_numa_alloc(size_t sz, int cpu) { void *ret = NULL; if (likely(sz > 0)) { if (likely(cpu >= 0)) { if (likely(s_numa_nodes != NULL && s_n_cpus > 0)) { unsigned int node = s_numa_nodes[cpu]; unsigned int allocd_node = UINT_MAX; struct bitmask *bmp; int r; bmp = numa_allocate_nodemask(); numa_bitmask_setbit(bmp, node); errno = 0; r = (int)set_mempolicy(MPOL_BIND, bmp->maskp, bmp->size + 1); if (likely(r == 0)) { errno = 0; ret = numa_alloc_onnode(sz, (int)node); if (likely(ret != NULL)) { lagopus_result_t rl; /* * We need this "first touch" even using the * numa_alloc_onnode(). */ (void)memset(ret, 0, sz); errno = 0; r = (int)get_mempolicy((int *)&allocd_node, NULL, 0, ret, MPOL_F_NODE|MPOL_F_ADDR); if (likely(r == 0)) { if (unlikely(node != allocd_node)) { /* * The memory is not allocated on the node, but it is * still usable. Just return it. */ lagopus_msg_warning("can't allocate " PFSZ(u) " bytes memory " "for CPU %d (NUMA node %d).\n", sz, cpu, node); } } else { lagopus_perror(LAGOPUS_RESULT_POSIX_API_ERROR); lagopus_msg_error("get_mempolicy() returned %d.\n", r); } rl = s_add_addr(ret, sz); if (unlikely(rl != LAGOPUS_RESULT_OK)) { lagopus_perror(rl); lagopus_msg_error("can't register the allocated address.\n"); numa_free(ret, sz); ret = NULL; } } } else { /* r == 0 */ lagopus_perror(LAGOPUS_RESULT_POSIX_API_ERROR); lagopus_msg_error("set_mempolicy() returned %d.\n", r); } numa_free_nodemask(bmp); set_mempolicy(MPOL_DEFAULT, NULL, 0); } else { /* s_numa_nodes != NULL && s_n_cpus > 0 */ /* * Not initialized or initialization failure. */ lagopus_msg_warning("The NUMA related information is not initialized. " "Use malloc(3) instead.\n"); ret = malloc(sz); } } else { /* cpu >= 0 */ /* * Use pure malloc(3). */ ret = malloc(sz); } } return ret; }