/* This test is run with overridden MEMKIND_HBW_NODES environment variable * and tries to perform allocation from DRAM using hbw_malloc() using * default HBW_POLICY_PREFERRED policy. */ int main() { struct bitmask *expected_nodemask = NULL; struct bitmask *returned_nodemask = NULL; void *ptr = NULL; int ret = 0; int status = 0; ptr = hbw_malloc(KB); if (ptr == NULL) { printf("Error: allocation failed\n"); goto exit; } expected_nodemask = numa_allocate_nodemask(); status = memkind_hbw_all_get_mbind_nodemask(NULL, expected_nodemask->maskp, expected_nodemask->size); if (status != MEMKIND_ERROR_ENVIRON) { printf("Error: wrong return value from memkind_hbw_all_get_mbind_nodemask()\n"); printf("Expected: %d\n", MEMKIND_ERROR_ENVIRON); printf("Actual: %d\n", status); goto exit; } returned_nodemask = numa_allocate_nodemask(); status = get_mempolicy(NULL, returned_nodemask->maskp, returned_nodemask->size, ptr, MPOL_F_ADDR); if (status) { printf("Error: get_mempolicy() returned %d\n", status); goto exit; } ret = numa_bitmask_equal(returned_nodemask, expected_nodemask); if (!ret) { printf("Error: Memkind hbw and allocated pointer nodemasks are not equal\n"); } exit: if (expected_nodemask) { numa_free_nodemask(expected_nodemask); } if (returned_nodemask) { numa_free_nodemask(returned_nodemask); } if (ptr) { hbw_free(ptr); } return ret; }
void* mmap_1g(void* addr /* = nullptr */, int node /* = -1 */) { #ifdef __linux__ if (s_num1GPages >= kMaxNum1GPages) return nullptr; if (get_huge1g_info(node).free_hugepages <= 0) return nullptr; if (node >= 0 && !numa_node_allowed(node)) return nullptr; #ifdef HAVE_NUMA bitmask* memMask = nullptr; bitmask* interleaveMask = nullptr; if (node >= 0 && numa_num_nodes > 1) { memMask = numa_get_membind(); interleaveMask = numa_get_interleave_mask(); bitmask* mask = numa_allocate_nodemask(); numa_bitmask_setbit(mask, node); numa_set_membind(mask); numa_bitmask_free(mask); } #endif void* ret = mmap_1g_impl(addr); if (ret != nullptr) { s_1GPages[s_num1GPages++] = ret; } #ifdef HAVE_NUMA if (memMask) { assert(interleaveMask); numa_set_membind(memMask); numa_set_interleave_mask(interleaveMask); numa_bitmask_free(memMask); numa_bitmask_free(interleaveMask); } #endif return ret; #else return nullptr; #endif }
void bind2node_id(int node_id) { struct bitmask *bmp = numa_allocate_nodemask(); numa_bitmask_setbit(bmp, node_id); numa_bind(bmp); numa_free_nodemask(bmp); }
/** * \brief returns the mask of nodes from which memory can currently be allocated. * * \return bitmap of nodes from which can be allocated */ struct bitmap *numa_get_membind(void) { assert(numa_alloc_bind_mask); struct bitmap *im = numa_allocate_nodemask(); if (im == NULL) { return NULL; } bitmap_copy(im, numa_alloc_bind_mask); return im; }
/** \brief returns the current interleave mask * * \returns bitmask representing the current interleave state * * returns the current interleave mask if the task's memory allocation policy is * page interleaved. Otherwise, this function returns an empty mask. */ struct bitmap *numa_get_interleave_mask(void) { assert(numa_alloc_interleave_mask); struct bitmap *im = numa_allocate_nodemask(); if (im == NULL) { return NULL; } bitmap_copy(im, numa_alloc_interleave_mask); return im; }
void ConfigureTableThread() { int32_t node_id = GlobalContext::get_numa_index(); struct bitmask *mask = numa_allocate_nodemask(); mask = numa_bitmask_setbit(mask, node_id); // set NUMA zone binding to be prefer numa_set_bind_policy(0); numa_set_membind(mask); numa_free_nodemask(mask); }
/** * mem_alloc_pages_onnode - allocates pages on a given numa node * @nr: the number of pages * @size: the page size (4KB, 2MB, or 1GB) * @numa_node: the numa node to allocate the pages from * @numa_policy: how strictly to take @numa_node * * Returns a pointer (virtual address) to a page or NULL if fail. */ void *mem_alloc_pages_onnode(int nr, int size, int node, int numa_policy) { void *vaddr; struct bitmask *mask = numa_allocate_nodemask(); numa_bitmask_setbit(mask, node); vaddr = mem_alloc_pages(nr, size, mask, numa_policy); numa_bitmask_free(mask); return vaddr; }
void *__mem_alloc_pages_onnode(void *base, int nr, int size, int node) { void *vaddr; struct bitmask *mask = numa_allocate_nodemask(); numa_bitmask_setbit(mask, node); vaddr = __mem_alloc_pages(base, nr, size, mask, MPOL_BIND); numa_bitmask_free(mask); return vaddr; }
void ConfigureTableThread() { int32_t idx = ThreadContext::get_id() - GlobalContext::get_head_table_thread_id(); int32_t node_id = idx % num_mem_nodes_; CHECK_EQ(numa_run_on_node(node_id), 0); struct bitmask *mask = numa_allocate_nodemask(); mask = numa_bitmask_setbit(mask, node_id); // set NUMA zone binding to be prefer numa_set_bind_policy(0); numa_set_membind(mask); numa_free_nodemask(mask); }
void open_mmap() { int ret = posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL); assert(ret==0); data = (T *)mmap(NULL, sizeof(T) * length, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); struct bitmask *bmask=numa_allocate_nodemask(); numa_bitmask_setall(bmask); numa_tonodemask_memory(data,sizeof(T)*length,bmask); numa_free_nodemask(bmask); assert(data!=MAP_FAILED); is_open = true; }
static void verify_mempolicy(unsigned int node, int mode) { struct bitmask *bm = numa_allocate_nodemask(); unsigned int i; numa_bitmask_setbit(bm, node); TEST(set_mempolicy(mode, bm->maskp, bm->size+1)); if (TST_RET) { tst_res(TFAIL | TTERRNO, "set_mempolicy(%s) node %u", tst_numa_mode_name(mode), node); return; } tst_res(TPASS, "set_mempolicy(%s) node %u", tst_numa_mode_name(mode), node); numa_free_nodemask(bm); const char *prefix = "child: "; if (SAFE_FORK()) { prefix = "parent: "; tst_reap_children(); } tst_nodemap_reset_counters(nodes); alloc_fault_count(nodes, NULL, PAGES_ALLOCATED * page_size); tst_nodemap_print_counters(nodes); for (i = 0; i < nodes->cnt; i++) { if (nodes->map[i] == node) { if (nodes->counters[i] == PAGES_ALLOCATED) { tst_res(TPASS, "%sNode %u allocated %u", prefix, node, PAGES_ALLOCATED); } else { tst_res(TFAIL, "%sNode %u allocated %u, expected %u", prefix, node, nodes->counters[i], PAGES_ALLOCATED); } continue; } if (nodes->counters[i]) { tst_res(TFAIL, "%sNode %u allocated %u, expected 0", prefix, i, nodes->counters[i]); } } }
static void regular_nodes_init(void) { int i, node = 0, nodes_num = numa_num_configured_nodes(); struct bitmask *node_cpus = numa_allocate_cpumask(); regular_nodes_mask = numa_allocate_nodemask(); for (i = 0; i < nodes_num; i++) { numa_node_to_cpus(node, node_cpus); if (numa_bitmask_weight(node_cpus)) numa_bitmask_setbit(regular_nodes_mask, i); } numa_bitmask_free(node_cpus); }
///This function tries to fill bandwidth array based on knowledge about known CPU models static int fill_bandwidth_values_heuristically(int* bandwidth, int bandwidth_len) { int ret = MEMKIND_ERROR_UNAVAILABLE; // Default error returned if heuristic aproach fails int i, nodes_num, memory_only_nodes_num = 0; struct bitmask *memory_only_nodes, *node_cpus; if (is_cpu_xeon_phi_x200() == 0) { log_info("Known CPU model detected: Intel(R) Xeon Phi(TM) x200."); nodes_num = numa_num_configured_nodes(); // Check if number of numa-nodes meets expectations for // supported configurations of Intel Xeon Phi x200 if( nodes_num != 2 && nodes_num != 4 && nodes_num!= 8 ) { return ret; } memory_only_nodes = numa_allocate_nodemask(); node_cpus = numa_allocate_cpumask(); for(i=0; i<nodes_num; i++) { numa_node_to_cpus(i, node_cpus); if(numa_bitmask_weight(node_cpus) == 0) { memory_only_nodes_num++; numa_bitmask_setbit(memory_only_nodes, i); } } // Check if number of memory-only nodes is equal number of memory+cpu nodes // If it passes change ret to 0 (success) and fill bw table if ( memory_only_nodes_num == (nodes_num - memory_only_nodes_num) ) { ret = 0; assign_arbitrary_bandwidth_values(bandwidth, bandwidth_len, memory_only_nodes); } numa_bitmask_free(memory_only_nodes); numa_bitmask_free(node_cpus); } return ret; }
int main(void) { void* ptr; struct bitmask *nmask; int err; nmask = numa_allocate_nodemask(); numa_bitmask_setbit(nmask, 0); ptr = shmem_open(); err = mbind(ptr, 4096 * 3, MPOL_INTERLEAVE, nmask->maskp, nmask->size, 0); if (err < 0) perror("mbind1"), exit(1); err = mbind(ptr + 4096, 4096, MPOL_BIND, nmask->maskp, nmask->size, 0); if (err < 0) perror("mbind1"), exit(1); return 0; }
void* mmap_2m(void* addr, int prot, int node /* = -1 */, bool map_shared /* = false */, bool map_fixed /* = false */) { #ifdef __linux__ if (get_huge2m_info(node).free_hugepages <= 0) return nullptr; #ifdef HAVE_NUMA bitmask* memMask = nullptr; bitmask* interleaveMask = nullptr; if (node >= 0 && numa_num_nodes > 1) { assert(numa_node_set != 0); if ((numa_node_set & (1u << node)) == 0) { // Numa policy forbids allocation on the node. return nullptr; } memMask = numa_get_membind(); interleaveMask = numa_get_interleave_mask(); bitmask* mask = numa_allocate_nodemask(); numa_bitmask_setbit(mask, node); numa_set_membind(mask); numa_bitmask_free(mask); } #endif void* ret = mmap_2m_impl(addr, prot, map_shared, map_fixed); s_num2MPages += !!ret; #ifdef HAVE_NUMA if (memMask) { numa_set_membind(memMask); numa_set_interleave_mask(interleaveMask); numa_bitmask_free(memMask); numa_bitmask_free(interleaveMask); } #endif return ret; #else // not linux return nullptr; #endif }
int main(int argc, char **argv) { FILE *fp; void *addr, *start, *end, *lastend; int node, err, lc; char buf[BUFSIZ]; struct bitmask *nmask = numa_allocate_nodemask(); pagesize = getpagesize(); tst_parse_opts(argc, argv, options, usage); if (opt_node) { node = SAFE_STRTOL(NULL, opt_nodestr, 1, LONG_MAX); } else { err = get_allowed_nodes(NH_MEMS | NH_MEMS, 1, &node); if (err == -3) tst_brkm(TCONF, NULL, "requires at least one node."); else if (err < 0) tst_brkm(TBROK | TERRNO, NULL, "get_allowed_nodes"); } numa_bitmask_setbit(nmask, node); for (lc = 0; TEST_LOOPING(lc); lc++) { tst_count = 0; addr = mmap(NULL, pagesize * 3, PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0); if (addr == MAP_FAILED) tst_brkm(TBROK | TERRNO, NULL, "mmap"); tst_resm(TINFO, "pid = %d addr = %p", getpid(), addr); /* make page populate */ memset(addr, 0, pagesize * 3); /* first mbind */ err = mbind(addr + pagesize, pagesize, MPOL_BIND, nmask->maskp, nmask->size, MPOL_MF_MOVE_ALL); if (err != 0) { if (errno != ENOSYS) tst_brkm(TBROK | TERRNO, NULL, "mbind1"); else tst_brkm(TCONF, NULL, "mbind syscall not implemented on this system."); } /* second mbind */ err = mbind(addr, pagesize * 3, MPOL_DEFAULT, NULL, 0, 0); if (err != 0) tst_brkm(TBROK | TERRNO, NULL, "mbind2"); /* /proc/self/maps in the form of "00400000-00406000 r-xp 00000000". */ fp = fopen("/proc/self/maps", "r"); if (fp == NULL) tst_brkm(TBROK | TERRNO, NULL, "fopen"); while (fgets(buf, BUFSIZ, fp) != NULL) { if (sscanf(buf, "%p-%p ", &start, &end) != 2) continue; if (start == addr) { tst_resm(TINFO, "start = %p, end = %p", start, end); if (end == addr + pagesize * 3) { tst_resm(TPASS, "only 1 VMA."); break; } lastend = end; while (fgets(buf, BUFSIZ, fp) != NULL) { /* No more VMAs, break */ if (sscanf(buf, "%p-%p ", &start, &end) != 2) break; tst_resm(TINFO, "start = %p, end = %p", start, end); /* more VMAs found */ if (start == lastend) lastend = end; if (end == addr + pagesize * 3) { tst_resm(TFAIL, ">1 unmerged VMAs."); break; } } if (end != addr + pagesize * 3) tst_resm(TFAIL, "no matched VMAs."); break; } } fclose(fp); if (munmap(addr, pagesize * 3) == -1) tst_brkm(TWARN | TERRNO, NULL, "munmap"); } tst_exit(); }
int main (int argc, char** argv) { int ret, c; int i, repeat = 5; int cpu = 2; static int errortype = 1; static int verbose = 1; static int disableHuge = 0; static int madvisePoison = 0; static int poll_exit=0; static long length; struct bitmask *nodes, *gnodes; int gpolicy; unsigned long error_opt; void *vaddrmin = (void *)-1UL, *vaddrmax = NULL; static size_t pdcount=0; unsigned long mattr, addrend, pages, count, nodeid, paddr = 0; unsigned long addr_start=0, nodeid_start=-1, mattr_start=-1; unsigned int pagesize = getpagesize(); char pte_str[20]; struct dlook_get_map_info req; static page_desc_t *pdbegin=NULL; page_desc_t *pd, *pdend; length = memsize("100k"); nodes = numa_allocate_nodemask(); gnodes = numa_allocate_nodemask(); progname = argv[0]; while (1) { static struct option long_options[] = { {"verbose", no_argument, &verbose, 1}, {"delay", no_argument, &delay, 1}, {"disableHuge", no_argument, &disableHuge, 1}, {"poll", no_argument, &poll_exit, 1}, {"madvisePoison", no_argument, &madvisePoison, 1}, {"manual", no_argument, &manual, 1}, {"cpu", required_argument, 0, 'c'}, {"errortype", required_argument, 0, 'e'}, {"help", no_argument, 0, 'h'}, {"length", required_argument, 0, 'l'} }; /* getopt_long stores the option index here. */ int option_index = 0; c = getopt_long (argc, argv, "hc:e:l:", long_options, &option_index); /* Detect the end of the options. */ if (c == -1) break; switch (c) { case 'c': cpu = atoi(optarg); break; case 'e': errortype = atoi(optarg); break; case 'h': help(); case 'l': /* Not exposed */ printf ("option -l with value `%s'\n", optarg); length = memsize("optarg"); break; case '?': /* getopt_long already printed an error message. */ exit(-1); } } cpu_process_setaffinity(getpid(), cpu); error_opt = get_etype(errortype); buf = mmap(NULL, length, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); if (mbind((void *)buf, length, MPOL_DEFAULT, nodes->maskp, nodes->size, 0) < 0){ perror("mbind error\n"); } /* Disable Hugepages */ if (disableHuge) madvise((void *)buf, length, MADV_NOHUGEPAGE); if (madvisePoison) madvise((void *)buf, length,MADV_HWPOISON ); gpolicy = -1; if (get_mempolicy(&gpolicy, gnodes->maskp, gnodes->size, (void *)buf, MPOL_F_ADDR) < 0) perror("get_mempolicy"); if (!numa_bitmask_equal(gnodes, nodes)) { printf("nodes differ %lx, %lx!\n", gnodes->maskp[0], nodes->maskp[0]); } strcpy(pte_str, ""); addrend = ((unsigned long)buf)+length; pages = (addrend-((unsigned long)buf))/pagesize; if (pages > pdcount) { pdbegin = realloc(pdbegin, sizeof(page_desc_t)*pages); pdcount = pages; } req.pid = getpid(); req.start_vaddr = (unsigned long)buf; req.end_vaddr = addrend; req.pd = pdbegin; sigaction(SIGBUS, &recover_act, NULL); /*Fault in Pages */ if(!poll_exit) hog((void *)buf, length); /* Get mmap phys_addrs */ if ((fd = open(UVMCE_DEVICE, O_RDWR)) < 0) { printf("Failed to open: %s\n", UVMCE_DEVICE); exit (1); } if (ioctl(fd, UVMCE_DLOOK, &req ) < 0){ printf("Failed to INJECT_UCE\n"); exit(1); } process_map(pd,pdbegin, pdend, pages, buf, addrend, pagesize, mattr, nodeid, paddr, pte_str, nodeid_start, mattr_start, addr_start); printf("\n\tstart_vaddr\t 0x%016lx length\t 0x%x\n\tend_vaddr\t 0x%016lx pages\t %ld\n", buf , length, addrend, pages); uv_inject(pd,pdbegin, pdend, pages, (unsigned long)buf, addrend, pagesize, mattr, nodeid, paddr, pte_str, nodeid_start, mattr_start, addr_start, error_opt); if (delay){ printf("Enter char to consume bad memory.."); getchar(); } if (error_opt != UVMCE_PATROL_SCRUB_UCE){ consume_it((void *)buf, length); } out: close(fd); return 0; }
/** * @brief Do memory binding. * * This is handling the binding types map_mem, mask_mem and rank. * The types local (default) and none are handled directly by the deamon. * * When using libnuma with API v1, this is a noop, just giving a warning. * * @param step Step structure * @param task Task structure * * @return No return value. */ void doMemBind(Step_t *step, PStask_t *task) { # ifndef HAVE_NUMA_ALLOCATE_NODEMASK mlog("%s: psslurm does not support memory binding types map_mem, mask_mem" " and rank with libnuma v1\n", __func__); fprintf(stderr, "Memory binding type not supported with used libnuma" " version"); return; # else const char delimiters[] = ","; uint32_t lTID; char *next, *saveptr, *ents, *myent, *endptr; char **entarray; unsigned int numents; uint16_t mynode; struct bitmask *nodemask = NULL; if (!(step->memBindType & MEM_BIND_MAP) && !(step->memBindType & MEM_BIND_MASK) && !(step->memBindType & MEM_BIND_RANK)) { /* things are handled elsewhere */ return; } if (!PSIDnodes_bindMem(PSC_getMyID()) || getenv("__PSI_NO_MEMBIND")) { // info messages already printed in doClamps() return; } if (numa_available()==-1) { fprintf(stderr, "NUMA not available:"); return; } nodemask = numa_allocate_nodemask(); if (!nodemask) { fprintf(stderr, "Allocation of nodemask failed:"); return; } lTID = getLocalRankID(task->rank, step, step->localNodeId); if (step->memBindType & MEM_BIND_RANK) { if (lTID > (unsigned int)numa_max_node()) { mlog("%s: memory binding to ranks not possible for rank %d." " (local rank %d > #numa_nodes %d)\n", __func__, task->rank, lTID, numa_max_node()); fprintf(stderr, "Memory binding to ranks not possible for rank %d," " local rank %u larger than max numa node %d.", task->rank, lTID, numa_max_node()); if (nodemask) numa_free_nodemask(nodemask); return; } if (numa_bitmask_isbitset(numa_get_mems_allowed(), lTID)) { numa_bitmask_setbit(nodemask, lTID); } else { mlog("%s: setting bit %d in memory mask not allowed in rank" " %d\n", __func__, lTID, task->rank); fprintf(stderr, "Not allowed to set bit %u in memory mask" " of rank %d\n", lTID, task->rank); } numa_set_membind(nodemask); if (nodemask) numa_free_nodemask(nodemask); return; } ents = ustrdup(step->memBind); entarray = umalloc(step->tasksToLaunch[step->localNodeId] * sizeof(char*)); numents = 0; myent = NULL; entarray[0] = NULL; next = strtok_r(ents, delimiters, &saveptr); while (next && (numents < step->tasksToLaunch[step->localNodeId])) { entarray[numents++] = next; if (numents == lTID+1) { myent = next; break; } next = strtok_r(NULL, delimiters, &saveptr); } if (!myent && numents) { myent = entarray[lTID % numents]; } if (!myent) { numa_set_membind(numa_all_nodes_ptr); if (step->memBindType & MEM_BIND_MASK) { mlog("%s: invalid mem mask string '%s'\n", __func__, ents); } else if (step->memBindType & MEM_BIND_MAP) { mlog("%s: invalid mem map string '%s'\n", __func__, ents); } goto cleanup; } if (step->memBindType & MEM_BIND_MAP) { if (strncmp(myent, "0x", 2) == 0) { mynode = strtoul (myent+2, &endptr, 16); } else { mynode = strtoul (myent, &endptr, 10); } if (*endptr == '\0' && mynode <= numa_max_node()) { if (numa_bitmask_isbitset(numa_get_mems_allowed(), mynode)) { numa_bitmask_setbit(nodemask, mynode); } else { mlog("%s: setting bit %d in memory mask not allowed in rank" " %d\n", __func__, mynode, task->rank); fprintf(stderr, "Not allowed to set bit %d in memory mask" " of rank %d\n", mynode, task->rank); } } else { mlog("%s: invalid memory map entry '%s' (%d) for rank %d\n", __func__, myent, mynode, task->rank); fprintf(stderr, "Invalid memory map entry '%s' for rank %d\n", myent, task->rank); numa_set_membind(numa_all_nodes_ptr); goto cleanup; } mdbg(PSSLURM_LOG_PART, "%s: (bind_map) node %i local task %i" " memstr '%s'\n", __func__, step->localNodeId, lTID, myent); } else if (step->memBindType & MEM_BIND_MASK) { parseNUMAmask(nodemask, myent, task->rank); } numa_set_membind(nodemask); cleanup: ufree(ents); ufree(entarray); if (nodemask) numa_free_nodemask(nodemask); # endif return; }
size_t remap_interleaved_2m_pages(void* addr, size_t pages, int prot, bool shared /* = false */) { #ifdef __linux__ assert(reinterpret_cast<uintptr_t>(addr) % size2m == 0); assert(addr != nullptr); if (pages == 0) return 0; #ifdef HAVE_NUMA const int maxNode = numa_max_node(); bitmask* memMask = nullptr; bitmask* interleaveMask = nullptr; bitmask* mask = nullptr; if (maxNode > 0) { memMask = numa_get_membind(); interleaveMask = numa_get_interleave_mask(); mask = numa_allocate_nodemask(); } #else constexpr int maxNode = 0; #endif int node = -1; int failed = 0; // consecutive failure count int mapped_count = 0; do { #ifdef HAVE_NUMA if (maxNode > 0) { if (++node > maxNode) node = 0; if (!numa_node_allowed(node)) { // Numa policy forbids allocation on node if (++failed > maxNode) break; continue; } numa_bitmask_setbit(mask, node); numa_set_membind(mask); numa_bitmask_clearbit(mask, node); } #endif // Fail early if we don't have huge pages reserved. if (get_huge2m_info(node).free_hugepages > 0 && mmap_2m_impl(addr, prot, shared, true /* MAP_FIXED */)) { addr = (char*)addr + size2m; ++mapped_count; failed = 0; continue; } // We failed on node, give up if we have failed on all nodes if (++failed > maxNode) break; } while (mapped_count < pages); #ifdef HAVE_NUMA if (mask) { numa_set_membind(memMask); numa_set_interleave_mask(interleaveMask); numa_bitmask_free(mask); numa_bitmask_free(interleaveMask); numa_bitmask_free(memMask); } #endif return mapped_count; #else // not linux return 0; #endif }
void* StorageManager::allocateSlots(const std::size_t num_slots, const int numa_node) { #if defined(QUICKSTEP_HAVE_MMAP_LINUX_HUGETLB) static constexpr int kLargePageMmapFlags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB; #elif defined(QUICKSTEP_HAVE_MMAP_BSD_SUPERPAGE) static constexpr int kLargePageMmapFlags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER; #endif makeRoomForBlockOrBlob(num_slots); void *slots = nullptr; #if defined(QUICKSTEP_HAVE_MMAP_LINUX_HUGETLB) || defined(QUICKSTEP_HAVE_MMAP_BSD_SUPERPAGE) slots = mmap(nullptr, num_slots * kSlotSizeBytes, PROT_READ | PROT_WRITE, kLargePageMmapFlags, -1, 0); // Fallback to regular mmap() if large page allocation failed. Even on // systems with large page support, large page allocation may fail if the // user running the executable is not a member of hugetlb_shm_group on Linux, // or if all the reserved hugepages are already in use. if (slots == MAP_FAILED) { slots = mmap(nullptr, num_slots * kSlotSizeBytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); } if (slots == MAP_FAILED) { slots = nullptr; } #elif defined(QUICKSTEP_HAVE_MMAP_PLAIN) slots = mmap(nullptr, num_slots * kSlotSizeBytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (slots == MAP_FAILED) { slots = nullptr; } #else slots = malloc_with_alignment(num_slots * kSlotSizeBytes, kCacheLineBytes); if (slots != nullptr) { memset(slots, 0x0, num_slots * kSlotSizeBytes); } #endif if (slots == nullptr) { throw OutOfMemory(); } #if defined(QUICKSTEP_HAVE_LIBNUMA) if (numa_node != -1) { DEBUG_ASSERT(numa_node < numa_num_configured_nodes()); struct bitmask *numa_node_bitmask = numa_allocate_nodemask(); // numa_node can be 0 through n-1, where n is the num of NUMA nodes. numa_bitmask_setbit(numa_node_bitmask, numa_node); long mbind_status = mbind(slots, // NOLINT(runtime/int) num_slots * kSlotSizeBytes, MPOL_PREFERRED, numa_node_bitmask->maskp, numa_node_bitmask->size, 0); numa_free_nodemask(numa_node_bitmask); if (mbind_status == -1) { LOG(WARNING) << "mbind() failed with errno " << errno << " (" << std::strerror(errno) << ")"; } } #endif // QUICKSTEP_HAVE_LIBNUMA total_memory_usage_ += num_slots; return slots; }
unique_bitmask_ptr make_nodemask_ptr() { return unique_bitmask_ptr(numa_allocate_nodemask(), numa_free_nodemask); }
static void * s_numa_alloc(size_t sz, int cpu) { void *ret = NULL; if (likely(sz > 0)) { if (likely(cpu >= 0)) { if (likely(s_numa_nodes != NULL && s_n_cpus > 0)) { unsigned int node = s_numa_nodes[cpu]; unsigned int allocd_node = UINT_MAX; struct bitmask *bmp; int r; bmp = numa_allocate_nodemask(); numa_bitmask_setbit(bmp, node); errno = 0; r = (int)set_mempolicy(MPOL_BIND, bmp->maskp, bmp->size + 1); if (likely(r == 0)) { errno = 0; ret = numa_alloc_onnode(sz, (int)node); if (likely(ret != NULL)) { lagopus_result_t rl; /* * We need this "first touch" even using the * numa_alloc_onnode(). */ (void)memset(ret, 0, sz); errno = 0; r = (int)get_mempolicy((int *)&allocd_node, NULL, 0, ret, MPOL_F_NODE|MPOL_F_ADDR); if (likely(r == 0)) { if (unlikely(node != allocd_node)) { /* * The memory is not allocated on the node, but it is * still usable. Just return it. */ lagopus_msg_warning("can't allocate " PFSZ(u) " bytes memory " "for CPU %d (NUMA node %d).\n", sz, cpu, node); } } else { lagopus_perror(LAGOPUS_RESULT_POSIX_API_ERROR); lagopus_msg_error("get_mempolicy() returned %d.\n", r); } rl = s_add_addr(ret, sz); if (unlikely(rl != LAGOPUS_RESULT_OK)) { lagopus_perror(rl); lagopus_msg_error("can't register the allocated address.\n"); numa_free(ret, sz); ret = NULL; } } } else { /* r == 0 */ lagopus_perror(LAGOPUS_RESULT_POSIX_API_ERROR); lagopus_msg_error("set_mempolicy() returned %d.\n", r); } numa_free_nodemask(bmp); set_mempolicy(MPOL_DEFAULT, NULL, 0); } else { /* s_numa_nodes != NULL && s_n_cpus > 0 */ /* * Not initialized or initialization failure. */ lagopus_msg_warning("The NUMA related information is not initialized. " "Use malloc(3) instead.\n"); ret = malloc(sz); } } else { /* cpu >= 0 */ /* * Use pure malloc(3). */ ret = malloc(sz); } } return ret; }