long recommended_minfreekbytes(void) { FILE *f; char buf[ZONEINFO_LINEBUF]; int nr_zones = 0; long recommended_min; long pageblock_kbytes = kernel_default_hugepage_size() / 1024; /* Detect the number of zones in the system */ f = fopen(PROCZONEINFO, "r"); if (f == NULL) { WARNING("Unable to open " PROCZONEINFO); return 0; } while (fgets(buf, ZONEINFO_LINEBUF, f) != NULL) { if (strncmp(buf, "Node ", 5) == 0) nr_zones++; } fclose(f); /* Make sure at least 2 pageblocks are free for MIGRATE_RESERVE */ recommended_min = pageblock_kbytes * nr_zones * 2; /* * Make sure that on average at least two pageblocks are almost free * of another type, one for a migratetype to fall back to and a * second to avoid subsequent fallbacks of other types There are 3 * MIGRATE_TYPES we care about. */ recommended_min += pageblock_kbytes * nr_zones * 3 * 3; return recommended_min; }
/** * get_huge_pages - Allocate an amount of memory backed by huge pages * len: Size of the region to allocate, must be hugepage-aligned * flags: Flags specifying the behaviour of the function * * This function allocates a region of memory that is backed by huge pages * and hugepage-aligned. This is not a suitable drop-in for malloc() but a * a malloc library could use this function to create a new fixed-size heap * similar in principal to what morecore does for glibc malloc. */ void *get_huge_pages(size_t len, ghp_t flags) { void *buf; int buf_fd = -1; int mmap_reserve = __hugetlb_opts.no_reserve ? MAP_NORESERVE : 0; int mmap_hugetlb = 0; int ret; /* Catch an altogether-too easy typo */ if (flags & GHR_MASK) ERROR("Improper use of GHR_* in get_huge_pages()\n"); #ifdef MAP_HUGETLB mmap_hugetlb = MAP_HUGETLB; #endif if (__hugetlb_opts.map_hugetlb && gethugepagesize() == kernel_default_hugepage_size()) { /* Because we can use MAP_HUGETLB, we simply mmap the region */ buf = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|mmap_hugetlb|mmap_reserve, 0, 0); } else { /* Create a file descriptor for the new region */ buf_fd = hugetlbfs_unlinked_fd(); if (buf_fd < 0) { WARNING("Couldn't open hugetlbfs file for %zd-sized buffer\n", len); return NULL; } /* Map the requested region */ buf = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_PRIVATE|mmap_reserve, buf_fd, 0); } if (buf == MAP_FAILED) { if (buf_fd >= 0) close(buf_fd); WARNING("get_huge_pages: New region mapping failed (flags: 0x%lX): %s\n", flags, strerror(errno)); return NULL; } /* Fault the region to ensure accesses succeed */ ret = hugetlbfs_prefault(buf, len); if (ret != 0) { munmap(buf, len); if (buf_fd >= 0) close(buf_fd); WARNING("get_huge_pages: Prefaulting failed (flags: 0x%lX): %s\n", flags, strerror(ret)); return NULL; } /* Close the file so we do not have to track the descriptor */ if (buf_fd >= 0 && close(buf_fd) != 0) { WARNING("Failed to close new buffer fd: %s\n", strerror(errno)); munmap(buf, len); return NULL; } /* woo, new buffer of shiny */ return buf; }
/* * Our plan is to ask for pages 'roughly' at the BASE. We expect and * require the kernel to offer us sequential pages from wherever it * first gave us a page. If it does not do so, we return the page and * pretend there are none this covers us for the case where another * map is in the way. This is required because 'morecore' must have * 'sbrk' semantics, ie. return sequential, contigious memory blocks. * Luckily, if it does not do so and we error out malloc will happily * go back to small pages and use mmap to get them. Hurrah. */ static void *hugetlbfs_morecore(ptrdiff_t increment) { int ret; void *p; long delta; int mmap_reserve = __hugetlb_opts.no_reserve ? MAP_NORESERVE : 0; int mmap_hugetlb = 0; int using_default_pagesize = (hpage_size == kernel_default_hugepage_size()); INFO("hugetlbfs_morecore(%ld) = ...\n", (long)increment); /* * how much to grow the heap by = * (size of heap) + malloc request - mmap'd space */ delta = (heaptop-heapbase) + increment - mapsize; INFO("heapbase = %p, heaptop = %p, mapsize = %lx, delta=%ld\n", heapbase, heaptop, mapsize, delta); /* align to multiple of hugepagesize. */ delta = ALIGN(delta, hpage_size); #ifdef MAP_HUGETLB mmap_hugetlb = MAP_HUGETLB; #endif if (delta > 0) { /* growing the heap */ INFO("Attempting to map %ld bytes\n", delta); /* map in (extend) more of the file at the end of our last map */ if (__hugetlb_opts.map_hugetlb && using_default_pagesize) p = mmap(heapbase + mapsize, delta, PROT_READ|PROT_WRITE, mmap_hugetlb|MAP_ANONYMOUS|MAP_PRIVATE|mmap_reserve, heap_fd, mapsize); else p = mmap(heapbase + mapsize, delta, PROT_READ|PROT_WRITE, MAP_PRIVATE|mmap_reserve, heap_fd, mapsize); if (p == MAP_FAILED) { WARNING("New heap segment map at %p failed: %s\n", heapbase+mapsize, strerror(errno)); return NULL; } /* if this is the first map */ if (! mapsize) { if (heapbase && (heapbase != p)) { WARNING("Heap originates at %p instead of %p\n", p, heapbase); if (__hugetlbfs_debug) dump_proc_pid_maps(); } /* then setup the heap variables */ heapbase = heaptop = p; } else if (p != (heapbase + mapsize)) { /* Couldn't get the mapping where we wanted */ munmap(p, delta); WARNING("New heap segment mapped at %p instead of %p\n", p, heapbase + mapsize); if (__hugetlbfs_debug) dump_proc_pid_maps(); return NULL; } /* Fault the region to ensure accesses succeed */ if (hugetlbfs_prefault(p, delta) != 0) { munmap(p, delta); return NULL; } /* we now have mmap'd further */ mapsize += delta; } else if (delta < 0) { /* shrinking the heap */ if (!__hugetlb_opts.shrink_ok) { /* shouldn't ever get here */ WARNING("Heap shrinking is turned off\n"); return NULL; } if (!mapsize) { WARNING("Can't shrink empty heap!\n"); return NULL; } /* * If we are forced to change the heapaddr from the * original brk() value we have violated brk semantics * (which we are not supposed to do). This shouldn't * pose a problem until glibc tries to trim the heap to an * address lower than what we aligned heapaddr to. At that * point the alignment "gap" causes heap corruption. * So we don't allow the heap to shrink below heapbase. */ if (mapsize + delta < 0) { /* remember: delta is negative */ WARNING("Unable to shrink heap below %p\n", heapbase); /* unmap just what is currently mapped */ delta = -mapsize; /* we need heaptop + increment == heapbase, so: */ increment = heapbase - heaptop; } INFO("Attempting to unmap %ld bytes @ %p\n", -delta, heapbase + mapsize + delta); ret = munmap(heapbase + mapsize + delta, -delta); if (ret) { WARNING("Unmapping failed while shrinking heap: " "%s\n", strerror(errno)); } else if (!__hugetlb_opts.map_hugetlb && !using_default_pagesize){ /* * Now shrink the hugetlbfs file. */ mapsize += delta; ret = ftruncate(heap_fd, mapsize); if (ret) { WARNING("Could not truncate hugetlbfs file to " "shrink heap: %s\n", strerror(errno)); } } } /* heap is continuous */ p = heaptop; /* and we now have added this much more space to the heap */ heaptop = heaptop + increment; INFO("... = %p\n", p); return p; }
void hugetlbfs_setup_morecore(void) { char *ep; unsigned long heapaddr; if (! __hugetlb_opts.morecore) return; if (strcasecmp(__hugetlb_opts.morecore, "no") == 0) { INFO("HUGETLB_MORECORE=%s, not setting up morecore\n", __hugetlb_opts.morecore); return; } /* * Determine the page size that will be used for the heap. * This can be set explicitly by setting HUGETLB_MORECORE to a valid * page size string or by setting HUGETLB_DEFAULT_PAGE_SIZE. */ if (strncasecmp(__hugetlb_opts.morecore, "y", 1) == 0) hpage_size = gethugepagesize(); else if (__hugetlb_opts.thp_morecore) hpage_size = kernel_default_hugepage_size(); else hpage_size = parse_page_size(__hugetlb_opts.morecore); if (hpage_size <= 0) { if (errno == ENOSYS) WARNING("Hugepages unavailable\n"); else if (errno == EOVERFLOW || errno == ERANGE) WARNING("Hugepage size too large\n"); else if (errno == EINVAL) WARNING("Invalid huge page size\n"); else WARNING("Hugepage size (%s)\n", strerror(errno)); return; } /* * We won't need an fd for the heap mmaps if we are using MAP_HUGETLB * or we are depending on transparent huge pages */ if(__hugetlb_opts.thp_morecore || (__hugetlb_opts.map_hugetlb && hpage_size == kernel_default_hugepage_size())) { heap_fd = -1; } else { if (!hugetlbfs_find_path_for_size(hpage_size)) { WARNING("Hugepage size %li unavailable", hpage_size); return; } heap_fd = hugetlbfs_unlinked_fd_for_size(hpage_size); if (heap_fd < 0) { WARNING("Couldn't open hugetlbfs file for morecore\n"); return; } } /* * THP morecore uses sbrk to allocate more heap space, counting on the * kernel to back the area with THP. So setting heapbase is * meaningless if thp_morecore is used. */ if (!__hugetlb_opts.thp_morecore && __hugetlb_opts.heapbase) { heapaddr = strtoul(__hugetlb_opts.heapbase, &ep, 16); if (*ep != '\0') { WARNING("Can't parse HUGETLB_MORECORE_HEAPBASE: %s\n", __hugetlb_opts.heapbase); return; } } else { heapaddr = (unsigned long)sbrk(0); if (!__hugetlb_opts.thp_morecore) heapaddr = hugetlbfs_next_addr(heapaddr); } INFO("setup_morecore(): heapaddr = 0x%lx\n", heapaddr); heaptop = heapbase = (void *)heapaddr; if (__hugetlb_opts.thp_morecore) __morecore = &thp_morecore; else __morecore = &hugetlbfs_morecore; /* Set some allocator options more appropriate for hugepages */ if (__hugetlb_opts.shrink_ok) mallopt(M_TRIM_THRESHOLD, hpage_size / 2); else mallopt(M_TRIM_THRESHOLD, -1); mallopt(M_TOP_PAD, hpage_size / 2); /* we always want to use our morecore, not ordinary mmap(). * This doesn't appear to prohibit malloc() from falling back * to mmap() if we run out of hugepages. */ mallopt(M_MMAP_MAX, 0); }