/* * util_map_hint -- determine hint address for mmap() * * If PMEM_MMAP_HINT environment variable is not set, we let the system to pick * the randomized mapping address. Otherwise, a user-defined hint address * is used. * * ALSR in 64-bit Linux kernel uses 28-bit of randomness for mmap * (bit positions 12-39), which means the base mapping address is randomized * within [0..1024GB] range, with 4KB granularity. Assuming additional * 1GB alignment, it results in 1024 possible locations. * * Configuring the hint address via PMEM_MMAP_HINT environment variable * disables address randomization. In such case, the function will search for * the first unused, properly aligned region of given size, above the specified * address. */ char * util_map_hint(size_t len, size_t req_align) { LOG(3, "len %zu req_align %zu", len, req_align); char *addr; /* choose the desired alignment based on the requested length */ size_t align = util_map_hint_align(len, req_align); if (Mmap_no_random) { LOG(4, "user-defined hint %p", (void *)Mmap_hint); addr = util_map_hint_unused((void *)Mmap_hint, len, align); } else { /* * Create dummy mapping to find an unused region of given size. * Request for increased size for later address alignment. * Use MAP_PRIVATE with read-only access to simulate * zero cost for overcommit accounting. Note: MAP_NORESERVE * flag is ignored if overcommit is disabled (mode 2). */ addr = mmap(NULL, len + align, PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); if (addr != MAP_FAILED) { LOG(4, "system choice %p", addr); munmap(addr, len + align); addr = (char *)roundup((uintptr_t)addr, align); } } LOG(4, "hint %p", addr); return addr; }
int main(int argc, char *argv[]) { START(argc, argv, "util_map_proc"); util_init(); if (argc < 3) UT_FATAL("usage: %s maps_file len [len]...", argv[0]); Sfile = argv[1]; for (int arg = 2; arg < argc; arg++) { size_t len = (size_t)strtoull(argv[arg], NULL, 0); size_t align = Ut_pagesize; if (len >= 2 * GIGABYTE) align = GIGABYTE; else if (len >= 4 * MEGABYTE) align = 2 * MEGABYTE; void *h1 = util_map_hint_unused((void *)TERABYTE, len, GIGABYTE); void *h2 = util_map_hint(len, 0); if (h1 != MAP_FAILED && h1 != NULL) UT_ASSERTeq((uintptr_t)h1 & (GIGABYTE - 1), 0); if (h2 != MAP_FAILED && h2 != NULL) UT_ASSERTeq((uintptr_t)h2 & (align - 1), 0); UT_OUT("len %zu: %p %p", len, h1, h2); } DONE(NULL); }
/* * util_map_hint -- determine hint address for mmap() * * If PMEM_MMAP_HINT environment variable is not set, we let the system to pick * the randomized mapping address. Otherwise, a user-defined hint address * is used. * * Windows Environment: * XXX - Windows doesn't support large DAX pages yet, so there is * no point in aligning for the same. * * Except for Windows Environment: * ALSR in 64-bit Linux kernel uses 28-bit of randomness for mmap * (bit positions 12-39), which means the base mapping address is randomized * within [0..1024GB] range, with 4KB granularity. Assuming additional * 1GB alignment, it results in 1024 possible locations. * * Configuring the hint address via PMEM_MMAP_HINT environment variable * disables address randomization. In such case, the function will search for * the first unused, properly aligned region of given size, above the * specified address. */ static char *util_map_hint(size_t len, size_t req_align) { char *addr; size_t align = 0; char *e = NULL; dprint(FD_IO, "DEBUG util_map_hint\n"); dprint(FD_IO, "len %zu req_align %zu\n", len, req_align); /* choose the desired alignment based on the requested length */ align = util_map_hint_align(len, req_align); e = getenv("PMEM_MMAP_HINT"); if (e) { char *endp; unsigned long long val = 0; errno = 0; val = strtoull(e, &endp, 16); if (errno || endp == e) { dprint(FD_IO, "Invalid PMEM_MMAP_HINT\n"); } else { Mmap_hint = (void *)val; Mmap_no_random = true; dprint(FD_IO, "PMEM_MMAP_HINT set to %p\n", Mmap_hint); } } if (Mmap_no_random) { dprint(FD_IO, "user-defined hint %p\n", (void *)Mmap_hint); addr = util_map_hint_unused((void *)Mmap_hint, len, align); } else { /* * Create dummy mapping to find an unused region of given size. * * Request for increased size for later address alignment. * * Windows Environment: * Use MAP_NORESERVE flag to only reserve the range of pages * rather than commit. We don't want the pages to be actually * backed by the operating system paging file, as the swap * file is usually too small to handle terabyte pools. * * Except for Windows Environment: * Use MAP_PRIVATE with read-only access to simulate * zero cost for overcommit accounting. Note: MAP_NORESERVE * flag is ignored if overcommit is disabled (mode 2). */ #ifndef WIN32 addr = mmap(NULL, len + align, PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); #else addr = mmap(NULL, len + align, PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0); #endif if (addr != MAP_FAILED) { dprint(FD_IO, "system choice %p\n", addr); munmap(addr, len + align); addr = (char *)roundup((uintptr_t)addr, align); } } dprint(FD_IO, "hint %p\n", addr); return addr; }