bool pages_boot(void) { os_page = os_page_detect(); if (os_page > PAGE) { malloc_write("<jemalloc>: Unsupported system page size\n"); if (opt_abort) { abort(); } return true; } #ifndef _WIN32 mmap_flags = MAP_PRIVATE | MAP_ANON; #endif #ifdef __CHERI_PURE_CAPABILITY__ mmap_flags |= MAP_ALIGNED(LG_PAGE); #endif #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT os_overcommits = os_overcommits_sysctl(); #elif defined(JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY) os_overcommits = os_overcommits_proc(); # ifdef MAP_NORESERVE if (os_overcommits) { mmap_flags |= MAP_NORESERVE; } # endif #else os_overcommits = false; #endif init_thp_state(); #ifdef __FreeBSD__ /* * FreeBSD doesn't need the check; madvise(2) is known to work. */ #else /* Detect lazy purge runtime support. */ if (pages_can_purge_lazy) { bool committed = false; void *madv_free_page = os_pages_map(NULL, PAGE, PAGE, &committed); if (madv_free_page == NULL) { return true; } assert(pages_can_purge_lazy_runtime); if (pages_purge_lazy(madv_free_page, PAGE)) { pages_can_purge_lazy_runtime = false; } os_pages_unmap(madv_free_page, PAGE); } #endif return false; }
/* * Map a shared object into memory. The argument is a file descriptor, * which must be open on the object and positioned at its beginning. * * The return value is a pointer to a newly-allocated Obj_Entry structure * for the shared object. Returns NULL on failure. */ Obj_Entry * _rtld_map_object(const char *path, int fd, const struct stat *sb) { Obj_Entry *obj; Elf_Ehdr *ehdr; Elf_Phdr *phdr; size_t phsize; Elf_Phdr *phlimit; Elf_Phdr *segs[2]; int nsegs; caddr_t mapbase = MAP_FAILED; size_t mapsize = 0; size_t bsssize = 0; int mapflags; Elf_Off base_offset; #ifdef MAP_ALIGNED Elf_Addr base_alignment; #endif Elf_Addr base_vaddr; Elf_Addr base_vlimit; Elf_Addr text_vlimit; int text_flags; caddr_t base_addr; Elf_Off data_offset; Elf_Addr data_vaddr; Elf_Addr data_vlimit; int data_flags; caddr_t data_addr; Elf_Addr phdr_vaddr; size_t phdr_memsz; caddr_t gap_addr; size_t gap_size; int i; #ifdef RTLD_LOADER Elf_Addr clear_vaddr; caddr_t clear_addr; size_t nclear; #endif if (sb != NULL && sb->st_size < (off_t)sizeof (Elf_Ehdr)) { _rtld_error("%s: unrecognized file format1", path); return NULL; } obj = _rtld_obj_new(); obj->path = xstrdup(path); obj->pathlen = strlen(path); if (sb != NULL) { obj->dev = sb->st_dev; obj->ino = sb->st_ino; } ehdr = mmap(NULL, _rtld_pagesz, PROT_READ, MAP_FILE | MAP_SHARED, fd, (off_t)0); obj->ehdr = ehdr; if (ehdr == MAP_FAILED) { _rtld_error("%s: read error: %s", path, xstrerror(errno)); goto bad; } /* Make sure the file is valid */ if (memcmp(ELFMAG, ehdr->e_ident, SELFMAG) != 0 || ehdr->e_ident[EI_CLASS] != ELFCLASS) { _rtld_error("%s: unrecognized file format2 [%x != %x]", path, ehdr->e_ident[EI_CLASS], ELFCLASS); goto bad; } /* Elf_e_ident includes class */ if (ehdr->e_ident[EI_VERSION] != EV_CURRENT || ehdr->e_version != EV_CURRENT || ehdr->e_ident[EI_DATA] != ELFDEFNNAME(MACHDEP_ENDIANNESS)) { _rtld_error("%s: unsupported file version", path); goto bad; } if (ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN) { _rtld_error("%s: unsupported file type", path); goto bad; } switch (ehdr->e_machine) { ELFDEFNNAME(MACHDEP_ID_CASES) default: _rtld_error("%s: unsupported machine", path); goto bad; } /* * We rely on the program header being in the first page. This is * not strictly required by the ABI specification, but it seems to * always true in practice. And, it simplifies things considerably. */ assert(ehdr->e_phentsize == sizeof(Elf_Phdr)); assert(ehdr->e_phoff + ehdr->e_phnum * sizeof(Elf_Phdr) <= _rtld_pagesz); /* * Scan the program header entries, and save key information. * * We rely on there being exactly two load segments, text and data, * in that order. */ phdr = (Elf_Phdr *) ((caddr_t)ehdr + ehdr->e_phoff); phsize = ehdr->e_phnum * sizeof(phdr[0]); obj->phdr = NULL; phdr_vaddr = EA_UNDEF; phdr_memsz = 0; phlimit = phdr + ehdr->e_phnum; nsegs = 0; while (phdr < phlimit) { switch (phdr->p_type) { case PT_INTERP: obj->interp = (void *)(uintptr_t)phdr->p_vaddr; dbg(("%s: PT_INTERP %p", obj->path, obj->interp)); break; case PT_LOAD: if (nsegs < 2) segs[nsegs] = phdr; ++nsegs; dbg(("%s: PT_LOAD %p", obj->path, phdr)); break; case PT_PHDR: phdr_vaddr = phdr->p_vaddr; phdr_memsz = phdr->p_memsz; dbg(("%s: PT_PHDR %p phsize %zu", obj->path, (void *)(uintptr_t)phdr_vaddr, phdr_memsz)); break; case PT_DYNAMIC: obj->dynamic = (void *)(uintptr_t)phdr->p_vaddr; dbg(("%s: PT_DYNAMIC %p", obj->path, obj->dynamic)); break; } ++phdr; } phdr = (Elf_Phdr *) ((caddr_t)ehdr + ehdr->e_phoff); obj->entry = (void *)(uintptr_t)ehdr->e_entry; if (!obj->dynamic) { _rtld_error("%s: not dynamically linked", path); goto bad; } if (nsegs != 2) { _rtld_error("%s: wrong number of segments (%d != 2)", path, nsegs); goto bad; } /* * Map the entire address space of the object as a file * region to stake out our contiguous region and establish a * base for relocation. We use a file mapping so that * the kernel will give us whatever alignment is appropriate * for the platform we're running on. * * We map it using the text protection, map the data segment * into the right place, then map an anon segment for the bss * and unmap the gaps left by padding to alignment. */ #ifdef MAP_ALIGNED base_alignment = segs[0]->p_align; #endif base_offset = round_down(segs[0]->p_offset); base_vaddr = round_down(segs[0]->p_vaddr); base_vlimit = round_up(segs[1]->p_vaddr + segs[1]->p_memsz); text_vlimit = round_up(segs[0]->p_vaddr + segs[0]->p_memsz); text_flags = protflags(segs[0]->p_flags); data_offset = round_down(segs[1]->p_offset); data_vaddr = round_down(segs[1]->p_vaddr); data_vlimit = round_up(segs[1]->p_vaddr + segs[1]->p_filesz); data_flags = protflags(segs[1]->p_flags); #ifdef RTLD_LOADER clear_vaddr = segs[1]->p_vaddr + segs[1]->p_filesz; #endif obj->textsize = text_vlimit - base_vaddr; obj->vaddrbase = base_vaddr; obj->isdynamic = ehdr->e_type == ET_DYN; obj->phdr_loaded = false; for (i = 0; i < nsegs; i++) { if (phdr_vaddr != EA_UNDEF && segs[i]->p_vaddr <= phdr_vaddr && segs[i]->p_memsz >= phdr_memsz) { obj->phdr_loaded = true; break; } if (segs[i]->p_offset <= ehdr->e_phoff && segs[i]->p_memsz >= phsize) { phdr_vaddr = segs[i]->p_vaddr + ehdr->e_phoff; phdr_memsz = phsize; obj->phdr_loaded = true; break; } } if (obj->phdr_loaded) { obj->phdr = (void *)(uintptr_t)phdr_vaddr; obj->phsize = phdr_memsz; } else { Elf_Phdr *buf; buf = xmalloc(phsize); if (buf == NULL) { _rtld_error("%s: cannot allocate program header", path); goto bad; } memcpy(buf, phdr, phsize); obj->phdr = buf; obj->phsize = phsize; } dbg(("%s: phdr %p phsize %zu (%s)", obj->path, obj->phdr, obj->phsize, obj->phdr_loaded ? "loaded" : "allocated")); /* Unmap header if it overlaps the first load section. */ if (base_offset < _rtld_pagesz) { munmap(ehdr, _rtld_pagesz); obj->ehdr = MAP_FAILED; } /* * Calculate log2 of the base section alignment. */ mapflags = 0; #ifdef MAP_ALIGNED if (base_alignment > _rtld_pagesz) { unsigned int log2 = 0; for (; base_alignment > 1; base_alignment >>= 1) log2++; mapflags = MAP_ALIGNED(log2); }
void * pages_map(void *addr, size_t size, size_t alignment, bool *commit) { assert(alignment >= PAGE); assert(ALIGNMENT_ADDR2BASE(addr, alignment) == (vaddr_t)addr); #if defined(__FreeBSD__) && defined(MAP_EXCL) /* * FreeBSD has mechanisms both to mmap at specific address without * touching existing mappings, and to mmap with specific alignment. */ { #ifdef __CHERI_PURE_CAPABILITY__ if (size & CHERI_ALIGN_MASK(size)) abort(); #endif if (os_overcommits) { *commit = true; } int prot = *commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT; int flags = mmap_flags; if (addr != NULL) { flags |= MAP_FIXED | MAP_EXCL; } else { unsigned alignment_bits = ffs_zu(alignment); assert(alignment_bits > 1); flags |= MAP_ALIGNED(alignment_bits - 1); } void *ret = mmap(addr, size, prot, flags, -1, 0); if (ret == MAP_FAILED) { ret = NULL; } return ret; } #endif /* * Ideally, there would be a way to specify alignment to mmap() (like * NetBSD has), but in the absence of such a feature, we have to work * hard to efficiently create aligned mappings. The reliable, but * slow method is to create a mapping that is over-sized, then trim the * excess. However, that always results in one or two calls to * os_pages_unmap(), and it can leave holes in the process's virtual * memory map if memory grows downward. * * Optimistically try mapping precisely the right amount before falling * back to the slow method, with the expectation that the optimistic * approach works most of the time. */ void *ret = os_pages_map(addr, size, os_page, commit); if (ret == NULL || ret == addr) { return ret; } assert(addr == NULL); if (ALIGNMENT_ADDR2OFFSET((vaddr_t)ret, alignment) != 0) { os_pages_unmap(ret, size); return pages_map_slow(size, alignment, commit); } assert(PAGE_ADDR2BASE(ret) == ret); return ret; }