int vsunlock( user_addr_t addr, user_size_t len, __unused int dirtied) { #if FIXME /* [ */ pmap_t pmap; vm_page_t pg; vm_map_offset_t vaddr; ppnum_t paddr; #endif /* FIXME ] */ kern_return_t kret; vm_map_t map; map = current_map(); #if FIXME /* [ */ if (dirtied) { pmap = get_task_pmap(current_task()); for (vaddr = vm_map_trunc_page(addr, PAGE_MASK); vaddr < vm_map_round_page(addr+len, PAGE_MASK); vaddr += PAGE_SIZE) { paddr = pmap_extract(pmap, vaddr); pg = PHYS_TO_VM_PAGE(paddr); vm_page_set_modified(pg); } } #endif /* FIXME ] */ #ifdef lint dirtied++; #endif /* lint */ kret = vm_map_unwire(map, vm_map_trunc_page(addr, vm_map_page_mask(map)), vm_map_round_page(addr+len, vm_map_page_mask(map)), FALSE); switch (kret) { case KERN_SUCCESS: return (0); case KERN_INVALID_ADDRESS: case KERN_NO_SPACE: return (ENOMEM); case KERN_PROTECTION_FAILURE: return (EACCES); default: return (EINVAL); } }
int vslock( user_addr_t addr, user_size_t len) { kern_return_t kret; vm_map_t map; map = current_map(); kret = vm_map_wire(map, vm_map_trunc_page(addr, vm_map_page_mask(map)), vm_map_round_page(addr+len, vm_map_page_mask(map)), VM_PROT_READ | VM_PROT_WRITE, FALSE); switch (kret) { case KERN_SUCCESS: return (0); case KERN_INVALID_ADDRESS: case KERN_NO_SPACE: return (ENOMEM); case KERN_PROTECTION_FAILURE: return (EACCES); default: return (EINVAL); } }
int useracc( user_addr_t addr, user_size_t len, int prot) { return (vm_map_check_protection( current_map(), vm_map_trunc_page(addr), vm_map_round_page(addr+len), prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE)); }
/* * On x86_64 systems, kernel extension text must remain within 2GB of the * kernel's text segment. To ensure this happens, we snag 2GB of kernel VM * as early as possible for kext allocations. */ void kext_alloc_init(void) { #if __x86_64__ kern_return_t rval = 0; kernel_segment_command_t *text = NULL; mach_vm_offset_t text_end, text_start; mach_vm_size_t text_size; mach_vm_size_t kext_alloc_size; /* Determine the start of the kernel's __TEXT segment and determine the * lower bound of the allocated submap for kext allocations. */ text = getsegbyname(SEG_TEXT); text_start = vm_map_trunc_page(text->vmaddr); text_start &= ~((512ULL * 1024 * 1024 * 1024) - 1); text_end = vm_map_round_page(text->vmaddr + text->vmsize); text_size = text_end - text_start; kext_alloc_base = KEXT_ALLOC_BASE(text_end); kext_alloc_size = KEXT_ALLOC_SIZE(text_size); kext_alloc_max = kext_alloc_base + kext_alloc_size; /* Allocate the subblock of the kernel map */ rval = kmem_suballoc(kernel_map, (vm_offset_t *) &kext_alloc_base, kext_alloc_size, /* pageable */ TRUE, VM_FLAGS_FIXED|VM_FLAGS_OVERWRITE, &g_kext_map); if (rval != KERN_SUCCESS) { panic("kext_alloc_init: kmem_suballoc failed 0x%x\n", rval); } if ((kext_alloc_base + kext_alloc_size) > kext_alloc_max) { panic("kext_alloc_init: failed to get first 2GB\n"); } if (kernel_map->min_offset > kext_alloc_base) { kernel_map->min_offset = kext_alloc_base; } printf("kext submap [0x%llx - 0x%llx], kernel text [0x%llx - 0x%llx]\n", kext_alloc_base, kext_alloc_max, text->vmaddr, text->vmaddr + text->vmsize); #else g_kext_map = kernel_map; kext_alloc_base = VM_MIN_KERNEL_ADDRESS; kext_alloc_max = VM_MAX_KERNEL_ADDRESS; #endif /* __x86_64__ */ }
int useracc( user_addr_t addr, user_size_t len, int prot) { vm_map_t map; map = current_map(); return (vm_map_check_protection( map, vm_map_trunc_page(addr, vm_map_page_mask(map)), vm_map_round_page(addr+len, vm_map_page_mask(map)), prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE)); }
int mlock(__unused proc_t p, struct mlock_args *uap, __unused int32_t *retvalval) { vm_map_t user_map; vm_map_offset_t addr; vm_map_size_t size, pageoff; kern_return_t result; AUDIT_ARG(addr, uap->addr); AUDIT_ARG(len, uap->len); addr = (vm_map_offset_t) uap->addr; size = (vm_map_size_t)uap->len; /* disable wrap around */ if (addr + size < addr) return (EINVAL); if (size == 0) return (0); user_map = current_map(); pageoff = (addr & vm_map_page_mask(user_map)); addr -= pageoff; size = vm_map_round_page(size+pageoff, vm_map_page_mask(user_map)); /* have to call vm_map_wire directly to pass "I don't know" protections */ result = vm_map_wire(user_map, addr, addr+size, VM_PROT_NONE | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_MLOCK), TRUE); if (result == KERN_RESOURCE_SHORTAGE) return EAGAIN; else if (result == KERN_PROTECTION_FAILURE) return EACCES; else if (result != KERN_SUCCESS) return ENOMEM; return 0; /* KERN_SUCCESS */ }
static load_return_t load_dylinker( struct dylinker_command *lcp, integer_t archbits, vm_map_t map, thread_t thread, int depth, load_result_t *result, boolean_t is_64bit ) { char *name; char *p; struct vnode *vp = NULLVP; /* set by get_macho_vnode() */ struct mach_header header; off_t file_offset = 0; /* set by get_macho_vnode() */ off_t macho_size = 0; /* set by get_macho_vnode() */ vm_map_t copy_map; load_result_t myresult; kern_return_t ret; vm_map_copy_t tmp; mach_vm_offset_t dyl_start, map_addr; mach_vm_size_t dyl_length; name = (char *)lcp + lcp->name.offset; /* * Check for a proper null terminated string. */ p = name; do { if (p >= (char *)lcp + lcp->cmdsize) return(LOAD_BADMACHO); } while (*p++); ret = get_macho_vnode(name, archbits, &header, &file_offset, &macho_size, &vp); if (ret) return (ret); myresult = load_result_null; /* * First try to map dyld in directly. This should work most of * the time since there shouldn't normally be something already * mapped to its address. */ ret = parse_machfile(vp, map, thread, &header, file_offset, macho_size, depth, &myresult); /* * If it turned out something was in the way, then we'll take * take this longer path to map dyld into a temporary map and * copy it into destination map at a different address. */ if (ret == LOAD_NOSPACE) { /* * Load the Mach-O. * Use a temporary map to do the work. */ copy_map = vm_map_create(pmap_create(vm_map_round_page(macho_size), is_64bit), get_map_min(map), get_map_max(map), TRUE); if (VM_MAP_NULL == copy_map) { ret = LOAD_RESOURCE; goto out; } myresult = load_result_null; ret = parse_machfile(vp, copy_map, thread, &header, file_offset, macho_size, depth, &myresult); if (ret) { vm_map_deallocate(copy_map); goto out; } if (get_map_nentries(copy_map) > 0) { dyl_start = mach_get_vm_start(copy_map); dyl_length = mach_get_vm_end(copy_map) - dyl_start; map_addr = dyl_start; ret = mach_vm_allocate(map, &map_addr, dyl_length, VM_FLAGS_ANYWHERE); if (ret != KERN_SUCCESS) { vm_map_deallocate(copy_map); ret = LOAD_NOSPACE; goto out; } ret = vm_map_copyin(copy_map, (vm_map_address_t)dyl_start, (vm_map_size_t)dyl_length, TRUE, &tmp); if (ret != KERN_SUCCESS) { (void) vm_map_remove(map, vm_map_trunc_page(map_addr), vm_map_round_page(map_addr + dyl_length), VM_MAP_NO_FLAGS); vm_map_deallocate(copy_map); goto out; } ret = vm_map_copy_overwrite(map, (vm_map_address_t)map_addr, tmp, FALSE); if (ret != KERN_SUCCESS) { vm_map_copy_discard(tmp); (void) vm_map_remove(map, vm_map_trunc_page(map_addr), vm_map_round_page(map_addr + dyl_length), VM_MAP_NO_FLAGS); vm_map_deallocate(copy_map); goto out; } if (map_addr != dyl_start) myresult.entry_point += (map_addr - dyl_start); } else { ret = LOAD_FAILURE; } vm_map_deallocate(copy_map); } if (ret == LOAD_SUCCESS) { result->dynlinker = TRUE; result->entry_point = myresult.entry_point; (void)ubc_map(vp, PROT_READ | PROT_EXEC); } out: vnode_put(vp); return (ret); }
/* * XXX Internally, we use VM_PROT_* somewhat interchangeably, but the correct * XXX usage is PROT_* from an interface perspective. Thus the values of * XXX VM_PROT_* and PROT_* need to correspond. */ int mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval) { /* * Map in special device (must be SHARED) or file */ struct fileproc *fp; struct vnode *vp; int flags; int prot; int err=0; vm_map_t user_map; kern_return_t result; vm_map_offset_t user_addr; vm_map_size_t user_size; vm_object_offset_t pageoff; vm_object_offset_t file_pos; int alloc_flags = 0; vm_tag_t tag = VM_KERN_MEMORY_NONE; vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_NONE; boolean_t docow; vm_prot_t maxprot; void *handle; memory_object_t pager = MEMORY_OBJECT_NULL; memory_object_control_t control; int mapanon=0; int fpref=0; int error =0; int fd = uap->fd; int num_retries = 0; /* * Note that for UNIX03 conformance, there is additional parameter checking for * mmap() system call in libsyscall prior to entering the kernel. The sanity * checks and argument validation done in this function are not the only places * one can get returned errnos. */ user_map = current_map(); user_addr = (vm_map_offset_t)uap->addr; user_size = (vm_map_size_t) uap->len; AUDIT_ARG(addr, user_addr); AUDIT_ARG(len, user_size); AUDIT_ARG(fd, uap->fd); prot = (uap->prot & VM_PROT_ALL); #if 3777787 /* * Since the hardware currently does not support writing without * read-before-write, or execution-without-read, if the request is * for write or execute access, we must imply read access as well; * otherwise programs expecting this to work will fail to operate. */ if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) prot |= VM_PROT_READ; #endif /* radar 3777787 */ flags = uap->flags; vp = NULLVP; /* * The vm code does not have prototypes & compiler doesn't do the' * the right thing when you cast 64bit value and pass it in function * call. So here it is. */ file_pos = (vm_object_offset_t)uap->pos; /* make sure mapping fits into numeric range etc */ if (file_pos + user_size > (vm_object_offset_t)-PAGE_SIZE_64) return (EINVAL); /* * Align the file position to a page boundary, * and save its page offset component. */ pageoff = (file_pos & vm_map_page_mask(user_map)); file_pos -= (vm_object_offset_t)pageoff; /* Adjust size for rounding (on both ends). */ user_size += pageoff; /* low end... */ user_size = vm_map_round_page(user_size, vm_map_page_mask(user_map)); /* hi end */ if (flags & MAP_JIT) { if ((flags & MAP_FIXED) || (flags & MAP_SHARED) || !(flags & MAP_ANON) || (flags & MAP_RESILIENT_CODESIGN) || (flags & MAP_RESILIENT_MEDIA)) { return EINVAL; } } if ((flags & MAP_RESILIENT_CODESIGN) || (flags & MAP_RESILIENT_MEDIA)) { if ((flags & MAP_ANON) || (flags & MAP_JIT)) { return EINVAL; } if (prot & (VM_PROT_WRITE | VM_PROT_EXECUTE)) { return EPERM; } } /* * Check for illegal addresses. Watch out for address wrap... Note * that VM_*_ADDRESS are not constants due to casts (argh). */ if (flags & MAP_FIXED) { /* * The specified address must have the same remainder * as the file offset taken modulo PAGE_SIZE, so it * should be aligned after adjustment by pageoff. */ user_addr -= pageoff; if (user_addr & vm_map_page_mask(user_map)) return (EINVAL); } #ifdef notyet /* DO not have apis to get this info, need to wait till then*/ /* * XXX for non-fixed mappings where no hint is provided or * the hint would fall in the potential heap space, * place it after the end of the largest possible heap. * * There should really be a pmap call to determine a reasonable * location. */ else if (addr < vm_map_round_page(p->p_vmspace->vm_daddr + MAXDSIZ, vm_map_page_mask(user_map))) addr = vm_map_round_page(p->p_vmspace->vm_daddr + MAXDSIZ, vm_map_page_mask(user_map)); #endif alloc_flags = 0; if (flags & MAP_ANON) { maxprot = VM_PROT_ALL; #if CONFIG_MACF /* * Entitlement check. */ error = mac_proc_check_map_anon(p, user_addr, user_size, prot, flags, &maxprot); if (error) { return EINVAL; } #endif /* MAC */ /* * Mapping blank space is trivial. Use positive fds as the alias * value for memory tracking. */ if (fd != -1) { /* * Use "fd" to pass (some) Mach VM allocation flags, * (see the VM_FLAGS_* definitions). */ alloc_flags = fd & (VM_FLAGS_ALIAS_MASK | VM_FLAGS_SUPERPAGE_MASK | VM_FLAGS_PURGABLE | VM_FLAGS_4GB_CHUNK); if (alloc_flags != fd) { /* reject if there are any extra flags */ return EINVAL; } VM_GET_FLAGS_ALIAS(alloc_flags, tag); alloc_flags &= ~VM_FLAGS_ALIAS_MASK; } handle = NULL; file_pos = 0; mapanon = 1; } else { struct vnode_attr va; vfs_context_t ctx = vfs_context_current(); if (flags & MAP_JIT) return EINVAL; /* * Mapping file, get fp for validation. Obtain vnode and make * sure it is of appropriate type. */ err = fp_lookup(p, fd, &fp, 0); if (err) return(err); fpref = 1; switch (FILEGLOB_DTYPE(fp->f_fglob)) { case DTYPE_PSXSHM: uap->addr = (user_addr_t)user_addr; uap->len = (user_size_t)user_size; uap->prot = prot; uap->flags = flags; uap->pos = file_pos; error = pshm_mmap(p, uap, retval, fp, (off_t)pageoff); goto bad; case DTYPE_VNODE: break; default: error = EINVAL; goto bad; } vp = (struct vnode *)fp->f_fglob->fg_data; error = vnode_getwithref(vp); if(error != 0) goto bad; if (vp->v_type != VREG && vp->v_type != VCHR) { (void)vnode_put(vp); error = EINVAL; goto bad; } AUDIT_ARG(vnpath, vp, ARG_VNODE1); /* * POSIX: mmap needs to update access time for mapped files */ if ((vnode_vfsvisflags(vp) & MNT_NOATIME) == 0) { VATTR_INIT(&va); nanotime(&va.va_access_time); VATTR_SET_ACTIVE(&va, va_access_time); vnode_setattr(vp, &va, ctx); } /* * XXX hack to handle use of /dev/zero to map anon memory (ala * SunOS). */ if (vp->v_type == VCHR || vp->v_type == VSTR) { (void)vnode_put(vp); error = ENODEV; goto bad; } else { /* * Ensure that file and memory protections are * compatible. Note that we only worry about * writability if mapping is shared; in this case, * current and max prot are dictated by the open file. * XXX use the vnode instead? Problem is: what * credentials do we use for determination? What if * proc does a setuid? */ maxprot = VM_PROT_EXECUTE; /* ??? */ if (fp->f_fglob->fg_flag & FREAD) maxprot |= VM_PROT_READ; else if (prot & PROT_READ) { (void)vnode_put(vp); error = EACCES; goto bad; } /* * If we are sharing potential changes (either via * MAP_SHARED or via the implicit sharing of character * device mappings), and we are trying to get write * permission although we opened it without asking * for it, bail out. */ if ((flags & MAP_SHARED) != 0) { if ((fp->f_fglob->fg_flag & FWRITE) != 0 && /* * Do not allow writable mappings of * swap files (see vm_swapfile_pager.c). */ !vnode_isswap(vp)) { /* * check for write access * * Note that we already made this check when granting FWRITE * against the file, so it seems redundant here. */ error = vnode_authorize(vp, NULL, KAUTH_VNODE_CHECKIMMUTABLE, ctx); /* if not granted for any reason, but we wanted it, bad */ if ((prot & PROT_WRITE) && (error != 0)) { vnode_put(vp); goto bad; } /* if writable, remember */ if (error == 0) maxprot |= VM_PROT_WRITE; } else if ((prot & PROT_WRITE) != 0) { (void)vnode_put(vp); error = EACCES; goto bad; } } else maxprot |= VM_PROT_WRITE; handle = (void *)vp; #if CONFIG_MACF error = mac_file_check_mmap(vfs_context_ucred(ctx), fp->f_fglob, prot, flags, file_pos, &maxprot); if (error) { (void)vnode_put(vp); goto bad; } #endif /* MAC */ } } if (user_size == 0) { if (!mapanon) (void)vnode_put(vp); error = 0; goto bad; } /* * We bend a little - round the start and end addresses * to the nearest page boundary. */ user_size = vm_map_round_page(user_size, vm_map_page_mask(user_map)); if (file_pos & vm_map_page_mask(user_map)) { if (!mapanon) (void)vnode_put(vp); error = EINVAL; goto bad; } if ((flags & MAP_FIXED) == 0) { alloc_flags |= VM_FLAGS_ANYWHERE; user_addr = vm_map_round_page(user_addr, vm_map_page_mask(user_map)); } else { if (user_addr != vm_map_trunc_page(user_addr, vm_map_page_mask(user_map))) { if (!mapanon) (void)vnode_put(vp); error = EINVAL; goto bad; } /* * mmap(MAP_FIXED) will replace any existing mappings in the * specified range, if the new mapping is successful. * If we just deallocate the specified address range here, * another thread might jump in and allocate memory in that * range before we get a chance to establish the new mapping, * and we won't have a chance to restore the old mappings. * So we use VM_FLAGS_OVERWRITE to let Mach VM know that it * has to deallocate the existing mappings and establish the * new ones atomically. */ alloc_flags |= VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE; } if (flags & MAP_NOCACHE) alloc_flags |= VM_FLAGS_NO_CACHE; if (flags & MAP_JIT) { vmk_flags.vmkf_map_jit = TRUE; } if (flags & MAP_RESILIENT_CODESIGN) { alloc_flags |= VM_FLAGS_RESILIENT_CODESIGN; } /* * Lookup/allocate object. */ if (handle == NULL) { control = NULL; #ifdef notyet /* Hmm .. */ #if defined(VM_PROT_READ_IS_EXEC) if (prot & VM_PROT_READ) prot |= VM_PROT_EXECUTE; if (maxprot & VM_PROT_READ) maxprot |= VM_PROT_EXECUTE; #endif #endif #if 3777787 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) prot |= VM_PROT_READ; if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) maxprot |= VM_PROT_READ; #endif /* radar 3777787 */ map_anon_retry: result = vm_map_enter_mem_object(user_map, &user_addr, user_size, 0, alloc_flags, vmk_flags, tag, IPC_PORT_NULL, 0, FALSE, prot, maxprot, (flags & MAP_SHARED) ? VM_INHERIT_SHARE : VM_INHERIT_DEFAULT); /* If a non-binding address was specified for this anonymous * mapping, retry the mapping with a zero base * in the event the mapping operation failed due to * lack of space between the address and the map's maximum. */ if ((result == KERN_NO_SPACE) && ((flags & MAP_FIXED) == 0) && user_addr && (num_retries++ == 0)) { user_addr = vm_map_page_size(user_map); goto map_anon_retry; } } else { if (vnode_isswap(vp)) { /* * Map swap files with a special pager * that returns obfuscated contents. */ control = NULL; pager = swapfile_pager_setup(vp); if (pager != MEMORY_OBJECT_NULL) { control = swapfile_pager_control(pager); } } else { control = ubc_getobject(vp, UBC_FLAGS_NONE); } if (control == NULL) { (void)vnode_put(vp); error = ENOMEM; goto bad; } /* * Set credentials: * FIXME: if we're writing the file we need a way to * ensure that someone doesn't replace our R/W creds * with ones that only work for read. */ ubc_setthreadcred(vp, p, current_thread()); docow = FALSE; if ((flags & (MAP_ANON|MAP_SHARED)) == 0) { docow = TRUE; } #ifdef notyet /* Hmm .. */ #if defined(VM_PROT_READ_IS_EXEC) if (prot & VM_PROT_READ) prot |= VM_PROT_EXECUTE; if (maxprot & VM_PROT_READ) maxprot |= VM_PROT_EXECUTE; #endif #endif /* notyet */ #if 3777787 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) prot |= VM_PROT_READ; if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) maxprot |= VM_PROT_READ; #endif /* radar 3777787 */ map_file_retry: if ((flags & MAP_RESILIENT_CODESIGN) || (flags & MAP_RESILIENT_MEDIA)) { if (prot & (VM_PROT_WRITE | VM_PROT_EXECUTE)) { assert(!mapanon); vnode_put(vp); error = EPERM; goto bad; } /* strictly limit access to "prot" */ maxprot &= prot; } vm_object_offset_t end_pos = 0; if (os_add_overflow(user_size, file_pos, &end_pos)) { vnode_put(vp); error = EINVAL; goto bad; } result = vm_map_enter_mem_object_control(user_map, &user_addr, user_size, 0, alloc_flags, vmk_flags, tag, control, file_pos, docow, prot, maxprot, (flags & MAP_SHARED) ? VM_INHERIT_SHARE : VM_INHERIT_DEFAULT); /* If a non-binding address was specified for this file backed * mapping, retry the mapping with a zero base * in the event the mapping operation failed due to * lack of space between the address and the map's maximum. */ if ((result == KERN_NO_SPACE) && ((flags & MAP_FIXED) == 0) && user_addr && (num_retries++ == 0)) { user_addr = vm_map_page_size(user_map); goto map_file_retry; } } if (!mapanon) { (void)vnode_put(vp); } switch (result) { case KERN_SUCCESS: *retval = user_addr + pageoff; error = 0; break; case KERN_INVALID_ADDRESS: case KERN_NO_SPACE: error = ENOMEM; break; case KERN_PROTECTION_FAILURE: error = EACCES; break; default: error = EINVAL; break; } bad: if (pager != MEMORY_OBJECT_NULL) { /* * Release the reference on the pager. * If the mapping was successful, it now holds * an extra reference. */ memory_object_deallocate(pager); } if (fpref) fp_drop(p, fd, fp, 0); KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_mmap) | DBG_FUNC_NONE), fd, (uint32_t)(*retval), (uint32_t)user_size, error, 0); #ifndef CONFIG_EMBEDDED KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO2, SYS_mmap) | DBG_FUNC_NONE), (uint32_t)(*retval >> 32), (uint32_t)(user_size >> 32), (uint32_t)(file_pos >> 32), (uint32_t)file_pos, 0); #endif return(error); }
kern_return_t kmem_alloc_contig( vm_map_t map, vm_offset_t *addrp, vm_size_t size, vm_offset_t mask, ppnum_t max_pnum, ppnum_t pnum_mask, int flags) { vm_object_t object; vm_object_offset_t offset; vm_map_offset_t map_addr; vm_map_offset_t map_mask; vm_map_size_t map_size, i; vm_map_entry_t entry; vm_page_t m, pages; kern_return_t kr; if (map == VM_MAP_NULL || (flags & ~(KMA_KOBJECT | KMA_LOMEM | KMA_NOPAGEWAIT))) return KERN_INVALID_ARGUMENT; if (size == 0) { *addrp = 0; return KERN_INVALID_ARGUMENT; } map_size = vm_map_round_page(size); map_mask = (vm_map_offset_t)mask; /* * Allocate a new object (if necessary) and the reference we * will be donating to the map entry. We must do this before * locking the map, or risk deadlock with the default pager. */ if ((flags & KMA_KOBJECT) != 0) { object = kernel_object; vm_object_reference(object); } else { object = vm_object_allocate(map_size); } kr = vm_map_find_space(map, &map_addr, map_size, map_mask, 0, &entry); if (KERN_SUCCESS != kr) { vm_object_deallocate(object); return kr; } entry->object.vm_object = object; entry->offset = offset = (object == kernel_object) ? map_addr : 0; /* Take an extra object ref in case the map entry gets deleted */ vm_object_reference(object); vm_map_unlock(map); kr = cpm_allocate(CAST_DOWN(vm_size_t, map_size), &pages, max_pnum, pnum_mask, FALSE, flags); if (kr != KERN_SUCCESS) { vm_map_remove(map, vm_map_trunc_page(map_addr), vm_map_round_page(map_addr + map_size), 0); vm_object_deallocate(object); *addrp = 0; return kr; } vm_object_lock(object); for (i = 0; i < map_size; i += PAGE_SIZE) { m = pages; pages = NEXT_PAGE(m); *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL; m->busy = FALSE; vm_page_insert(m, object, offset + i); } vm_object_unlock(object); if ((kr = vm_map_wire(map, vm_map_trunc_page(map_addr), vm_map_round_page(map_addr + map_size), VM_PROT_DEFAULT, FALSE)) != KERN_SUCCESS) { if (object == kernel_object) { vm_object_lock(object); vm_object_page_remove(object, offset, offset + map_size); vm_object_unlock(object); } vm_map_remove(map, vm_map_trunc_page(map_addr), vm_map_round_page(map_addr + map_size), 0); vm_object_deallocate(object); return kr; } vm_object_deallocate(object); if (object == kernel_object) vm_map_simplify(map, map_addr); *addrp = (vm_offset_t) map_addr; assert((vm_map_offset_t) *addrp == map_addr); return KERN_SUCCESS; }
kern_return_t map_fd_funneled( int fd, vm_object_offset_t offset, vm_offset_t *va, boolean_t findspace, vm_size_t size) { kern_return_t result; struct fileproc *fp; struct vnode *vp; void * pager; vm_offset_t map_addr=0; vm_size_t map_size; int err=0; vm_map_t my_map; proc_t p = current_proc(); struct vnode_attr vattr; /* * Find the inode; verify that it's a regular file. */ err = fp_lookup(p, fd, &fp, 0); if (err) return(err); if (fp->f_fglob->fg_type != DTYPE_VNODE){ err = KERN_INVALID_ARGUMENT; goto bad; } if (!(fp->f_fglob->fg_flag & FREAD)) { err = KERN_PROTECTION_FAILURE; goto bad; } vp = (struct vnode *)fp->f_fglob->fg_data; err = vnode_getwithref(vp); if(err != 0) goto bad; if (vp->v_type != VREG) { (void)vnode_put(vp); err = KERN_INVALID_ARGUMENT; goto bad; } AUDIT_ARG(vnpath, vp, ARG_VNODE1); /* * POSIX: mmap needs to update access time for mapped files */ if ((vnode_vfsvisflags(vp) & MNT_NOATIME) == 0) { VATTR_INIT(&vattr); nanotime(&vattr.va_access_time); VATTR_SET_ACTIVE(&vattr, va_access_time); vnode_setattr(vp, &vattr, vfs_context_current()); } if (offset & PAGE_MASK_64) { printf("map_fd: file offset not page aligned(%d : %s)\n",p->p_pid, p->p_comm); (void)vnode_put(vp); err = KERN_INVALID_ARGUMENT; goto bad; } map_size = round_page(size); /* * Allow user to map in a zero length file. */ if (size == 0) { (void)vnode_put(vp); err = KERN_SUCCESS; goto bad; } /* * Map in the file. */ pager = (void *)ubc_getpager(vp); if (pager == NULL) { (void)vnode_put(vp); err = KERN_FAILURE; goto bad; } my_map = current_map(); result = vm_map_64( my_map, &map_addr, map_size, (vm_offset_t)0, VM_FLAGS_ANYWHERE, pager, offset, TRUE, VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT); if (result != KERN_SUCCESS) { (void)vnode_put(vp); err = result; goto bad; } if (!findspace) { vm_offset_t dst_addr; vm_map_copy_t tmp; if (copyin(CAST_USER_ADDR_T(va), &dst_addr, sizeof (dst_addr)) || trunc_page_32(dst_addr) != dst_addr) { (void) vm_map_remove( my_map, map_addr, map_addr + map_size, VM_MAP_NO_FLAGS); (void)vnode_put(vp); err = KERN_INVALID_ADDRESS; goto bad; } result = vm_map_copyin(my_map, (vm_map_address_t)map_addr, (vm_map_size_t)map_size, TRUE, &tmp); if (result != KERN_SUCCESS) { (void) vm_map_remove(my_map, vm_map_trunc_page(map_addr), vm_map_round_page(map_addr + map_size), VM_MAP_NO_FLAGS); (void)vnode_put(vp); err = result; goto bad; } result = vm_map_copy_overwrite(my_map, (vm_map_address_t)dst_addr, tmp, FALSE); if (result != KERN_SUCCESS) { vm_map_copy_discard(tmp); (void)vnode_put(vp); err = result; goto bad; } } else { if (copyout(&map_addr, CAST_USER_ADDR_T(va), sizeof (map_addr))) { (void) vm_map_remove(my_map, vm_map_trunc_page(map_addr), vm_map_round_page(map_addr + map_size), VM_MAP_NO_FLAGS); (void)vnode_put(vp); err = KERN_INVALID_ADDRESS; goto bad; } } ubc_setthreadcred(vp, current_proc(), current_thread()); (void)ubc_map(vp, (PROT_READ | PROT_EXEC)); (void)vnode_put(vp); err = 0; bad: fp_drop(p, fd, fp, 0); return (err); }
kern_return_t mach_port_space_info( ipc_space_t space, ipc_info_space_t *infop, ipc_info_name_array_t *tablep, mach_msg_type_number_t *tableCntp, __unused ipc_info_tree_name_array_t *treep, __unused mach_msg_type_number_t *treeCntp) { ipc_info_name_t *table_info; vm_offset_t table_addr; vm_size_t table_size, table_size_needed; ipc_entry_t table; ipc_entry_num_t tsize; mach_port_index_t index; kern_return_t kr; vm_map_copy_t copy; if (space == IS_NULL) return KERN_INVALID_TASK; #if !(DEVELOPMENT | DEBUG) const boolean_t dbg_ok = (mac_task_check_expose_task(kernel_task) == 0); #else const boolean_t dbg_ok = TRUE; #endif /* start with in-line memory */ table_size = 0; for (;;) { is_read_lock(space); if (!is_active(space)) { is_read_unlock(space); if (table_size != 0) kmem_free(ipc_kernel_map, table_addr, table_size); return KERN_INVALID_TASK; } table_size_needed = vm_map_round_page((space->is_table_size * sizeof(ipc_info_name_t)), VM_MAP_PAGE_MASK(ipc_kernel_map)); if (table_size_needed == table_size) break; is_read_unlock(space); if (table_size != table_size_needed) { if (table_size != 0) kmem_free(ipc_kernel_map, table_addr, table_size); kr = kmem_alloc(ipc_kernel_map, &table_addr, table_size_needed, VM_KERN_MEMORY_IPC); if (kr != KERN_SUCCESS) { return KERN_RESOURCE_SHORTAGE; } table_size = table_size_needed; } } /* space is read-locked and active; we have enough wired memory */ /* get the overall space info */ infop->iis_genno_mask = MACH_PORT_NGEN(MACH_PORT_DEAD); infop->iis_table_size = space->is_table_size; infop->iis_table_next = space->is_table_next->its_size; /* walk the table for this space */ table = space->is_table; tsize = space->is_table_size; table_info = (ipc_info_name_array_t)table_addr; for (index = 0; index < tsize; index++) { ipc_info_name_t *iin = &table_info[index]; ipc_entry_t entry = &table[index]; ipc_entry_bits_t bits; bits = entry->ie_bits; iin->iin_name = MACH_PORT_MAKE(index, IE_BITS_GEN(bits)); iin->iin_collision = 0; iin->iin_type = IE_BITS_TYPE(bits); if ((entry->ie_bits & MACH_PORT_TYPE_PORT_RIGHTS) != MACH_PORT_TYPE_NONE && entry->ie_request != IE_REQ_NONE) { __IGNORE_WCASTALIGN(ipc_port_t port = (ipc_port_t) entry->ie_object); assert(IP_VALID(port)); ip_lock(port); iin->iin_type |= ipc_port_request_type(port, iin->iin_name, entry->ie_request); ip_unlock(port); } iin->iin_urefs = IE_BITS_UREFS(bits); iin->iin_object = (dbg_ok) ? (natural_t)VM_KERNEL_ADDRPERM((uintptr_t)entry->ie_object) : 0; iin->iin_next = entry->ie_next; iin->iin_hash = entry->ie_index; } is_read_unlock(space); /* prepare the table out-of-line data for return */ if (table_size > 0) { vm_size_t used_table_size; used_table_size = infop->iis_table_size * sizeof(ipc_info_name_t); if (table_size > used_table_size) bzero((char *)&table_info[infop->iis_table_size], table_size - used_table_size); kr = vm_map_unwire( ipc_kernel_map, vm_map_trunc_page(table_addr, VM_MAP_PAGE_MASK(ipc_kernel_map)), vm_map_round_page(table_addr + table_size, VM_MAP_PAGE_MASK(ipc_kernel_map)), FALSE); assert(kr == KERN_SUCCESS); kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)table_addr, (vm_map_size_t)used_table_size, TRUE, ©); assert(kr == KERN_SUCCESS); *tablep = (ipc_info_name_t *)copy; *tableCntp = infop->iis_table_size; } else { *tablep = (ipc_info_name_t *)0; *tableCntp = 0; } /* splay tree is obsolete, no work to do... */ *treep = (ipc_info_tree_name_t *)0; *treeCntp = 0; return KERN_SUCCESS; }
kern_return_t kernel_memory_allocate( register vm_map_t map, register vm_offset_t *addrp, register vm_size_t size, register vm_offset_t mask, int flags) { vm_object_t object; vm_object_offset_t offset; vm_object_offset_t pg_offset; vm_map_entry_t entry; vm_map_offset_t map_addr, fill_start; vm_map_offset_t map_mask; vm_map_size_t map_size, fill_size; kern_return_t kr; vm_page_t mem; vm_page_t guard_page_list = NULL; vm_page_t wired_page_list = NULL; int guard_page_count = 0; int wired_page_count = 0; int i; int vm_alloc_flags; if (! vm_kernel_ready) { panic("kernel_memory_allocate: VM is not ready"); } if (size == 0) { *addrp = 0; return KERN_INVALID_ARGUMENT; } map_size = vm_map_round_page(size); map_mask = (vm_map_offset_t) mask; vm_alloc_flags = 0; /* * limit the size of a single extent of wired memory * to try and limit the damage to the system if * too many pages get wired down */ if (map_size > (1 << 30)) { return KERN_RESOURCE_SHORTAGE; } /* * Guard pages: * * Guard pages are implemented as ficticious pages. By placing guard pages * on either end of a stack, they can help detect cases where a thread walks * off either end of its stack. They are allocated and set up here and attempts * to access those pages are trapped in vm_fault_page(). * * The map_size we were passed may include extra space for * guard pages. If those were requested, then back it out of fill_size * since vm_map_find_space() takes just the actual size not including * guard pages. Similarly, fill_start indicates where the actual pages * will begin in the range. */ fill_start = 0; fill_size = map_size; if (flags & KMA_GUARD_FIRST) { vm_alloc_flags |= VM_FLAGS_GUARD_BEFORE; fill_start += PAGE_SIZE_64; fill_size -= PAGE_SIZE_64; if (map_size < fill_start + fill_size) { /* no space for a guard page */ *addrp = 0; return KERN_INVALID_ARGUMENT; } guard_page_count++; } if (flags & KMA_GUARD_LAST) { vm_alloc_flags |= VM_FLAGS_GUARD_AFTER; fill_size -= PAGE_SIZE_64; if (map_size <= fill_start + fill_size) { /* no space for a guard page */ *addrp = 0; return KERN_INVALID_ARGUMENT; } guard_page_count++; } wired_page_count = (int) (fill_size / PAGE_SIZE_64); assert(wired_page_count * PAGE_SIZE_64 == fill_size); for (i = 0; i < guard_page_count; i++) { for (;;) { mem = vm_page_grab_guard(); if (mem != VM_PAGE_NULL) break; if (flags & KMA_NOPAGEWAIT) { kr = KERN_RESOURCE_SHORTAGE; goto out; } vm_page_more_fictitious(); } mem->pageq.next = (queue_entry_t)guard_page_list; guard_page_list = mem; } for (i = 0; i < wired_page_count; i++) { uint64_t unavailable; for (;;) { if (flags & KMA_LOMEM) mem = vm_page_grablo(); else mem = vm_page_grab(); if (mem != VM_PAGE_NULL) break; if (flags & KMA_NOPAGEWAIT) { kr = KERN_RESOURCE_SHORTAGE; goto out; } if ((flags & KMA_LOMEM) && (vm_lopage_needed == TRUE)) { kr = KERN_RESOURCE_SHORTAGE; goto out; } unavailable = (vm_page_wire_count + vm_page_free_target) * PAGE_SIZE; if (unavailable > max_mem || map_size > (max_mem - unavailable)) { kr = KERN_RESOURCE_SHORTAGE; goto out; } VM_PAGE_WAIT(); } mem->pageq.next = (queue_entry_t)wired_page_list; wired_page_list = mem; } /* * Allocate a new object (if necessary). We must do this before * locking the map, or risk deadlock with the default pager. */ if ((flags & KMA_KOBJECT) != 0) { object = kernel_object; vm_object_reference(object); } else { object = vm_object_allocate(map_size); } kr = vm_map_find_space(map, &map_addr, fill_size, map_mask, vm_alloc_flags, &entry); if (KERN_SUCCESS != kr) { vm_object_deallocate(object); goto out; } entry->object.vm_object = object; entry->offset = offset = (object == kernel_object) ? map_addr : 0; entry->wired_count++; if (flags & KMA_PERMANENT) entry->permanent = TRUE; if (object != kernel_object) vm_object_reference(object); vm_object_lock(object); vm_map_unlock(map); pg_offset = 0; if (fill_start) { if (guard_page_list == NULL) panic("kernel_memory_allocate: guard_page_list == NULL"); mem = guard_page_list; guard_page_list = (vm_page_t)mem->pageq.next; mem->pageq.next = NULL; vm_page_insert(mem, object, offset + pg_offset); mem->busy = FALSE; pg_offset += PAGE_SIZE_64; } for (pg_offset = fill_start; pg_offset < fill_start + fill_size; pg_offset += PAGE_SIZE_64) { if (wired_page_list == NULL) panic("kernel_memory_allocate: wired_page_list == NULL"); mem = wired_page_list; wired_page_list = (vm_page_t)mem->pageq.next; mem->pageq.next = NULL; mem->wire_count++; vm_page_insert(mem, object, offset + pg_offset); mem->busy = FALSE; mem->pmapped = TRUE; mem->wpmapped = TRUE; PMAP_ENTER(kernel_pmap, map_addr + pg_offset, mem, VM_PROT_READ | VM_PROT_WRITE, object->wimg_bits & VM_WIMG_MASK, TRUE); if (flags & KMA_NOENCRYPT) { bzero(CAST_DOWN(void *, (map_addr + pg_offset)), PAGE_SIZE); pmap_set_noencrypt(mem->phys_page); } }
int mincore(__unused proc_t p, struct mincore_args *uap, __unused int32_t *retval) { mach_vm_offset_t addr, first_addr, end; vm_map_t map; user_addr_t vec; int error; int vecindex, lastvecindex; int mincoreinfo=0; int pqueryinfo; kern_return_t ret; int numref; char c; map = current_map(); /* * Make sure that the addresses presented are valid for user * mode. */ first_addr = addr = vm_map_trunc_page(uap->addr, vm_map_page_mask(map)); end = addr + vm_map_round_page(uap->len, vm_map_page_mask(map)); if (end < addr) return (EINVAL); /* * Address of byte vector */ vec = uap->vec; map = current_map(); /* * Do this on a map entry basis so that if the pages are not * in the current processes address space, we can easily look * up the pages elsewhere. */ lastvecindex = -1; for( ; addr < end; addr += PAGE_SIZE ) { pqueryinfo = 0; ret = mach_vm_page_query(map, addr, &pqueryinfo, &numref); if (ret != KERN_SUCCESS) pqueryinfo = 0; mincoreinfo = 0; if (pqueryinfo & VM_PAGE_QUERY_PAGE_PRESENT) mincoreinfo |= MINCORE_INCORE; if (pqueryinfo & VM_PAGE_QUERY_PAGE_REF) mincoreinfo |= MINCORE_REFERENCED; if (pqueryinfo & VM_PAGE_QUERY_PAGE_DIRTY) mincoreinfo |= MINCORE_MODIFIED; /* * calculate index into user supplied byte vector */ vecindex = (addr - first_addr)>> PAGE_SHIFT; /* * If we have skipped map entries, we need to make sure that * the byte vector is zeroed for those skipped entries. */ while((lastvecindex + 1) < vecindex) { c = 0; error = copyout(&c, vec + lastvecindex, 1); if (error) { return (EFAULT); } ++lastvecindex; } /* * Pass the page information to the user */ c = (char)mincoreinfo; error = copyout(&c, vec + vecindex, 1); if (error) { return (EFAULT); } lastvecindex = vecindex; } /* * Zero the last entries in the byte vector. */ vecindex = (end - first_addr) >> PAGE_SHIFT; while((lastvecindex + 1) < vecindex) { c = 0; error = copyout(&c, vec + lastvecindex, 1); if (error) { return (EFAULT); } ++lastvecindex; } return (0); }
/* * Return an array of virtual pages that are mapped to a task. */ kern_return_t vm32_mapped_pages_info( __DEBUG_ONLY vm_map_t map, __DEBUG_ONLY page_address_array_t *pages, __DEBUG_ONLY mach_msg_type_number_t *pages_count) { #if !MACH_VM_DEBUG return KERN_FAILURE; #else pmap_t pmap; vm_size_t size, size_used; unsigned int actual, space; page_address_array_t list; vm_offset_t addr = 0; if (map == VM_MAP_NULL) return (KERN_INVALID_ARGUMENT); pmap = map->pmap; size = pmap_resident_count(pmap) * sizeof(vm_offset_t); size = vm_map_round_page(size, VM_MAP_PAGE_MASK(ipc_kernel_map)); for (;;) { (void) vm_allocate(ipc_kernel_map, &addr, size, VM_FLAGS_ANYWHERE | VM_MAKE_TAG(VM_KERN_MEMORY_IPC)); (void) vm_map_unwire( ipc_kernel_map, vm_map_trunc_page(addr, VM_MAP_PAGE_MASK(ipc_kernel_map)), vm_map_round_page(addr + size, VM_MAP_PAGE_MASK(ipc_kernel_map)), FALSE); list = (page_address_array_t) addr; space = (unsigned int) (size / sizeof(vm_offset_t)); actual = pmap_list_resident_pages(pmap, list, space); if (actual <= space) break; /* * Free memory if not enough */ (void) kmem_free(ipc_kernel_map, addr, size); /* * Try again, doubling the size */ size = vm_map_round_page(actual * sizeof(vm_offset_t), VM_MAP_PAGE_MASK(ipc_kernel_map)); } if (actual == 0) { *pages = 0; *pages_count = 0; (void) kmem_free(ipc_kernel_map, addr, size); } else { vm_size_t vmsize_used; *pages_count = actual; size_used = (actual * sizeof(vm_offset_t)); vmsize_used = vm_map_round_page(size_used, VM_MAP_PAGE_MASK(ipc_kernel_map)); (void) vm_map_wire( ipc_kernel_map, vm_map_trunc_page(addr, VM_MAP_PAGE_MASK(ipc_kernel_map)), vm_map_round_page(addr + size, VM_MAP_PAGE_MASK(ipc_kernel_map)), VM_PROT_READ|VM_PROT_WRITE, FALSE); (void) vm_map_copyin(ipc_kernel_map, (vm_map_address_t)addr, (vm_map_size_t)size_used, TRUE, (vm_map_copy_t *)pages); if (vmsize_used != size) { (void) kmem_free(ipc_kernel_map, addr + vmsize_used, size - vmsize_used); } } return (KERN_SUCCESS); #endif /* MACH_VM_DEBUG */ }
/* * On x86_64 systems, kernel extension text must remain within 2GB of the * kernel's text segment. To ensure this happens, we snag 2GB of kernel VM * as early as possible for kext allocations. */ void kext_alloc_init(void) { #if CONFIG_KEXT_BASEMENT kern_return_t rval = 0; kernel_segment_command_t *text = NULL; kernel_segment_command_t *prelinkTextSegment = NULL; mach_vm_offset_t text_end, text_start; mach_vm_size_t text_size; mach_vm_size_t kext_alloc_size; /* Determine the start of the kernel's __TEXT segment and determine the * lower bound of the allocated submap for kext allocations. */ text = getsegbyname(SEG_TEXT); text_start = vm_map_trunc_page(text->vmaddr, VM_MAP_PAGE_MASK(kernel_map)); text_start &= ~((512ULL * 1024 * 1024 * 1024) - 1); text_end = vm_map_round_page(text->vmaddr + text->vmsize, VM_MAP_PAGE_MASK(kernel_map)); text_size = text_end - text_start; kext_alloc_base = KEXT_ALLOC_BASE(text_end); kext_alloc_size = KEXT_ALLOC_SIZE(text_size); kext_alloc_max = kext_alloc_base + kext_alloc_size; /* Post boot kext allocation will start after the prelinked kexts */ prelinkTextSegment = getsegbyname("__PRELINK_TEXT"); if (prelinkTextSegment) { /* use kext_post_boot_base to start allocations past all the prelinked * kexts */ kext_post_boot_base = vm_map_round_page(kext_alloc_base + prelinkTextSegment->vmsize, VM_MAP_PAGE_MASK(kernel_map)); } else { kext_post_boot_base = kext_alloc_base; } /* Allocate the sub block of the kernel map */ rval = kmem_suballoc(kernel_map, (vm_offset_t *) &kext_alloc_base, kext_alloc_size, /* pageable */ TRUE, VM_FLAGS_FIXED|VM_FLAGS_OVERWRITE, &g_kext_map); if (rval != KERN_SUCCESS) { panic("kext_alloc_init: kmem_suballoc failed 0x%x\n", rval); } if ((kext_alloc_base + kext_alloc_size) > kext_alloc_max) { panic("kext_alloc_init: failed to get first 2GB\n"); } if (kernel_map->min_offset > kext_alloc_base) { kernel_map->min_offset = kext_alloc_base; } printf("kext submap [0x%lx - 0x%lx], kernel text [0x%lx - 0x%lx]\n", VM_KERNEL_UNSLIDE(kext_alloc_base), VM_KERNEL_UNSLIDE(kext_alloc_max), VM_KERNEL_UNSLIDE(text->vmaddr), VM_KERNEL_UNSLIDE(text->vmaddr + text->vmsize)); #else g_kext_map = kernel_map; kext_alloc_base = VM_MIN_KERNEL_ADDRESS; kext_alloc_max = VM_MAX_KERNEL_ADDRESS; #endif /* CONFIG_KEXT_BASEMENT */ }
static load_return_t load_segment( struct load_command *lcp, uint32_t filetype, void * control, off_t pager_offset, off_t macho_size, struct vnode *vp, vm_map_t map, int64_t slide, load_result_t *result ) { struct segment_command_64 segment_command, *scp; kern_return_t ret; vm_map_offset_t map_addr, map_offset; vm_map_size_t map_size, seg_size, delta_size; vm_prot_t initprot; vm_prot_t maxprot; size_t segment_command_size, total_section_size, single_section_size; boolean_t prohibit_pagezero_mapping = FALSE; if (LC_SEGMENT_64 == lcp->cmd) { segment_command_size = sizeof(struct segment_command_64); single_section_size = sizeof(struct section_64); } else { segment_command_size = sizeof(struct segment_command); single_section_size = sizeof(struct section); } if (lcp->cmdsize < segment_command_size) return (LOAD_BADMACHO); total_section_size = lcp->cmdsize - segment_command_size; if (LC_SEGMENT_64 == lcp->cmd) scp = (struct segment_command_64 *)lcp; else { scp = &segment_command; widen_segment_command((struct segment_command *)lcp, scp); } /* * Make sure what we get from the file is really ours (as specified * by macho_size). */ if (scp->fileoff + scp->filesize < scp->fileoff || scp->fileoff + scp->filesize > (uint64_t)macho_size) return (LOAD_BADMACHO); /* * Ensure that the number of sections specified would fit * within the load command size. */ if (total_section_size / single_section_size < scp->nsects) return (LOAD_BADMACHO); /* * Make sure the segment is page-aligned in the file. */ if ((scp->fileoff & PAGE_MASK_64) != 0) return (LOAD_BADMACHO); /* * Round sizes to page size. */ seg_size = round_page_64(scp->vmsize); map_size = round_page_64(scp->filesize); map_addr = trunc_page_64(scp->vmaddr); /* JVXXX note that in XNU TOT this is round instead of trunc for 64 bits */ seg_size = vm_map_round_page(seg_size, vm_map_page_mask(map)); map_size = vm_map_round_page(map_size, vm_map_page_mask(map)); if (seg_size == 0) return (KERN_SUCCESS); if (map_addr == 0 && map_size == 0 && seg_size != 0 && (scp->initprot & VM_PROT_ALL) == VM_PROT_NONE && (scp->maxprot & VM_PROT_ALL) == VM_PROT_NONE) { /* * For PIE, extend page zero rather than moving it. Extending * page zero keeps early allocations from falling predictably * between the end of page zero and the beginning of the first * slid segment. */ seg_size += slide; slide = 0; /* XXX (4596982) this interferes with Rosetta, so limit to 64-bit tasks */ if (scp->cmd == LC_SEGMENT_64) { prohibit_pagezero_mapping = TRUE; } if (prohibit_pagezero_mapping) { /* * This is a "page zero" segment: it starts at address 0, * is not mapped from the binary file and is not accessible. * User-space should never be able to access that memory, so * make it completely off limits by raising the VM map's * minimum offset. */ ret = vm_map_raise_min_offset(map, seg_size); if (ret != KERN_SUCCESS) { return (LOAD_FAILURE); } return (LOAD_SUCCESS); } } /* If a non-zero slide was specified by the caller, apply now */ map_addr += slide; if (map_addr < result->min_vm_addr) result->min_vm_addr = map_addr; if (map_addr+seg_size > result->max_vm_addr) result->max_vm_addr = map_addr+seg_size; if (map == VM_MAP_NULL) return (LOAD_SUCCESS); map_offset = pager_offset + scp->fileoff; /* limited to 32 bits */ if (map_size > 0) { initprot = (scp->initprot) & VM_PROT_ALL; maxprot = (scp->maxprot) & VM_PROT_ALL; /* * Map a copy of the file into the address space. */ ret = vm_map_enter_mem_object_control(map, &map_addr, map_size, (mach_vm_offset_t)0, VM_FLAGS_FIXED, control, map_offset, TRUE, initprot, maxprot, VM_INHERIT_DEFAULT); if (ret != KERN_SUCCESS) { return (LOAD_NOSPACE); } /* * If the file didn't end on a page boundary, * we need to zero the leftover. */ delta_size = map_size - scp->filesize; #if FIXME if (delta_size > 0) { mach_vm_offset_t tmp; ret = mach_vm_allocate(kernel_map, &tmp, delta_size, VM_FLAGS_ANYWHERE); if (ret != KERN_SUCCESS) return(LOAD_RESOURCE); if (copyout(tmp, map_addr + scp->filesize, delta_size)) { (void) mach_vm_deallocate( kernel_map, tmp, delta_size); return (LOAD_FAILURE); } (void) mach_vm_deallocate(kernel_map, tmp, delta_size); } #endif /* FIXME */ } /* * If the virtual size of the segment is greater * than the size from the file, we need to allocate * zero fill memory for the rest. */ delta_size = seg_size - map_size; if (delta_size > 0) { mach_vm_offset_t tmp = map_addr + map_size; ret = mach_vm_map(map, &tmp, delta_size, 0, VM_FLAGS_FIXED, NULL, 0, FALSE, scp->initprot, scp->maxprot, VM_INHERIT_DEFAULT); if (ret != KERN_SUCCESS) return(LOAD_NOSPACE); } if ( (scp->fileoff == 0) && (scp->filesize != 0) ) result->mach_header = map_addr; if (scp->flags & SG_PROTECTED_VERSION_1) { ret = unprotect_segment(scp->fileoff, scp->filesize, vp, pager_offset, map, map_addr, map_size); } else { ret = LOAD_SUCCESS; } if (LOAD_SUCCESS == ret && filetype == MH_DYLINKER && result->all_image_info_addr == MACH_VM_MIN_ADDRESS) note_all_image_info_section(scp, LC_SEGMENT_64 == lcp->cmd, single_section_size, (const char *)lcp + segment_command_size, slide, result); if ((result->entry_point >= map_addr) && (result->entry_point < (map_addr + map_size))) result->validentry = 1; return ret; }
kern_return_t mach_port_space_info( ipc_space_t space, ipc_info_space_t *infop, ipc_info_name_array_t *tablep, mach_msg_type_number_t *tableCntp, ipc_info_tree_name_array_t *treep, mach_msg_type_number_t *treeCntp) { ipc_info_name_t *table_info; vm_offset_t table_addr; vm_size_t table_size, table_size_needed; ipc_info_tree_name_t *tree_info; vm_offset_t tree_addr; vm_size_t tree_size, tree_size_needed; ipc_tree_entry_t tentry; ipc_entry_t table; ipc_entry_num_t tsize; mach_port_index_t index; kern_return_t kr; vm_map_copy_t copy; if (space == IS_NULL) return KERN_INVALID_TASK; /* start with in-line memory */ table_size = 0; tree_size = 0; for (;;) { is_read_lock(space); if (!space->is_active) { is_read_unlock(space); if (table_size != 0) kmem_free(ipc_kernel_map, table_addr, table_size); if (tree_size != 0) kmem_free(ipc_kernel_map, tree_addr, tree_size); return KERN_INVALID_TASK; } table_size_needed = round_page(space->is_table_size * sizeof(ipc_info_name_t)); tree_size_needed = round_page(space->is_tree_total * sizeof(ipc_info_tree_name_t)); if ((table_size_needed == table_size) && (tree_size_needed == tree_size)) break; is_read_unlock(space); if (table_size != table_size_needed) { if (table_size != 0) kmem_free(ipc_kernel_map, table_addr, table_size); kr = kmem_alloc(ipc_kernel_map, &table_addr, table_size_needed); if (kr != KERN_SUCCESS) { if (tree_size != 0) kmem_free(ipc_kernel_map, tree_addr, tree_size); return KERN_RESOURCE_SHORTAGE; } table_size = table_size_needed; } if (tree_size != tree_size_needed) { if (tree_size != 0) kmem_free(ipc_kernel_map, tree_addr, tree_size); kr = kmem_alloc(ipc_kernel_map, &tree_addr, tree_size_needed); if (kr != KERN_SUCCESS) { if (table_size != 0) kmem_free(ipc_kernel_map, table_addr, table_size); return KERN_RESOURCE_SHORTAGE; } tree_size = tree_size_needed; } } /* space is read-locked and active; we have enough wired memory */ /* get the overall space info */ infop->iis_genno_mask = MACH_PORT_NGEN(MACH_PORT_DEAD); infop->iis_table_size = space->is_table_size; infop->iis_table_next = space->is_table_next->its_size; infop->iis_tree_size = space->is_tree_total; infop->iis_tree_small = space->is_tree_small; infop->iis_tree_hash = space->is_tree_hash; /* walk the table for this space */ table = space->is_table; tsize = space->is_table_size; table_info = (ipc_info_name_array_t)table_addr; for (index = 0; index < tsize; index++) { ipc_info_name_t *iin = &table_info[index]; ipc_entry_t entry = &table[index]; ipc_entry_bits_t bits; bits = entry->ie_bits; iin->iin_name = MACH_PORT_MAKE(index, IE_BITS_GEN(bits)); iin->iin_collision = (bits & IE_BITS_COLLISION) ? TRUE : FALSE; iin->iin_type = IE_BITS_TYPE(bits); if (entry->ie_request) iin->iin_type |= MACH_PORT_TYPE_DNREQUEST; iin->iin_urefs = IE_BITS_UREFS(bits); iin->iin_object = (vm_offset_t) entry->ie_object; iin->iin_next = entry->ie_next; iin->iin_hash = entry->ie_index; } /* walk the splay tree for this space */ tree_info = (ipc_info_tree_name_array_t)tree_addr; for (tentry = ipc_splay_traverse_start(&space->is_tree), index = 0; tentry != ITE_NULL; tentry = ipc_splay_traverse_next(&space->is_tree, FALSE)) { ipc_info_tree_name_t *iitn = &tree_info[index++]; ipc_info_name_t *iin = &iitn->iitn_name; ipc_entry_t entry = &tentry->ite_entry; ipc_entry_bits_t bits = entry->ie_bits; assert(IE_BITS_TYPE(bits) != MACH_PORT_TYPE_NONE); iin->iin_name = tentry->ite_name; iin->iin_collision = (bits & IE_BITS_COLLISION) ? TRUE : FALSE; iin->iin_type = IE_BITS_TYPE(bits); if (entry->ie_request) iin->iin_type |= MACH_PORT_TYPE_DNREQUEST; iin->iin_urefs = IE_BITS_UREFS(bits); iin->iin_object = (vm_offset_t) entry->ie_object; iin->iin_next = entry->ie_next; iin->iin_hash = entry->ie_index; if (tentry->ite_lchild == ITE_NULL) iitn->iitn_lchild = MACH_PORT_NULL; else iitn->iitn_lchild = tentry->ite_lchild->ite_name; if (tentry->ite_rchild == ITE_NULL) iitn->iitn_rchild = MACH_PORT_NULL; else iitn->iitn_rchild = tentry->ite_rchild->ite_name; } ipc_splay_traverse_finish(&space->is_tree); is_read_unlock(space); /* prepare the table out-of-line data for return */ if (table_size > 0) { if (table_size > infop->iis_table_size * sizeof(ipc_info_name_t)) bzero((char *)&table_info[infop->iis_table_size], table_size - infop->iis_table_size * sizeof(ipc_info_name_t)); kr = vm_map_unwire(ipc_kernel_map, vm_map_trunc_page(table_addr), vm_map_round_page(table_addr + table_size), FALSE); assert(kr == KERN_SUCCESS); kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)table_addr, (vm_map_size_t)table_size, TRUE, ©); assert(kr == KERN_SUCCESS); *tablep = (ipc_info_name_t *)copy; *tableCntp = infop->iis_table_size; } else { *tablep = (ipc_info_name_t *)0; *tableCntp = 0; } /* prepare the tree out-of-line data for return */ if (tree_size > 0) { if (tree_size > infop->iis_tree_size * sizeof(ipc_info_tree_name_t)) bzero((char *)&tree_info[infop->iis_tree_size], tree_size - infop->iis_tree_size * sizeof(ipc_info_tree_name_t)); kr = vm_map_unwire(ipc_kernel_map, vm_map_trunc_page(tree_addr), vm_map_round_page(tree_addr + tree_size), FALSE); assert(kr == KERN_SUCCESS); kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)tree_addr, (vm_map_size_t)tree_size, TRUE, ©); assert(kr == KERN_SUCCESS); *treep = (ipc_info_tree_name_t *)copy; *treeCntp = infop->iis_tree_size; } else { *treep = (ipc_info_tree_name_t *)0; *treeCntp = 0; } return KERN_SUCCESS; }
kern_return_t vm32_region_info_64( __DEBUG_ONLY vm_map_t map, __DEBUG_ONLY vm32_offset_t address, __DEBUG_ONLY vm_info_region_64_t *regionp, __DEBUG_ONLY vm_info_object_array_t *objectsp, __DEBUG_ONLY mach_msg_type_number_t *objectsCntp) { #if !MACH_VM_DEBUG return KERN_FAILURE; #else vm_map_copy_t copy; vm_offset_t addr = 0; /* memory for OOL data */ vm_size_t size; /* size of the memory */ unsigned int room; /* room for this many objects */ unsigned int used; /* actually this many objects */ vm_info_region_64_t region; kern_return_t kr; if (map == VM_MAP_NULL) return KERN_INVALID_TASK; size = 0; /* no memory allocated yet */ for (;;) { vm_map_t cmap; /* current map in traversal */ vm_map_t nmap; /* next map to look at */ vm_map_entry_t entry; vm_object_t object, cobject, nobject; /* nothing is locked */ vm_map_lock_read(map); for (cmap = map;; cmap = nmap) { /* cmap is read-locked */ if (!vm_map_lookup_entry(cmap, address, &entry)) { entry = entry->vme_next; if (entry == vm_map_to_entry(cmap)) { vm_map_unlock_read(cmap); if (size != 0) kmem_free(ipc_kernel_map, addr, size); return KERN_NO_SPACE; } } if (entry->is_sub_map) nmap = VME_SUBMAP(entry); else break; /* move down to the lower map */ vm_map_lock_read(nmap); vm_map_unlock_read(cmap); } /* cmap is read-locked; we have a real entry */ object = VME_OBJECT(entry); region.vir_start = (natural_t) entry->vme_start; region.vir_end = (natural_t) entry->vme_end; region.vir_object = (natural_t)(uintptr_t) object; region.vir_offset = VME_OFFSET(entry); region.vir_needs_copy = entry->needs_copy; region.vir_protection = entry->protection; region.vir_max_protection = entry->max_protection; region.vir_inheritance = entry->inheritance; region.vir_wired_count = entry->wired_count; region.vir_user_wired_count = entry->user_wired_count; used = 0; room = (unsigned int) (size / sizeof(vm_info_object_t)); if (object == VM_OBJECT_NULL) { vm_map_unlock_read(cmap); /* no memory needed */ break; } vm_object_lock(object); vm_map_unlock_read(cmap); for (cobject = object;; cobject = nobject) { /* cobject is locked */ if (used < room) { vm_info_object_t *vio = &((vm_info_object_t *) addr)[used]; vio->vio_object = (natural_t)(uintptr_t) cobject; vio->vio_size = (natural_t) cobject->vo_size; vio->vio_ref_count = cobject->ref_count; vio->vio_resident_page_count = cobject->resident_page_count; vio->vio_copy = (natural_t)(uintptr_t) cobject->copy; vio->vio_shadow = (natural_t)(uintptr_t) cobject->shadow; vio->vio_shadow_offset = (natural_t) cobject->vo_shadow_offset; vio->vio_paging_offset = (natural_t) cobject->paging_offset; vio->vio_copy_strategy = cobject->copy_strategy; vio->vio_last_alloc = (vm_offset_t) cobject->last_alloc; vio->vio_paging_in_progress = cobject->paging_in_progress + cobject->activity_in_progress; vio->vio_pager_created = cobject->pager_created; vio->vio_pager_initialized = cobject->pager_initialized; vio->vio_pager_ready = cobject->pager_ready; vio->vio_can_persist = cobject->can_persist; vio->vio_internal = cobject->internal; vio->vio_temporary = cobject->temporary; vio->vio_alive = cobject->alive; vio->vio_purgable = (cobject->purgable != VM_PURGABLE_DENY); vio->vio_purgable_volatile = (cobject->purgable == VM_PURGABLE_VOLATILE || cobject->purgable == VM_PURGABLE_EMPTY); } used++; nobject = cobject->shadow; if (nobject == VM_OBJECT_NULL) { vm_object_unlock(cobject); break; } vm_object_lock(nobject); vm_object_unlock(cobject); } /* nothing locked */ if (used <= room) break; /* must allocate more memory */ if (size != 0) kmem_free(ipc_kernel_map, addr, size); size = vm_map_round_page(2 * used * sizeof(vm_info_object_t), VM_MAP_PAGE_MASK(ipc_kernel_map)); kr = vm_allocate(ipc_kernel_map, &addr, size, VM_FLAGS_ANYWHERE | VM_MAKE_TAG(VM_KERN_MEMORY_IPC)); if (kr != KERN_SUCCESS) return KERN_RESOURCE_SHORTAGE; kr = vm_map_wire( ipc_kernel_map, vm_map_trunc_page(addr, VM_MAP_PAGE_MASK(ipc_kernel_map)), vm_map_round_page(addr + size, VM_MAP_PAGE_MASK(ipc_kernel_map)), VM_PROT_READ|VM_PROT_WRITE, FALSE); assert(kr == KERN_SUCCESS); } /* free excess memory; make remaining memory pageable */ if (used == 0) { copy = VM_MAP_COPY_NULL; if (size != 0) kmem_free(ipc_kernel_map, addr, size); } else { vm_size_t size_used = (used * sizeof(vm_info_object_t)); vm_size_t vmsize_used = vm_map_round_page(size_used, VM_MAP_PAGE_MASK(ipc_kernel_map)); kr = vm_map_unwire( ipc_kernel_map, vm_map_trunc_page(addr, VM_MAP_PAGE_MASK(ipc_kernel_map)), vm_map_round_page(addr + size_used, VM_MAP_PAGE_MASK(ipc_kernel_map)), FALSE); assert(kr == KERN_SUCCESS); kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)addr, (vm_map_size_t)size_used, TRUE, ©); assert(kr == KERN_SUCCESS); if (size != vmsize_used) kmem_free(ipc_kernel_map, addr + vmsize_used, size - vmsize_used); } *regionp = region; *objectsp = (vm_info_object_array_t) copy; *objectsCntp = used; return KERN_SUCCESS; #endif /* MACH_VM_DEBUG */ }
int mprotect(__unused proc_t p, struct mprotect_args *uap, __unused int32_t *retval) { vm_prot_t prot; mach_vm_offset_t user_addr; mach_vm_size_t user_size; kern_return_t result; vm_map_t user_map; #if CONFIG_MACF int error; #endif AUDIT_ARG(addr, uap->addr); AUDIT_ARG(len, uap->len); AUDIT_ARG(value32, uap->prot); user_map = current_map(); user_addr = (mach_vm_offset_t) uap->addr; user_size = (mach_vm_size_t) uap->len; prot = (vm_prot_t)(uap->prot & (VM_PROT_ALL | VM_PROT_TRUSTED | VM_PROT_STRIP_READ)); if (user_addr & vm_map_page_mask(user_map)) { /* UNIX SPEC: user address is not page-aligned, return EINVAL */ return EINVAL; } #ifdef notyet /* Hmm .. */ #if defined(VM_PROT_READ_IS_EXEC) if (prot & VM_PROT_READ) prot |= VM_PROT_EXECUTE; #endif #endif /* notyet */ #if 3936456 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) prot |= VM_PROT_READ; #endif /* 3936456 */ #if defined(__arm64__) if (prot & VM_PROT_STRIP_READ) prot &= ~(VM_PROT_READ | VM_PROT_STRIP_READ); #endif #if CONFIG_MACF /* * The MAC check for mprotect is of limited use for 2 reasons: * Without mmap revocation, the caller could have asked for the max * protections initially instead of a reduced set, so a mprotect * check would offer no new security. * It is not possible to extract the vnode from the pager object(s) * of the target memory range. * However, the MAC check may be used to prevent a process from, * e.g., making the stack executable. */ error = mac_proc_check_mprotect(p, user_addr, user_size, prot); if (error) return (error); #endif if(prot & VM_PROT_TRUSTED) { #if CONFIG_DYNAMIC_CODE_SIGNING /* CODE SIGNING ENFORCEMENT - JIT support */ /* The special protection value VM_PROT_TRUSTED requests that we treat * this page as if it had a valid code signature. * If this is enabled, there MUST be a MAC policy implementing the * mac_proc_check_mprotect() hook above. Otherwise, Codesigning will be * compromised because the check would always succeed and thusly any * process could sign dynamically. */ result = vm_map_sign( user_map, vm_map_trunc_page(user_addr, vm_map_page_mask(user_map)), vm_map_round_page(user_addr+user_size, vm_map_page_mask(user_map))); switch (result) { case KERN_SUCCESS: break; case KERN_INVALID_ADDRESS: /* UNIX SPEC: for an invalid address range, return ENOMEM */ return ENOMEM; default: return EINVAL; } #else return ENOTSUP; #endif } prot &= ~VM_PROT_TRUSTED; result = mach_vm_protect(user_map, user_addr, user_size, FALSE, prot); switch (result) { case KERN_SUCCESS: return (0); case KERN_PROTECTION_FAILURE: return (EACCES); case KERN_INVALID_ADDRESS: /* UNIX SPEC: for an invalid address range, return ENOMEM */ return ENOMEM; } return (EINVAL); }
kern_return_t host_virtual_physical_table_info( __DEBUG_ONLY host_t host, __DEBUG_ONLY hash_info_bucket_array_t *infop, __DEBUG_ONLY mach_msg_type_number_t *countp) { #if !MACH_VM_DEBUG return KERN_FAILURE; #else vm_offset_t addr = 0; vm_size_t size = 0; hash_info_bucket_t *info; unsigned int potential, actual; kern_return_t kr; if (host == HOST_NULL) return KERN_INVALID_HOST; /* start with in-line data */ info = *infop; potential = *countp; for (;;) { actual = vm_page_info(info, potential); if (actual <= potential) break; /* allocate more memory */ if (info != *infop) kmem_free(ipc_kernel_map, addr, size); size = vm_map_round_page(actual * sizeof *info, VM_MAP_PAGE_MASK(ipc_kernel_map)); kr = vm_allocate(ipc_kernel_map, &addr, size, VM_FLAGS_ANYWHERE | VM_MAKE_TAG(VM_KERN_MEMORY_IPC)); if (kr != KERN_SUCCESS) return KERN_RESOURCE_SHORTAGE; info = (hash_info_bucket_t *) addr; potential = (unsigned int) (size/sizeof (*info)); } if (info == *infop) { /* data fit in-line; nothing to deallocate */ *countp = actual; } else if (actual == 0) { kmem_free(ipc_kernel_map, addr, size); *countp = 0; } else { vm_map_copy_t copy; vm_size_t used, vmused; used = (actual * sizeof(*info)); vmused = vm_map_round_page(used, VM_MAP_PAGE_MASK(ipc_kernel_map)); if (vmused != size) kmem_free(ipc_kernel_map, addr + vmused, size - vmused); kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)addr, (vm_map_size_t)used, TRUE, ©); assert(kr == KERN_SUCCESS); *infop = (hash_info_bucket_t *) copy; *countp = actual; } return KERN_SUCCESS; #endif /* MACH_VM_DEBUG */ }
/* * host_processor_info * * Return info about the processors on this host. It will return * the number of processors, and the specific type of info requested * in an OOL array. */ kern_return_t host_processor_info( host_t host, processor_flavor_t flavor, natural_t *out_pcount, processor_info_array_t *out_array, mach_msg_type_number_t *out_array_count) { kern_return_t result; processor_t processor; host_t thost; processor_info_t info; unsigned int icount, tcount; unsigned int pcount, i; vm_offset_t addr; vm_size_t size, needed; vm_map_copy_t copy; if (host == HOST_NULL) return (KERN_INVALID_ARGUMENT); result = processor_info_count(flavor, &icount); if (result != KERN_SUCCESS) return (result); pcount = processor_count; assert(pcount != 0); needed = pcount * icount * sizeof(natural_t); size = round_page(needed); result = kmem_alloc(ipc_kernel_map, &addr, size); if (result != KERN_SUCCESS) return (KERN_RESOURCE_SHORTAGE); info = (processor_info_t) addr; processor = processor_list; tcount = icount; result = processor_info(processor, flavor, &thost, info, &tcount); if (result != KERN_SUCCESS) { kmem_free(ipc_kernel_map, addr, size); return (result); } if (pcount > 1) { for (i = 1; i < pcount; i++) { simple_lock(&processor_list_lock); processor = processor->processor_list; simple_unlock(&processor_list_lock); info += icount; tcount = icount; result = processor_info(processor, flavor, &thost, info, &tcount); if (result != KERN_SUCCESS) { kmem_free(ipc_kernel_map, addr, size); return (result); } } } if (size != needed) bzero((char *) addr + needed, size - needed); result = vm_map_unwire(ipc_kernel_map, vm_map_trunc_page(addr), vm_map_round_page(addr + size), FALSE); assert(result == KERN_SUCCESS); result = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)addr, (vm_map_size_t)size, TRUE, ©); assert(result == KERN_SUCCESS); *out_pcount = pcount; *out_array = (processor_info_array_t) copy; *out_array_count = pcount * icount; return (KERN_SUCCESS); }