/* * Destroy old address space, and allocate a new stack. * The new stack is only sgrowsiz large because it is grown * automatically on a page fault. */ int exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv) { int error; struct proc *p = imgp->proc; struct vmspace *vmspace = p->p_vmspace; vm_object_t obj; struct rlimit rlim_stack; vm_offset_t sv_minuser, stack_addr; vm_map_t map; u_long ssiz; imgp->vmspace_destroyed = 1; imgp->sysent = sv; /* May be called with Giant held */ EVENTHANDLER_DIRECT_INVOKE(process_exec, p, imgp); /* * Blow away entire process VM, if address space not shared, * otherwise, create a new VM space so that other threads are * not disrupted */ map = &vmspace->vm_map; if (map_at_zero) sv_minuser = sv->sv_minuser; else sv_minuser = MAX(sv->sv_minuser, PAGE_SIZE); if (vmspace->vm_refcnt == 1 && vm_map_min(map) == sv_minuser && vm_map_max(map) == sv->sv_maxuser && cpu_exec_vmspace_reuse(p, map)) { shmexit(vmspace); pmap_remove_pages(vmspace_pmap(vmspace)); vm_map_remove(map, vm_map_min(map), vm_map_max(map)); /* * An exec terminates mlockall(MCL_FUTURE), ASLR state * must be re-evaluated. */ vm_map_lock(map); vm_map_modflags(map, 0, MAP_WIREFUTURE | MAP_ASLR | MAP_ASLR_IGNSTART); vm_map_unlock(map); } else { error = vmspace_exec(p, sv_minuser, sv->sv_maxuser); if (error) return (error); vmspace = p->p_vmspace; map = &vmspace->vm_map; } map->flags |= imgp->map_flags; /* Map a shared page */ obj = sv->sv_shared_page_obj; if (obj != NULL) { vm_object_reference(obj); error = vm_map_fixed(map, obj, 0, sv->sv_shared_page_base, sv->sv_shared_page_len, VM_PROT_READ | VM_PROT_EXECUTE, VM_PROT_READ | VM_PROT_EXECUTE, MAP_INHERIT_SHARE | MAP_ACC_NO_CHARGE); if (error != KERN_SUCCESS) { vm_object_deallocate(obj); return (vm_mmap_to_errno(error)); } } /* Allocate a new stack */ if (imgp->stack_sz != 0) { ssiz = trunc_page(imgp->stack_sz); PROC_LOCK(p); lim_rlimit_proc(p, RLIMIT_STACK, &rlim_stack); PROC_UNLOCK(p); if (ssiz > rlim_stack.rlim_max) ssiz = rlim_stack.rlim_max; if (ssiz > rlim_stack.rlim_cur) { rlim_stack.rlim_cur = ssiz; kern_setrlimit(curthread, RLIMIT_STACK, &rlim_stack); } } else if (sv->sv_maxssiz != NULL) { ssiz = *sv->sv_maxssiz; } else { ssiz = maxssiz; } stack_addr = sv->sv_usrstack - ssiz; error = vm_map_stack(map, stack_addr, (vm_size_t)ssiz, obj != NULL && imgp->stack_prot != 0 ? imgp->stack_prot : sv->sv_stackprot, VM_PROT_ALL, MAP_STACK_GROWS_DOWN); if (error != KERN_SUCCESS) return (vm_mmap_to_errno(error)); /* * vm_ssize and vm_maxsaddr are somewhat antiquated concepts, but they * are still used to enforce the stack rlimit on the process stack. */ vmspace->vm_ssize = sgrowsiz >> PAGE_SHIFT; vmspace->vm_maxsaddr = (char *)stack_addr; return (0); }
/* * Internal version of mmap. * Currently used by mmap, exec, and sys5 shared memory. * Handle is either a vnode pointer or NULL for MAP_ANON. * * No requirements */ int vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, vm_prot_t maxprot, int flags, void *handle, vm_ooffset_t foff) { boolean_t fitit; vm_object_t object; vm_offset_t eaddr; vm_size_t esize; vm_size_t align; int (*uksmap)(cdev_t dev, vm_page_t fake); struct vnode *vp; struct thread *td = curthread; struct proc *p; int rv = KERN_SUCCESS; off_t objsize; int docow; int error; if (size == 0) return (0); objsize = round_page(size); if (objsize < size) return (EINVAL); size = objsize; lwkt_gettoken(&map->token); /* * XXX messy code, fixme * * NOTE: Overflow checks require discrete statements or GCC4 * will optimize it out. */ if ((p = curproc) != NULL && map == &p->p_vmspace->vm_map) { esize = map->size + size; /* workaround gcc4 opt */ if (esize < map->size || esize > p->p_rlimit[RLIMIT_VMEM].rlim_cur) { lwkt_reltoken(&map->token); return(ENOMEM); } } /* * We currently can only deal with page aligned file offsets. * The check is here rather than in the syscall because the * kernel calls this function internally for other mmaping * operations (such as in exec) and non-aligned offsets will * cause pmap inconsistencies...so we want to be sure to * disallow this in all cases. * * NOTE: Overflow checks require discrete statements or GCC4 * will optimize it out. */ if (foff & PAGE_MASK) { lwkt_reltoken(&map->token); return (EINVAL); } /* * Handle alignment. For large memory maps it is possible * that the MMU can optimize the page table so align anything * that is a multiple of SEG_SIZE to SEG_SIZE. * * Also align any large mapping (bigger than 16x SG_SIZE) to a * SEG_SIZE address boundary. */ if (flags & MAP_SIZEALIGN) { align = size; if ((align ^ (align - 1)) != (align << 1) - 1) { lwkt_reltoken(&map->token); return (EINVAL); } } else if ((flags & MAP_FIXED) == 0 && ((size & SEG_MASK) == 0 || size > SEG_SIZE * 16)) { align = SEG_SIZE; } else { align = PAGE_SIZE; } if ((flags & (MAP_FIXED | MAP_TRYFIXED)) == 0) { fitit = TRUE; *addr = round_page(*addr); } else { if (*addr != trunc_page(*addr)) { lwkt_reltoken(&map->token); return (EINVAL); } eaddr = *addr + size; if (eaddr < *addr) { lwkt_reltoken(&map->token); return (EINVAL); } fitit = FALSE; if ((flags & MAP_TRYFIXED) == 0) vm_map_remove(map, *addr, *addr + size); } uksmap = NULL; /* * Lookup/allocate object. */ if (flags & MAP_ANON) { /* * Unnamed anonymous regions always start at 0. */ if (handle) { /* * Default memory object */ object = default_pager_alloc(handle, objsize, prot, foff); if (object == NULL) { lwkt_reltoken(&map->token); return(ENOMEM); } docow = MAP_PREFAULT_PARTIAL; } else { /* * Implicit single instance of a default memory * object, so we don't need a VM object yet. */ foff = 0; object = NULL; docow = 0; } vp = NULL; } else { vp = (struct vnode *)handle; /* * Non-anonymous mappings of VCHR (aka not /dev/zero) * cannot specify MAP_STACK or MAP_VPAGETABLE. */ if (vp->v_type == VCHR) { if (flags & (MAP_STACK | MAP_VPAGETABLE)) { lwkt_reltoken(&map->token); return(EINVAL); } } if (vp->v_type == VCHR && vp->v_rdev->si_ops->d_uksmap) { /* * Device mappings without a VM object, typically * sharing permanently allocated kernel memory or * process-context-specific (per-process) data. * * Force them to be shared. */ uksmap = vp->v_rdev->si_ops->d_uksmap; object = NULL; docow = MAP_PREFAULT_PARTIAL; flags &= ~(MAP_PRIVATE|MAP_COPY); flags |= MAP_SHARED; } else if (vp->v_type == VCHR) { /* * Device mappings (device size unknown?). * Force them to be shared. */ error = dev_dmmap_single(vp->v_rdev, &foff, objsize, &object, prot, NULL); if (error == ENODEV) { handle = (void *)(intptr_t)vp->v_rdev; object = dev_pager_alloc(handle, objsize, prot, foff); if (object == NULL) { lwkt_reltoken(&map->token); return(EINVAL); } } else if (error) { lwkt_reltoken(&map->token); return(error); } docow = MAP_PREFAULT_PARTIAL; flags &= ~(MAP_PRIVATE|MAP_COPY); flags |= MAP_SHARED; } else { /* * Regular file mapping (typically). The attribute * check is for the link count test only. mmapable * vnodes must already have a VM object assigned. */ struct vattr vat; int error; error = VOP_GETATTR(vp, &vat); if (error) { lwkt_reltoken(&map->token); return (error); } docow = MAP_PREFAULT_PARTIAL; object = vnode_pager_reference(vp); if (object == NULL && vp->v_type == VREG) { lwkt_reltoken(&map->token); kprintf("Warning: cannot mmap vnode %p, no " "object\n", vp); return(EINVAL); } /* * If it is a regular file without any references * we do not need to sync it. */ if (vp->v_type == VREG && vat.va_nlink == 0) { flags |= MAP_NOSYNC; } } } /* * Deal with the adjusted flags */ if ((flags & (MAP_ANON|MAP_SHARED)) == 0) docow |= MAP_COPY_ON_WRITE; if (flags & MAP_NOSYNC) docow |= MAP_DISABLE_SYNCER; if (flags & MAP_NOCORE) docow |= MAP_DISABLE_COREDUMP; #if defined(VM_PROT_READ_IS_EXEC) if (prot & VM_PROT_READ) prot |= VM_PROT_EXECUTE; if (maxprot & VM_PROT_READ) maxprot |= VM_PROT_EXECUTE; #endif /* * This may place the area in its own page directory if (size) is * large enough, otherwise it typically returns its argument. * * (object can be NULL) */ if (fitit) { *addr = pmap_addr_hint(object, *addr, size); } /* * Stack mappings need special attention. * * Mappings that use virtual page tables will default to storing * the page table at offset 0. */ if (uksmap) { rv = vm_map_find(map, uksmap, vp->v_rdev, foff, addr, size, align, fitit, VM_MAPTYPE_UKSMAP, prot, maxprot, docow); } else if (flags & MAP_STACK) { rv = vm_map_stack(map, *addr, size, flags, prot, maxprot, docow); } else if (flags & MAP_VPAGETABLE) { rv = vm_map_find(map, object, NULL, foff, addr, size, align, fitit, VM_MAPTYPE_VPAGETABLE, prot, maxprot, docow); } else { rv = vm_map_find(map, object, NULL, foff, addr, size, align, fitit, VM_MAPTYPE_NORMAL, prot, maxprot, docow); } if (rv != KERN_SUCCESS) { /* * Lose the object reference. Will destroy the * object if it's an unnamed anonymous mapping * or named anonymous without other references. * * (NOTE: object can be NULL) */ vm_object_deallocate(object); goto out; } /* * Shared memory is also shared with children. */ if (flags & (MAP_SHARED|MAP_INHERIT)) { rv = vm_map_inherit(map, *addr, *addr + size, VM_INHERIT_SHARE); if (rv != KERN_SUCCESS) { vm_map_remove(map, *addr, *addr + size); goto out; } } /* If a process has marked all future mappings for wiring, do so */ if ((rv == KERN_SUCCESS) && (map->flags & MAP_WIREFUTURE)) vm_map_unwire(map, *addr, *addr + size, FALSE); /* * Set the access time on the vnode */ if (vp != NULL) vn_mark_atime(vp, td); out: lwkt_reltoken(&map->token); switch (rv) { case KERN_SUCCESS: return (0); case KERN_INVALID_ADDRESS: case KERN_NO_SPACE: return (ENOMEM); case KERN_PROTECTION_FAILURE: return (EACCES); default: return (EINVAL); } }