示例#1
0
/*
 * Destroy old address space, and allocate a new stack.
 *	The new stack is only sgrowsiz large because it is grown
 *	automatically on a page fault.
 */
int
exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv)
{
	int error;
	struct proc *p = imgp->proc;
	struct vmspace *vmspace = p->p_vmspace;
	vm_object_t obj;
	struct rlimit rlim_stack;
	vm_offset_t sv_minuser, stack_addr;
	vm_map_t map;
	u_long ssiz;

	imgp->vmspace_destroyed = 1;
	imgp->sysent = sv;

	/* May be called with Giant held */
	EVENTHANDLER_DIRECT_INVOKE(process_exec, p, imgp);

	/*
	 * Blow away entire process VM, if address space not shared,
	 * otherwise, create a new VM space so that other threads are
	 * not disrupted
	 */
	map = &vmspace->vm_map;
	if (map_at_zero)
		sv_minuser = sv->sv_minuser;
	else
		sv_minuser = MAX(sv->sv_minuser, PAGE_SIZE);
	if (vmspace->vm_refcnt == 1 && vm_map_min(map) == sv_minuser &&
	    vm_map_max(map) == sv->sv_maxuser &&
	    cpu_exec_vmspace_reuse(p, map)) {
		shmexit(vmspace);
		pmap_remove_pages(vmspace_pmap(vmspace));
		vm_map_remove(map, vm_map_min(map), vm_map_max(map));
		/*
		 * An exec terminates mlockall(MCL_FUTURE), ASLR state
		 * must be re-evaluated.
		 */
		vm_map_lock(map);
		vm_map_modflags(map, 0, MAP_WIREFUTURE | MAP_ASLR |
		    MAP_ASLR_IGNSTART);
		vm_map_unlock(map);
	} else {
		error = vmspace_exec(p, sv_minuser, sv->sv_maxuser);
		if (error)
			return (error);
		vmspace = p->p_vmspace;
		map = &vmspace->vm_map;
	}
	map->flags |= imgp->map_flags;

	/* Map a shared page */
	obj = sv->sv_shared_page_obj;
	if (obj != NULL) {
		vm_object_reference(obj);
		error = vm_map_fixed(map, obj, 0,
		    sv->sv_shared_page_base, sv->sv_shared_page_len,
		    VM_PROT_READ | VM_PROT_EXECUTE,
		    VM_PROT_READ | VM_PROT_EXECUTE,
		    MAP_INHERIT_SHARE | MAP_ACC_NO_CHARGE);
		if (error != KERN_SUCCESS) {
			vm_object_deallocate(obj);
			return (vm_mmap_to_errno(error));
		}
	}

	/* Allocate a new stack */
	if (imgp->stack_sz != 0) {
		ssiz = trunc_page(imgp->stack_sz);
		PROC_LOCK(p);
		lim_rlimit_proc(p, RLIMIT_STACK, &rlim_stack);
		PROC_UNLOCK(p);
		if (ssiz > rlim_stack.rlim_max)
			ssiz = rlim_stack.rlim_max;
		if (ssiz > rlim_stack.rlim_cur) {
			rlim_stack.rlim_cur = ssiz;
			kern_setrlimit(curthread, RLIMIT_STACK, &rlim_stack);
		}
	} else if (sv->sv_maxssiz != NULL) {
		ssiz = *sv->sv_maxssiz;
	} else {
		ssiz = maxssiz;
	}
	stack_addr = sv->sv_usrstack - ssiz;
	error = vm_map_stack(map, stack_addr, (vm_size_t)ssiz,
	    obj != NULL && imgp->stack_prot != 0 ? imgp->stack_prot :
	    sv->sv_stackprot, VM_PROT_ALL, MAP_STACK_GROWS_DOWN);
	if (error != KERN_SUCCESS)
		return (vm_mmap_to_errno(error));

	/*
	 * vm_ssize and vm_maxsaddr are somewhat antiquated concepts, but they
	 * are still used to enforce the stack rlimit on the process stack.
	 */
	vmspace->vm_ssize = sgrowsiz >> PAGE_SHIFT;
	vmspace->vm_maxsaddr = (char *)stack_addr;

	return (0);
}
示例#2
0
/*
 * Internal version of mmap.
 * Currently used by mmap, exec, and sys5 shared memory.
 * Handle is either a vnode pointer or NULL for MAP_ANON.
 * 
 * No requirements
 */
int
vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
	vm_prot_t maxprot, int flags, void *handle, vm_ooffset_t foff)
{
	boolean_t fitit;
	vm_object_t object;
	vm_offset_t eaddr;
	vm_size_t   esize;
	vm_size_t   align;
	int (*uksmap)(cdev_t dev, vm_page_t fake);
	struct vnode *vp;
	struct thread *td = curthread;
	struct proc *p;
	int rv = KERN_SUCCESS;
	off_t objsize;
	int docow;
	int error;

	if (size == 0)
		return (0);

	objsize = round_page(size);
	if (objsize < size)
		return (EINVAL);
	size = objsize;

	lwkt_gettoken(&map->token);
	
	/*
	 * XXX messy code, fixme
	 *
	 * NOTE: Overflow checks require discrete statements or GCC4
	 * will optimize it out.
	 */
	if ((p = curproc) != NULL && map == &p->p_vmspace->vm_map) {
		esize = map->size + size;	/* workaround gcc4 opt */
		if (esize < map->size ||
		    esize > p->p_rlimit[RLIMIT_VMEM].rlim_cur) {
			lwkt_reltoken(&map->token);
			return(ENOMEM);
		}
	}

	/*
	 * We currently can only deal with page aligned file offsets.
	 * The check is here rather than in the syscall because the
	 * kernel calls this function internally for other mmaping
	 * operations (such as in exec) and non-aligned offsets will
	 * cause pmap inconsistencies...so we want to be sure to
	 * disallow this in all cases.
	 *
	 * NOTE: Overflow checks require discrete statements or GCC4
	 * will optimize it out.
	 */
	if (foff & PAGE_MASK) {
		lwkt_reltoken(&map->token);
		return (EINVAL);
	}

	/*
	 * Handle alignment.  For large memory maps it is possible
	 * that the MMU can optimize the page table so align anything
	 * that is a multiple of SEG_SIZE to SEG_SIZE.
	 *
	 * Also align any large mapping (bigger than 16x SG_SIZE) to a
	 * SEG_SIZE address boundary.
	 */
	if (flags & MAP_SIZEALIGN) {
		align = size;
		if ((align ^ (align - 1)) != (align << 1) - 1) {
			lwkt_reltoken(&map->token);
			return (EINVAL);
		}
	} else if ((flags & MAP_FIXED) == 0 &&
		   ((size & SEG_MASK) == 0 || size > SEG_SIZE * 16)) {
		align = SEG_SIZE;
	} else {
		align = PAGE_SIZE;
	}

	if ((flags & (MAP_FIXED | MAP_TRYFIXED)) == 0) {
		fitit = TRUE;
		*addr = round_page(*addr);
	} else {
		if (*addr != trunc_page(*addr)) {
			lwkt_reltoken(&map->token);
			return (EINVAL);
		}
		eaddr = *addr + size;
		if (eaddr < *addr) {
			lwkt_reltoken(&map->token);
			return (EINVAL);
		}
		fitit = FALSE;
		if ((flags & MAP_TRYFIXED) == 0)
			vm_map_remove(map, *addr, *addr + size);
	}

	uksmap = NULL;

	/*
	 * Lookup/allocate object.
	 */
	if (flags & MAP_ANON) {
		/*
		 * Unnamed anonymous regions always start at 0.
		 */
		if (handle) {
			/*
			 * Default memory object
			 */
			object = default_pager_alloc(handle, objsize,
						     prot, foff);
			if (object == NULL) {
				lwkt_reltoken(&map->token);
				return(ENOMEM);
			}
			docow = MAP_PREFAULT_PARTIAL;
		} else {
			/*
			 * Implicit single instance of a default memory
			 * object, so we don't need a VM object yet.
			 */
			foff = 0;
			object = NULL;
			docow = 0;
		}
		vp = NULL;
	} else {
		vp = (struct vnode *)handle;

		/*
		 * Non-anonymous mappings of VCHR (aka not /dev/zero)
		 * cannot specify MAP_STACK or MAP_VPAGETABLE.
		 */
		if (vp->v_type == VCHR) {
			if (flags & (MAP_STACK | MAP_VPAGETABLE)) {
				lwkt_reltoken(&map->token);
				return(EINVAL);
			}
		}

		if (vp->v_type == VCHR && vp->v_rdev->si_ops->d_uksmap) {
			/*
			 * Device mappings without a VM object, typically
			 * sharing permanently allocated kernel memory or
			 * process-context-specific (per-process) data.
			 *
			 * Force them to be shared.
			 */
			uksmap = vp->v_rdev->si_ops->d_uksmap;
			object = NULL;
			docow = MAP_PREFAULT_PARTIAL;
			flags &= ~(MAP_PRIVATE|MAP_COPY);
			flags |= MAP_SHARED;
		} else if (vp->v_type == VCHR) {
			/*
			 * Device mappings (device size unknown?).
			 * Force them to be shared.
			 */
			error = dev_dmmap_single(vp->v_rdev, &foff, objsize,
						&object, prot, NULL);

			if (error == ENODEV) {
				handle = (void *)(intptr_t)vp->v_rdev;
				object = dev_pager_alloc(handle, objsize, prot, foff);
				if (object == NULL) {
					lwkt_reltoken(&map->token);
					return(EINVAL);
				}
			} else if (error) {
				lwkt_reltoken(&map->token);
				return(error);
			}

			docow = MAP_PREFAULT_PARTIAL;
			flags &= ~(MAP_PRIVATE|MAP_COPY);
			flags |= MAP_SHARED;
		} else {
			/*
			 * Regular file mapping (typically).  The attribute
			 * check is for the link count test only.  mmapable
			 * vnodes must already have a VM object assigned.
			 */
			struct vattr vat;
			int error;

			error = VOP_GETATTR(vp, &vat);
			if (error) {
				lwkt_reltoken(&map->token);
				return (error);
			}
			docow = MAP_PREFAULT_PARTIAL;
			object = vnode_pager_reference(vp);
			if (object == NULL && vp->v_type == VREG) {
				lwkt_reltoken(&map->token);
				kprintf("Warning: cannot mmap vnode %p, no "
					"object\n", vp);
				return(EINVAL);
			}

			/*
			 * If it is a regular file without any references
			 * we do not need to sync it.
			 */
			if (vp->v_type == VREG && vat.va_nlink == 0) {
				flags |= MAP_NOSYNC;
			}
		}
	}

	/*
	 * Deal with the adjusted flags
	 */
	if ((flags & (MAP_ANON|MAP_SHARED)) == 0)
		docow |= MAP_COPY_ON_WRITE;
	if (flags & MAP_NOSYNC)
		docow |= MAP_DISABLE_SYNCER;
	if (flags & MAP_NOCORE)
		docow |= MAP_DISABLE_COREDUMP;

#if defined(VM_PROT_READ_IS_EXEC)
	if (prot & VM_PROT_READ)
		prot |= VM_PROT_EXECUTE;

	if (maxprot & VM_PROT_READ)
		maxprot |= VM_PROT_EXECUTE;
#endif

	/*
	 * This may place the area in its own page directory if (size) is
	 * large enough, otherwise it typically returns its argument.
	 *
	 * (object can be NULL)
	 */
	if (fitit) {
		*addr = pmap_addr_hint(object, *addr, size);
	}

	/*
	 * Stack mappings need special attention.
	 *
	 * Mappings that use virtual page tables will default to storing
	 * the page table at offset 0.
	 */
	if (uksmap) {
		rv = vm_map_find(map, uksmap, vp->v_rdev,
				 foff, addr, size,
				 align,
				 fitit, VM_MAPTYPE_UKSMAP,
				 prot, maxprot, docow);
	} else if (flags & MAP_STACK) {
		rv = vm_map_stack(map, *addr, size, flags,
				  prot, maxprot, docow);
	} else if (flags & MAP_VPAGETABLE) {
		rv = vm_map_find(map, object, NULL,
				 foff, addr, size,
				 align,
				 fitit, VM_MAPTYPE_VPAGETABLE,
				 prot, maxprot, docow);
	} else {
		rv = vm_map_find(map, object, NULL,
				 foff, addr, size,
				 align,
				 fitit, VM_MAPTYPE_NORMAL,
				 prot, maxprot, docow);
	}

	if (rv != KERN_SUCCESS) {
		/*
		 * Lose the object reference. Will destroy the
		 * object if it's an unnamed anonymous mapping
		 * or named anonymous without other references.
		 *
		 * (NOTE: object can be NULL)
		 */
		vm_object_deallocate(object);
		goto out;
	}

	/*
	 * Shared memory is also shared with children.
	 */
	if (flags & (MAP_SHARED|MAP_INHERIT)) {
		rv = vm_map_inherit(map, *addr, *addr + size, VM_INHERIT_SHARE);
		if (rv != KERN_SUCCESS) {
			vm_map_remove(map, *addr, *addr + size);
			goto out;
		}
	}

	/* If a process has marked all future mappings for wiring, do so */
	if ((rv == KERN_SUCCESS) && (map->flags & MAP_WIREFUTURE))
		vm_map_unwire(map, *addr, *addr + size, FALSE);

	/*
	 * Set the access time on the vnode
	 */
	if (vp != NULL)
		vn_mark_atime(vp, td);
out:
	lwkt_reltoken(&map->token);
	
	switch (rv) {
	case KERN_SUCCESS:
		return (0);
	case KERN_INVALID_ADDRESS:
	case KERN_NO_SPACE:
		return (ENOMEM);
	case KERN_PROTECTION_FAILURE:
		return (EACCES);
	default:
		return (EINVAL);
	}
}