Пример #1
0
/*
 *	kmap_alloc_wait:
 *
 *	Allocates pageable memory from a sub-map of the kernel.  If the submap
 *	has no room, the caller sleeps waiting for more memory in the submap.
 *
 *	This routine may block.
 */
vm_offset_t
kmap_alloc_wait(vm_map_t map, vm_size_t size)
{
	vm_offset_t addr;

	size = round_page(size);
	if (!swap_reserve(size))
		return (0);

	for (;;) {
		/*
		 * To make this work for more than one map, use the map's lock
		 * to lock out sleepers/wakers.
		 */
		vm_map_lock(map);
		if (vm_map_findspace(map, vm_map_min(map), size, &addr) == 0)
			break;
		/* no space now; see if we can ever get space */
		if (vm_map_max(map) - vm_map_min(map) < size) {
			vm_map_unlock(map);
			swap_release(size);
			return (0);
		}
		map->needs_wakeup = TRUE;
		vm_map_unlock_and_wait(map, 0);
	}
	vm_map_insert(map, NULL, 0, addr, addr + size, VM_PROT_ALL,
	    VM_PROT_ALL, MAP_ACC_CHARGED);
	vm_map_unlock(map);
	return (addr);
}
Пример #2
0
int
get_vmmap_entries(
	vm_map_t	map)
{
	int	total_entries = 0;
	vm_map_entry_t	entry;

	if (not_in_kdp)
	  vm_map_lock(map);
	entry = vm_map_first_entry(map);

	while(entry != vm_map_to_entry(map)) {
		if(entry->is_sub_map) {
			total_entries += 	
				get_vmsubmap_entries(entry->object.sub_map, 
					entry->offset, 
					entry->offset + 
					(entry->vme_end - entry->vme_start));
		} else {
			total_entries += 1;
		}
		entry = entry->vme_next;
	}
	if (not_in_kdp)
	  vm_map_unlock(map);
	return(total_entries);
}
Пример #3
0
/*
 * mlockall(int how)
 *
 * No requirements
 */
int
sys_mlockall(struct mlockall_args *uap)
{
	struct thread *td = curthread;
	struct proc *p = td->td_proc;
	vm_map_t map = &p->p_vmspace->vm_map;
	vm_map_entry_t entry;
	int how = uap->how;
	int rc = KERN_SUCCESS;

	if (((how & MCL_CURRENT) == 0) && ((how & MCL_FUTURE) == 0))
		return (EINVAL);

	rc = priv_check_cred(td->td_ucred, PRIV_ROOT, 0);
	if (rc) 
		return (rc);

	vm_map_lock(map);
	do {
		if (how & MCL_CURRENT) {
			for(entry = map->header.next;
			    entry != &map->header;
			    entry = entry->next);

			rc = ENOSYS;
			break;
		}
	
		if (how & MCL_FUTURE)
			map->flags |= MAP_WIREFUTURE;
	} while(0);
	vm_map_unlock(map);

	return (rc);
}
Пример #4
0
kern_return_t
projected_buffer_deallocate(
     vm_map_t 		map,
     vm_offset_t 	start, 
     vm_offset_t	end)
{
	vm_map_entry_t entry, k_entry;

	if (map == VM_MAP_NULL || map == kernel_map)
		return KERN_INVALID_ARGUMENT;

	vm_map_lock(map);
	if (!vm_map_lookup_entry(map, start, &entry) ||
	    end > entry->vme_end ||
            /*Check corresponding kernel entry*/
	    (k_entry = entry->projected_on) == 0) {
	  vm_map_unlock(map);
	  return(KERN_INVALID_ARGUMENT);
	}

	/*Prepare for deallocation*/
	if (entry->vme_start < start)
	  _vm_map_clip_start(&map->hdr, entry, start);
	if (entry->vme_end > end)
	  _vm_map_clip_end(&map->hdr, entry, end);
      	if (map->first_free == entry)   /*Adjust first_free hint*/
	  map->first_free = entry->vme_prev;
	entry->projected_on = 0;        /*Needed to allow deletion*/
	entry->wired_count = 0;         /*Avoid unwire fault*/
	vm_map_entry_delete(map, entry);
	vm_map_unlock(map);

	/*Check if the buffer is not persistent and only the 
          kernel mapping remains, and if so delete it*/
	vm_map_lock(kernel_map);
	if (k_entry->projected_on == (vm_map_entry_t) -1 &&
	    k_entry->object.vm_object->ref_count == 1) {
	  if (kernel_map->first_free == k_entry)
	    kernel_map->first_free = k_entry->vme_prev;
	  k_entry->projected_on = 0;    /*Allow unwire fault*/
	  vm_map_entry_delete(kernel_map, k_entry);
	}
	vm_map_unlock(kernel_map);
	return(KERN_SUCCESS);
}
Пример #5
0
int
sys_munmap(struct lwp *l, const struct sys_munmap_args *uap, register_t *retval)
{
	/* {
		syscallarg(void *) addr;
		syscallarg(size_t) len;
	} */
	struct proc *p = l->l_proc;
	vaddr_t addr;
	vsize_t size, pageoff;
	struct vm_map *map;
	struct vm_map_entry *dead_entries;
	int error;

	/*
	 * get syscall args.
	 */

	addr = (vaddr_t)SCARG(uap, addr);
	size = (vsize_t)SCARG(uap, len);

	/*
	 * align the address to a page boundary and adjust the size accordingly.
	 */

	pageoff = (addr & PAGE_MASK);
	addr -= pageoff;
	size += pageoff;
	size = (vsize_t)round_page(size);

	if (size == 0)
		return (0);

	error = range_test(addr, size, false);
	if (error)
		return error;

	map = &p->p_vmspace->vm_map;

	/*
	 * interesting system call semantic: make sure entire range is
	 * allocated before allowing an unmap.
	 */

	vm_map_lock(map);
#if 0
	if (!uvm_map_checkprot(map, addr, addr + size, VM_PROT_NONE)) {
		vm_map_unlock(map);
		return (EINVAL);
	}
#endif
	uvm_unmap_remove(map, addr, addr + size, &dead_entries, NULL, 0);
	vm_map_unlock(map);
	if (dead_entries != NULL)
		uvm_unmap_detach(dead_entries, 0);
	return (0);
}
Пример #6
0
/*
 * munlockall(void)
 *
 *	Unwire all user-wired map entries, cancel MCL_FUTURE.
 *
 * No requirements
 */
int
sys_munlockall(struct munlockall_args *uap)
{
	struct thread *td = curthread;
	struct proc *p = td->td_proc;
	vm_map_t map = &p->p_vmspace->vm_map;
	vm_map_entry_t entry;
	int rc = KERN_SUCCESS;

	vm_map_lock(map);

	/* Clear MAP_WIREFUTURE to cancel mlockall(MCL_FUTURE) */
	map->flags &= ~MAP_WIREFUTURE;

retry:
	for (entry = map->header.next;
	     entry != &map->header;
	     entry = entry->next) {
		if ((entry->eflags & MAP_ENTRY_USER_WIRED) == 0)
			continue;

		/*
		 * If we encounter an in-transition entry, we release the 
		 * map lock and retry the scan; we do not decrement any
		 * wired_count more than once because we do not touch
		 * any entries with MAP_ENTRY_USER_WIRED not set.
		 *
 		 * There is a potential interleaving with concurrent
		 * mlockall()s here -- if we abort a scan, an mlockall()
		 * could start, wire a number of entries before our 
		 * current position in, and then stall itself on this
		 * or any other in-transition entry. If that occurs, when
		 * we resume, we will unwire those entries. 
 		 */
		if (entry->eflags & MAP_ENTRY_IN_TRANSITION) {
			entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
			++mycpu->gd_cnt.v_intrans_coll;
			++mycpu->gd_cnt.v_intrans_wait;
			vm_map_transition_wait(map);
			goto retry;
		}

		KASSERT(entry->wired_count > 0, 
			("wired_count was 0 with USER_WIRED set! %p", entry));
	
		/* Drop wired count, if it hits zero, unwire the entry */
		entry->eflags &= ~MAP_ENTRY_USER_WIRED;
		entry->wired_count--;
		if (entry->wired_count == 0)
			vm_fault_unwire(map, entry);
	}

	map->timestamp++;
	vm_map_unlock(map);

	return (rc);
}
Пример #7
0
/*
 *	kmap_free_wakeup:
 *
 *	Returns memory to a submap of the kernel, and wakes up any processes
 *	waiting for memory in that map.
 */
void
kmap_free_wakeup(vm_map_t map, vm_offset_t addr, vm_size_t size)
{

	vm_map_lock(map);
	(void) vm_map_delete(map, trunc_page(addr), round_page(addr + size));
	if (map->needs_wakeup) {
		map->needs_wakeup = FALSE;
		vm_map_wakeup(map);
	}
	vm_map_unlock(map);
}
Пример #8
0
void
uvm_km_free_wakeup(struct vm_map *map, vaddr_t addr, vsize_t size)
{
	struct vm_map_entry *dead_entries;

	vm_map_lock(map);
	uvm_unmap_remove(map, trunc_page(addr), round_page(addr+size), 
			 &dead_entries, NULL);
	wakeup(map);
	vm_map_unlock(map);

	if (dead_entries != NULL)
		uvm_unmap_detach(dead_entries, 0);
}
Пример #9
0
/*
 * 	kmem_init:
 *
 *	Create the kernel map; insert a mapping covering kernel text, 
 *	data, bss, and all space allocated thus far (`boostrap' data).  The 
 *	new map will thus map the range between VM_MIN_KERNEL_ADDRESS and 
 *	`start' as allocated, and the range between `start' and `end' as free.
 */
void
kmem_init(vm_offset_t start, vm_offset_t end)
{
	vm_map_t m;

	m = vm_map_create(kernel_pmap, VM_MIN_KERNEL_ADDRESS, end);
	m->system_map = 1;
	vm_map_lock(m);
	/* N.B.: cannot use kgdb to debug, starting with this assignment ... */
	kernel_map = m;
	(void) vm_map_insert(m, NULL, (vm_ooffset_t) 0,
#ifdef __amd64__
	    KERNBASE,
#else		     
	    VM_MIN_KERNEL_ADDRESS,
#endif
	    start, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT);
	/* ... and ending with the completion of the above `insert' */
	vm_map_unlock(m);
}
Пример #10
0
kern_return_t
kmem_alloc_aligned(
	vm_map_t 	map,
	vm_offset_t 	*addrp,
	vm_size_t 	size)
{
	vm_map_entry_t entry;
	vm_offset_t offset;
	vm_offset_t addr;
	unsigned int attempts;
	kern_return_t kr;

	if ((size & (size - 1)) != 0)
		panic("kmem_alloc_aligned");

	/*
	 *	Use the kernel object for wired-down kernel pages.
	 *	Assume that no region of the kernel object is
	 *	referenced more than once.  We want vm_map_find_entry
	 *	to extend an existing entry if possible.
	 */

	size = round_page(size);
	attempts = 0;

retry:
	vm_map_lock(map);
	kr = vm_map_find_entry(map, &addr, size, size - 1,
			       kernel_object, &entry);
	if (kr != KERN_SUCCESS) {
		vm_map_unlock(map);

		if (attempts == 0) {
			attempts++;
			slab_collect();
			goto retry;
		}

		printf_once("no more rooom for kmem_alloc_aligned in %p\n", map);
		return kr;
	}

	/*
	 *	Since we didn't know where the new region would
	 *	start, we couldn't supply the correct offset into
	 *	the kernel object.  We only initialize the entry
	 *	if we aren't extending an existing entry.
	 */

	offset = addr - VM_MIN_KERNEL_ADDRESS;

	if (entry->object.vm_object == VM_OBJECT_NULL) {
		vm_object_reference(kernel_object);

		entry->object.vm_object = kernel_object;
		entry->offset = offset;
	}

	/*
	 *	Since we have not given out this address yet,
	 *	it is safe to unlock the map.
	 */
	vm_map_unlock(map);

	/*
	 *	Allocate wired-down memory in the kernel_object,
	 *	for this entry, and enter it in the kernel pmap.
	 */
	kmem_alloc_pages(kernel_object, offset,
			 addr, addr + size,
			 VM_PROT_DEFAULT);

	/*
	 *	Return the memory, not zeroed.
	 */
	*addrp = addr;
	return KERN_SUCCESS;
}
Пример #11
0
int
sys_mquery(struct proc *p, void *v, register_t *retval)
{
	struct sys_mquery_args /* {
		syscallarg(void *) addr;
		syscallarg(size_t) len;
		syscallarg(int) prot;
		syscallarg(int) flags;
		syscallarg(int) fd;
		syscallarg(long) pad;
		syscallarg(off_t) pos;
	} */ *uap = v;
	struct file *fp;
	struct uvm_object *uobj;
	voff_t uoff;
	int error;
	vaddr_t vaddr;
	int flags = 0;
	vsize_t size;
	vm_prot_t prot;
	int fd;

	vaddr = (vaddr_t) SCARG(uap, addr);
	prot = SCARG(uap, prot);
	size = (vsize_t) SCARG(uap, len);
	fd = SCARG(uap, fd);

	if ((prot & VM_PROT_ALL) != prot)
		return (EINVAL);

	if (SCARG(uap, flags) & MAP_FIXED)
		flags |= UVM_FLAG_FIXED;

	if (fd >= 0) {
		if ((error = getvnode(p->p_fd, fd, &fp)) != 0)
			return (error);
		uobj = &((struct vnode *)fp->f_data)->v_uvm.u_obj;
		uoff = SCARG(uap, pos);
	} else {
		fp = NULL;
		uobj = NULL;
		uoff = 0;
	}

	if (vaddr == 0)
		vaddr = uvm_map_hint(p, prot);

	/* prevent a user requested address from falling in heap space */
	if ((vaddr + size > (vaddr_t)p->p_vmspace->vm_daddr) &&
	    (vaddr < (vaddr_t)p->p_vmspace->vm_daddr + MAXDSIZ)) {
		if (flags & UVM_FLAG_FIXED) {
			error = EINVAL;
			goto done;
		}
		vaddr = round_page((vaddr_t)p->p_vmspace->vm_daddr + MAXDSIZ);
	}
	vm_map_lock(&p->p_vmspace->vm_map);

again:
	if (uvm_map_findspace(&p->p_vmspace->vm_map, vaddr, size,
	    &vaddr, uobj, uoff, 0, flags) == NULL) {
		if (flags & UVM_FLAG_FIXED)
			error = EINVAL;
		else
			error = ENOMEM;
	} else {
		/* prevent a returned address from falling in heap space */
		if ((vaddr + size > (vaddr_t)p->p_vmspace->vm_daddr)
		    && (vaddr < (vaddr_t)p->p_vmspace->vm_daddr + MAXDSIZ)) {
			vaddr = round_page((vaddr_t)p->p_vmspace->vm_daddr +
			    MAXDSIZ);
			goto again;
		}
		error = 0;
		*retval = (register_t)(vaddr);
	}
	vm_map_unlock(&p->p_vmspace->vm_map);
done:
	if (fp != NULL)
		FRELE(fp);
	return (error);
}
Пример #12
0
int
darwin_sys_load_shared_file(struct lwp *l, const struct darwin_sys_load_shared_file_args *uap, register_t *retval)
{
	/* {
		syscallarg(char *) filename;
		syscallarg(void *) addr;
		syscallarg(u_long) len;
		syscallarg(void **) base;
		syscallarg(int) count:
		syscallarg(mach_sf_mapping_t *) mappings;
		syscallarg(int *) flags;
	} */
	struct file *fp;
	struct vnode *vp = NULL;
	vaddr_t base;
	struct proc *p = l->l_proc;
	int flags;
	char *filename;
	mach_sf_mapping_t *mapp = NULL;
	size_t maplen;
	struct sys_open_args open_cup;
	struct sys_close_args close_cup;
	register_t fdc;
	int fd;
	int i;
	int error;
	vaddr_t max_addr, addr;
	size_t len;
	vaddr_t uaddr;
	int need_relocation;
	struct exec_vmcmd evc;

	filename = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
	if ((error = copyin(SCARG(uap, filename), filename, MAXPATHLEN)) != 0)
		goto bad1;

	if ((error = copyin(SCARG(uap, base), &base, sizeof(base))) != 0)
		goto bad1;

	if ((error = copyin(SCARG(uap, flags), &flags, sizeof(base))) != 0)
		goto bad1;

#ifdef DEBUG_DARWIN
	DPRINTF(("darwin_sys_load_shared_file: filename = %p ",
	    SCARG(uap, filename)));
	DPRINTF(("addr = %p len = 0x%08lx base = %p ",
	    SCARG(uap, addr), SCARG(uap, len), SCARG(uap, base)));
	DPRINTF(("count = %d mappings = %p flags = %p ",
	    SCARG(uap, count), SCARG(uap, mappings), SCARG(uap, flags)));
	DPRINTF(("*base = 0x%08lx *flags = %d filename=`%s'\n",
	    base, flags, filename));
#endif

	SCARG(&open_cup, path) = SCARG(uap, filename);
	SCARG(&open_cup, flags) = O_RDONLY;
	SCARG(&open_cup, mode) = 0;
	if ((error = sys_open(l, &open_cup, &fdc)) != 0)
		goto bad1;

	fd = (int)fdc;
	fp = fd_getfile(fd);
	if (fp == NULL) {
		error = EBADF;
		goto bad1point5;
	}
	vp = fp->f_data;
	vref(vp);

	if (SCARG(uap, count) < 0 ||
	    SCARG(uap, count) > PAGE_SIZE / sizeof(*mapp)) {
		error = EINVAL;
		goto bad2;
	}
	maplen = SCARG(uap, count) * sizeof(*mapp);
	mapp = malloc(maplen, M_TEMP, M_WAITOK);

	if ((error = copyin(SCARG(uap, mappings), mapp, maplen)) != 0)
		goto bad2;

#ifdef DEBUG_DARWIN
	for (i = 0; i < SCARG(uap, count); i++) {
		DPRINTF(("mapp[%d].mapping_offset = 0x%08lx\n",
		    i, mapp[i].mapping_offset));
		DPRINTF(("mapp[%d].size = 0x%08lx\n", i, (long)mapp[i].size));
		DPRINTF(("mapp[%d].file_offset = 0x%08lx\n",
		    i, mapp[i].file_offset));
		DPRINTF(("mapp[%d].protection = %d\n",
		    i, mapp[i].protection));
		DPRINTF(("mapp[%d].cksum = %ld\n", i, mapp[i].cksum));
	}
#endif

	/* Check if we can load at the default addresses */
	need_relocation = 0;
	vm_map_lock(&p->p_vmspace->vm_map);
	for (i = 0; i < SCARG(uap, count); i++)
		if ((uvm_map_findspace(&p->p_vmspace->vm_map,
		    base + mapp[i].mapping_offset, mapp[i].size,
		    &uaddr, NULL, 0, 0, UVM_FLAG_FIXED)) == NULL)
			need_relocation = 1;
	vm_map_unlock(&p->p_vmspace->vm_map);

	/* If we cannot, we need a relocation */
	if (need_relocation) {
		DPRINTF(("Relocating\n"));
		/* Compute the length of the region enclosing all sections */
		max_addr = 0;
		for (i = 0; i < SCARG(uap, count); i++) {
			addr = (vaddr_t)(mapp[i].mapping_offset
			    + base + mapp[i].size);
			if (addr > max_addr)
				max_addr = addr;
		}
		len = max_addr - base;
		DPRINTF(("base = 0x%08lx max_addr = 0x%08lx len = 0x%08x\n",
		    base, max_addr, len));

		/* Find some place to map this region */
		vm_map_lock(&p->p_vmspace->vm_map);
		if ((uvm_map_findspace(&p->p_vmspace->vm_map, base,
		    len, &uaddr, NULL, 0, PAGE_SIZE, 0)) == NULL) {
			DPRINTF(("Impossible to find some space\n"));
			vm_map_unlock(&p->p_vmspace->vm_map);
			error = ENOMEM;
			goto bad2;
		}
		vm_map_unlock(&p->p_vmspace->vm_map);

		/* Update the base address */
		base = uaddr;
		DPRINTF(("New base address: base = 0x%08lx\n", base));
	}

	/* Do the actual mapping */
	for (i = 0; i < SCARG(uap, count); i++) {
		bzero(&evc, sizeof(evc));
		evc.ev_addr = base + mapp[i].mapping_offset;
		evc.ev_len = mapp[i].size;
		evc.ev_prot = mapp[i].protection & VM_PROT_ALL;
		evc.ev_flags = 0;
		if (mapp[i].protection & MACH_VM_PROT_ZF)
			evc.ev_proc = vmcmd_map_zero;
		else
			evc.ev_proc = vmcmd_map_readvn;
		evc.ev_offset = mapp[i].file_offset;
		evc.ev_vp = vp;

		DPRINTF(("map section %d: start = 0x%08lx, len = 0x%08lx\n",
		    i, evc.ev_addr, evc.ev_len));

		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
		if ((error = (*evc.ev_proc)(l, &evc)) != 0) {
			VOP_UNLOCK(vp, 0);
			DPRINTF(("Failed\n"));
			goto bad2;
		}
		VOP_UNLOCK(vp, 0);
		DPRINTF(("Success\n"));
	}
bad2:
	if (mapp)
		free(mapp, M_TEMP);
	vrele(vp);
	fd_putfile(fd);
bad1point5:
	SCARG(&close_cup, fd) = fd;
	if ((error = sys_close(l, &close_cup, retval)) != 0)
		goto bad1;

	if ((error = copyout(&base, SCARG(uap, base), sizeof(base))) != 0)
		goto bad1;

	if ((error = copyout(&flags, SCARG(uap, flags), sizeof(base))) != 0)
		goto bad1;
bad1:
	free(filename, M_TEMP);

	return error;
}
Пример #13
0
int
uvm_mmap(struct vm_map *map, vaddr_t *addr, vsize_t size, vm_prot_t prot,
    vm_prot_t maxprot, int flags, int advice, struct uvm_object *uobj,
    voff_t foff, vsize_t locklimit)
{
	vaddr_t align = 0;
	int error;
	uvm_flag_t uvmflag = 0;

	/*
	 * check params
	 */

	if (size == 0)
		return 0;
	if (foff & PAGE_MASK)
		return EINVAL;
	if ((prot & maxprot) != prot)
		return EINVAL;

	/*
	 * for non-fixed mappings, round off the suggested address.
	 * for fixed mappings, check alignment and zap old mappings.
	 */

	if ((flags & MAP_FIXED) == 0) {
		*addr = round_page(*addr);
	} else {
		if (*addr & PAGE_MASK)
			return EINVAL;
		uvmflag |= UVM_FLAG_FIXED;
		(void) uvm_unmap(map, *addr, *addr + size);
	}

	/*
	 * Try to see if any requested alignment can even be attemped.
	 * Make sure we can express the alignment (asking for a >= 4GB
	 * alignment on an ILP32 architecure make no sense) and the
	 * alignment is at least for a page sized quanitiy.  If the
	 * request was for a fixed mapping, make sure supplied address
	 * adheres to the request alignment.
	 */
	align = (flags & MAP_ALIGNMENT_MASK) >> MAP_ALIGNMENT_SHIFT;
	if (align) {
		if (align >= sizeof(vaddr_t) * NBBY)
			return EINVAL;
		align = 1L << align;
		if (align < PAGE_SIZE)
			return EINVAL;
		if (align >= vm_map_max(map))
			return ENOMEM;
		if (flags & MAP_FIXED) {
			if ((*addr & (align-1)) != 0)
				return EINVAL;
			align = 0;
		}
	}

	/*
	 * check resource limits
	 */

	if (!VM_MAP_IS_KERNEL(map) &&
	    (((rlim_t)curproc->p_vmspace->vm_map.size + (rlim_t)size) >
	    curproc->p_rlimit[RLIMIT_AS].rlim_cur))
		return ENOMEM;

	/*
	 * handle anon vs. non-anon mappings.   for non-anon mappings attach
	 * to underlying vm object.
	 */

	if (flags & MAP_ANON) {
		KASSERT(uobj == NULL);
		foff = UVM_UNKNOWN_OFFSET;
		if ((flags & MAP_SHARED) == 0)
			/* XXX: defer amap create */
			uvmflag |= UVM_FLAG_COPYONW;
		else
			/* shared: create amap now */
			uvmflag |= UVM_FLAG_OVERLAY;

	} else {
		KASSERT(uobj != NULL);
		if ((flags & MAP_SHARED) == 0) {
			uvmflag |= UVM_FLAG_COPYONW;
		}
	}

	uvmflag = UVM_MAPFLAG(prot, maxprot,
	    (flags & MAP_SHARED) ? UVM_INH_SHARE : UVM_INH_COPY, advice,
	    uvmflag);
	error = uvm_map(map, addr, size, uobj, foff, align, uvmflag);
	if (error) {
		if (uobj)
			uobj->pgops->pgo_detach(uobj);
		return error;
	}

	/*
	 * POSIX 1003.1b -- if our address space was configured
	 * to lock all future mappings, wire the one we just made.
	 *
	 * Also handle the MAP_WIRED flag here.
	 */

	if (prot == VM_PROT_NONE) {

		/*
		 * No more work to do in this case.
		 */

		return 0;
	}
	if ((flags & MAP_WIRED) != 0 || (map->flags & VM_MAP_WIREFUTURE) != 0) {
		vm_map_lock(map);
		if (atop(size) + uvmexp.wired > uvmexp.wiredmax ||
		    (locklimit != 0 &&
		     size + ptoa(pmap_wired_count(vm_map_pmap(map))) >
		     locklimit)) {
			vm_map_unlock(map);
			uvm_unmap(map, *addr, *addr + size);
			return ENOMEM;
		}

		/*
		 * uvm_map_pageable() always returns the map unlocked.
		 */

		error = uvm_map_pageable(map, *addr, *addr + size,
		    false, UVM_LK_ENTER);
		if (error) {
			uvm_unmap(map, *addr, *addr + size);
			return error;
		}
		return 0;
	}
	return 0;
}
Пример #14
0
int
sys_munmap(struct proc *p, void *v, register_t *retval)
{
	struct sys_munmap_args /* {
		syscallarg(void *) addr;
		syscallarg(size_t) len;
	} */ *uap = v;
	vaddr_t addr;
	vsize_t size, pageoff;
	vm_map_t map;
	vaddr_t vm_min_address = VM_MIN_ADDRESS;
	struct vm_map_entry *dead_entries;

	/*
	 * get syscall args...
	 */

	addr = (vaddr_t) SCARG(uap, addr);
	size = (vsize_t) SCARG(uap, len);
	
	/*
	 * align the address to a page boundary, and adjust the size accordingly
	 */
	ALIGN_ADDR(addr, size, pageoff);

	/*
	 * Check for illegal addresses.  Watch out for address wrap...
	 * Note that VM_*_ADDRESS are not constants due to casts (argh).
	 */
	if (addr > SIZE_MAX - size)
		return (EINVAL);
	if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS)
		return (EINVAL);
	if (vm_min_address > 0 && addr < vm_min_address)
		return (EINVAL);
	map = &p->p_vmspace->vm_map;


	vm_map_lock(map);	/* lock map so we can checkprot */

	/*
	 * interesting system call semantic: make sure entire range is 
	 * allocated before allowing an unmap.
	 */

	if (!uvm_map_checkprot(map, addr, addr + size, VM_PROT_NONE)) {
		vm_map_unlock(map);
		return (EINVAL);
	}

	/*
	 * doit!
	 */
	uvm_unmap_remove(map, addr, addr + size, &dead_entries, p);

	vm_map_unlock(map);	/* and unlock */

	if (dead_entries != NULL)
		uvm_unmap_detach(dead_entries, 0);

	return (0);
}
Пример #15
0
int
uvm_mmap(vm_map_t map, vaddr_t *addr, vsize_t size, vm_prot_t prot,
    vm_prot_t maxprot, int flags, caddr_t handle, voff_t foff,
    vsize_t locklimit, struct proc *p)
{
	struct uvm_object *uobj;
	struct vnode *vp;
	int error;
	int advice = UVM_ADV_NORMAL;
	uvm_flag_t uvmflag = 0;
	vsize_t align = 0;	/* userland page size */

	/*
	 * check params
	 */

	if (size == 0)
		return(0);
	if (foff & PAGE_MASK)
		return(EINVAL);
	if ((prot & maxprot) != prot)
		return(EINVAL);

	/*
	 * for non-fixed mappings, round off the suggested address.
	 * for fixed mappings, check alignment and zap old mappings.
	 */

	if ((flags & MAP_FIXED) == 0) {
		*addr = round_page(*addr);	/* round */
	} else {
		if (*addr & PAGE_MASK)
			return(EINVAL);
		uvmflag |= UVM_FLAG_FIXED;
		uvm_unmap_p(map, *addr, *addr + size, p);	/* zap! */
	}

	/*
	 * handle anon vs. non-anon mappings.   for non-anon mappings attach
	 * to underlying vm object.
	 */

	if (flags & MAP_ANON) {
		if ((flags & MAP_FIXED) == 0 && size >= __LDPGSZ)
			align = __LDPGSZ;
		foff = UVM_UNKNOWN_OFFSET;
		uobj = NULL;
		if ((flags & MAP_SHARED) == 0)
			/* XXX: defer amap create */
			uvmflag |= UVM_FLAG_COPYONW;
		else
			/* shared: create amap now */
			uvmflag |= UVM_FLAG_OVERLAY;

	} else {

		vp = (struct vnode *) handle;	/* get vnode */
		if (vp->v_type != VCHR) {
			uobj = uvn_attach((void *) vp, (flags & MAP_SHARED) ?
			   maxprot : (maxprot & ~VM_PROT_WRITE));
			if (uobj) {
				assert((void*)uobj == vp);
				if (flags & MAP_DENYWRITE)
					uvmflag |= UVM_FLAG_DENYWRITE;
				if ((flags & MAP_SHARED)
				    && (maxprot & VM_PROT_WRITE))
					uvmflag |= UVM_FLAG_WRITECOUNT;
			}

#ifndef UBC
			/*
			 * XXXCDC: hack from old code
			 * don't allow vnodes which have been mapped
			 * shared-writeable to persist [forces them to be
			 * flushed out when last reference goes].
			 * XXXCDC: interesting side effect: avoids a bug.
			 * note that in WRITE [ufs_readwrite.c] that we
			 * allocate buffer, uncache, and then do the write.
			 * the problem with this is that if the uncache causes
			 * VM data to be flushed to the same area of the file
			 * we are writing to... in that case we've got the
			 * buffer locked and our process goes to sleep forever.
			 *
			 * XXXCDC: checking maxprot protects us from the
			 * "persistbug" program but this is not a long term
			 * solution.
			 * 
			 * XXXCDC: we don't bother calling uncache with the vp
			 * VOP_LOCKed since we know that we are already
			 * holding a valid reference to the uvn (from the
			 * uvn_attach above), and thus it is impossible for
			 * the uncache to kill the uvn and trigger I/O.
			 */
			if (flags & MAP_SHARED) {
				if ((prot & VM_PROT_WRITE) ||
				    (maxprot & VM_PROT_WRITE)) {
					uvm_vnp_uncache(vp);
				}
			}
#else
			/* XXX for now, attach doesn't gain a ref */
			VREF(vp);
#endif
		} else {
			uobj = udv_attach((void *) &vp->v_rdev,
			    (flags & MAP_SHARED) ? maxprot :
			    (maxprot & ~VM_PROT_WRITE), foff, size);
			/*
			 * XXX Some devices don't like to be mapped with
			 * XXX PROT_EXEC, but we don't really have a
			 * XXX better way of handling this, right now
			 */
			if (uobj == NULL && (prot & PROT_EXEC) == 0) {
				maxprot &= ~VM_PROT_EXECUTE;
				uobj = udv_attach((void *) &vp->v_rdev,
				    (flags & MAP_SHARED) ? maxprot :
				    (maxprot & ~VM_PROT_WRITE), foff, size);
			}
			advice = UVM_ADV_RANDOM;
		}
		
		if (uobj == NULL)
			return((vp->v_type == VREG) ? ENOMEM : EINVAL);

		if ((flags & MAP_SHARED) == 0)
			uvmflag |= UVM_FLAG_COPYONW;
	}

	/*
	 * set up mapping flags
	 */

	uvmflag = UVM_MAPFLAG(prot, maxprot, 
			(flags & MAP_SHARED) ? UVM_INH_SHARE : UVM_INH_COPY,
			advice, uvmflag);

	error = uvm_map_p(map, addr, size, uobj, foff, align, uvmflag, p);

	if (error == 0) {
		/*
		 * POSIX 1003.1b -- if our address space was configured
		 * to lock all future mappings, wire the one we just made.
		 */
		if (prot == VM_PROT_NONE) {
			/*
			 * No more work to do in this case.
			 */
			return (0);
		}
		
		vm_map_lock(map);

		if (map->flags & VM_MAP_WIREFUTURE) {
			if ((atop(size) + uvmexp.wired) > uvmexp.wiredmax
#ifdef pmap_wired_count
			    || (locklimit != 0 && (size +
			         ptoa(pmap_wired_count(vm_map_pmap(map)))) >
			        locklimit)
#endif
			) {
				error = ENOMEM;
				vm_map_unlock(map);
				/* unmap the region! */
				uvm_unmap(map, *addr, *addr + size);
				goto bad;
			}
			/*
			 * uvm_map_pageable() always returns the map
			 * unlocked.
			 */
			error = uvm_map_pageable(map, *addr, *addr + size,
			    FALSE, UVM_LK_ENTER);
			if (error != 0) {
				/* unmap the region! */
				uvm_unmap(map, *addr, *addr + size);
				goto bad;
			}
			return (0);
		}

		vm_map_unlock(map);

		return (0);
	}

	/*
	 * errors: first detach from the uobj, if any.
	 */
	
	if (uobj)
		uobj->pgops->pgo_detach(uobj);

bad:
	return (error);
}
Пример #16
0
int
uvm_io(vm_map_t map, struct uio *uio, int flags)
{
	vaddr_t baseva, endva, pageoffset, kva;
	vsize_t chunksz, togo, sz;
	struct uvm_map_deadq dead_entries;
	int error, extractflags;

	/*
	 * step 0: sanity checks and set up for copy loop.  start with a
	 * large chunk size.  if we have trouble finding vm space we will
	 * reduce it.
	 */
	if (uio->uio_resid == 0)
		return(0);
	togo = uio->uio_resid;

	baseva = (vaddr_t) uio->uio_offset;
	endva = baseva + (togo - 1);

	if (endva < baseva)   /* wrap around? */
		return(EIO);

	if (baseva >= VM_MAXUSER_ADDRESS)
		return(0);
	if (endva >= VM_MAXUSER_ADDRESS)
		/* EOF truncate */
		togo = togo - (endva - VM_MAXUSER_ADDRESS + 1);
	pageoffset = baseva & PAGE_MASK;
	baseva = trunc_page(baseva);
	chunksz = min(round_page(togo + pageoffset), MAXBSIZE);
	error = 0;

	extractflags = 0;
	if (flags & UVM_IO_FIXPROT)
		extractflags |= UVM_EXTRACT_FIXPROT;

	/* step 1: main loop...  while we've got data to move */
	for (/*null*/; togo > 0 ; pageoffset = 0) {
		/* step 2: extract mappings from the map into kernel_map */
		error = uvm_map_extract(map, baseva, chunksz, &kva,
		    extractflags);
		if (error) {

			/* retry with a smaller chunk... */
			if (error == ENOMEM && chunksz > PAGE_SIZE) {
				chunksz = trunc_page(chunksz / 2);
				if (chunksz < PAGE_SIZE)
					chunksz = PAGE_SIZE;
				continue;
			}

			break;
		}

		/* step 3: move a chunk of data */
		sz = chunksz - pageoffset;
		if (sz > togo)
			sz = togo;
		error = uiomove((caddr_t) (kva + pageoffset), sz, uio);
		togo -= sz;
		baseva += chunksz;

		/* step 4: unmap the area of kernel memory */
		vm_map_lock(kernel_map);
		TAILQ_INIT(&dead_entries);
		uvm_unmap_remove(kernel_map, kva, kva+chunksz,
		    &dead_entries, FALSE, TRUE);
		vm_map_unlock(kernel_map);
		uvm_unmap_detach(&dead_entries, AMAP_REFALL);

		/*
		 * We defer checking the error return from uiomove until
		 * here so that we won't leak memory.
		 */
		if (error)
			break;
	}

	return (error);
}
Пример #17
0
/*
 *	kmem_realloc:
 *
 *	Reallocate wired-down memory in the kernel's address map
 *	or a submap.  Newly allocated pages are not zeroed.
 *	This can only be used on regions allocated with kmem_alloc.
 *
 *	If successful, the pages in the old region are mapped twice.
 *	The old region is unchanged.  Use kmem_free to get rid of it.
 */
kern_return_t kmem_realloc(
	vm_map_t 	map,
	vm_offset_t 	oldaddr,
	vm_size_t 	oldsize,
	vm_offset_t 	*newaddrp,
	vm_size_t 	newsize)
{
	vm_offset_t oldmin, oldmax;
	vm_offset_t newaddr;
	vm_object_t object;
	vm_map_entry_t oldentry, newentry;
	unsigned int attempts;
	kern_return_t kr;

	oldmin = trunc_page(oldaddr);
	oldmax = round_page(oldaddr + oldsize);
	oldsize = oldmax - oldmin;
	newsize = round_page(newsize);

	/*
	 *	Find space for the new region.
	 */

	attempts = 0;

retry:
	vm_map_lock(map);
	kr = vm_map_find_entry(map, &newaddr, newsize, (vm_offset_t) 0,
			       VM_OBJECT_NULL, &newentry);
	if (kr != KERN_SUCCESS) {
		vm_map_unlock(map);

		if (attempts == 0) {
			attempts++;
			slab_collect();
			goto retry;
		}

		printf_once("no more room for kmem_realloc in %p\n", map);
		return kr;
	}

	/*
	 *	Find the VM object backing the old region.
	 */

	if (!vm_map_lookup_entry(map, oldmin, &oldentry))
		panic("kmem_realloc");
	object = oldentry->object.vm_object;

	/*
	 *	Increase the size of the object and
	 *	fill in the new region.
	 */

	vm_object_reference(object);
	vm_object_lock(object);
	if (object->size != oldsize)
		panic("kmem_realloc");
	object->size = newsize;
	vm_object_unlock(object);

	newentry->object.vm_object = object;
	newentry->offset = 0;

	/*
	 *	Since we have not given out this address yet,
	 *	it is safe to unlock the map.  We are trusting
	 *	that nobody will play with either region.
	 */

	vm_map_unlock(map);

	/*
	 *	Remap the pages in the old region and
	 *	allocate more pages for the new region.
	 */

	kmem_remap_pages(object, 0,
			 newaddr, newaddr + oldsize,
			 VM_PROT_DEFAULT);
	kmem_alloc_pages(object, oldsize,
			 newaddr + oldsize, newaddr + newsize,
			 VM_PROT_DEFAULT);

	*newaddrp = newaddr;
	return KERN_SUCCESS;
}
Пример #18
0
kern_return_t
kmem_alloc(
	vm_map_t 	map,
	vm_offset_t 	*addrp,
	vm_size_t 	size)
{
	vm_object_t object;
	vm_map_entry_t entry;
	vm_offset_t addr;
	unsigned int attempts;
	kern_return_t kr;

	/*
	 *	Allocate a new object.  We must do this before locking
	 *	the map, lest we risk deadlock with the default pager:
	 *		device_read_alloc uses kmem_alloc,
	 *		which tries to allocate an object,
	 *		which uses kmem_alloc_wired to get memory,
	 *		which blocks for pages.
	 *		then the default pager needs to read a block
	 *		to process a memory_object_data_write,
	 *		and device_read_alloc calls kmem_alloc
	 *		and deadlocks on the map lock.
	 */

	size = round_page(size);
	object = vm_object_allocate(size);

	attempts = 0;

retry:
	vm_map_lock(map);
	kr = vm_map_find_entry(map, &addr, size, (vm_offset_t) 0,
			       VM_OBJECT_NULL, &entry);
	if (kr != KERN_SUCCESS) {
		vm_map_unlock(map);

		if (attempts == 0) {
			attempts++;
			slab_collect();
			goto retry;
		}

		printf_once("no more room for kmem_alloc in %p\n", map);
		vm_object_deallocate(object);
		return kr;
	}

	entry->object.vm_object = object;
	entry->offset = 0;

	/*
	 *	Since we have not given out this address yet,
	 *	it is safe to unlock the map.
	 */
	vm_map_unlock(map);

	/*
	 *	Allocate wired-down memory in the kernel_object,
	 *	for this entry, and enter it in the kernel pmap.
	 */
	kmem_alloc_pages(object, 0,
			 addr, addr + size,
			 VM_PROT_DEFAULT);

	/*
	 *	Return the memory, not zeroed.
	 */
	*addrp = addr;
	return KERN_SUCCESS;
}
Пример #19
0
kern_return_t
projected_buffer_map(
	vm_map_t 	map,
	vm_offset_t 	kernel_addr,
	vm_size_t 	size,
	vm_offset_t 	*user_p,
       vm_prot_t 	protection,
       vm_inherit_t 	inheritance)  /*Currently only VM_INHERIT_NONE supported*/
{
	vm_map_entry_t u_entry, k_entry;
	vm_offset_t physical_addr, user_addr;
	vm_size_t r_size;
	kern_return_t kr;

	/*
	 *	Find entry in kernel map 
	 */

	size = round_page(size);
	if (map == VM_MAP_NULL || map == kernel_map ||
	    !vm_map_lookup_entry(kernel_map, kernel_addr, &k_entry) ||
	    kernel_addr + size > k_entry->vme_end)
	  return(KERN_INVALID_ARGUMENT);


	/*
         *     Create entry in user task
         */

	vm_map_lock(map);
	kr = vm_map_find_entry(map, &user_addr, size, (vm_offset_t) 0,
			       VM_OBJECT_NULL, &u_entry);
	if (kr != KERN_SUCCESS) {
	  vm_map_unlock(map);
	  return kr;
	}

	u_entry->object.vm_object = k_entry->object.vm_object;
	vm_object_reference(k_entry->object.vm_object);
	u_entry->offset = kernel_addr - k_entry->vme_start + k_entry->offset;
	u_entry->projected_on = k_entry;
             /*Creates coupling with kernel mapping of the buffer, and
               also guarantees that user cannot directly manipulate
               buffer VM entry*/
	u_entry->protection = protection;
	u_entry->max_protection = protection;
	u_entry->inheritance = inheritance;
	u_entry->wired_count = k_entry->wired_count;
	vm_map_unlock(map);
       	*user_p = user_addr;

	/* Set up physical mappings for user pmap */

	pmap_pageable(map->pmap, user_addr, user_addr + size,
		      !k_entry->wired_count);
	for (r_size = 0; r_size < size; r_size += PAGE_SIZE) {
	  physical_addr = pmap_extract(kernel_pmap, kernel_addr + r_size);
	  pmap_enter(map->pmap, user_addr + r_size, physical_addr,
		     protection, k_entry->wired_count);
	}

	return(KERN_SUCCESS);
}
Пример #20
0
/*
 * vm_contig_pg_kmap:
 *
 * Map previously allocated (vm_contig_pg_alloc) range of pages from
 * vm_page_array[] into the KVA.  Once mapped, the pages are part of
 * the Kernel, and are to free'ed with kmem_free(&kernel_map, addr, size).
 *
 * No requirements.
 */
vm_offset_t
vm_contig_pg_kmap(int start, u_long size, vm_map_t map, int flags)
{
	vm_offset_t addr, tmp_addr;
	vm_page_t pga = vm_page_array;
	int i, count;

	size = round_page(size);
	if (size == 0)
		panic("vm_contig_pg_kmap: size must not be 0");

	crit_enter();
	lwkt_gettoken(&vm_token);

	/*
	 * We've found a contiguous chunk that meets our requirements.
	 * Allocate KVM, and assign phys pages and return a kernel VM
	 * pointer.
	 */
	count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
	vm_map_lock(map);
	if (vm_map_findspace(map, vm_map_min(map), size, PAGE_SIZE, 0, &addr) !=
	    KERN_SUCCESS) {
		/*
		 * XXX We almost never run out of kernel virtual
		 * space, so we don't make the allocated memory
		 * above available.
		 */
		vm_map_unlock(map);
		vm_map_entry_release(count);
		lwkt_reltoken(&vm_token);
		crit_exit();
		return (0);
	}

	/*
	 * kernel_object maps 1:1 to kernel_map.
	 */
	vm_object_hold(&kernel_object);
	vm_object_reference(&kernel_object);
	vm_map_insert(map, &count, 
		      &kernel_object, addr,
		      addr, addr + size,
		      VM_MAPTYPE_NORMAL,
		      VM_PROT_ALL, VM_PROT_ALL,
		      0);
	vm_map_unlock(map);
	vm_map_entry_release(count);

	tmp_addr = addr;
	for (i = start; i < (start + size / PAGE_SIZE); i++) {
		vm_page_t m = &pga[i];
		vm_page_insert(m, &kernel_object, OFF_TO_IDX(tmp_addr));
		if ((flags & M_ZERO) && !(m->flags & PG_ZERO))
			pmap_zero_page(VM_PAGE_TO_PHYS(m));
		m->flags = 0;
		tmp_addr += PAGE_SIZE;
 	}
	vm_map_wire(map, addr, addr + size, 0);

	vm_object_drop(&kernel_object);

	lwkt_reltoken(&vm_token);
	crit_exit();
	return (addr);
}
Пример #21
0
/*
 * Destroy old address space, and allocate a new stack.
 *	The new stack is only sgrowsiz large because it is grown
 *	automatically on a page fault.
 */
int
exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv)
{
	int error;
	struct proc *p = imgp->proc;
	struct vmspace *vmspace = p->p_vmspace;
	vm_object_t obj;
	struct rlimit rlim_stack;
	vm_offset_t sv_minuser, stack_addr;
	vm_map_t map;
	u_long ssiz;

	imgp->vmspace_destroyed = 1;
	imgp->sysent = sv;

	/* May be called with Giant held */
	EVENTHANDLER_DIRECT_INVOKE(process_exec, p, imgp);

	/*
	 * Blow away entire process VM, if address space not shared,
	 * otherwise, create a new VM space so that other threads are
	 * not disrupted
	 */
	map = &vmspace->vm_map;
	if (map_at_zero)
		sv_minuser = sv->sv_minuser;
	else
		sv_minuser = MAX(sv->sv_minuser, PAGE_SIZE);
	if (vmspace->vm_refcnt == 1 && vm_map_min(map) == sv_minuser &&
	    vm_map_max(map) == sv->sv_maxuser &&
	    cpu_exec_vmspace_reuse(p, map)) {
		shmexit(vmspace);
		pmap_remove_pages(vmspace_pmap(vmspace));
		vm_map_remove(map, vm_map_min(map), vm_map_max(map));
		/*
		 * An exec terminates mlockall(MCL_FUTURE), ASLR state
		 * must be re-evaluated.
		 */
		vm_map_lock(map);
		vm_map_modflags(map, 0, MAP_WIREFUTURE | MAP_ASLR |
		    MAP_ASLR_IGNSTART);
		vm_map_unlock(map);
	} else {
		error = vmspace_exec(p, sv_minuser, sv->sv_maxuser);
		if (error)
			return (error);
		vmspace = p->p_vmspace;
		map = &vmspace->vm_map;
	}
	map->flags |= imgp->map_flags;

	/* Map a shared page */
	obj = sv->sv_shared_page_obj;
	if (obj != NULL) {
		vm_object_reference(obj);
		error = vm_map_fixed(map, obj, 0,
		    sv->sv_shared_page_base, sv->sv_shared_page_len,
		    VM_PROT_READ | VM_PROT_EXECUTE,
		    VM_PROT_READ | VM_PROT_EXECUTE,
		    MAP_INHERIT_SHARE | MAP_ACC_NO_CHARGE);
		if (error != KERN_SUCCESS) {
			vm_object_deallocate(obj);
			return (vm_mmap_to_errno(error));
		}
	}

	/* Allocate a new stack */
	if (imgp->stack_sz != 0) {
		ssiz = trunc_page(imgp->stack_sz);
		PROC_LOCK(p);
		lim_rlimit_proc(p, RLIMIT_STACK, &rlim_stack);
		PROC_UNLOCK(p);
		if (ssiz > rlim_stack.rlim_max)
			ssiz = rlim_stack.rlim_max;
		if (ssiz > rlim_stack.rlim_cur) {
			rlim_stack.rlim_cur = ssiz;
			kern_setrlimit(curthread, RLIMIT_STACK, &rlim_stack);
		}
	} else if (sv->sv_maxssiz != NULL) {
		ssiz = *sv->sv_maxssiz;
	} else {
		ssiz = maxssiz;
	}
	stack_addr = sv->sv_usrstack - ssiz;
	error = vm_map_stack(map, stack_addr, (vm_size_t)ssiz,
	    obj != NULL && imgp->stack_prot != 0 ? imgp->stack_prot :
	    sv->sv_stackprot, VM_PROT_ALL, MAP_STACK_GROWS_DOWN);
	if (error != KERN_SUCCESS)
		return (vm_mmap_to_errno(error));

	/*
	 * vm_ssize and vm_maxsaddr are somewhat antiquated concepts, but they
	 * are still used to enforce the stack rlimit on the process stack.
	 */
	vmspace->vm_ssize = sgrowsiz >> PAGE_SHIFT;
	vmspace->vm_maxsaddr = (char *)stack_addr;

	return (0);
}
Пример #22
0
kern_return_t
projected_buffer_allocate(
	vm_map_t 	map,
	vm_size_t 	size,
       int 		persistence,
	vm_offset_t 	*kernel_p,
	vm_offset_t 	*user_p,
       vm_prot_t 	protection,
       vm_inherit_t 	inheritance)  /*Currently only VM_INHERIT_NONE supported*/
{
	vm_object_t object;
	vm_map_entry_t u_entry, k_entry;
	vm_offset_t addr;
	vm_size_t r_size;
	kern_return_t kr;

	if (map == VM_MAP_NULL || map == kernel_map)
	  return(KERN_INVALID_ARGUMENT);

	/*
	 *	Allocate a new object. 
	 */

	size = round_page(size);
	object = vm_object_allocate(size);

	vm_map_lock(kernel_map);
	kr = vm_map_find_entry(kernel_map, &addr, size, (vm_offset_t) 0,
			       VM_OBJECT_NULL, &k_entry);
	if (kr != KERN_SUCCESS) {
	  vm_map_unlock(kernel_map);
	  vm_object_deallocate(object);
	  return kr;
	}

	k_entry->object.vm_object = object;
	if (!persistence)
	  k_entry->projected_on = (vm_map_entry_t) -1;
              /*Mark entry so as to automatically deallocate it when
                last corresponding user entry is deallocated*/
	vm_map_unlock(kernel_map);
	*kernel_p = addr;

	vm_map_lock(map);
	kr = vm_map_find_entry(map, &addr, size, (vm_offset_t) 0,
			       VM_OBJECT_NULL, &u_entry);
	if (kr != KERN_SUCCESS) {
	  vm_map_unlock(map);
	  vm_map_lock(kernel_map);
	  vm_map_entry_delete(kernel_map, k_entry);
	  vm_map_unlock(kernel_map);
	  vm_object_deallocate(object);
	  return kr;
	}

	u_entry->object.vm_object = object;
	vm_object_reference(object);
	u_entry->projected_on = k_entry;
             /*Creates coupling with kernel mapping of the buffer, and
               also guarantees that user cannot directly manipulate
               buffer VM entry*/
	u_entry->protection = protection;
	u_entry->max_protection = protection;
	u_entry->inheritance = inheritance;
	vm_map_unlock(map);
       	*user_p = addr;

	/*
	 *	Allocate wired-down memory in the object,
	 *	and enter it in the kernel pmap.
	 */
	kmem_alloc_pages(object, 0,
			 *kernel_p, *kernel_p + size,
			 VM_PROT_READ | VM_PROT_WRITE);
	memset((void*) *kernel_p, 0, size);         /*Zero fill*/

	/* Set up physical mappings for user pmap */

	pmap_pageable(map->pmap, *user_p, *user_p + size, FALSE);
	for (r_size = 0; r_size < size; r_size += PAGE_SIZE) {
	  addr = pmap_extract(kernel_pmap, *kernel_p + r_size);
	  pmap_enter(map->pmap, *user_p + r_size, addr,
		     protection, TRUE);
	}

	return(KERN_SUCCESS);
}
Пример #23
0
int
vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
    int fault_flags, vm_page_t *m_hold)
{
	vm_prot_t prot;
	long ahead, behind;
	int alloc_req, era, faultcount, nera, reqpage, result;
	boolean_t growstack, is_first_object_locked, wired;
	int map_generation;
	vm_object_t next_object;
	vm_page_t marray[VM_FAULT_READ_MAX];
	int hardfault;
	struct faultstate fs;
	struct vnode *vp;
	int locked, error;

	hardfault = 0;
	growstack = TRUE;
	PCPU_INC(cnt.v_vm_faults);
	fs.vp = NULL;
	fs.vfslocked = 0;
	faultcount = reqpage = 0;

RetryFault:;

	/*
	 * Find the backing store object and offset into it to begin the
	 * search.
	 */
	fs.map = map;
	result = vm_map_lookup(&fs.map, vaddr, fault_type, &fs.entry,
	    &fs.first_object, &fs.first_pindex, &prot, &wired);
	if (result != KERN_SUCCESS) {
		if (growstack && result == KERN_INVALID_ADDRESS &&
		    map != kernel_map) {
			result = vm_map_growstack(curproc, vaddr);
			if (result != KERN_SUCCESS)
				return (KERN_FAILURE);
			growstack = FALSE;
			goto RetryFault;
		}
		return (result);
	}

	map_generation = fs.map->timestamp;

	if (fs.entry->eflags & MAP_ENTRY_NOFAULT) {
		if ((curthread->td_pflags & TDP_DEVMEMIO) != 0) {
			vm_map_unlock_read(fs.map);
			return (KERN_FAILURE);
		}
		panic("vm_fault: fault on nofault entry, addr: %lx",
		    (u_long)vaddr);
	}

	if (fs.entry->eflags & MAP_ENTRY_IN_TRANSITION &&
	    fs.entry->wiring_thread != curthread) {
		vm_map_unlock_read(fs.map);
		vm_map_lock(fs.map);
		if (vm_map_lookup_entry(fs.map, vaddr, &fs.entry) &&
		    (fs.entry->eflags & MAP_ENTRY_IN_TRANSITION)) {
			fs.entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
			vm_map_unlock_and_wait(fs.map, 0);
		} else
			vm_map_unlock(fs.map);
		goto RetryFault;
	}

	/*
	 * Make a reference to this object to prevent its disposal while we
	 * are messing with it.  Once we have the reference, the map is free
	 * to be diddled.  Since objects reference their shadows (and copies),
	 * they will stay around as well.
	 *
	 * Bump the paging-in-progress count to prevent size changes (e.g. 
	 * truncation operations) during I/O.  This must be done after
	 * obtaining the vnode lock in order to avoid possible deadlocks.
	 */
	VM_OBJECT_LOCK(fs.first_object);
	vm_object_reference_locked(fs.first_object);
	vm_object_pip_add(fs.first_object, 1);

	fs.lookup_still_valid = TRUE;

	if (wired)
		fault_type = prot | (fault_type & VM_PROT_COPY);

	fs.first_m = NULL;

	/*
	 * Search for the page at object/offset.
	 */
	fs.object = fs.first_object;
	fs.pindex = fs.first_pindex;
	while (TRUE) {
		/*
		 * If the object is dead, we stop here
		 */
		if (fs.object->flags & OBJ_DEAD) {
			unlock_and_deallocate(&fs);
			return (KERN_PROTECTION_FAILURE);
		}

		/*
		 * See if page is resident
		 */
		fs.m = vm_page_lookup(fs.object, fs.pindex);
		if (fs.m != NULL) {
			/* 
			 * check for page-based copy on write.
			 * We check fs.object == fs.first_object so
			 * as to ensure the legacy COW mechanism is
			 * used when the page in question is part of
			 * a shadow object.  Otherwise, vm_page_cowfault()
			 * removes the page from the backing object, 
			 * which is not what we want.
			 */
			vm_page_lock(fs.m);
			if ((fs.m->cow) && 
			    (fault_type & VM_PROT_WRITE) &&
			    (fs.object == fs.first_object)) {
				vm_page_cowfault(fs.m);
				unlock_and_deallocate(&fs);
				goto RetryFault;
			}

			/*
			 * Wait/Retry if the page is busy.  We have to do this
			 * if the page is busy via either VPO_BUSY or 
			 * vm_page_t->busy because the vm_pager may be using
			 * vm_page_t->busy for pageouts ( and even pageins if
			 * it is the vnode pager ), and we could end up trying
			 * to pagein and pageout the same page simultaneously.
			 *
			 * We can theoretically allow the busy case on a read
			 * fault if the page is marked valid, but since such
			 * pages are typically already pmap'd, putting that
			 * special case in might be more effort then it is 
			 * worth.  We cannot under any circumstances mess
			 * around with a vm_page_t->busy page except, perhaps,
			 * to pmap it.
			 */
			if ((fs.m->oflags & VPO_BUSY) || fs.m->busy) {
				/*
				 * Reference the page before unlocking and
				 * sleeping so that the page daemon is less
				 * likely to reclaim it. 
				 */
				vm_page_aflag_set(fs.m, PGA_REFERENCED);
				vm_page_unlock(fs.m);
				if (fs.object != fs.first_object) {
					if (!VM_OBJECT_TRYLOCK(
					    fs.first_object)) {
						VM_OBJECT_UNLOCK(fs.object);
						VM_OBJECT_LOCK(fs.first_object);
						VM_OBJECT_LOCK(fs.object);
					}
					vm_page_lock(fs.first_m);
					vm_page_free(fs.first_m);
					vm_page_unlock(fs.first_m);
					vm_object_pip_wakeup(fs.first_object);
					VM_OBJECT_UNLOCK(fs.first_object);
					fs.first_m = NULL;
				}
				unlock_map(&fs);
				if (fs.m == vm_page_lookup(fs.object,
				    fs.pindex)) {
					vm_page_sleep_if_busy(fs.m, TRUE,
					    "vmpfw");
				}
				vm_object_pip_wakeup(fs.object);
				VM_OBJECT_UNLOCK(fs.object);
				PCPU_INC(cnt.v_intrans);
				vm_object_deallocate(fs.first_object);
				goto RetryFault;
			}
			vm_pageq_remove(fs.m);
			vm_page_unlock(fs.m);

			/*
			 * Mark page busy for other processes, and the 
			 * pagedaemon.  If it still isn't completely valid
			 * (readable), jump to readrest, else break-out ( we
			 * found the page ).
			 */
			vm_page_busy(fs.m);
			if (fs.m->valid != VM_PAGE_BITS_ALL)
				goto readrest;
			break;
		}

		/*
		 * Page is not resident, If this is the search termination
		 * or the pager might contain the page, allocate a new page.
		 */
		if (TRYPAGER || fs.object == fs.first_object) {
			if (fs.pindex >= fs.object->size) {
				unlock_and_deallocate(&fs);
				return (KERN_PROTECTION_FAILURE);
			}

			/*
			 * Allocate a new page for this object/offset pair.
			 *
			 * Unlocked read of the p_flag is harmless. At
			 * worst, the P_KILLED might be not observed
			 * there, and allocation can fail, causing
			 * restart and new reading of the p_flag.
			 */
			fs.m = NULL;
			if (!vm_page_count_severe() || P_KILLED(curproc)) {
#if VM_NRESERVLEVEL > 0
				if ((fs.object->flags & OBJ_COLORED) == 0) {
					fs.object->flags |= OBJ_COLORED;
					fs.object->pg_color = atop(vaddr) -
					    fs.pindex;
				}
#endif
				alloc_req = P_KILLED(curproc) ?
				    VM_ALLOC_SYSTEM : VM_ALLOC_NORMAL;
				if (fs.object->type != OBJT_VNODE &&
				    fs.object->backing_object == NULL)
					alloc_req |= VM_ALLOC_ZERO;
				fs.m = vm_page_alloc(fs.object, fs.pindex,
				    alloc_req);
			}
			if (fs.m == NULL) {
				unlock_and_deallocate(&fs);
				VM_WAITPFAULT;
				goto RetryFault;
			} else if (fs.m->valid == VM_PAGE_BITS_ALL)
				break;
		}

readrest:
		/*
		 * We have found a valid page or we have allocated a new page.
		 * The page thus may not be valid or may not be entirely 
		 * valid.
		 *
		 * Attempt to fault-in the page if there is a chance that the
		 * pager has it, and potentially fault in additional pages
		 * at the same time.
		 */
		if (TRYPAGER) {
			int rv;
			u_char behavior = vm_map_entry_behavior(fs.entry);

			if (behavior == MAP_ENTRY_BEHAV_RANDOM ||
			    P_KILLED(curproc)) {
				behind = 0;
				ahead = 0;
			} else if (behavior == MAP_ENTRY_BEHAV_SEQUENTIAL) {
				behind = 0;
				ahead = atop(fs.entry->end - vaddr) - 1;
				if (ahead > VM_FAULT_READ_AHEAD_MAX)
					ahead = VM_FAULT_READ_AHEAD_MAX;
				if (fs.pindex == fs.entry->next_read)
					vm_fault_cache_behind(&fs,
					    VM_FAULT_READ_MAX);
			} else {
				/*
				 * If this is a sequential page fault, then
				 * arithmetically increase the number of pages
				 * in the read-ahead window.  Otherwise, reset
				 * the read-ahead window to its smallest size.
				 */
				behind = atop(vaddr - fs.entry->start);
				if (behind > VM_FAULT_READ_BEHIND)
					behind = VM_FAULT_READ_BEHIND;
				ahead = atop(fs.entry->end - vaddr) - 1;
				era = fs.entry->read_ahead;
				if (fs.pindex == fs.entry->next_read) {
					nera = era + behind;
					if (nera > VM_FAULT_READ_AHEAD_MAX)
						nera = VM_FAULT_READ_AHEAD_MAX;
					behind = 0;
					if (ahead > nera)
						ahead = nera;
					if (era == VM_FAULT_READ_AHEAD_MAX)
						vm_fault_cache_behind(&fs,
						    VM_FAULT_CACHE_BEHIND);
				} else if (ahead > VM_FAULT_READ_AHEAD_MIN)
					ahead = VM_FAULT_READ_AHEAD_MIN;
				if (era != ahead)
					fs.entry->read_ahead = ahead;
			}

			/*
			 * Call the pager to retrieve the data, if any, after
			 * releasing the lock on the map.  We hold a ref on
			 * fs.object and the pages are VPO_BUSY'd.
			 */
			unlock_map(&fs);

vnode_lock:
			if (fs.object->type == OBJT_VNODE) {
				vp = fs.object->handle;
				if (vp == fs.vp)
					goto vnode_locked;
				else if (fs.vp != NULL) {
					vput(fs.vp);
					fs.vp = NULL;
				}
				locked = VOP_ISLOCKED(vp);

				if (VFS_NEEDSGIANT(vp->v_mount) && !fs.vfslocked) {
					fs.vfslocked = 1;
					if (!mtx_trylock(&Giant)) {
						VM_OBJECT_UNLOCK(fs.object);
						mtx_lock(&Giant);
						VM_OBJECT_LOCK(fs.object);
						goto vnode_lock;
					}
				}
				if (locked != LK_EXCLUSIVE)
					locked = LK_SHARED;
				/* Do not sleep for vnode lock while fs.m is busy */
				error = vget(vp, locked | LK_CANRECURSE |
				    LK_NOWAIT, curthread);
				if (error != 0) {
					int vfslocked;

					vfslocked = fs.vfslocked;
					fs.vfslocked = 0; /* Keep Giant */
					vhold(vp);
					release_page(&fs);
					unlock_and_deallocate(&fs);
					error = vget(vp, locked | LK_RETRY |
					    LK_CANRECURSE, curthread);
					vdrop(vp);
					fs.vp = vp;
					fs.vfslocked = vfslocked;
					KASSERT(error == 0,
					    ("vm_fault: vget failed"));
					goto RetryFault;
				}
				fs.vp = vp;
			}
vnode_locked:
			KASSERT(fs.vp == NULL || !fs.map->system_map,
			    ("vm_fault: vnode-backed object mapped by system map"));

			/*
			 * now we find out if any other pages should be paged
			 * in at this time this routine checks to see if the
			 * pages surrounding this fault reside in the same
			 * object as the page for this fault.  If they do,
			 * then they are faulted in also into the object.  The
			 * array "marray" returned contains an array of
			 * vm_page_t structs where one of them is the
			 * vm_page_t passed to the routine.  The reqpage
			 * return value is the index into the marray for the
			 * vm_page_t passed to the routine.
			 *
			 * fs.m plus the additional pages are VPO_BUSY'd.
			 */
			faultcount = vm_fault_additional_pages(
			    fs.m, behind, ahead, marray, &reqpage);

			rv = faultcount ?
			    vm_pager_get_pages(fs.object, marray, faultcount,
				reqpage) : VM_PAGER_FAIL;

			if (rv == VM_PAGER_OK) {
				/*
				 * Found the page. Leave it busy while we play
				 * with it.
				 */

				/*
				 * Relookup in case pager changed page. Pager
				 * is responsible for disposition of old page
				 * if moved.
				 */
				fs.m = vm_page_lookup(fs.object, fs.pindex);
				if (!fs.m) {
					unlock_and_deallocate(&fs);
					goto RetryFault;
				}

				hardfault++;
				break; /* break to PAGE HAS BEEN FOUND */
			}
			/*
			 * Remove the bogus page (which does not exist at this
			 * object/offset); before doing so, we must get back
			 * our object lock to preserve our invariant.
			 *
			 * Also wake up any other process that may want to bring
			 * in this page.
			 *
			 * If this is the top-level object, we must leave the
			 * busy page to prevent another process from rushing
			 * past us, and inserting the page in that object at
			 * the same time that we are.
			 */
			if (rv == VM_PAGER_ERROR)
				printf("vm_fault: pager read error, pid %d (%s)\n",
				    curproc->p_pid, curproc->p_comm);
			/*
			 * Data outside the range of the pager or an I/O error
			 */
			/*
			 * XXX - the check for kernel_map is a kludge to work
			 * around having the machine panic on a kernel space
			 * fault w/ I/O error.
			 */
			if (((fs.map != kernel_map) && (rv == VM_PAGER_ERROR)) ||
				(rv == VM_PAGER_BAD)) {
				vm_page_lock(fs.m);
				vm_page_free(fs.m);
				vm_page_unlock(fs.m);
				fs.m = NULL;
				unlock_and_deallocate(&fs);
				return ((rv == VM_PAGER_ERROR) ? KERN_FAILURE : KERN_PROTECTION_FAILURE);
			}
			if (fs.object != fs.first_object) {
				vm_page_lock(fs.m);
				vm_page_free(fs.m);
				vm_page_unlock(fs.m);
				fs.m = NULL;
				/*
				 * XXX - we cannot just fall out at this
				 * point, m has been freed and is invalid!
				 */
			}
		}

		/*
		 * We get here if the object has default pager (or unwiring) 
		 * or the pager doesn't have the page.
		 */
		if (fs.object == fs.first_object)
			fs.first_m = fs.m;

		/*
		 * Move on to the next object.  Lock the next object before
		 * unlocking the current one.
		 */
		fs.pindex += OFF_TO_IDX(fs.object->backing_object_offset);
		next_object = fs.object->backing_object;
		if (next_object == NULL) {
			/*
			 * If there's no object left, fill the page in the top
			 * object with zeros.
			 */
			if (fs.object != fs.first_object) {
				vm_object_pip_wakeup(fs.object);
				VM_OBJECT_UNLOCK(fs.object);

				fs.object = fs.first_object;
				fs.pindex = fs.first_pindex;
				fs.m = fs.first_m;
				VM_OBJECT_LOCK(fs.object);
			}
			fs.first_m = NULL;

			/*
			 * Zero the page if necessary and mark it valid.
			 */
			if ((fs.m->flags & PG_ZERO) == 0) {
				pmap_zero_page(fs.m);
			} else {
				PCPU_INC(cnt.v_ozfod);
			}
			PCPU_INC(cnt.v_zfod);
			fs.m->valid = VM_PAGE_BITS_ALL;
			break;	/* break to PAGE HAS BEEN FOUND */
		} else {
			KASSERT(fs.object != next_object,
			    ("object loop %p", next_object));
			VM_OBJECT_LOCK(next_object);
			vm_object_pip_add(next_object, 1);
			if (fs.object != fs.first_object)
				vm_object_pip_wakeup(fs.object);
			VM_OBJECT_UNLOCK(fs.object);
			fs.object = next_object;
		}
	}

	KASSERT((fs.m->oflags & VPO_BUSY) != 0,
	    ("vm_fault: not busy after main loop"));

	/*
	 * PAGE HAS BEEN FOUND. [Loop invariant still holds -- the object lock
	 * is held.]
	 */

	/*
	 * If the page is being written, but isn't already owned by the
	 * top-level object, we have to copy it into a new page owned by the
	 * top-level object.
	 */
	if (fs.object != fs.first_object) {
		/*
		 * We only really need to copy if we want to write it.
		 */
		if ((fault_type & (VM_PROT_COPY | VM_PROT_WRITE)) != 0) {
			/*
			 * This allows pages to be virtually copied from a 
			 * backing_object into the first_object, where the 
			 * backing object has no other refs to it, and cannot
			 * gain any more refs.  Instead of a bcopy, we just 
			 * move the page from the backing object to the 
			 * first object.  Note that we must mark the page 
			 * dirty in the first object so that it will go out 
			 * to swap when needed.
			 */
			is_first_object_locked = FALSE;
			if (
				/*
				 * Only one shadow object
				 */
				(fs.object->shadow_count == 1) &&
				/*
				 * No COW refs, except us
				 */
				(fs.object->ref_count == 1) &&
				/*
				 * No one else can look this object up
				 */
				(fs.object->handle == NULL) &&
				/*
				 * No other ways to look the object up
				 */
				((fs.object->type == OBJT_DEFAULT) ||
				 (fs.object->type == OBJT_SWAP)) &&
			    (is_first_object_locked = VM_OBJECT_TRYLOCK(fs.first_object)) &&
				/*
				 * We don't chase down the shadow chain
				 */
			    fs.object == fs.first_object->backing_object) {
				/*
				 * get rid of the unnecessary page
				 */
				vm_page_lock(fs.first_m);
				vm_page_free(fs.first_m);
				vm_page_unlock(fs.first_m);
				/*
				 * grab the page and put it into the 
				 * process'es object.  The page is 
				 * automatically made dirty.
				 */
				vm_page_lock(fs.m);
				vm_page_rename(fs.m, fs.first_object, fs.first_pindex);
				vm_page_unlock(fs.m);
				vm_page_busy(fs.m);
				fs.first_m = fs.m;
				fs.m = NULL;
				PCPU_INC(cnt.v_cow_optim);
			} else {
				/*
				 * Oh, well, lets copy it.
				 */
				pmap_copy_page(fs.m, fs.first_m);
				fs.first_m->valid = VM_PAGE_BITS_ALL;
				if (wired && (fault_flags &
				    VM_FAULT_CHANGE_WIRING) == 0) {
					vm_page_lock(fs.first_m);
					vm_page_wire(fs.first_m);
					vm_page_unlock(fs.first_m);
					
					vm_page_lock(fs.m);
					vm_page_unwire(fs.m, FALSE);
					vm_page_unlock(fs.m);
				}
				/*
				 * We no longer need the old page or object.
				 */
				release_page(&fs);
			}
			/*
			 * fs.object != fs.first_object due to above 
			 * conditional
			 */
			vm_object_pip_wakeup(fs.object);
			VM_OBJECT_UNLOCK(fs.object);
			/*
			 * Only use the new page below...
			 */
			fs.object = fs.first_object;
			fs.pindex = fs.first_pindex;
			fs.m = fs.first_m;
			if (!is_first_object_locked)
				VM_OBJECT_LOCK(fs.object);
			PCPU_INC(cnt.v_cow_faults);
			curthread->td_cow++;
		} else {
			prot &= ~VM_PROT_WRITE;
		}
	}

	/*
	 * We must verify that the maps have not changed since our last
	 * lookup.
	 */
	if (!fs.lookup_still_valid) {
		vm_object_t retry_object;
		vm_pindex_t retry_pindex;
		vm_prot_t retry_prot;

		if (!vm_map_trylock_read(fs.map)) {
			release_page(&fs);
			unlock_and_deallocate(&fs);
			goto RetryFault;
		}
		fs.lookup_still_valid = TRUE;
		if (fs.map->timestamp != map_generation) {
			result = vm_map_lookup_locked(&fs.map, vaddr, fault_type,
			    &fs.entry, &retry_object, &retry_pindex, &retry_prot, &wired);

			/*
			 * If we don't need the page any longer, put it on the inactive
			 * list (the easiest thing to do here).  If no one needs it,
			 * pageout will grab it eventually.
			 */
			if (result != KERN_SUCCESS) {
				release_page(&fs);
				unlock_and_deallocate(&fs);

				/*
				 * If retry of map lookup would have blocked then
				 * retry fault from start.
				 */
				if (result == KERN_FAILURE)
					goto RetryFault;
				return (result);
			}
			if ((retry_object != fs.first_object) ||
			    (retry_pindex != fs.first_pindex)) {
				release_page(&fs);
				unlock_and_deallocate(&fs);
				goto RetryFault;
			}

			/*
			 * Check whether the protection has changed or the object has
			 * been copied while we left the map unlocked. Changing from
			 * read to write permission is OK - we leave the page
			 * write-protected, and catch the write fault. Changing from
			 * write to read permission means that we can't mark the page
			 * write-enabled after all.
			 */
			prot &= retry_prot;
		}
	}
	/*
	 * If the page was filled by a pager, update the map entry's
	 * last read offset.  Since the pager does not return the
	 * actual set of pages that it read, this update is based on
	 * the requested set.  Typically, the requested and actual
	 * sets are the same.
	 *
	 * XXX The following assignment modifies the map
	 * without holding a write lock on it.
	 */
	if (hardfault)
		fs.entry->next_read = fs.pindex + faultcount - reqpage;

	if ((prot & VM_PROT_WRITE) != 0 ||
	    (fault_flags & VM_FAULT_DIRTY) != 0) {
		vm_object_set_writeable_dirty(fs.object);

		/*
		 * If this is a NOSYNC mmap we do not want to set VPO_NOSYNC
		 * if the page is already dirty to prevent data written with
		 * the expectation of being synced from not being synced.
		 * Likewise if this entry does not request NOSYNC then make
		 * sure the page isn't marked NOSYNC.  Applications sharing
		 * data should use the same flags to avoid ping ponging.
		 */
		if (fs.entry->eflags & MAP_ENTRY_NOSYNC) {
			if (fs.m->dirty == 0)
				fs.m->oflags |= VPO_NOSYNC;
		} else {
			fs.m->oflags &= ~VPO_NOSYNC;
		}

		/*
		 * If the fault is a write, we know that this page is being
		 * written NOW so dirty it explicitly to save on 
		 * pmap_is_modified() calls later.
		 *
		 * Also tell the backing pager, if any, that it should remove
		 * any swap backing since the page is now dirty.
		 */
		if (((fault_type & VM_PROT_WRITE) != 0 &&
		    (fault_flags & VM_FAULT_CHANGE_WIRING) == 0) ||
		    (fault_flags & VM_FAULT_DIRTY) != 0) {
			vm_page_dirty(fs.m);
			vm_pager_page_unswapped(fs.m);
		}
	}

	/*
	 * Page had better still be busy
	 */
	KASSERT(fs.m->oflags & VPO_BUSY,
		("vm_fault: page %p not busy!", fs.m));
	/*
	 * Page must be completely valid or it is not fit to
	 * map into user space.  vm_pager_get_pages() ensures this.
	 */
	KASSERT(fs.m->valid == VM_PAGE_BITS_ALL,
	    ("vm_fault: page %p partially invalid", fs.m));
	VM_OBJECT_UNLOCK(fs.object);

	/*
	 * Put this page into the physical map.  We had to do the unlock above
	 * because pmap_enter() may sleep.  We don't put the page
	 * back on the active queue until later so that the pageout daemon
	 * won't find it (yet).
	 */
	pmap_enter(fs.map->pmap, vaddr, fault_type, fs.m, prot, wired);
	if ((fault_flags & VM_FAULT_CHANGE_WIRING) == 0 && wired == 0)
		vm_fault_prefault(fs.map->pmap, vaddr, fs.entry);
	VM_OBJECT_LOCK(fs.object);
	vm_page_lock(fs.m);

	/*
	 * If the page is not wired down, then put it where the pageout daemon
	 * can find it.
	 */
	if (fault_flags & VM_FAULT_CHANGE_WIRING) {
		if (wired)
			vm_page_wire(fs.m);
		else
			vm_page_unwire(fs.m, 1);
	} else
		vm_page_activate(fs.m);
	if (m_hold != NULL) {
		*m_hold = fs.m;
		vm_page_hold(fs.m);
	}
	vm_page_unlock(fs.m);
	vm_page_wakeup(fs.m);

	/*
	 * Unlock everything, and return
	 */
	unlock_and_deallocate(&fs);
	if (hardfault)
		curthread->td_ru.ru_majflt++;
	else
		curthread->td_ru.ru_minflt++;

	return (KERN_SUCCESS);
}
Пример #24
0
static void
mac_proc_vm_revoke_recurse(struct thread *td, struct ucred *cred,
    struct vm_map *map)
{
	vm_map_entry_t vme;
	int vfslocked, result;
	vm_prot_t revokeperms;
	vm_object_t backing_object, object;
	vm_ooffset_t offset;
	struct vnode *vp;
	struct mount *mp;

	if (!mac_mmap_revocation)
		return;

	vm_map_lock(map);
	for (vme = map->header.next; vme != &map->header; vme = vme->next) {
		if (vme->eflags & MAP_ENTRY_IS_SUB_MAP) {
			mac_proc_vm_revoke_recurse(td, cred,
			    vme->object.sub_map);
			continue;
		}
		/*
		 * Skip over entries that obviously are not shared.
		 */
		if (vme->eflags & (MAP_ENTRY_COW | MAP_ENTRY_NOSYNC) ||
		    !vme->max_protection)
			continue;
		/*
		 * Drill down to the deepest backing object.
		 */
		offset = vme->offset;
		object = vme->object.vm_object;
		if (object == NULL)
			continue;
		VM_OBJECT_LOCK(object);
		while ((backing_object = object->backing_object) != NULL) {
			VM_OBJECT_LOCK(backing_object);
			offset += object->backing_object_offset;
			VM_OBJECT_UNLOCK(object);
			object = backing_object;
		}
		VM_OBJECT_UNLOCK(object);
		/*
		 * At the moment, vm_maps and objects aren't considered by
		 * the MAC system, so only things with backing by a normal
		 * object (read: vnodes) are checked.
		 */
		if (object->type != OBJT_VNODE)
			continue;
		vp = (struct vnode *)object->handle;
		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
		result = vme->max_protection;
		mac_vnode_check_mmap_downgrade(cred, vp, &result);
		VOP_UNLOCK(vp, 0);
		/*
		 * Find out what maximum protection we may be allowing now
		 * but a policy needs to get removed.
		 */
		revokeperms = vme->max_protection & ~result;
		if (!revokeperms) {
			VFS_UNLOCK_GIANT(vfslocked);
			continue;
		}
		printf("pid %ld: revoking %s perms from %#lx:%ld "
		    "(max %s/cur %s)\n", (long)td->td_proc->p_pid,
		    prot2str(revokeperms), (u_long)vme->start,
		    (long)(vme->end - vme->start),
		    prot2str(vme->max_protection), prot2str(vme->protection));
		/*
		 * This is the really simple case: if a map has more
		 * max_protection than is allowed, but it's not being
		 * actually used (that is, the current protection is still
		 * allowed), we can just wipe it out and do nothing more.
		 */
		if ((vme->protection & revokeperms) == 0) {
			vme->max_protection -= revokeperms;
		} else {
			if (revokeperms & VM_PROT_WRITE) {
				/*
				 * In the more complicated case, flush out all
				 * pending changes to the object then turn it
				 * copy-on-write.
				 */
				vm_object_reference(object);
				(void) vn_start_write(vp, &mp, V_WAIT);
				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
				VM_OBJECT_LOCK(object);
				vm_object_page_clean(object, offset, offset +
				    vme->end - vme->start, OBJPC_SYNC);
				VM_OBJECT_UNLOCK(object);
				VOP_UNLOCK(vp, 0);
				vn_finished_write(mp);
				vm_object_deallocate(object);
				/*
				 * Why bother if there's no read permissions
				 * anymore?  For the rest, we need to leave
				 * the write permissions on for COW, or
				 * remove them entirely if configured to.
				 */
				if (!mac_mmap_revocation_via_cow) {
					vme->max_protection &= ~VM_PROT_WRITE;
					vme->protection &= ~VM_PROT_WRITE;
				} if ((revokeperms & VM_PROT_READ) == 0)
					vme->eflags |= MAP_ENTRY_COW |
					    MAP_ENTRY_NEEDS_COPY;
			}
			if (revokeperms & VM_PROT_EXECUTE) {
				vme->max_protection &= ~VM_PROT_EXECUTE;
				vme->protection &= ~VM_PROT_EXECUTE;
			}
			if (revokeperms & VM_PROT_READ) {
				vme->max_protection = 0;
				vme->protection = 0;
			}
			pmap_protect(map->pmap, vme->start, vme->end,
			    vme->protection & ~revokeperms);
			vm_map_simplify_entry(map, vme);
		}
		VFS_UNLOCK_GIANT(vfslocked);
	}
	vm_map_unlock(map);
}