示例#1
0
/*
 * Copy a binary buffer from kernel space to user space.
 *
 * Returns 0 on success, EFAULT on failure.
 */
int
copyout(const void *kaddr, void *udaddr, size_t len)
{
	struct vmspace *vm = curproc->p_vmspace;
	struct lwbuf *lwb;
	struct lwbuf lwb_cache;
	vm_page_t m;
	int error;
	size_t n;

	error = 0;
	while (len) {
		m = vm_fault_page(&vm->vm_map, trunc_page((vm_offset_t)udaddr),
				  VM_PROT_READ|VM_PROT_WRITE,
				  VM_FAULT_NORMAL, &error);
		if (error)
			break;
		n = PAGE_SIZE - ((vm_offset_t)udaddr & PAGE_MASK);
		if (n > len)
			n = len;
		lwb = lwbuf_alloc(m, &lwb_cache);
		bcopy(kaddr, (char *)lwbuf_kva(lwb) +
			     ((vm_offset_t)udaddr & PAGE_MASK), n);
		len -= n;
		udaddr = (char *)udaddr + n;
		kaddr = (const char *)kaddr + n;
		vm_page_dirty(m);
		lwbuf_free(lwb);
		vm_page_unhold(m);
	}
	return (error);
}
示例#2
0
/*
 * Copy the specified number of bytes from userland to the xio.
 * Return an error code or 0 on success.  
 *
 * uoffset is the abstracted starting offset in the XIO, not the actual
 * offset, and usually starts at 0.
 *
 * Data in pages backing the XIO will be modified.
 */
int
xio_copy_utox(xio_t xio, int uoffset, const void *uptr, int bytes)
{
    int i;
    int n;
    int error;
    int offset;
    vm_page_t m;
    struct lwbuf *lwb;
    struct lwbuf lwb_cache;

    if (uoffset + bytes > xio->xio_bytes)
	return(EFAULT);

    offset = (xio->xio_offset + uoffset) & PAGE_MASK;
    if ((n = PAGE_SIZE - offset) > bytes)
	n = bytes;

    error = 0;
    for (i = (xio->xio_offset + uoffset) >> PAGE_SHIFT; 
	 i < xio->xio_npages; 
	 ++i
    ) {
	m = xio->xio_pages[i];
	lwb = lwbuf_alloc(m, &lwb_cache);
	error = copyin(uptr, (char *)lwbuf_kva(lwb) + offset, n);
	lwbuf_free(lwb);
	if (error)
	    break;
	bytes -= n;
	uptr = (const char *)uptr + n;
	if (bytes == 0)
	    break;
	if ((n = bytes) > PAGE_SIZE)
	    n = PAGE_SIZE;
	offset = 0;
    }
    return(error);
}
示例#3
0
/*
 * Copy the specified number of bytes from the xio to a kernel
 * buffer.  Return an error code or 0 on success.
 *
 * uoffset is the abstracted starting offset in the XIO, not the actual
 * offset, and usually starts at 0.
 *
 * The XIO is not modified.
 */
int
xio_copy_xtok(xio_t xio, int uoffset, void *kptr, int bytes)
{
    int i;
    int n;
    int error;
    int offset;
    vm_page_t m;
    struct lwbuf *lwb;
    struct lwbuf lwb_cache;

    if (bytes + uoffset > xio->xio_bytes)
	return(EFAULT);

    offset = (xio->xio_offset + uoffset) & PAGE_MASK;
    if ((n = PAGE_SIZE - offset) > bytes)
	n = bytes;

    error = 0;
    for (i = (xio->xio_offset + uoffset) >> PAGE_SHIFT; 
	 i < xio->xio_npages; 
	 ++i
    ) {
	m = xio->xio_pages[i];
	lwb = lwbuf_alloc(m, &lwb_cache);
	bcopy((char *)lwbuf_kva(lwb) + offset, kptr, n);
	lwbuf_free(lwb);
	bytes -= n;
	kptr = (char *)kptr + n;
	if (bytes == 0)
	    break;
	if ((n = bytes) > PAGE_SIZE)
	    n = PAGE_SIZE;
	offset = 0;
    }
    return(error);
}
示例#4
0
/*
 * Implement uiomove(9) from physical memory using lwbuf's to reduce
 * the creation and destruction of ephemeral mappings.
 */
int
uiomove_fromphys(vm_page_t *ma, vm_offset_t offset, size_t n, struct uio *uio)
{
	struct lwbuf lwb_cache;
	struct lwbuf *lwb;
	struct thread *td = curthread;
	struct iovec *iov;
	void *cp;
	vm_offset_t page_offset;
	vm_page_t m;
	size_t cnt;
	int error = 0;
	int save = 0;

	KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE,
	    ("uiomove_fromphys: mode"));
	KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread,
	    ("uiomove_fromphys proc"));

	crit_enter();
	save = td->td_flags & TDF_DEADLKTREAT;
	td->td_flags |= TDF_DEADLKTREAT;
	crit_exit();

	while (n > 0 && uio->uio_resid) {
		iov = uio->uio_iov;
		cnt = iov->iov_len;
		if (cnt == 0) {
			uio->uio_iov++;
			uio->uio_iovcnt--;
			continue;
		}
		if (cnt > n)
			cnt = n;
		page_offset = offset & PAGE_MASK;
		cnt = min(cnt, PAGE_SIZE - page_offset);
		m = ma[offset >> PAGE_SHIFT];
		lwb = lwbuf_alloc(m, &lwb_cache);
		cp = (char *)lwbuf_kva(lwb) + page_offset;

		switch (uio->uio_segflg) {
		case UIO_USERSPACE:
			/*
			 * note: removed uioyield (it was the wrong place to
			 * put it).
			 */
			if (uio->uio_rw == UIO_READ)
				error = copyout(cp, iov->iov_base, cnt);
			else
				error = copyin(iov->iov_base, cp, cnt);
			if (error) {
				lwbuf_free(lwb);
				goto out;
			}
			break;
		case UIO_SYSSPACE:
			if (uio->uio_rw == UIO_READ)
				bcopy(cp, iov->iov_base, cnt);
			else
				bcopy(iov->iov_base, cp, cnt);
			break;
		case UIO_NOCOPY:
			break;
		}
		lwbuf_free(lwb);
		iov->iov_base = (char *)iov->iov_base + cnt;
		iov->iov_len -= cnt;
		uio->uio_resid -= cnt;
		uio->uio_offset += cnt;
		offset += cnt;
		n -= cnt;
	}
out:
	if (save == 0) {
		crit_enter();
		td->td_flags &= ~TDF_DEADLKTREAT;
		crit_exit();
	}
	return (error);
}
/*
 * Lets the VM system know about a change in size for a file.
 * We adjust our own internal size and flush any cached pages in
 * the associated object that are affected by the size change.
 *
 * NOTE: This routine may be invoked as a result of a pager put
 * operation (possibly at object termination time), so we must be careful.
 *
 * NOTE: vp->v_filesize is initialized to NOOFFSET (-1), be sure that
 * we do not blow up on the case.  nsize will always be >= 0, however.
 */
void
vnode_pager_setsize(struct vnode *vp, vm_ooffset_t nsize)
{
	vm_pindex_t nobjsize;
	vm_pindex_t oobjsize;
	vm_object_t object;

	object = vp->v_object;
	if (object == NULL)
		return;
	vm_object_hold(object);
	KKASSERT(vp->v_object == object);

	/*
	 * Hasn't changed size
	 */
	if (nsize == vp->v_filesize) {
		vm_object_drop(object);
		return;
	}

	/*
	 * Has changed size.  Adjust the VM object's size and v_filesize
	 * before we start scanning pages to prevent new pages from being
	 * allocated during the scan.
	 */
	nobjsize = OFF_TO_IDX(nsize + PAGE_MASK);
	oobjsize = object->size;
	object->size = nobjsize;

	/*
	 * File has shrunk. Toss any cached pages beyond the new EOF.
	 */
	if (nsize < vp->v_filesize) {
		vp->v_filesize = nsize;
		if (nobjsize < oobjsize) {
			vm_object_page_remove(object, nobjsize, oobjsize,
					      FALSE);
		}
		/*
		 * This gets rid of garbage at the end of a page that is now
		 * only partially backed by the vnode.  Since we are setting
		 * the entire page valid & clean after we are done we have
		 * to be sure that the portion of the page within the file
		 * bounds is already valid.  If it isn't then making it
		 * valid would create a corrupt block.
		 */
		if (nsize & PAGE_MASK) {
			vm_offset_t kva;
			vm_page_t m;

			m = vm_page_lookup_busy_wait(object, OFF_TO_IDX(nsize),
						     TRUE, "vsetsz");

			if (m && m->valid) {
				int base = (int)nsize & PAGE_MASK;
				int size = PAGE_SIZE - base;
				struct lwbuf *lwb;
				struct lwbuf lwb_cache;

				/*
				 * Clear out partial-page garbage in case
				 * the page has been mapped.
				 *
				 * This is byte aligned.
				 */
				lwb = lwbuf_alloc(m, &lwb_cache);
				kva = lwbuf_kva(lwb);
				bzero((caddr_t)kva + base, size);
				lwbuf_free(lwb);

				/*
				 * XXX work around SMP data integrity race
				 * by unmapping the page from user processes.
				 * The garbage we just cleared may be mapped
				 * to a user process running on another cpu
				 * and this code is not running through normal
				 * I/O channels which handle SMP issues for
				 * us, so unmap page to synchronize all cpus.
				 *
				 * XXX should vm_pager_unmap_page() have
				 * dealt with this?
				 */
				vm_page_protect(m, VM_PROT_NONE);

				/*
				 * Clear out partial-page dirty bits.  This
				 * has the side effect of setting the valid
				 * bits, but that is ok.  There are a bunch
				 * of places in the VM system where we expected
				 * m->dirty == VM_PAGE_BITS_ALL.  The file EOF
				 * case is one of them.  If the page is still
				 * partially dirty, make it fully dirty.
				 *
				 * NOTE: We do not clear out the valid
				 * bits.  This would prevent bogus_page
				 * replacement from working properly.
				 *
				 * NOTE: We do not want to clear the dirty
				 * bit for a partial DEV_BSIZE'd truncation!
				 * This is DEV_BSIZE aligned!
				 */
				vm_page_clear_dirty_beg_nonincl(m, base, size);
				if (m->dirty != 0)
					m->dirty = VM_PAGE_BITS_ALL;
				vm_page_wakeup(m);
			} else if (m) {
				vm_page_wakeup(m);
			}
		}
	} else {
		vp->v_filesize = nsize;
	}
	vm_object_drop(object);
}
示例#6
0
/*
 * A VFS can call this function to try to dispose of a read request
 * directly from the VM system, pretty much bypassing almost all VFS
 * overhead except for atime updates.
 *
 * If 0 is returned some or all of the uio was handled.  The caller must
 * check the uio and handle the remainder.
 *
 * The caller must fail on a non-zero error.
 */
int
vop_helper_read_shortcut(struct vop_read_args *ap)
{
	struct vnode *vp;
	struct uio *uio;
	struct lwbuf *lwb;
	struct lwbuf lwb_cache;
	vm_object_t obj;
	vm_page_t m;
	int offset;
	int n;
	int error;

	vp = ap->a_vp;
	uio = ap->a_uio;

	/*
	 * We can't short-cut if there is no VM object or this is a special
	 * UIO_NOCOPY read (typically from VOP_STRATEGY()).  We also can't
	 * do this if we cannot extract the filesize from the vnode.
	 */
	if (vm_read_shortcut_enable == 0)
		return(0);
	if (vp->v_object == NULL || uio->uio_segflg == UIO_NOCOPY)
		return(0);
	if (vp->v_filesize == NOOFFSET)
		return(0);
	if (uio->uio_resid == 0)
		return(0);

	/*
	 * Iterate the uio on a page-by-page basis
	 *
	 * XXX can we leave the object held shared during the uiomove()?
	 */
	++vm_read_shortcut_count;
	obj = vp->v_object;
	vm_object_hold_shared(obj);

	error = 0;
	while (uio->uio_resid && error == 0) {
		offset = (int)uio->uio_offset & PAGE_MASK;
		n = PAGE_SIZE - offset;
		if (n > uio->uio_resid)
			n = uio->uio_resid;
		if (vp->v_filesize < uio->uio_offset)
			break;
		if (uio->uio_offset + n > vp->v_filesize)
			n = vp->v_filesize - uio->uio_offset;
		if (n == 0)
			break;	/* hit EOF */

		m = vm_page_lookup_busy_try(obj, OFF_TO_IDX(uio->uio_offset),
					    FALSE, &error);
		if (error || m == NULL) {
			++vm_read_shortcut_failed;
			error = 0;
			break;
		}
		if ((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL) {
			++vm_read_shortcut_failed;
			vm_page_wakeup(m);
			break;
		}
		lwb = lwbuf_alloc(m, &lwb_cache);

		/*
		 * Use a no-fault uiomove() to avoid deadlocking against
		 * our VM object (which could livelock on the same object
		 * due to shared-vs-exclusive), or deadlocking against
		 * our busied page.  Returns EFAULT on any fault which
		 * winds up diving a vnode.
		 */
		error = uiomove_nofault((char *)lwbuf_kva(lwb) + offset,
					n, uio);

		vm_page_flag_set(m, PG_REFERENCED);
		lwbuf_free(lwb);
		vm_page_wakeup(m);
	}
	vm_object_drop(obj);

	/*
	 * Ignore EFAULT since we used uiomove_nofault(), causes caller
	 * to fall-back to normal code for this case.
	 */
	if (error == EFAULT)
		error = 0;

	return (error);
}
示例#7
0
/*
 * MPSAFE thread
 */
static void
vm_pagezero(void *arg)
{
	vm_page_t m = NULL;
	struct lwbuf *lwb = NULL;
	struct lwbuf lwb_cache;
	enum zeroidle_state state = STATE_IDLE;
	char *pg = NULL;
	int npages = 0;
	int sleep_time;	
	int i = 0;
	int cpu = (int)(intptr_t)arg;
	int zero_state = 0;

	/*
	 * Adjust thread parameters before entering our loop.  The thread
	 * is started with the MP lock held and with normal kernel thread
	 * priority.
	 *
	 * Also put us on the last cpu for now.
	 *
	 * For now leave the MP lock held, the VM routines cannot be called
	 * with it released until tokenization is finished.
	 */
	lwkt_setpri_self(TDPRI_IDLE_WORK);
	lwkt_setcpu_self(globaldata_find(cpu));
	sleep_time = DEFAULT_SLEEP_TIME;

	/*
	 * Loop forever
	 */
	for (;;) {
		int zero_count;

		switch(state) {
		case STATE_IDLE:
			/*
			 * Wait for work.
			 */
			tsleep(&zero_state, 0, "pgzero", sleep_time);
			if (vm_page_zero_check(&zero_count, &zero_state))
				npages = idlezero_rate / 10;
			sleep_time = vm_page_zero_time(zero_count);
			if (npages)
				state = STATE_GET_PAGE;	/* Fallthrough */
			break;
		case STATE_GET_PAGE:
			/*
			 * Acquire page to zero
			 */
			if (--npages == 0) {
				state = STATE_IDLE;
			} else {
				m = vm_page_free_fromq_fast();
				if (m == NULL) {
					state = STATE_IDLE;
				} else {
					state = STATE_ZERO_PAGE;
					lwb = lwbuf_alloc(m, &lwb_cache);
					pg = (char *)lwbuf_kva(lwb);
					i = 0;
				}
			}
			break;
		case STATE_ZERO_PAGE:
			/*
			 * Zero-out the page
			 */
			while (i < PAGE_SIZE) {
				if (idlezero_nocache == 1)
					bzeront(&pg[i], IDLEZERO_RUN);
				else
					bzero(&pg[i], IDLEZERO_RUN);
				i += IDLEZERO_RUN;
				lwkt_yield();
			}
			state = STATE_RELEASE_PAGE;
			break;
		case STATE_RELEASE_PAGE:
			lwbuf_free(lwb);
			vm_page_flag_set(m, PG_ZERO);
			vm_page_free_toq(m);
			state = STATE_GET_PAGE;
			++idlezero_count;	/* non-locked, SMP race ok */
			break;
		}
		lwkt_yield();
	}
}