예제 #1
0
/*
 * old style vnode pager input routine
 */
static int
vnode_pager_input_old(vm_object_t object, vm_page_t m)
{
	struct uio auio;
	struct iovec aiov;
	int error;
	int size;
	struct sf_buf *sf;
	struct vnode *vp;

	VM_OBJECT_ASSERT_WLOCKED(object);
	error = 0;

	/*
	 * Return failure if beyond current EOF
	 */
	if (IDX_TO_OFF(m->pindex) >= object->un_pager.vnp.vnp_size) {
		return VM_PAGER_BAD;
	} else {
		size = PAGE_SIZE;
		if (IDX_TO_OFF(m->pindex) + size > object->un_pager.vnp.vnp_size)
			size = object->un_pager.vnp.vnp_size - IDX_TO_OFF(m->pindex);
		vp = object->handle;
		VM_OBJECT_WUNLOCK(object);

		/*
		 * Allocate a kernel virtual address and initialize so that
		 * we can use VOP_READ/WRITE routines.
		 */
		sf = sf_buf_alloc(m, 0);

		aiov.iov_base = (caddr_t)sf_buf_kva(sf);
		aiov.iov_len = size;
		auio.uio_iov = &aiov;
		auio.uio_iovcnt = 1;
		auio.uio_offset = IDX_TO_OFF(m->pindex);
		auio.uio_segflg = UIO_SYSSPACE;
		auio.uio_rw = UIO_READ;
		auio.uio_resid = size;
		auio.uio_td = curthread;

		error = VOP_READ(vp, &auio, 0, curthread->td_ucred);
		if (!error) {
			int count = size - auio.uio_resid;

			if (count == 0)
				error = EINVAL;
			else if (count != PAGE_SIZE)
				bzero((caddr_t)sf_buf_kva(sf) + count,
				    PAGE_SIZE - count);
		}
		sf_buf_free(sf);

		VM_OBJECT_WLOCK(object);
	}
	KASSERT(m->dirty == 0, ("vnode_pager_input_old: page %p is dirty", m));
	if (!error)
		m->valid = VM_PAGE_BITS_ALL;
	return error ? VM_PAGER_ERROR : VM_PAGER_OK;
}
예제 #2
0
static void drm_sg_cleanup(struct drm_sg_mem * entry)
{
	if (entry == NULL)
		return;

	if (entry->vaddr != 0)
		kmem_free(&kernel_map, entry->vaddr, IDX_TO_OFF(entry->pages));

	kfree(entry->busaddr);
	kfree(entry);
}
예제 #3
0
void drm_sg_cleanup(struct drm_sg_mem * entry)
{
	if (entry == NULL)
		return;

	if (entry->vaddr != 0)
		kmem_free(kernel_arena, entry->vaddr, IDX_TO_OFF(entry->pages));

	free(entry->busaddr, DRM_MEM_SGLISTS);
	free(entry, DRM_MEM_DRIVER);
}
예제 #4
0
static int
shared_page_alloc_locked(int size, int align)
{
	int res;

	res = roundup(shared_page_free, align);
	if (res + size >= IDX_TO_OFF(shared_page_obj->size))
		res = -1;
	else
		shared_page_free = res + size;
	return (res);
}
예제 #5
0
int drm_legacy_sg_alloc(struct drm_device *dev, void *data,
			struct drm_file *file_priv)
{
	struct drm_scatter_gather *request = data;
	struct drm_sg_mem *entry;
	vm_size_t size;
	vm_pindex_t pindex;

	if (dev->sg)
		return -EINVAL;

	DRM_DEBUG("request size=%ld\n", request->size);

	entry = kmalloc(sizeof(*entry), M_DRM, M_WAITOK | M_ZERO);

	size = round_page(request->size);
	entry->pages = OFF_TO_IDX(size);
	entry->busaddr = kmalloc(entry->pages * sizeof(*entry->busaddr),
	    M_DRM, M_WAITOK | M_ZERO);

	entry->vaddr = kmem_alloc_attr(&kernel_map, size,
				       VM_SUBSYS_DRM_SCAT, M_WAITOK | M_ZERO,
				       0, BUS_SPACE_MAXADDR_32BIT,
				       VM_MEMATTR_WRITE_COMBINING);
	if (entry->vaddr == 0) {
		drm_sg_cleanup(entry);
		return (-ENOMEM);
	}

	for(pindex = 0; pindex < entry->pages; pindex++) {
		entry->busaddr[pindex] =
		    vtophys(entry->vaddr + IDX_TO_OFF(pindex));
	}

	DRM_LOCK(dev);
	if (dev->sg) {
		DRM_UNLOCK(dev);
		drm_sg_cleanup(entry);
		return (-EINVAL);
	}
	dev->sg = entry;
	DRM_UNLOCK(dev);

	request->handle = entry->vaddr;

	DRM_DEBUG("allocated %ju pages @ 0x%08jx, contents=%08lx\n",
	    entry->pages, (uintmax_t)entry->vaddr,
	    *(unsigned long *)entry->vaddr);

	return (0);
}
예제 #6
0
int drm_sg_alloc(struct drm_device *dev, struct drm_scatter_gather * request)
{
	struct drm_sg_mem *entry;
	vm_size_t size;
	vm_pindex_t pindex;

	DRM_DEBUG("\n");

	if (!drm_core_check_feature(dev, DRIVER_SG))
		return -EINVAL;

	if (dev->sg)
		return -EINVAL;

	entry = malloc(sizeof(*entry), DRM_MEM_DRIVER, M_NOWAIT | M_ZERO);
	if (!entry)
		return -ENOMEM;

	DRM_DEBUG("request size=%ld\n", request->size);

	size = round_page(request->size);
	entry->pages = OFF_TO_IDX(size);
	entry->busaddr = malloc(entry->pages * sizeof(*entry->busaddr),
	    DRM_MEM_SGLISTS, M_NOWAIT | M_ZERO);
	if (!entry->busaddr) {
		free(entry, DRM_MEM_DRIVER);
		return -ENOMEM;
	}

	entry->vaddr = drm_vmalloc_dma(size);
	if (entry->vaddr == 0) {
		free(entry->busaddr, DRM_MEM_DRIVER);
		free(entry, DRM_MEM_DRIVER);
		return -ENOMEM;
	}

	for (pindex = 0; pindex < entry->pages; pindex++) {
		entry->busaddr[pindex] =
		    vtophys(entry->vaddr + IDX_TO_OFF(pindex));
	}

	request->handle = entry->vaddr;

	dev->sg = entry;

	DRM_DEBUG("allocated %ju pages @ 0x%08zx, contents=%08lx\n",
	    entry->pages, entry->vaddr, *(unsigned long *)entry->vaddr);

	return 0;
}
예제 #7
0
/*
 * No requirements.
 *
 * WARNING! Do not obtain dev_pager_mtx here, doing so will cause a
 *	    deadlock in DRMs VM paging code.
 */
static int
dev_pager_getpage(vm_object_t object, vm_page_t *mpp, int seqaccess)
{
	vm_page_t page;
	int error;

	page = *mpp;

	error = object->un_pager.devp.ops->cdev_pg_fault(
			object, IDX_TO_OFF(page->pindex),
			PROT_READ, mpp);

	return (error);
}
/*
 * Return whether the vnode pager has the requested page.  Return the
 * number of disk-contiguous pages before and after the requested page,
 * not including the requested page.
 */
static boolean_t
vnode_pager_haspage(vm_object_t object, vm_pindex_t pindex)
{
	struct vnode *vp = object->handle;
	off_t loffset;
	off_t doffset;
	int voff;
	int bsize;
	int error;

	/*
	 * If no vp or vp is doomed or marked transparent to VM, we do not
	 * have the page.
	 */
	if ((vp == NULL) || (vp->v_flag & VRECLAIMED))
		return FALSE;

	/*
	 * If filesystem no longer mounted or offset beyond end of file we do
	 * not have the page.
	 */
	loffset = IDX_TO_OFF(pindex);

	if (vp->v_mount == NULL || loffset >= vp->v_filesize)
		return FALSE;

	bsize = vp->v_mount->mnt_stat.f_iosize;
	voff = loffset % bsize;

	/*
	 * XXX
	 *
	 * BMAP returns byte counts before and after, where after
	 * is inclusive of the base page.  haspage must return page
	 * counts before and after where after does not include the
	 * base page.
	 *
	 * BMAP is allowed to return a *after of 0 for backwards
	 * compatibility.  The base page is still considered valid if
	 * no error is returned.
	 */
	error = VOP_BMAP(vp, loffset - voff, &doffset, NULL, NULL, 0);
	if (error)
		return TRUE;
	if (doffset == NOOFFSET)
		return FALSE;
	return TRUE;
}
예제 #9
0
/* Create the VM system backing object for this vnode */
int
vnode_create_vobject(struct vnode *vp, off_t isize, struct thread *td)
{
	vm_object_t object;
	vm_ooffset_t size = isize;
	struct vattr va;

	if (!vn_isdisk(vp, NULL) && vn_canvmio(vp) == FALSE)
		return (0);

	while ((object = vp->v_object) != NULL) {
		VM_OBJECT_WLOCK(object);
		if (!(object->flags & OBJ_DEAD)) {
			VM_OBJECT_WUNLOCK(object);
			return (0);
		}
		VOP_UNLOCK(vp, 0);
		vm_object_set_flag(object, OBJ_DISCONNECTWNT);
		VM_OBJECT_SLEEP(object, object, PDROP | PVM, "vodead", 0);
		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
	}

	if (size == 0) {
		if (vn_isdisk(vp, NULL)) {
			size = IDX_TO_OFF(INT_MAX);
		} else {
			if (VOP_GETATTR(vp, &va, td->td_ucred))
				return (0);
			size = va.va_size;
		}
	}

	object = vnode_pager_alloc(vp, size, 0, 0, td->td_ucred);
	/*
	 * Dereference the reference we just created.  This assumes
	 * that the object is associated with the vp.
	 */
	VM_OBJECT_WLOCK(object);
	object->ref_count--;
	VM_OBJECT_WUNLOCK(object);
	vrele(vp);

	KASSERT(vp->v_object != NULL, ("vnode_create_vobject: NULL object"));

	return (0);
}
예제 #10
0
int ttm_tt_swapout(struct ttm_tt *ttm, vm_object_t persistent_swap_storage)
{
	vm_object_t obj;
	vm_page_t from_page, to_page;
	int i;

	BUG_ON(ttm->state != tt_unbound && ttm->state != tt_unpopulated);
	BUG_ON(ttm->caching_state != tt_cached);

	if (!persistent_swap_storage) {
		obj = swap_pager_alloc(NULL,
		    IDX_TO_OFF(ttm->num_pages), VM_PROT_DEFAULT, 0);
		if (obj == NULL) {
			pr_err("Failed allocating swap storage\n");
			return (-ENOMEM);
		}
	} else
		obj = persistent_swap_storage;

	VM_OBJECT_LOCK(obj);
	vm_object_pip_add(obj, 1);
	for (i = 0; i < ttm->num_pages; ++i) {
		from_page = ttm->pages[i];
		if (unlikely(from_page == NULL))
			continue;
		to_page = vm_page_grab(obj, i, VM_ALLOC_NORMAL |
					       VM_ALLOC_RETRY);
		pmap_copy_page(VM_PAGE_TO_PHYS(from_page),
					VM_PAGE_TO_PHYS(to_page));
		to_page->valid = VM_PAGE_BITS_ALL;
		vm_page_dirty(to_page);
		vm_page_wakeup(to_page);
	}
	vm_object_pip_wakeup(obj);
	VM_OBJECT_UNLOCK(obj);

	ttm->bdev->driver->ttm_tt_unpopulate(ttm);
	ttm->swap_storage = obj;
	ttm->page_flags |= TTM_PAGE_FLAG_SWAPPED;
	if (persistent_swap_storage)
		ttm->page_flags |= TTM_PAGE_FLAG_PERSISTENT_SWAP;

	return 0;
}
예제 #11
0
파일: ttm_tt.c 프로젝트: Alkzndr/freebsd
int ttm_tt_swapout(struct ttm_tt *ttm, vm_object_t persistent_swap_storage)
{
	vm_object_t obj;
	vm_page_t from_page, to_page;
	int i;

	MPASS(ttm->state == tt_unbound || ttm->state == tt_unpopulated);
	MPASS(ttm->caching_state == tt_cached);

	if (persistent_swap_storage == NULL) {
		obj = vm_pager_allocate(OBJT_SWAP, NULL,
		    IDX_TO_OFF(ttm->num_pages), VM_PROT_DEFAULT, 0,
		    curthread->td_ucred);
		if (obj == NULL) {
			printf("[TTM] Failed allocating swap storage\n");
			return (-ENOMEM);
		}
	} else
		obj = persistent_swap_storage;

	VM_OBJECT_WLOCK(obj);
	vm_object_pip_add(obj, 1);
	for (i = 0; i < ttm->num_pages; ++i) {
		from_page = ttm->pages[i];
		if (unlikely(from_page == NULL))
			continue;
		to_page = vm_page_grab(obj, i, VM_ALLOC_NORMAL);
		pmap_copy_page(from_page, to_page);
		to_page->valid = VM_PAGE_BITS_ALL;
		vm_page_dirty(to_page);
		vm_page_xunbusy(to_page);
	}
	vm_object_pip_wakeup(obj);
	VM_OBJECT_WUNLOCK(obj);

	ttm->bdev->driver->ttm_tt_unpopulate(ttm);
	ttm->swap_storage = obj;
	ttm->page_flags |= TTM_PAGE_FLAG_SWAPPED;
	if (persistent_swap_storage != NULL)
		ttm->page_flags |= TTM_PAGE_FLAG_PERSISTENT_SWAP;
	return (0);
}
예제 #12
0
static boolean_t
vnode_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *before,
    int *after)
{
	struct vnode *vp = object->handle;
	daddr_t bn;
	int err;
	daddr_t reqblock;
	int poff;
	int bsize;
	int pagesperblock, blocksperpage;

	VM_OBJECT_ASSERT_WLOCKED(object);
	/*
	 * If no vp or vp is doomed or marked transparent to VM, we do not
	 * have the page.
	 */
	if (vp == NULL || vp->v_iflag & VI_DOOMED)
		return FALSE;
	/*
	 * If the offset is beyond end of file we do
	 * not have the page.
	 */
	if (IDX_TO_OFF(pindex) >= object->un_pager.vnp.vnp_size)
		return FALSE;

	bsize = vp->v_mount->mnt_stat.f_iosize;
	pagesperblock = bsize / PAGE_SIZE;
	blocksperpage = 0;
	if (pagesperblock > 0) {
		reqblock = pindex / pagesperblock;
	} else {
		blocksperpage = (PAGE_SIZE / bsize);
		reqblock = pindex * blocksperpage;
	}
	VM_OBJECT_WUNLOCK(object);
	err = VOP_BMAP(vp, reqblock, NULL, &bn, after, before);
	VM_OBJECT_WLOCK(object);
	if (err)
		return TRUE;
	if (bn == -1)
		return FALSE;
	if (pagesperblock > 0) {
		poff = pindex - (reqblock * pagesperblock);
		if (before) {
			*before *= pagesperblock;
			*before += poff;
		}
		if (after) {
			/*
			 * The BMAP vop can report a partial block in the
			 * 'after', but must not report blocks after EOF.
			 * Assert the latter, and truncate 'after' in case
			 * of the former.
			 */
			KASSERT((reqblock + *after) * pagesperblock <
			    roundup2(object->size, pagesperblock),
			    ("%s: reqblock %jd after %d size %ju", __func__,
			    (intmax_t )reqblock, *after,
			    (uintmax_t )object->size));
			*after *= pagesperblock;
			*after += pagesperblock - (poff + 1);
			if (pindex + *after >= object->size)
				*after = object->size - 1 - pindex;
		}
	} else {
		if (before) {
			*before /= blocksperpage;
		}

		if (after) {
			*after /= blocksperpage;
		}
	}
	return TRUE;
}
예제 #13
0
/*
 * This is now called from local media FS's to operate against their
 * own vnodes if they fail to implement VOP_PUTPAGES.
 *
 * This is typically called indirectly via the pageout daemon and
 * clustering has already typically occurred, so in general we ask the
 * underlying filesystem to write the data out asynchronously rather
 * then delayed.
 */
int
vnode_pager_generic_putpages(struct vnode *vp, vm_page_t *ma, int bytecount,
    int flags, int *rtvals)
{
	int i;
	vm_object_t object;
	vm_page_t m;
	int count;

	int maxsize, ncount;
	vm_ooffset_t poffset;
	struct uio auio;
	struct iovec aiov;
	int error;
	int ioflags;
	int ppscheck = 0;
	static struct timeval lastfail;
	static int curfail;

	object = vp->v_object;
	count = bytecount / PAGE_SIZE;

	for (i = 0; i < count; i++)
		rtvals[i] = VM_PAGER_ERROR;

	if ((int64_t)ma[0]->pindex < 0) {
		printf("vnode_pager_putpages: attempt to write meta-data!!! -- 0x%lx(%lx)\n",
		    (long)ma[0]->pindex, (u_long)ma[0]->dirty);
		rtvals[0] = VM_PAGER_BAD;
		return VM_PAGER_BAD;
	}

	maxsize = count * PAGE_SIZE;
	ncount = count;

	poffset = IDX_TO_OFF(ma[0]->pindex);

	/*
	 * If the page-aligned write is larger then the actual file we
	 * have to invalidate pages occurring beyond the file EOF.  However,
	 * there is an edge case where a file may not be page-aligned where
	 * the last page is partially invalid.  In this case the filesystem
	 * may not properly clear the dirty bits for the entire page (which
	 * could be VM_PAGE_BITS_ALL due to the page having been mmap()d).
	 * With the page locked we are free to fix-up the dirty bits here.
	 *
	 * We do not under any circumstances truncate the valid bits, as
	 * this will screw up bogus page replacement.
	 */
	VM_OBJECT_WLOCK(object);
	if (maxsize + poffset > object->un_pager.vnp.vnp_size) {
		if (object->un_pager.vnp.vnp_size > poffset) {
			int pgoff;

			maxsize = object->un_pager.vnp.vnp_size - poffset;
			ncount = btoc(maxsize);
			if ((pgoff = (int)maxsize & PAGE_MASK) != 0) {
				/*
				 * If the object is locked and the following
				 * conditions hold, then the page's dirty
				 * field cannot be concurrently changed by a
				 * pmap operation.
				 */
				m = ma[ncount - 1];
				vm_page_assert_sbusied(m);
				KASSERT(!pmap_page_is_write_mapped(m),
		("vnode_pager_generic_putpages: page %p is not read-only", m));
				vm_page_clear_dirty(m, pgoff, PAGE_SIZE -
				    pgoff);
			}
		} else {
			maxsize = 0;
			ncount = 0;
		}
		if (ncount < count) {
			for (i = ncount; i < count; i++) {
				rtvals[i] = VM_PAGER_BAD;
			}
		}
	}
	VM_OBJECT_WUNLOCK(object);

	/*
	 * pageouts are already clustered, use IO_ASYNC to force a bawrite()
	 * rather then a bdwrite() to prevent paging I/O from saturating 
	 * the buffer cache.  Dummy-up the sequential heuristic to cause
	 * large ranges to cluster.  If neither IO_SYNC or IO_ASYNC is set,
	 * the system decides how to cluster.
	 */
	ioflags = IO_VMIO;
	if (flags & (VM_PAGER_PUT_SYNC | VM_PAGER_PUT_INVAL))
		ioflags |= IO_SYNC;
	else if ((flags & VM_PAGER_CLUSTER_OK) == 0)
		ioflags |= IO_ASYNC;
	ioflags |= (flags & VM_PAGER_PUT_INVAL) ? IO_INVAL: 0;
	ioflags |= (flags & VM_PAGER_PUT_NOREUSE) ? IO_NOREUSE : 0;
	ioflags |= IO_SEQMAX << IO_SEQSHIFT;

	aiov.iov_base = (caddr_t) 0;
	aiov.iov_len = maxsize;
	auio.uio_iov = &aiov;
	auio.uio_iovcnt = 1;
	auio.uio_offset = poffset;
	auio.uio_segflg = UIO_NOCOPY;
	auio.uio_rw = UIO_WRITE;
	auio.uio_resid = maxsize;
	auio.uio_td = (struct thread *) 0;
	error = VOP_WRITE(vp, &auio, ioflags, curthread->td_ucred);
	PCPU_INC(cnt.v_vnodeout);
	PCPU_ADD(cnt.v_vnodepgsout, ncount);

	if (error) {
		if ((ppscheck = ppsratecheck(&lastfail, &curfail, 1)))
			printf("vnode_pager_putpages: I/O error %d\n", error);
	}
	if (auio.uio_resid) {
		if (ppscheck || ppsratecheck(&lastfail, &curfail, 1))
			printf("vnode_pager_putpages: residual I/O %zd at %lu\n",
			    auio.uio_resid, (u_long)ma[0]->pindex);
	}
	for (i = 0; i < ncount; i++) {
		rtvals[i] = VM_PAGER_OK;
	}
	return rtvals[0];
}
예제 #14
0
static int
vnode_pager_generic_getpages_done(struct buf *bp)
{
	vm_object_t object;
	off_t tfoff, nextoff;
	int i, error;

	error = (bp->b_ioflags & BIO_ERROR) != 0 ? EIO : 0;
	object = bp->b_vp->v_object;

	if (error == 0 && bp->b_bcount != bp->b_npages * PAGE_SIZE) {
		if (!buf_mapped(bp)) {
			bp->b_data = bp->b_kvabase;
			pmap_qenter((vm_offset_t)bp->b_data, bp->b_pages,
			    bp->b_npages);
		}
		bzero(bp->b_data + bp->b_bcount,
		    PAGE_SIZE * bp->b_npages - bp->b_bcount);
	}
	if (buf_mapped(bp)) {
		pmap_qremove((vm_offset_t)bp->b_data, bp->b_npages);
		bp->b_data = unmapped_buf;
	}

	VM_OBJECT_WLOCK(object);
	for (i = 0, tfoff = IDX_TO_OFF(bp->b_pages[0]->pindex);
	    i < bp->b_npages; i++, tfoff = nextoff) {
		vm_page_t mt;

		nextoff = tfoff + PAGE_SIZE;
		mt = bp->b_pages[i];

		if (nextoff <= object->un_pager.vnp.vnp_size) {
			/*
			 * Read filled up entire page.
			 */
			mt->valid = VM_PAGE_BITS_ALL;
			KASSERT(mt->dirty == 0,
			    ("%s: page %p is dirty", __func__, mt));
			KASSERT(!pmap_page_is_mapped(mt),
			    ("%s: page %p is mapped", __func__, mt));
		} else {
			/*
			 * Read did not fill up entire page.
			 *
			 * Currently we do not set the entire page valid,
			 * we just try to clear the piece that we couldn't
			 * read.
			 */
			vm_page_set_valid_range(mt, 0,
			    object->un_pager.vnp.vnp_size - tfoff);
			KASSERT((mt->dirty & vm_page_bits(0,
			    object->un_pager.vnp.vnp_size - tfoff)) == 0,
			    ("%s: page %p is dirty", __func__, mt));
		}

		if (i < bp->b_pgbefore || i >= bp->b_npages - bp->b_pgafter)
			vm_page_readahead_finish(mt);
	}
	VM_OBJECT_WUNLOCK(object);
	if (error != 0)
		printf("%s: I/O read error %d\n", __func__, error);

	return (error);
}
/*
 * This is now called from local media FS's to operate against their
 * own vnodes if they fail to implement VOP_GETPAGES.
 *
 * With all the caching local media devices do these days there is really
 * very little point to attempting to restrict the I/O size to contiguous
 * blocks on-disk, especially if our caller thinks we need all the specified
 * pages.  Just construct and issue a READ.
 */
int
vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *mpp, int bytecount,
			     int reqpage, int seqaccess)
{
	struct iovec aiov;
	struct uio auio;
	off_t foff;
	int error;
	int count;
	int i;
	int ioflags;

	/*
	 * Do not do anything if the vnode is bad.
	 */
	if (vp->v_mount == NULL)
		return VM_PAGER_BAD;

	/*
	 * Calculate the number of pages.  Since we are paging in whole
	 * pages, adjust bytecount to be an integral multiple of the page
	 * size.  It will be clipped to the file EOF later on.
	 */
	bytecount = round_page(bytecount);
	count = bytecount / PAGE_SIZE;

	/*
	 * We could check m[reqpage]->valid here and shortcut the operation,
	 * but doing so breaks read-ahead.  Instead assume that the VM
	 * system has already done at least the check, don't worry about
	 * any races, and issue the VOP_READ to allow read-ahead to function.
	 *
	 * This keeps the pipeline full for I/O bound sequentially scanned
	 * mmap()'s
	 */
	/* don't shortcut */

	/*
	 * Discard pages past the file EOF.  If the requested page is past
	 * the file EOF we just leave its valid bits set to 0, the caller
	 * expects to maintain ownership of the requested page.  If the
	 * entire range is past file EOF discard everything and generate
	 * a pagein error.
	 */
	foff = IDX_TO_OFF(mpp[0]->pindex);
	if (foff >= vp->v_filesize) {
		for (i = 0; i < count; i++) {
			if (i != reqpage)
				vnode_pager_freepage(mpp[i]);
		}
		return VM_PAGER_ERROR;
	}

	if (foff + bytecount > vp->v_filesize) {
		bytecount = vp->v_filesize - foff;
		i = round_page(bytecount) / PAGE_SIZE;
		while (count > i) {
			--count;
			if (count != reqpage)
				vnode_pager_freepage(mpp[count]);
		}
	}

	/*
	 * The size of the transfer is bytecount.  bytecount will be an
	 * integral multiple of the page size unless it has been clipped
	 * to the file EOF.  The transfer cannot exceed the file EOF.
	 *
	 * When dealing with real devices we must round-up to the device
	 * sector size.
	 */
	if (vp->v_type == VBLK || vp->v_type == VCHR) {
		int secmask = vp->v_rdev->si_bsize_phys - 1;
		KASSERT(secmask < PAGE_SIZE, ("vnode_pager_generic_getpages: sector size %d too large", secmask + 1));
		bytecount = (bytecount + secmask) & ~secmask;
	}

	/*
	 * Severe hack to avoid deadlocks with the buffer cache
	 */
	for (i = 0; i < count; ++i) {
		vm_page_t mt = mpp[i];

		vm_page_io_start(mt);
		vm_page_wakeup(mt);
	}

	/*
	 * Issue the I/O with some read-ahead if bytecount > PAGE_SIZE
	 */
	ioflags = IO_VMIO;
	if (seqaccess)
		ioflags |= IO_SEQMAX << IO_SEQSHIFT;

	aiov.iov_base = NULL;
	aiov.iov_len = bytecount;
	auio.uio_iov = &aiov;
	auio.uio_iovcnt = 1;
	auio.uio_offset = foff;
	auio.uio_segflg = UIO_NOCOPY;
	auio.uio_rw = UIO_READ;
	auio.uio_resid = bytecount;
	auio.uio_td = NULL;
	mycpu->gd_cnt.v_vnodein++;
	mycpu->gd_cnt.v_vnodepgsin += count;

	error = VOP_READ(vp, &auio, ioflags, proc0.p_ucred);

	/*
	 * Severe hack to avoid deadlocks with the buffer cache
	 */
	for (i = 0; i < count; ++i) {
		vm_page_busy_wait(mpp[i], FALSE, "getpgs");
		vm_page_io_finish(mpp[i]);
	}

	/*
	 * Calculate the actual number of bytes read and clean up the
	 * page list.  
	 */
	bytecount -= auio.uio_resid;

	for (i = 0; i < count; ++i) {
		vm_page_t mt = mpp[i];

		if (i != reqpage) {
			if (error == 0 && mt->valid) {
				if (mt->flags & PG_REFERENCED)
					vm_page_activate(mt);
				else
					vm_page_deactivate(mt);
				vm_page_wakeup(mt);
			} else {
				vnode_pager_freepage(mt);
			}
		} else if (mt->valid == 0) {
			if (error == 0) {
				kprintf("page failed but no I/O error page "
					"%p object %p pindex %d\n",
					mt, mt->object, (int) mt->pindex);
				/* whoops, something happened */
				error = EINVAL;
			}
		} else if (mt->valid != VM_PAGE_BITS_ALL) {
			/*
			 * Zero-extend the requested page if necessary (if
			 * the filesystem is using a small block size).
			 */
			vm_page_zero_invalid(mt, TRUE);
		}
	}
	if (error) {
		kprintf("vnode_pager_getpage: I/O read error\n");
	}
	return (error ? VM_PAGER_ERROR : VM_PAGER_OK);
}
/*
 * This is now called from local media FS's to operate against their
 * own vnodes if they fail to implement VOP_PUTPAGES.
 *
 * This is typically called indirectly via the pageout daemon and
 * clustering has already typically occured, so in general we ask the
 * underlying filesystem to write the data out asynchronously rather
 * then delayed.
 */
int
vnode_pager_generic_putpages(struct vnode *vp, vm_page_t *m, int bytecount,
			     int flags, int *rtvals)
{
	int i;
	int maxsize, ncount, count;
	vm_ooffset_t poffset;
	struct uio auio;
	struct iovec aiov;
	int error;
	int ioflags;

	count = bytecount / PAGE_SIZE;

	for (i = 0; i < count; i++)
		rtvals[i] = VM_PAGER_AGAIN;

	if ((int) m[0]->pindex < 0) {
		kprintf("vnode_pager_putpages: attempt to write meta-data!!! -- 0x%lx(%x)\n",
			(long)m[0]->pindex, m[0]->dirty);
		rtvals[0] = VM_PAGER_BAD;
		return VM_PAGER_BAD;
	}

	maxsize = count * PAGE_SIZE;
	ncount = count;

	poffset = IDX_TO_OFF(m[0]->pindex);

	/*
	 * If the page-aligned write is larger then the actual file we
	 * have to invalidate pages occuring beyond the file EOF.
	 *
	 * If the file EOF resides in the middle of a page we still clear
	 * all of that page's dirty bits later on.  If we didn't it would
	 * endlessly re-write.
	 *
	 * We do not under any circumstances truncate the valid bits, as
	 * this will screw up bogus page replacement.
	 *
	 * The caller has already read-protected the pages.  The VFS must
	 * use the buffer cache to wrap the pages.  The pages might not
	 * be immediately flushed by the buffer cache but once under its
	 * control the pages themselves can wind up being marked clean
	 * and their covering buffer cache buffer can be marked dirty.
	 */
	if (poffset + maxsize > vp->v_filesize) {
		if (poffset < vp->v_filesize) {
			maxsize = vp->v_filesize - poffset;
			ncount = btoc(maxsize);
		} else {
			maxsize = 0;
			ncount = 0;
		}
		if (ncount < count) {
			for (i = ncount; i < count; i++) {
				rtvals[i] = VM_PAGER_BAD;
			}
		}
	}

	/*
	 * pageouts are already clustered, use IO_ASYNC to force a bawrite()
	 * rather then a bdwrite() to prevent paging I/O from saturating
	 * the buffer cache.  Dummy-up the sequential heuristic to cause
	 * large ranges to cluster.  If neither IO_SYNC or IO_ASYNC is set,
	 * the system decides how to cluster.
	 */
	ioflags = IO_VMIO;
	if (flags & (VM_PAGER_PUT_SYNC | VM_PAGER_PUT_INVAL))
		ioflags |= IO_SYNC;
	else if ((flags & VM_PAGER_CLUSTER_OK) == 0)
		ioflags |= IO_ASYNC;
	ioflags |= (flags & VM_PAGER_PUT_INVAL) ? IO_INVAL: 0;
	ioflags |= IO_SEQMAX << IO_SEQSHIFT;

	aiov.iov_base = (caddr_t) 0;
	aiov.iov_len = maxsize;
	auio.uio_iov = &aiov;
	auio.uio_iovcnt = 1;
	auio.uio_offset = poffset;
	auio.uio_segflg = UIO_NOCOPY;
	auio.uio_rw = UIO_WRITE;
	auio.uio_resid = maxsize;
	auio.uio_td = NULL;
	error = VOP_WRITE(vp, &auio, ioflags, proc0.p_ucred);
	mycpu->gd_cnt.v_vnodeout++;
	mycpu->gd_cnt.v_vnodepgsout += ncount;

	if (error) {
		krateprintf(&vbadrate,
			    "vnode_pager_putpages: I/O error %d\n", error);
	}
	if (auio.uio_resid) {
		krateprintf(&vresrate,
			    "vnode_pager_putpages: residual I/O %zd at %lu\n",
			    auio.uio_resid, (u_long)m[0]->pindex);
	}
	if (error == 0) {
		for (i = 0; i < ncount; i++) {
			rtvals[i] = VM_PAGER_OK;
			vm_page_undirty(m[i]);
		}
	}
	return rtvals[0];
}
예제 #17
0
/*
 * Vnode op for VM getpages.
 * Wish wish .... get rid from multiple IO routines
 *
 * smbfs_getpages(struct vnode *a_vp, vm_page_t *a_m, int a_count,
 *		  int a_reqpage, vm_ooffset_t a_offset)
 */
int
smbfs_getpages(struct vop_getpages_args *ap)
{
#ifdef SMBFS_RWGENERIC
	return vop_stdgetpages(ap);
#else
	int i, error, npages;
	int doclose;
	size_t size, toff, nextoff, count;
	struct uio uio;
	struct iovec iov;
	vm_offset_t kva;
	struct buf *bp;
	struct vnode *vp;
	struct thread *td = curthread;	/* XXX */
	struct ucred *cred;
	struct smbmount *smp;
	struct smbnode *np;
	struct smb_cred scred;
	vm_page_t *pages;

	KKASSERT(td->td_proc);

	vp = ap->a_vp;
	cred = td->td_proc->p_ucred;
	np = VTOSMB(vp);
	smp = VFSTOSMBFS(vp->v_mount);
	pages = ap->a_m;
	count = (size_t)ap->a_count;

	if (vp->v_object == NULL) {
		kprintf("smbfs_getpages: called with non-merged cache vnode??\n");
		return VM_PAGER_ERROR;
	}
	smb_makescred(&scred, td, cred);

	bp = getpbuf_kva(&smbfs_pbuf_freecnt);
	npages = btoc(count);
	kva = (vm_offset_t) bp->b_data;
	pmap_qenter(kva, pages, npages);

	iov.iov_base = (caddr_t) kva;
	iov.iov_len = count;
	uio.uio_iov = &iov;
	uio.uio_iovcnt = 1;
	uio.uio_offset = IDX_TO_OFF(pages[0]->pindex);
	uio.uio_resid = count;
	uio.uio_segflg = UIO_SYSSPACE;
	uio.uio_rw = UIO_READ;
	uio.uio_td = td;

	/*
	 * This is kinda nasty.  Since smbfs is physically closing the
	 * fid on close(), we have to reopen it if necessary.  There are
	 * other races here too, such as if another process opens the same
	 * file while we are blocked in read. XXX
	 */
	error = 0;
	doclose = 0;
	if (np->n_opencount == 0) {
		error = smbfs_smb_open(np, SMB_AM_OPENREAD, &scred);
		if (error == 0)
			doclose = 1;
	}
	if (error == 0)
		error = smb_read(smp->sm_share, np->n_fid, &uio, &scred);
	if (doclose)
		smbfs_smb_close(smp->sm_share, np->n_fid, NULL, &scred);
	pmap_qremove(kva, npages);

	relpbuf(bp, &smbfs_pbuf_freecnt);

	if (error && (uio.uio_resid == count)) {
		kprintf("smbfs_getpages: error %d\n",error);
		for (i = 0; i < npages; i++) {
			if (ap->a_reqpage != i)
				vnode_pager_freepage(pages[i]);
		}
		return VM_PAGER_ERROR;
	}

	size = count - uio.uio_resid;

	for (i = 0, toff = 0; i < npages; i++, toff = nextoff) {
		vm_page_t m;
		nextoff = toff + PAGE_SIZE;
		m = pages[i];

		m->flags &= ~PG_ZERO;

		/*
		 * NOTE: pmap dirty bit should have already been cleared.
		 *	 We do not clear it here.
		 */
		if (nextoff <= size) {
			m->valid = VM_PAGE_BITS_ALL;
			m->dirty = 0;
		} else {
			int nvalid = ((size + DEV_BSIZE - 1) - toff) &
				      ~(DEV_BSIZE - 1);
			vm_page_set_validclean(m, 0, nvalid);
		}
		
		if (i != ap->a_reqpage) {
			/*
			 * Whether or not to leave the page activated is up in
			 * the air, but we should put the page on a page queue
			 * somewhere (it already is in the object).  Result:
			 * It appears that emperical results show that
			 * deactivating pages is best.
			 */

			/*
			 * Just in case someone was asking for this page we
			 * now tell them that it is ok to use.
			 */
			if (!error) {
				if (m->flags & PG_REFERENCED)
					vm_page_activate(m);
				else
					vm_page_deactivate(m);
				vm_page_wakeup(m);
			} else {
				vnode_pager_freepage(m);
			}
		}
	}
	return 0;
#endif /* SMBFS_RWGENERIC */
}
예제 #18
0
/*
 * This is now called from local media FS's to operate against their
 * own vnodes if they fail to implement VOP_GETPAGES.
 */
int
vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *m, int count,
    int *a_rbehind, int *a_rahead, vop_getpages_iodone_t iodone, void *arg)
{
	vm_object_t object;
	struct bufobj *bo;
	struct buf *bp;
	off_t foff;
#ifdef INVARIANTS
	off_t blkno0;
#endif
	int bsize, pagesperblock, *freecnt;
	int error, before, after, rbehind, rahead, poff, i;
	int bytecount, secmask;

	KASSERT(vp->v_type != VCHR && vp->v_type != VBLK,
	    ("%s does not support devices", __func__));

	if (vp->v_iflag & VI_DOOMED)
		return (VM_PAGER_BAD);

	object = vp->v_object;
	foff = IDX_TO_OFF(m[0]->pindex);
	bsize = vp->v_mount->mnt_stat.f_iosize;
	pagesperblock = bsize / PAGE_SIZE;

	KASSERT(foff < object->un_pager.vnp.vnp_size,
	    ("%s: page %p offset beyond vp %p size", __func__, m[0], vp));
	KASSERT(count <= sizeof(bp->b_pages),
	    ("%s: requested %d pages", __func__, count));

	/*
	 * The last page has valid blocks.  Invalid part can only
	 * exist at the end of file, and the page is made fully valid
	 * by zeroing in vm_pager_get_pages().
	 */
	if (m[count - 1]->valid != 0 && --count == 0) {
		if (iodone != NULL)
			iodone(arg, m, 1, 0);
		return (VM_PAGER_OK);
	}

	/*
	 * Synchronous and asynchronous paging operations use different
	 * free pbuf counters.  This is done to avoid asynchronous requests
	 * to consume all pbufs.
	 * Allocate the pbuf at the very beginning of the function, so that
	 * if we are low on certain kind of pbufs don't even proceed to BMAP,
	 * but sleep.
	 */
	freecnt = iodone != NULL ?
	    &vnode_async_pbuf_freecnt : &vnode_pbuf_freecnt;
	bp = getpbuf(freecnt);

	/*
	 * Get the underlying device blocks for the file with VOP_BMAP().
	 * If the file system doesn't support VOP_BMAP, use old way of
	 * getting pages via VOP_READ.
	 */
	error = VOP_BMAP(vp, foff / bsize, &bo, &bp->b_blkno, &after, &before);
	if (error == EOPNOTSUPP) {
		relpbuf(bp, freecnt);
		VM_OBJECT_WLOCK(object);
		for (i = 0; i < count; i++) {
			PCPU_INC(cnt.v_vnodein);
			PCPU_INC(cnt.v_vnodepgsin);
			error = vnode_pager_input_old(object, m[i]);
			if (error)
				break;
		}
		VM_OBJECT_WUNLOCK(object);
		return (error);
	} else if (error != 0) {
		relpbuf(bp, freecnt);
		return (VM_PAGER_ERROR);
	}

	/*
	 * If the file system supports BMAP, but blocksize is smaller
	 * than a page size, then use special small filesystem code.
	 */
	if (pagesperblock == 0) {
		relpbuf(bp, freecnt);
		for (i = 0; i < count; i++) {
			PCPU_INC(cnt.v_vnodein);
			PCPU_INC(cnt.v_vnodepgsin);
			error = vnode_pager_input_smlfs(object, m[i]);
			if (error)
				break;
		}
		return (error);
	}

	/*
	 * A sparse file can be encountered only for a single page request,
	 * which may not be preceded by call to vm_pager_haspage().
	 */
	if (bp->b_blkno == -1) {
		KASSERT(count == 1,
		    ("%s: array[%d] request to a sparse file %p", __func__,
		    count, vp));
		relpbuf(bp, freecnt);
		pmap_zero_page(m[0]);
		KASSERT(m[0]->dirty == 0, ("%s: page %p is dirty",
		    __func__, m[0]));
		VM_OBJECT_WLOCK(object);
		m[0]->valid = VM_PAGE_BITS_ALL;
		VM_OBJECT_WUNLOCK(object);
		return (VM_PAGER_OK);
	}

#ifdef INVARIANTS
	blkno0 = bp->b_blkno;
#endif
	bp->b_blkno += (foff % bsize) / DEV_BSIZE;

	/* Recalculate blocks available after/before to pages. */
	poff = (foff % bsize) / PAGE_SIZE;
	before *= pagesperblock;
	before += poff;
	after *= pagesperblock;
	after += pagesperblock - (poff + 1);
	if (m[0]->pindex + after >= object->size)
		after = object->size - 1 - m[0]->pindex;
	KASSERT(count <= after + 1, ("%s: %d pages asked, can do only %d",
	    __func__, count, after + 1));
	after -= count - 1;

	/* Trim requested rbehind/rahead to possible values. */   
	rbehind = a_rbehind ? *a_rbehind : 0;
	rahead = a_rahead ? *a_rahead : 0;
	rbehind = min(rbehind, before);
	rbehind = min(rbehind, m[0]->pindex);
	rahead = min(rahead, after);
	rahead = min(rahead, object->size - m[count - 1]->pindex);
	/*
	 * Check that total amount of pages fit into buf.  Trim rbehind and
	 * rahead evenly if not.
	 */
	if (rbehind + rahead + count > nitems(bp->b_pages)) {
		int trim, sum;

		trim = rbehind + rahead + count - nitems(bp->b_pages) + 1;
		sum = rbehind + rahead;
		if (rbehind == before) {
			/* Roundup rbehind trim to block size. */
			rbehind -= roundup(trim * rbehind / sum, pagesperblock);
			if (rbehind < 0)
				rbehind = 0;
		} else
			rbehind -= trim * rbehind / sum;
		rahead -= trim * rahead / sum;
	}
	KASSERT(rbehind + rahead + count <= nitems(bp->b_pages),
	    ("%s: behind %d ahead %d count %d", __func__,
	    rbehind, rahead, count));

	/*
	 * Fill in the bp->b_pages[] array with requested and optional   
	 * read behind or read ahead pages.  Read behind pages are looked
	 * up in a backward direction, down to a first cached page.  Same
	 * for read ahead pages, but there is no need to shift the array
	 * in case of encountering a cached page.
	 */
	i = bp->b_npages = 0;
	if (rbehind) {
		vm_pindex_t startpindex, tpindex;
		vm_page_t p;

		VM_OBJECT_WLOCK(object);
		startpindex = m[0]->pindex - rbehind;
		if ((p = TAILQ_PREV(m[0], pglist, listq)) != NULL &&
		    p->pindex >= startpindex)
			startpindex = p->pindex + 1;

		/* tpindex is unsigned; beware of numeric underflow. */
		for (tpindex = m[0]->pindex - 1;
		    tpindex >= startpindex && tpindex < m[0]->pindex;
		    tpindex--, i++) {
			p = vm_page_alloc(object, tpindex, VM_ALLOC_NORMAL);
			if (p == NULL) {
				/* Shift the array. */
				for (int j = 0; j < i; j++)
					bp->b_pages[j] = bp->b_pages[j + 
					    tpindex + 1 - startpindex]; 
				break;
			}
			bp->b_pages[tpindex - startpindex] = p;
		}

		bp->b_pgbefore = i;
		bp->b_npages += i;
		bp->b_blkno -= IDX_TO_OFF(i) / DEV_BSIZE;
	} else
		bp->b_pgbefore = 0;

	/* Requested pages. */
	for (int j = 0; j < count; j++, i++)
		bp->b_pages[i] = m[j];
	bp->b_npages += count;

	if (rahead) {
		vm_pindex_t endpindex, tpindex;
		vm_page_t p;

		if (!VM_OBJECT_WOWNED(object))
			VM_OBJECT_WLOCK(object);
		endpindex = m[count - 1]->pindex + rahead + 1;
		if ((p = TAILQ_NEXT(m[count - 1], listq)) != NULL &&
		    p->pindex < endpindex)
			endpindex = p->pindex;
		if (endpindex > object->size)
			endpindex = object->size;

		for (tpindex = m[count - 1]->pindex + 1;
		    tpindex < endpindex; i++, tpindex++) {
			p = vm_page_alloc(object, tpindex, VM_ALLOC_NORMAL);
			if (p == NULL)
				break;
			bp->b_pages[i] = p;
		}

		bp->b_pgafter = i - bp->b_npages;
		bp->b_npages = i;
	} else
		bp->b_pgafter = 0;

	if (VM_OBJECT_WOWNED(object))
		VM_OBJECT_WUNLOCK(object);

	/* Report back actual behind/ahead read. */
	if (a_rbehind)
		*a_rbehind = bp->b_pgbefore;
	if (a_rahead)
		*a_rahead = bp->b_pgafter;

#ifdef INVARIANTS
	KASSERT(bp->b_npages <= nitems(bp->b_pages),
	    ("%s: buf %p overflowed", __func__, bp));
	for (int j = 1; j < bp->b_npages; j++)
		KASSERT(bp->b_pages[j]->pindex - 1 ==
		    bp->b_pages[j - 1]->pindex,
		    ("%s: pages array not consecutive, bp %p", __func__, bp));
#endif

	/*
	 * Recalculate first offset and bytecount with regards to read behind.
	 * Truncate bytecount to vnode real size and round up physical size
	 * for real devices.
	 */
	foff = IDX_TO_OFF(bp->b_pages[0]->pindex);
	bytecount = bp->b_npages << PAGE_SHIFT;
	if ((foff + bytecount) > object->un_pager.vnp.vnp_size)
		bytecount = object->un_pager.vnp.vnp_size - foff;
	secmask = bo->bo_bsize - 1;
	KASSERT(secmask < PAGE_SIZE && secmask > 0,
	    ("%s: sector size %d too large", __func__, secmask + 1));
	bytecount = (bytecount + secmask) & ~secmask;

	/*
	 * And map the pages to be read into the kva, if the filesystem
	 * requires mapped buffers.
	 */
	if ((vp->v_mount->mnt_kern_flag & MNTK_UNMAPPED_BUFS) != 0 &&
	    unmapped_buf_allowed) {
		bp->b_data = unmapped_buf;
		bp->b_offset = 0;
	} else {
		bp->b_data = bp->b_kvabase;
		pmap_qenter((vm_offset_t)bp->b_data, bp->b_pages, bp->b_npages);
	}

	/* Build a minimal buffer header. */
	bp->b_iocmd = BIO_READ;
	KASSERT(bp->b_rcred == NOCRED, ("leaking read ucred"));
	KASSERT(bp->b_wcred == NOCRED, ("leaking write ucred"));
	bp->b_rcred = crhold(curthread->td_ucred);
	bp->b_wcred = crhold(curthread->td_ucred);
	pbgetbo(bo, bp);
	bp->b_vp = vp;
	bp->b_bcount = bp->b_bufsize = bp->b_runningbufspace = bytecount;
	bp->b_iooffset = dbtob(bp->b_blkno);
	KASSERT(IDX_TO_OFF(m[0]->pindex - bp->b_pages[0]->pindex) ==
	    (blkno0 - bp->b_blkno) * DEV_BSIZE +
	    IDX_TO_OFF(m[0]->pindex) % bsize,
	    ("wrong offsets bsize %d m[0] %ju b_pages[0] %ju "
	    "blkno0 %ju b_blkno %ju", bsize,
	    (uintmax_t)m[0]->pindex, (uintmax_t)bp->b_pages[0]->pindex,
	    (uintmax_t)blkno0, (uintmax_t)bp->b_blkno));

	atomic_add_long(&runningbufspace, bp->b_runningbufspace);
	PCPU_INC(cnt.v_vnodein);
	PCPU_ADD(cnt.v_vnodepgsin, bp->b_npages);

	if (iodone != NULL) { /* async */
		bp->b_pgiodone = iodone;
		bp->b_caller1 = arg;
		bp->b_iodone = vnode_pager_generic_getpages_done_async;
		bp->b_flags |= B_ASYNC;
		BUF_KERNPROC(bp);
		bstrategy(bp);
		return (VM_PAGER_OK);
	} else {
		bp->b_iodone = bdone;
		bstrategy(bp);
		bwait(bp, PVM, "vnread");
		error = vnode_pager_generic_getpages_done(bp);
		for (i = 0; i < bp->b_npages; i++)
			bp->b_pages[i] = NULL;
		bp->b_vp = NULL;
		pbrelbo(bp);
		relpbuf(bp, &vnode_pbuf_freecnt);
		return (error != 0 ? VM_PAGER_ERROR : VM_PAGER_OK);
	}
}
예제 #19
0
/*
    struct vnop_getpages_args {
        struct vnode *a_vp;
        vm_page_t *a_m;
        int a_count;
        int a_reqpage;
        vm_ooffset_t a_offset;
    };
*/
static int
fuse_vnop_getpages(struct vop_getpages_args *ap)
{
	int i, error, nextoff, size, toff, count, npages;
	struct uio uio;
	struct iovec iov;
	vm_offset_t kva;
	struct buf *bp;
	struct vnode *vp;
	struct thread *td;
	struct ucred *cred;
	vm_page_t *pages;

	FS_DEBUG2G("heh\n");

	vp = ap->a_vp;
	KASSERT(vp->v_object, ("objectless vp passed to getpages"));
	td = curthread;			/* XXX */
	cred = curthread->td_ucred;	/* XXX */
	pages = ap->a_m;
	count = ap->a_count;

	if (!fsess_opt_mmap(vnode_mount(vp))) {
		FS_DEBUG("called on non-cacheable vnode??\n");
		return (VM_PAGER_ERROR);
	}
	npages = btoc(count);

	/*
	 * If the requested page is partially valid, just return it and
	 * allow the pager to zero-out the blanks.  Partially valid pages
	 * can only occur at the file EOF.
	 */

	VM_OBJECT_WLOCK(vp->v_object);
	fuse_vm_page_lock_queues();
	if (pages[ap->a_reqpage]->valid != 0) {
		for (i = 0; i < npages; ++i) {
			if (i != ap->a_reqpage) {
				fuse_vm_page_lock(pages[i]);
				vm_page_free(pages[i]);
				fuse_vm_page_unlock(pages[i]);
			}
		}
		fuse_vm_page_unlock_queues();
		VM_OBJECT_WUNLOCK(vp->v_object);
		return 0;
	}
	fuse_vm_page_unlock_queues();
	VM_OBJECT_WUNLOCK(vp->v_object);

	/*
	 * We use only the kva address for the buffer, but this is extremely
	 * convienient and fast.
	 */
	bp = getpbuf(&fuse_pbuf_freecnt);

	kva = (vm_offset_t)bp->b_data;
	pmap_qenter(kva, pages, npages);
	PCPU_INC(cnt.v_vnodein);
	PCPU_ADD(cnt.v_vnodepgsin, npages);

	iov.iov_base = (caddr_t)kva;
	iov.iov_len = count;
	uio.uio_iov = &iov;
	uio.uio_iovcnt = 1;
	uio.uio_offset = IDX_TO_OFF(pages[0]->pindex);
	uio.uio_resid = count;
	uio.uio_segflg = UIO_SYSSPACE;
	uio.uio_rw = UIO_READ;
	uio.uio_td = td;

	error = fuse_io_dispatch(vp, &uio, IO_DIRECT, cred);
	pmap_qremove(kva, npages);

	relpbuf(bp, &fuse_pbuf_freecnt);

	if (error && (uio.uio_resid == count)) {
		FS_DEBUG("error %d\n", error);
		VM_OBJECT_WLOCK(vp->v_object);
		fuse_vm_page_lock_queues();
		for (i = 0; i < npages; ++i) {
			if (i != ap->a_reqpage) {
				fuse_vm_page_lock(pages[i]);
				vm_page_free(pages[i]);
				fuse_vm_page_unlock(pages[i]);
			}
		}
		fuse_vm_page_unlock_queues();
		VM_OBJECT_WUNLOCK(vp->v_object);
		return VM_PAGER_ERROR;
	}
	/*
	 * Calculate the number of bytes read and validate only that number
	 * of bytes.  Note that due to pending writes, size may be 0.  This
	 * does not mean that the remaining data is invalid!
	 */

	size = count - uio.uio_resid;
	VM_OBJECT_WLOCK(vp->v_object);
	fuse_vm_page_lock_queues();
	for (i = 0, toff = 0; i < npages; i++, toff = nextoff) {
		vm_page_t m;

		nextoff = toff + PAGE_SIZE;
		m = pages[i];

		if (nextoff <= size) {
			/*
			 * Read operation filled an entire page
			 */
			m->valid = VM_PAGE_BITS_ALL;
			KASSERT(m->dirty == 0,
			    ("fuse_getpages: page %p is dirty", m));
		} else if (size > toff) {
			/*
			 * Read operation filled a partial page.
			 */
			m->valid = 0;
			vm_page_set_valid_range(m, 0, size - toff);
			KASSERT(m->dirty == 0,
			    ("fuse_getpages: page %p is dirty", m));
		} else {
			/*
			 * Read operation was short.  If no error occured
			 * we may have hit a zero-fill section.   We simply
			 * leave valid set to 0.
			 */
			;
		}
		if (i != ap->a_reqpage)
			vm_page_readahead_finish(m);
	}
	fuse_vm_page_unlock_queues();
	VM_OBJECT_WUNLOCK(vp->v_object);
	return 0;
}
예제 #20
0
/*
 * spec_getpages() - get pages associated with device vnode.
 *
 * Note that spec_read and spec_write do not use the buffer cache, so we
 * must fully implement getpages here.
 */
static int
devfs_spec_getpages(struct vop_getpages_args *ap)
{
	vm_offset_t kva;
	int error;
	int i, pcount, size;
	struct buf *bp;
	vm_page_t m;
	vm_ooffset_t offset;
	int toff, nextoff, nread;
	struct vnode *vp = ap->a_vp;
	int blksiz;
	int gotreqpage;

	error = 0;
	pcount = round_page(ap->a_count) / PAGE_SIZE;

	/*
	 * Calculate the offset of the transfer and do sanity check.
	 */
	offset = IDX_TO_OFF(ap->a_m[0]->pindex) + ap->a_offset;

	/*
	 * Round up physical size for real devices.  We cannot round using
	 * v_mount's block size data because v_mount has nothing to do with
	 * the device.  i.e. it's usually '/dev'.  We need the physical block
	 * size for the device itself.
	 *
	 * We can't use v_rdev->si_mountpoint because it only exists when the
	 * block device is mounted.  However, we can use v_rdev.
	 */
	if (vn_isdisk(vp, NULL))
		blksiz = vp->v_rdev->si_bsize_phys;
	else
		blksiz = DEV_BSIZE;

	size = (ap->a_count + blksiz - 1) & ~(blksiz - 1);

	bp = getpbuf_kva(NULL);
	kva = (vm_offset_t)bp->b_data;

	/*
	 * Map the pages to be read into the kva.
	 */
	pmap_qenter(kva, ap->a_m, pcount);

	/* Build a minimal buffer header. */
	bp->b_cmd = BUF_CMD_READ;
	bp->b_bcount = size;
	bp->b_resid = 0;
	bsetrunningbufspace(bp, size);

	bp->b_bio1.bio_offset = offset;
	bp->b_bio1.bio_done = devfs_spec_getpages_iodone;

	mycpu->gd_cnt.v_vnodein++;
	mycpu->gd_cnt.v_vnodepgsin += pcount;

	/* Do the input. */
	vn_strategy(ap->a_vp, &bp->b_bio1);

	crit_enter();

	/* We definitely need to be at splbio here. */
	while (bp->b_cmd != BUF_CMD_DONE)
		tsleep(bp, 0, "spread", 0);

	crit_exit();

	if (bp->b_flags & B_ERROR) {
		if (bp->b_error)
			error = bp->b_error;
		else
			error = EIO;
	}

	/*
	 * If EOF is encountered we must zero-extend the result in order
	 * to ensure that the page does not contain garabge.  When no
	 * error occurs, an early EOF is indicated if b_bcount got truncated.
	 * b_resid is relative to b_bcount and should be 0, but some devices
	 * might indicate an EOF with b_resid instead of truncating b_bcount.
	 */
	nread = bp->b_bcount - bp->b_resid;
	if (nread < ap->a_count)
		bzero((caddr_t)kva + nread, ap->a_count - nread);
	pmap_qremove(kva, pcount);

	gotreqpage = 0;
	for (i = 0, toff = 0; i < pcount; i++, toff = nextoff) {
		nextoff = toff + PAGE_SIZE;
		m = ap->a_m[i];

		m->flags &= ~PG_ZERO;

		/*
		 * NOTE: vm_page_undirty/clear_dirty etc do not clear the
		 *	 pmap modified bit.  pmap modified bit should have
		 *	 already been cleared.
		 */
		if (nextoff <= nread) {
			m->valid = VM_PAGE_BITS_ALL;
			vm_page_undirty(m);
		} else if (toff < nread) {
			/*
			 * Since this is a VM request, we have to supply the
			 * unaligned offset to allow vm_page_set_valid()
			 * to zero sub-DEV_BSIZE'd portions of the page.
			 */
			vm_page_set_valid(m, 0, nread - toff);
			vm_page_clear_dirty_end_nonincl(m, 0, nread - toff);
		} else {
			m->valid = 0;
			vm_page_undirty(m);
		}

		if (i != ap->a_reqpage) {
			/*
			 * Just in case someone was asking for this page we
			 * now tell them that it is ok to use.
			 */
			if (!error || (m->valid == VM_PAGE_BITS_ALL)) {
				if (m->valid) {
					if (m->flags & PG_REFERENCED) {
						vm_page_activate(m);
					} else {
						vm_page_deactivate(m);
					}
					vm_page_wakeup(m);
				} else {
					vm_page_free(m);
				}
			} else {
				vm_page_free(m);
			}
		} else if (m->valid) {
			gotreqpage = 1;
			/*
			 * Since this is a VM request, we need to make the
			 * entire page presentable by zeroing invalid sections.
			 */
			if (m->valid != VM_PAGE_BITS_ALL)
			    vm_page_zero_invalid(m, FALSE);
		}
	}
	if (!gotreqpage) {
		m = ap->a_m[ap->a_reqpage];
		devfs_debug(DEVFS_DEBUG_WARNING,
	    "spec_getpages:(%s) I/O read failure: (error=%d) bp %p vp %p\n",
			devtoname(vp->v_rdev), error, bp, bp->b_vp);
		devfs_debug(DEVFS_DEBUG_WARNING,
	    "               size: %d, resid: %d, a_count: %d, valid: 0x%x\n",
		    size, bp->b_resid, ap->a_count, m->valid);
		devfs_debug(DEVFS_DEBUG_WARNING,
	    "               nread: %d, reqpage: %d, pindex: %lu, pcount: %d\n",
		    nread, ap->a_reqpage, (u_long)m->pindex, pcount);
		/*
		 * Free the buffer header back to the swap buffer pool.
		 */
		relpbuf(bp, NULL);
		return VM_PAGER_ERROR;
	}
	/*
	 * Free the buffer header back to the swap buffer pool.
	 */
	relpbuf(bp, NULL);
	if (DEVFS_NODE(ap->a_vp))
		nanotime(&DEVFS_NODE(ap->a_vp)->mtime);
	return VM_PAGER_OK;
}
예제 #21
0
/*
 * Vnode op for VM putpages.
 * possible bug: all IO done in sync mode
 * Note that vop_close always invalidate pages before close, so it's
 * not necessary to open vnode.
 *
 * nwfs_putpages(struct vnode *a_vp, vm_page_t *a_m, int a_count,
 *		 int a_sync, int *a_rtvals, vm_ooffset_t a_offset)
 */
int
nwfs_putpages(struct vop_putpages_args *ap)
{
	int error;
	struct thread *td = curthread;	/* XXX */
	struct vnode *vp = ap->a_vp;
	struct ucred *cred;

#ifndef NWFS_RWCACHE
	KKASSERT(td->td_proc);
	cred = td->td_proc->p_ucred;		/* XXX */
	VOP_OPEN(vp, FWRITE, cred, NULL);
	error = vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count,
		ap->a_sync, ap->a_rtvals);
	VOP_CLOSE(vp, FWRITE, cred);
	return error;
#else
	struct uio uio;
	struct iovec iov;
	vm_offset_t kva;
	struct buf *bp;
	int i, npages, count;
	int *rtvals;
	struct nwmount *nmp;
	struct nwnode *np;
	vm_page_t *pages;

	KKASSERT(td->td_proc);
	cred = td->td_proc->p_ucred;		/* XXX */

/*	VOP_OPEN(vp, FWRITE, cred, NULL);*/
	np = VTONW(vp);
	nmp = VFSTONWFS(vp->v_mount);
	pages = ap->a_m;
	count = ap->a_count;
	rtvals = ap->a_rtvals;
	npages = btoc(count);

	for (i = 0; i < npages; i++) {
		rtvals[i] = VM_PAGER_AGAIN;
	}

	bp = getpbuf_kva(&nwfs_pbuf_freecnt);
	kva = (vm_offset_t) bp->b_data;
	pmap_qenter(kva, pages, npages);

	iov.iov_base = (caddr_t) kva;
	iov.iov_len = count;
	uio.uio_iov = &iov;
	uio.uio_iovcnt = 1;
	uio.uio_offset = IDX_TO_OFF(pages[0]->pindex);
	uio.uio_resid = count;
	uio.uio_segflg = UIO_SYSSPACE;
	uio.uio_rw = UIO_WRITE;
	uio.uio_td = td;
	NCPVNDEBUG("ofs=%d,resid=%d\n",(int)uio.uio_offset, uio.uio_resid);

	error = ncp_write(NWFSTOCONN(nmp), &np->n_fh, &uio, cred);
/*	VOP_CLOSE(vp, FWRITE, cred);*/
	NCPVNDEBUG("paged write done: %d\n", error);

	pmap_qremove(kva, npages);
	relpbuf(bp, &nwfs_pbuf_freecnt);

	if (!error) {
		int nwritten = round_page(count - uio.uio_resid) / PAGE_SIZE;
		for (i = 0; i < nwritten; i++) {
			rtvals[i] = VM_PAGER_OK;
			vm_page_undirty(pages[i]);
		}
	}
	return rtvals[0];
#endif /* NWFS_RWCACHE */
}
예제 #22
0
/*
 * Vnode op for VM getpages.
 * Wish wish .... get rid from multiple IO routines
 *
 * nwfs_getpages(struct vnode *a_vp, vm_page_t *a_m, int a_count,
 *		 int a_reqpage, vm_ooffset_t a_offset)
 */
int
nwfs_getpages(struct vop_getpages_args *ap)
{
#ifndef NWFS_RWCACHE
	return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, ap->a_count,
					    ap->a_reqpage, ap->a_seqaccess);
#else
	int i, error, npages;
	size_t nextoff, toff;
	size_t count;
	size_t size;
	struct uio uio;
	struct iovec iov;
	vm_offset_t kva;
	struct buf *bp;
	struct vnode *vp;
	struct thread *td = curthread;	/* XXX */
	struct ucred *cred;
	struct nwmount *nmp;
	struct nwnode *np;
	vm_page_t *pages;

	KKASSERT(td->td_proc);
	cred = td->td_proc->p_ucred;

	vp = ap->a_vp;
	np = VTONW(vp);
	nmp = VFSTONWFS(vp->v_mount);
	pages = ap->a_m;
	count = (size_t)ap->a_count;

	if (vp->v_object == NULL) {
		kprintf("nwfs_getpages: called with non-merged cache vnode??\n");
		return VM_PAGER_ERROR;
	}

	bp = getpbuf_kva(&nwfs_pbuf_freecnt);
	npages = btoc(count);
	kva = (vm_offset_t) bp->b_data;
	pmap_qenter(kva, pages, npages);

	iov.iov_base = (caddr_t) kva;
	iov.iov_len = count;
	uio.uio_iov = &iov;
	uio.uio_iovcnt = 1;
	uio.uio_offset = IDX_TO_OFF(pages[0]->pindex);
	uio.uio_resid = count;
	uio.uio_segflg = UIO_SYSSPACE;
	uio.uio_rw = UIO_READ;
	uio.uio_td = td;

	error = ncp_read(NWFSTOCONN(nmp), &np->n_fh, &uio,cred);
	pmap_qremove(kva, npages);

	relpbuf(bp, &nwfs_pbuf_freecnt);

	if (error && (uio.uio_resid == count)) {
		kprintf("nwfs_getpages: error %d\n",error);
		for (i = 0; i < npages; i++) {
			if (ap->a_reqpage != i)
				vnode_pager_freepage(pages[i]);
		}
		return VM_PAGER_ERROR;
	}

	size = count - uio.uio_resid;

	for (i = 0, toff = 0; i < npages; i++, toff = nextoff) {
		vm_page_t m;
		nextoff = toff + PAGE_SIZE;
		m = pages[i];

		m->flags &= ~PG_ZERO;

		/*
		 * NOTE: pmap dirty bit should have already been cleared.
		 *	 We do not clear it here.
		 */
		if (nextoff <= size) {
			m->valid = VM_PAGE_BITS_ALL;
			m->dirty = 0;
		} else {
			int nvalid = ((size + DEV_BSIZE - 1) - toff) &
				      ~(DEV_BSIZE - 1);
			vm_page_set_validclean(m, 0, nvalid);
		}
		
		if (i != ap->a_reqpage) {
			/*
			 * Whether or not to leave the page activated is up in
			 * the air, but we should put the page on a page queue
			 * somewhere (it already is in the object).  Result:
			 * It appears that emperical results show that
			 * deactivating pages is best.
			 */

			/*
			 * Just in case someone was asking for this page we
			 * now tell them that it is ok to use.
			 */
			if (!error) {
				if (m->flags & PG_REFERENCED)
					vm_page_activate(m);
				else
					vm_page_deactivate(m);
				vm_page_wakeup(m);
			} else {
				vnode_pager_freepage(m);
			}
		}
	}
	return 0;
#endif /* NWFS_RWCACHE */
}
예제 #23
0
/*
 * small block filesystem vnode pager input
 */
static int
vnode_pager_input_smlfs(vm_object_t object, vm_page_t m)
{
	struct vnode *vp;
	struct bufobj *bo;
	struct buf *bp;
	struct sf_buf *sf;
	daddr_t fileaddr;
	vm_offset_t bsize;
	vm_page_bits_t bits;
	int error, i;

	error = 0;
	vp = object->handle;
	if (vp->v_iflag & VI_DOOMED)
		return VM_PAGER_BAD;

	bsize = vp->v_mount->mnt_stat.f_iosize;

	VOP_BMAP(vp, 0, &bo, 0, NULL, NULL);

	sf = sf_buf_alloc(m, 0);

	for (i = 0; i < PAGE_SIZE / bsize; i++) {
		vm_ooffset_t address;

		bits = vm_page_bits(i * bsize, bsize);
		if (m->valid & bits)
			continue;

		address = IDX_TO_OFF(m->pindex) + i * bsize;
		if (address >= object->un_pager.vnp.vnp_size) {
			fileaddr = -1;
		} else {
			error = vnode_pager_addr(vp, address, &fileaddr, NULL);
			if (error)
				break;
		}
		if (fileaddr != -1) {
			bp = getpbuf(&vnode_pbuf_freecnt);

			/* build a minimal buffer header */
			bp->b_iocmd = BIO_READ;
			bp->b_iodone = bdone;
			KASSERT(bp->b_rcred == NOCRED, ("leaking read ucred"));
			KASSERT(bp->b_wcred == NOCRED, ("leaking write ucred"));
			bp->b_rcred = crhold(curthread->td_ucred);
			bp->b_wcred = crhold(curthread->td_ucred);
			bp->b_data = (caddr_t)sf_buf_kva(sf) + i * bsize;
			bp->b_blkno = fileaddr;
			pbgetbo(bo, bp);
			bp->b_vp = vp;
			bp->b_bcount = bsize;
			bp->b_bufsize = bsize;
			bp->b_runningbufspace = bp->b_bufsize;
			atomic_add_long(&runningbufspace, bp->b_runningbufspace);

			/* do the input */
			bp->b_iooffset = dbtob(bp->b_blkno);
			bstrategy(bp);

			bwait(bp, PVM, "vnsrd");

			if ((bp->b_ioflags & BIO_ERROR) != 0)
				error = EIO;

			/*
			 * free the buffer header back to the swap buffer pool
			 */
			bp->b_vp = NULL;
			pbrelbo(bp);
			relpbuf(bp, &vnode_pbuf_freecnt);
			if (error)
				break;
		} else
			bzero((caddr_t)sf_buf_kva(sf) + i * bsize, bsize);
		KASSERT((m->dirty & bits) == 0,
		    ("vnode_pager_input_smlfs: page %p is dirty", m));
		VM_OBJECT_WLOCK(object);
		m->valid |= bits;
		VM_OBJECT_WUNLOCK(object);
	}
	sf_buf_free(sf);
	if (error) {
		return VM_PAGER_ERROR;
	}
	return VM_PAGER_OK;
}
예제 #24
0
/*
    struct vnop_putpages_args {
        struct vnode *a_vp;
        vm_page_t *a_m;
        int a_count;
        int a_sync;
        int *a_rtvals;
        vm_ooffset_t a_offset;
    };
*/
static int
fuse_vnop_putpages(struct vop_putpages_args *ap)
{
	struct uio uio;
	struct iovec iov;
	vm_offset_t kva;
	struct buf *bp;
	int i, error, npages, count;
	off_t offset;
	int *rtvals;
	struct vnode *vp;
	struct thread *td;
	struct ucred *cred;
	vm_page_t *pages;
	vm_ooffset_t fsize;

	FS_DEBUG2G("heh\n");

	vp = ap->a_vp;
	KASSERT(vp->v_object, ("objectless vp passed to putpages"));
	fsize = vp->v_object->un_pager.vnp.vnp_size;
	td = curthread;			/* XXX */
	cred = curthread->td_ucred;	/* XXX */
	pages = ap->a_m;
	count = ap->a_count;
	rtvals = ap->a_rtvals;
	npages = btoc(count);
	offset = IDX_TO_OFF(pages[0]->pindex);

	if (!fsess_opt_mmap(vnode_mount(vp))) {
		FS_DEBUG("called on non-cacheable vnode??\n");
	}
	for (i = 0; i < npages; i++)
		rtvals[i] = VM_PAGER_AGAIN;

	/*
	 * When putting pages, do not extend file past EOF.
	 */

	if (offset + count > fsize) {
		count = fsize - offset;
		if (count < 0)
			count = 0;
	}
	/*
	 * We use only the kva address for the buffer, but this is extremely
	 * convienient and fast.
	 */
	bp = getpbuf(&fuse_pbuf_freecnt);

	kva = (vm_offset_t)bp->b_data;
	pmap_qenter(kva, pages, npages);
	PCPU_INC(cnt.v_vnodeout);
	PCPU_ADD(cnt.v_vnodepgsout, count);

	iov.iov_base = (caddr_t)kva;
	iov.iov_len = count;
	uio.uio_iov = &iov;
	uio.uio_iovcnt = 1;
	uio.uio_offset = offset;
	uio.uio_resid = count;
	uio.uio_segflg = UIO_SYSSPACE;
	uio.uio_rw = UIO_WRITE;
	uio.uio_td = td;

	error = fuse_io_dispatch(vp, &uio, IO_DIRECT, cred);

	pmap_qremove(kva, npages);
	relpbuf(bp, &fuse_pbuf_freecnt);

	if (!error) {
		int nwritten = round_page(count - uio.uio_resid) / PAGE_SIZE;

		for (i = 0; i < nwritten; i++) {
			rtvals[i] = VM_PAGER_OK;
			VM_OBJECT_WLOCK(pages[i]->object);
			vm_page_undirty(pages[i]);
			VM_OBJECT_WUNLOCK(pages[i]->object);
		}
	}
	return rtvals[0];
}
예제 #25
0
static int
devfs_spec_open(struct vop_open_args *ap)
{
	struct vnode *vp = ap->a_vp;
	struct vnode *orig_vp = NULL;
	struct devfs_node *node = DEVFS_NODE(vp);
	struct devfs_node *newnode;
	cdev_t dev, ndev = NULL;
	int error = 0;

	if (node) {
		if (node->d_dev == NULL)
			return ENXIO;
		if (!devfs_node_is_accessible(node))
			return ENOENT;
	}

	if ((dev = vp->v_rdev) == NULL)
		return ENXIO;

	vn_lock(vp, LK_UPGRADE | LK_RETRY);

	if (node && ap->a_fp) {
		devfs_debug(DEVFS_DEBUG_DEBUG, "devfs_spec_open: -1.1-\n");
		lockmgr(&devfs_lock, LK_EXCLUSIVE);

		ndev = devfs_clone(dev, node->d_dir.d_name,
				   node->d_dir.d_namlen,
				   ap->a_mode, ap->a_cred);
		if (ndev != NULL) {
			newnode = devfs_create_device_node(
					DEVFS_MNTDATA(vp->v_mount)->root_node,
					ndev, NULL, NULL);
			/* XXX: possibly destroy device if this happens */

			if (newnode != NULL) {
				dev = ndev;
				devfs_link_dev(dev);

				devfs_debug(DEVFS_DEBUG_DEBUG,
						"parent here is: %s, node is: |%s|\n",
						((node->parent->node_type == Nroot) ?
						"ROOT!" : node->parent->d_dir.d_name),
						newnode->d_dir.d_name);
				devfs_debug(DEVFS_DEBUG_DEBUG,
						"test: %s\n",
						((struct devfs_node *)(TAILQ_LAST(DEVFS_DENODE_HEAD(node->parent), devfs_node_head)))->d_dir.d_name);

				/*
				 * orig_vp is set to the original vp if we cloned.
				 */
				/* node->flags |= DEVFS_CLONED; */
				devfs_allocv(&vp, newnode);
				orig_vp = ap->a_vp;
				ap->a_vp = vp;
			}
		}
		lockmgr(&devfs_lock, LK_RELEASE);
	}

	devfs_debug(DEVFS_DEBUG_DEBUG,
		    "devfs_spec_open() called on %s! \n",
		    dev->si_name);

	/*
	 * Make this field valid before any I/O in ->d_open
	 */
	if (!dev->si_iosize_max)
		/* XXX: old DFLTPHYS == 64KB dependency */
		dev->si_iosize_max = min(MAXPHYS,64*1024);

	if (dev_dflags(dev) & D_TTY)
		vsetflags(vp, VISTTY);

	/*
	 * Open underlying device
	 */
	vn_unlock(vp);
	error = dev_dopen(dev, ap->a_mode, S_IFCHR, ap->a_cred, ap->a_fp);
	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);

	/*
	 * Clean up any cloned vp if we error out.
	 */
	if (error) {
		if (orig_vp) {
			vput(vp);
			ap->a_vp = orig_vp;
			/* orig_vp = NULL; */
		}
		return error;
	}

	/*
	 * This checks if the disk device is going to be opened for writing.
	 * It will be only allowed in the cases where securelevel permits it
	 * and it's not mounted R/W.
	 */
	if ((dev_dflags(dev) & D_DISK) && (ap->a_mode & FWRITE) &&
	    (ap->a_cred != FSCRED)) {

		/* Very secure mode. No open for writing allowed */
		if (securelevel >= 2)
			return EPERM;

		/*
		 * If it is mounted R/W, do not allow to open for writing.
		 * In the case it's mounted read-only but securelevel
		 * is >= 1, then do not allow opening for writing either.
		 */
		if (vfs_mountedon(vp)) {
			if (!(dev->si_mountpoint->mnt_flag & MNT_RDONLY))
				return EBUSY;
			else if (securelevel >= 1)
				return EPERM;
		}
	}

	if (dev_dflags(dev) & D_TTY) {
		if (dev->si_tty) {
			struct tty *tp;
			tp = dev->si_tty;
			if (!tp->t_stop) {
				devfs_debug(DEVFS_DEBUG_DEBUG,
					    "devfs: no t_stop\n");
				tp->t_stop = nottystop;
			}
		}
	}


	if (vn_isdisk(vp, NULL)) {
		if (!dev->si_bsize_phys)
			dev->si_bsize_phys = DEV_BSIZE;
		vinitvmio(vp, IDX_TO_OFF(INT_MAX), PAGE_SIZE, -1);
	}

	vop_stdopen(ap);
#if 0
	if (node)
		nanotime(&node->atime);
#endif

	/*
	 * If we replaced the vp the vop_stdopen() call will have loaded
	 * it into fp->f_data and vref()d the vp, giving us two refs.  So
	 * instead of just unlocking it here we have to vput() it.
	 */
	if (orig_vp)
		vput(vp);

	/* Ugly pty magic, to make pty devices appear once they are opened */
	if (node && (node->flags & DEVFS_PTY) == DEVFS_PTY)
		node->flags &= ~DEVFS_INVISIBLE;

	if (ap->a_fp) {
		KKASSERT(ap->a_fp->f_type == DTYPE_VNODE);
		KKASSERT((ap->a_fp->f_flag & FMASK) == (ap->a_mode & FMASK));
		ap->a_fp->f_ops = &devfs_dev_fileops;
		KKASSERT(ap->a_fp->f_data == (void *)vp);
	}

	return 0;
}
예제 #26
0
/*
 * This is now called from local media FS's to operate against their
 * own vnodes if they fail to implement VOP_PUTPAGES.
 *
 * This is typically called indirectly via the pageout daemon and
 * clustering has already typically occurred, so in general we ask the
 * underlying filesystem to write the data out asynchronously rather
 * then delayed.
 */
int
vnode_pager_generic_putpages(struct vnode *vp, vm_page_t *ma, int bytecount,
    int flags, int *rtvals)
{
	vm_object_t object;
	vm_page_t m;
	vm_ooffset_t poffset;
	struct uio auio;
	struct iovec aiov;
	int count, error, i, maxsize, ncount, pgoff, ppscheck;
	static struct timeval lastfail;
	static int curfail;

	object = vp->v_object;
	count = bytecount / PAGE_SIZE;

	for (i = 0; i < count; i++)
		rtvals[i] = VM_PAGER_ERROR;

	if ((int64_t)ma[0]->pindex < 0) {
		printf("vnode_pager_generic_putpages: "
		    "attempt to write meta-data 0x%jx(%lx)\n",
		    (uintmax_t)ma[0]->pindex, (u_long)ma[0]->dirty);
		rtvals[0] = VM_PAGER_BAD;
		return (VM_PAGER_BAD);
	}

	maxsize = count * PAGE_SIZE;
	ncount = count;

	poffset = IDX_TO_OFF(ma[0]->pindex);

	/*
	 * If the page-aligned write is larger then the actual file we
	 * have to invalidate pages occurring beyond the file EOF.  However,
	 * there is an edge case where a file may not be page-aligned where
	 * the last page is partially invalid.  In this case the filesystem
	 * may not properly clear the dirty bits for the entire page (which
	 * could be VM_PAGE_BITS_ALL due to the page having been mmap()d).
	 * With the page locked we are free to fix-up the dirty bits here.
	 *
	 * We do not under any circumstances truncate the valid bits, as
	 * this will screw up bogus page replacement.
	 */
	VM_OBJECT_WLOCK(object);
	if (maxsize + poffset > object->un_pager.vnp.vnp_size) {
		if (object->un_pager.vnp.vnp_size > poffset) {
			maxsize = object->un_pager.vnp.vnp_size - poffset;
			ncount = btoc(maxsize);
			if ((pgoff = (int)maxsize & PAGE_MASK) != 0) {
				/*
				 * If the object is locked and the following
				 * conditions hold, then the page's dirty
				 * field cannot be concurrently changed by a
				 * pmap operation.
				 */
				m = ma[ncount - 1];
				vm_page_assert_sbusied(m);
				KASSERT(!pmap_page_is_write_mapped(m),
		("vnode_pager_generic_putpages: page %p is not read-only", m));
				MPASS(m->dirty != 0);
				vm_page_clear_dirty(m, pgoff, PAGE_SIZE -
				    pgoff);
			}
		} else {
			maxsize = 0;
			ncount = 0;
		}
		for (i = ncount; i < count; i++)
			rtvals[i] = VM_PAGER_BAD;
	}
	for (i = 0; i < ncount - ((btoc(maxsize) & PAGE_MASK) != 0); i++)
		MPASS(ma[i]->dirty == VM_PAGE_BITS_ALL);
	VM_OBJECT_WUNLOCK(object);

	aiov.iov_base = NULL;
	aiov.iov_len = maxsize;
	auio.uio_iov = &aiov;
	auio.uio_iovcnt = 1;
	auio.uio_offset = poffset;
	auio.uio_segflg = UIO_NOCOPY;
	auio.uio_rw = UIO_WRITE;
	auio.uio_resid = maxsize;
	auio.uio_td = NULL;
	error = VOP_WRITE(vp, &auio, vnode_pager_putpages_ioflags(flags),
	    curthread->td_ucred);
	VM_CNT_INC(v_vnodeout);
	VM_CNT_ADD(v_vnodepgsout, ncount);

	ppscheck = 0;
	if (error != 0 && (ppscheck = ppsratecheck(&lastfail, &curfail, 1))
	    != 0)
		printf("vnode_pager_putpages: I/O error %d\n", error);
	if (auio.uio_resid != 0 && (ppscheck != 0 ||
	    ppsratecheck(&lastfail, &curfail, 1) != 0))
		printf("vnode_pager_putpages: residual I/O %zd at %ju\n",
		    auio.uio_resid, (uintmax_t)ma[0]->pindex);
	for (i = 0; i < ncount; i++)
		rtvals[i] = VM_PAGER_OK;
	return (rtvals[0]);
}
예제 #27
0
/*
 * This is now called from local media FS's to operate against their
 * own vnodes if they fail to implement VOP_GETPAGES.
 */
int
vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *m, int bytecount,
    int reqpage, vop_getpages_iodone_t iodone, void *arg)
{
	vm_object_t object;
	struct bufobj *bo;
	struct buf *bp;
	daddr_t firstaddr, reqblock;
	off_t foff, pib;
	int pbefore, pafter, i, size, bsize, first, last, *freecnt;
	int count, error, before, after, secmask;

	KASSERT(vp->v_type != VCHR && vp->v_type != VBLK,
	    ("vnode_pager_generic_getpages does not support devices"));
	if (vp->v_iflag & VI_DOOMED)
		return (VM_PAGER_BAD);

	object = vp->v_object;
	count = bytecount / PAGE_SIZE;
	bsize = vp->v_mount->mnt_stat.f_iosize;

	/*
	 * Synchronous and asynchronous paging operations use different
	 * free pbuf counters.  This is done to avoid asynchronous requests
	 * to consume all pbufs.
	 * Allocate the pbuf at the very beginning of the function, so that
	 * if we are low on certain kind of pbufs don't even proceed to BMAP,
	 * but sleep.
	 */
	freecnt = iodone != NULL ?
	    &vnode_async_pbuf_freecnt : &vnode_pbuf_freecnt;
	bp = getpbuf(freecnt);

	/*
	 * Get the underlying device blocks for the file with VOP_BMAP().
	 * If the file system doesn't support VOP_BMAP, use old way of
	 * getting pages via VOP_READ.
	 */
	error = VOP_BMAP(vp, IDX_TO_OFF(m[reqpage]->pindex) / bsize, &bo,
	    &reqblock, &after, &before);
	if (error == EOPNOTSUPP) {
		relpbuf(bp, freecnt);
		VM_OBJECT_WLOCK(object);
		for (i = 0; i < count; i++)
			if (i != reqpage) {
				vm_page_lock(m[i]);
				vm_page_free(m[i]);
				vm_page_unlock(m[i]);
			}
		PCPU_INC(cnt.v_vnodein);
		PCPU_INC(cnt.v_vnodepgsin);
		error = vnode_pager_input_old(object, m[reqpage]);
		VM_OBJECT_WUNLOCK(object);
		return (error);
	} else if (error != 0) {
		relpbuf(bp, freecnt);
		vm_pager_free_nonreq(object, m, reqpage, count, FALSE);
		return (VM_PAGER_ERROR);

		/*
		 * If the blocksize is smaller than a page size, then use
		 * special small filesystem code.
		 */
	} else if ((PAGE_SIZE / bsize) > 1) {
		relpbuf(bp, freecnt);
		vm_pager_free_nonreq(object, m, reqpage, count, FALSE);
		PCPU_INC(cnt.v_vnodein);
		PCPU_INC(cnt.v_vnodepgsin);
		return (vnode_pager_input_smlfs(object, m[reqpage]));
	}

	/*
	 * Since the caller has busied the requested page, that page's valid
	 * field will not be changed by other threads.
	 */
	vm_page_assert_xbusied(m[reqpage]);

	/*
	 * If we have a completely valid page available to us, we can
	 * clean up and return.  Otherwise we have to re-read the
	 * media.
	 */
	if (m[reqpage]->valid == VM_PAGE_BITS_ALL) {
		relpbuf(bp, freecnt);
		vm_pager_free_nonreq(object, m, reqpage, count, FALSE);
		return (VM_PAGER_OK);
	} else if (reqblock == -1) {
		relpbuf(bp, freecnt);
		pmap_zero_page(m[reqpage]);
		KASSERT(m[reqpage]->dirty == 0,
		    ("vnode_pager_generic_getpages: page %p is dirty", m));
		VM_OBJECT_WLOCK(object);
		m[reqpage]->valid = VM_PAGE_BITS_ALL;
		vm_pager_free_nonreq(object, m, reqpage, count, TRUE);
		VM_OBJECT_WUNLOCK(object);
		return (VM_PAGER_OK);
	} else if (m[reqpage]->valid != 0) {
		VM_OBJECT_WLOCK(object);
		m[reqpage]->valid = 0;
		VM_OBJECT_WUNLOCK(object);
	}

	pib = IDX_TO_OFF(m[reqpage]->pindex) % bsize;
	pbefore = ((daddr_t)before * bsize + pib) / PAGE_SIZE;
	pafter = ((daddr_t)(after + 1) * bsize - pib) / PAGE_SIZE - 1;
	first = reqpage < pbefore ? 0 : reqpage - pbefore;
	last = reqpage + pafter >= count ? count - 1 : reqpage + pafter;
	if (first > 0 || last + 1 < count) {
		VM_OBJECT_WLOCK(object);
		for (i = 0; i < first; i++) {
			vm_page_lock(m[i]);
			vm_page_free(m[i]);
			vm_page_unlock(m[i]);
		}
		for (i = last + 1; i < count; i++) {
			vm_page_lock(m[i]);
			vm_page_free(m[i]);
			vm_page_unlock(m[i]);
		}
		VM_OBJECT_WUNLOCK(object);
	}

	/*
	 * here on direct device I/O
	 */
	firstaddr = reqblock;
	firstaddr += pib / DEV_BSIZE;
	firstaddr -= IDX_TO_OFF(reqpage - first) / DEV_BSIZE;

	/*
	 * The first and last page have been calculated now, move
	 * input pages to be zero based, and adjust the count.
	 */
	m += first;
	reqpage -= first;
	count = last - first + 1;

	/*
	 * calculate the file virtual address for the transfer
	 */
	foff = IDX_TO_OFF(m[0]->pindex);

	/*
	 * calculate the size of the transfer
	 */
	size = count * PAGE_SIZE;
	KASSERT(count > 0, ("zero count"));
	if ((foff + size) > object->un_pager.vnp.vnp_size)
		size = object->un_pager.vnp.vnp_size - foff;
	KASSERT(size > 0, ("zero size"));

	/*
	 * round up physical size for real devices.
	 */
	secmask = bo->bo_bsize - 1;
	KASSERT(secmask < PAGE_SIZE && secmask > 0,
	    ("vnode_pager_generic_getpages: sector size %d too large",
	    secmask + 1));
	size = (size + secmask) & ~secmask;

	/*
	 * and map the pages to be read into the kva, if the filesystem
	 * requires mapped buffers.
	 */
	if ((vp->v_mount->mnt_kern_flag & MNTK_UNMAPPED_BUFS) != 0 &&
	    unmapped_buf_allowed) {
		bp->b_data = unmapped_buf;
		bp->b_offset = 0;
	} else {
		bp->b_data = bp->b_kvabase;
		pmap_qenter((vm_offset_t)bp->b_data, m, count);
	}

	/* build a minimal buffer header */
	bp->b_iocmd = BIO_READ;
	KASSERT(bp->b_rcred == NOCRED, ("leaking read ucred"));
	KASSERT(bp->b_wcred == NOCRED, ("leaking write ucred"));
	bp->b_rcred = crhold(curthread->td_ucred);
	bp->b_wcred = crhold(curthread->td_ucred);
	bp->b_blkno = firstaddr;
	pbgetbo(bo, bp);
	bp->b_vp = vp;
	bp->b_bcount = size;
	bp->b_bufsize = size;
	bp->b_runningbufspace = bp->b_bufsize;
	for (i = 0; i < count; i++)
		bp->b_pages[i] = m[i];
	bp->b_npages = count;
	bp->b_pager.pg_reqpage = reqpage;
	atomic_add_long(&runningbufspace, bp->b_runningbufspace);

	PCPU_INC(cnt.v_vnodein);
	PCPU_ADD(cnt.v_vnodepgsin, count);

	/* do the input */
	bp->b_iooffset = dbtob(bp->b_blkno);

	if (iodone != NULL) { /* async */
		bp->b_pager.pg_iodone = iodone;
		bp->b_caller1 = arg;
		bp->b_iodone = vnode_pager_generic_getpages_done_async;
		bp->b_flags |= B_ASYNC;
		BUF_KERNPROC(bp);
		bstrategy(bp);
		/* Good bye! */
	} else {
		bp->b_iodone = bdone;
		bstrategy(bp);
		bwait(bp, PVM, "vnread");
		error = vnode_pager_generic_getpages_done(bp);
		for (i = 0; i < bp->b_npages; i++)
			bp->b_pages[i] = NULL;
		bp->b_vp = NULL;
		pbrelbo(bp);
		relpbuf(bp, &vnode_pbuf_freecnt);
	}

	return (error != 0 ? VM_PAGER_ERROR : VM_PAGER_OK);
}
예제 #28
0
/*
 * Vnode op for VM putpages.
 * possible bug: all IO done in sync mode
 * Note that vop_close always invalidate pages before close, so it's
 * not necessary to open vnode.
 *
 * smbfs_putpages(struct vnode *a_vp, vm_page_t *a_m, int a_count, int a_sync,
 *		  int *a_rtvals, vm_ooffset_t a_offset)
 */
int
smbfs_putpages(struct vop_putpages_args *ap)
{
	int error;
	struct vnode *vp = ap->a_vp;
	struct thread *td = curthread;	/* XXX */
	struct ucred *cred;

#ifdef SMBFS_RWGENERIC
	KKASSERT(td->td_proc);
	cred = td->td_proc->p_ucred;
	VOP_OPEN(vp, FWRITE, cred, NULL);
	error = vop_stdputpages(ap);
	VOP_CLOSE(vp, FWRITE, cred);
	return error;
#else
	struct uio uio;
	struct iovec iov;
	vm_offset_t kva;
	struct buf *bp;
	int i, npages, count;
	int doclose;
	int *rtvals;
	struct smbmount *smp;
	struct smbnode *np;
	struct smb_cred scred;
	vm_page_t *pages;

	KKASSERT(td->td_proc);
	cred = td->td_proc->p_ucred;
/*	VOP_OPEN(vp, FWRITE, cred, NULL);*/
	np = VTOSMB(vp);
	smp = VFSTOSMBFS(vp->v_mount);
	pages = ap->a_m;
	count = ap->a_count;
	rtvals = ap->a_rtvals;
	npages = btoc(count);

	for (i = 0; i < npages; i++) {
		rtvals[i] = VM_PAGER_AGAIN;
	}

	bp = getpbuf_kva(&smbfs_pbuf_freecnt);
	kva = (vm_offset_t) bp->b_data;
	pmap_qenter(kva, pages, npages);

	iov.iov_base = (caddr_t) kva;
	iov.iov_len = count;
	uio.uio_iov = &iov;
	uio.uio_iovcnt = 1;
	uio.uio_offset = IDX_TO_OFF(pages[0]->pindex);
	uio.uio_resid = count;
	uio.uio_segflg = UIO_SYSSPACE;
	uio.uio_rw = UIO_WRITE;
	uio.uio_td = td;
	SMBVDEBUG("ofs=%d,resid=%d\n",(int)uio.uio_offset, uio.uio_resid);

	smb_makescred(&scred, td, cred);

	/*
	 * This is kinda nasty.  Since smbfs is physically closing the
	 * fid on close(), we have to reopen it if necessary.  There are
	 * other races here too, such as if another process opens the same
	 * file while we are blocked in read, or the file is open read-only
	 * XXX
	 */
	error = 0;
	doclose = 0;
	if (np->n_opencount == 0) {
		error = smbfs_smb_open(np, SMB_AM_OPENRW, &scred);
		if (error == 0)
			doclose = 1;
	}
	if (error == 0)
		error = smb_write(smp->sm_share, np->n_fid, &uio, &scred);
	if (doclose)
		smbfs_smb_close(smp->sm_share, np->n_fid, NULL, &scred);
/*	VOP_CLOSE(vp, FWRITE, cred);*/
	SMBVDEBUG("paged write done: %d\n", error);

	pmap_qremove(kva, npages);
	relpbuf(bp, &smbfs_pbuf_freecnt);

	if (!error) {
		int nwritten = round_page(count - uio.uio_resid) / PAGE_SIZE;
		for (i = 0; i < nwritten; i++) {
			rtvals[i] = VM_PAGER_OK;
			vm_page_undirty(pages[i]);
		}
	}
	return rtvals[0];
#endif /* SMBFS_RWGENERIC */
}