Esempio n. 1
0
/*
 * old style vnode pager input routine
 */
static int
vnode_pager_input_old(vm_object_t object, vm_page_t m)
{
	struct uio auio;
	struct iovec aiov;
	int error;
	int size;
	struct sf_buf *sf;
	struct vnode *vp;

	VM_OBJECT_ASSERT_WLOCKED(object);
	error = 0;

	/*
	 * Return failure if beyond current EOF
	 */
	if (IDX_TO_OFF(m->pindex) >= object->un_pager.vnp.vnp_size) {
		return VM_PAGER_BAD;
	} else {
		size = PAGE_SIZE;
		if (IDX_TO_OFF(m->pindex) + size > object->un_pager.vnp.vnp_size)
			size = object->un_pager.vnp.vnp_size - IDX_TO_OFF(m->pindex);
		vp = object->handle;
		VM_OBJECT_WUNLOCK(object);

		/*
		 * Allocate a kernel virtual address and initialize so that
		 * we can use VOP_READ/WRITE routines.
		 */
		sf = sf_buf_alloc(m, 0);

		aiov.iov_base = (caddr_t)sf_buf_kva(sf);
		aiov.iov_len = size;
		auio.uio_iov = &aiov;
		auio.uio_iovcnt = 1;
		auio.uio_offset = IDX_TO_OFF(m->pindex);
		auio.uio_segflg = UIO_SYSSPACE;
		auio.uio_rw = UIO_READ;
		auio.uio_resid = size;
		auio.uio_td = curthread;

		error = VOP_READ(vp, &auio, 0, curthread->td_ucred);
		if (!error) {
			int count = size - auio.uio_resid;

			if (count == 0)
				error = EINVAL;
			else if (count != PAGE_SIZE)
				bzero((caddr_t)sf_buf_kva(sf) + count,
				    PAGE_SIZE - count);
		}
		sf_buf_free(sf);

		VM_OBJECT_WLOCK(object);
	}
	KASSERT(m->dirty == 0, ("vnode_pager_input_old: page %p is dirty", m));
	if (!error)
		m->valid = VM_PAGE_BITS_ALL;
	return error ? VM_PAGER_ERROR : VM_PAGER_OK;
}
Esempio n. 2
0
/*
 *	The object must be locked.
 */
static void
vnode_pager_dealloc(vm_object_t object)
{
	struct vnode *vp;
	int refs;

	vp = object->handle;
	if (vp == NULL)
		panic("vnode_pager_dealloc: pager already dealloced");

	VM_OBJECT_ASSERT_WLOCKED(object);
	vm_object_pip_wait(object, "vnpdea");
	refs = object->ref_count;

	object->handle = NULL;
	object->type = OBJT_DEAD;
	if (object->flags & OBJ_DISCONNECTWNT) {
		vm_object_clear_flag(object, OBJ_DISCONNECTWNT);
		wakeup(object);
	}
	ASSERT_VOP_ELOCKED(vp, "vnode_pager_dealloc");
	if (object->un_pager.vnp.writemappings > 0) {
		object->un_pager.vnp.writemappings = 0;
		VOP_ADD_WRITECOUNT(vp, -1);
		CTR3(KTR_VFS, "%s: vp %p v_writecount decreased to %d",
		    __func__, vp, vp->v_writecount);
	}
	vp->v_object = NULL;
	VOP_UNSET_TEXT(vp);
	VM_OBJECT_WUNLOCK(object);
	while (refs-- > 0)
		vunref(vp);
	VM_OBJECT_WLOCK(object);
}
Esempio n. 3
0
/*
 * Fill as many pages as vm_fault has allocated for us.
 */
static int
phys_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage)
{
	int i;

	VM_OBJECT_ASSERT_WLOCKED(object);
	for (i = 0; i < count; i++) {
		if (m[i]->valid == 0) {
			if ((m[i]->flags & PG_ZERO) == 0)
				pmap_zero_page(m[i]);
			m[i]->valid = VM_PAGE_BITS_ALL;
		}
		KASSERT(m[i]->valid == VM_PAGE_BITS_ALL,
		    ("phys_pager_getpages: partially valid page %p", m[i]));
		KASSERT(m[i]->dirty == 0,
		    ("phys_pager_getpages: dirty page %p", m[i]));
		/* The requested page must remain busy, the others not. */
		if (i == reqpage) {
			vm_page_lock(m[i]);
			vm_page_flash(m[i]);
			vm_page_unlock(m[i]);
		} else
			vm_page_xunbusy(m[i]);
	}
	return (VM_PAGER_OK);
}
Esempio n. 4
0
static void
dev_pager_free_page(vm_object_t object, vm_page_t m)
{

	VM_OBJECT_ASSERT_WLOCKED(object);
	KASSERT((object->type == OBJT_DEVICE &&
	    (m->oflags & VPO_UNMANAGED) != 0),
	    ("Managed device or page obj %p m %p", object, m));
	TAILQ_REMOVE(&object->un_pager.devp.devp_pglist, m, pageq);
	vm_page_putfake(m);
}
Esempio n. 5
0
void
cdev_pager_free_page(vm_object_t object, vm_page_t m)
{

	VM_OBJECT_ASSERT_WLOCKED(object);
	if (object->type == OBJT_MGTDEVICE) {
		KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("unmanaged %p", m));
		pmap_remove_all(m);
		vm_page_lock(m);
		vm_page_remove(m);
		vm_page_unlock(m);
	} else if (object->type == OBJT_DEVICE)
		dev_pager_free_page(object, m);
}
Esempio n. 6
0
/*
 * Speed up the reclamation of up to "distance" pages that precede the
 * faulting pindex within the first object of the shadow chain.
 */
static void
vm_fault_cache_behind(const struct faultstate *fs, int distance)
{
	vm_object_t first_object, object;
	vm_page_t m, m_prev;
	vm_pindex_t pindex;

	object = fs->object;
	VM_OBJECT_ASSERT_WLOCKED(object);
	first_object = fs->first_object;
	if (first_object != object) {
		if (!VM_OBJECT_TRYWLOCK(first_object)) {
			VM_OBJECT_WUNLOCK(object);
			VM_OBJECT_WLOCK(first_object);
			VM_OBJECT_WLOCK(object);
		}
	}
	/* Neither fictitious nor unmanaged pages can be cached. */
	if ((first_object->flags & (OBJ_FICTITIOUS | OBJ_UNMANAGED)) == 0) {
		if (fs->first_pindex < distance)
			pindex = 0;
		else
			pindex = fs->first_pindex - distance;
		if (pindex < OFF_TO_IDX(fs->entry->offset))
			pindex = OFF_TO_IDX(fs->entry->offset);
		m = first_object != object ? fs->first_m : fs->m;
		KASSERT((m->oflags & VPO_BUSY) != 0,
		    ("vm_fault_cache_behind: page %p is not busy", m));
		m_prev = vm_page_prev(m);
		while ((m = m_prev) != NULL && m->pindex >= pindex &&
		    m->valid == VM_PAGE_BITS_ALL) {
			m_prev = vm_page_prev(m);
			if (m->busy != 0 || (m->oflags & VPO_BUSY) != 0)
				continue;
			vm_page_lock(m);
			if (m->hold_count == 0 && m->wire_count == 0) {
				pmap_remove_all(m);
				vm_page_aflag_clear(m, PGA_REFERENCED);
				if (m->dirty != 0)
					vm_page_deactivate(m);
				else
					vm_page_cache(m);
			}
			vm_page_unlock(m);
		}
	}
	if (first_object != object)
		VM_OBJECT_WUNLOCK(first_object);
}
Esempio n. 7
0
static int
sg_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage)
{
	struct sglist *sg;
	vm_page_t m_paddr, page;
	vm_pindex_t offset;
	vm_paddr_t paddr;
	vm_memattr_t memattr;
	size_t space;
	int i;

	VM_OBJECT_ASSERT_WLOCKED(object);
	sg = object->handle;
	memattr = object->memattr;
	VM_OBJECT_WUNLOCK(object);
	offset = m[reqpage]->pindex;

	/*
	 * Lookup the physical address of the requested page.  An initial
	 * value of '1' instead of '0' is used so we can assert that the
	 * page is found since '0' can be a valid page-aligned physical
	 * address.
	 */
	space = 0;
	paddr = 1;
	for (i = 0; i < sg->sg_nseg; i++) {
		if (space + sg->sg_segs[i].ss_len <= (offset * PAGE_SIZE)) {
			space += sg->sg_segs[i].ss_len;
			continue;
		}
		paddr = sg->sg_segs[i].ss_paddr + offset * PAGE_SIZE - space;
		break;
	}
	KASSERT(paddr != 1, ("invalid SG page index"));

	/* If "paddr" is a real page, perform a sanity check on "memattr". */
	if ((m_paddr = vm_phys_paddr_to_vm_page(paddr)) != NULL &&
	    pmap_page_get_memattr(m_paddr) != memattr) {
		memattr = pmap_page_get_memattr(m_paddr);
		printf(
	    "WARNING: A device driver has set \"memattr\" inconsistently.\n");
	}

	/* Return a fake page for the requested page. */
	KASSERT(!(m[reqpage]->flags & PG_FICTITIOUS),
	    ("backing page for SG is fake"));

	/* Construct a new fake page. */
	page = vm_page_getfake(paddr, memattr);
	VM_OBJECT_WLOCK(object);
	TAILQ_INSERT_TAIL(&object->un_pager.sgp.sgp_pglist, page, plinks.q);

	/* Free the original pages and insert this fake page into the object. */
	for (i = 0; i < count; i++) {
		if (i == reqpage &&
		    vm_page_replace(page, object, offset) != m[i])
			panic("sg_pager_getpages: invalid place replacement");
		vm_page_lock(m[i]);
		vm_page_free(m[i]);
		vm_page_unlock(m[i]);
	}
	m[reqpage] = page;
	page->valid = VM_PAGE_BITS_ALL;

	return (VM_PAGER_OK);
}
Esempio n. 8
0
/*
 * If blocks are contiguous on disk, use this to provide clustered
 * read ahead.  We will read as many blocks as possible sequentially
 * and then parcel them up into logical blocks in the buffer hash table.
 */
static struct buf *
cluster_rbuild(struct vnode *vp, u_quad_t filesize, daddr_t lbn,
    daddr_t blkno, long size, int run, int gbflags, struct buf *fbp)
{
	struct buf *bp, *tbp;
	daddr_t bn;
	off_t off;
	long tinc, tsize;
	int i, inc, j, k, toff;

	KASSERT(size == vp->v_mount->mnt_stat.f_iosize,
	    ("cluster_rbuild: size %ld != f_iosize %jd\n",
	    size, (intmax_t)vp->v_mount->mnt_stat.f_iosize));

	/*
	 * avoid a division
	 */
	while ((u_quad_t) size * (lbn + run) > filesize) {
		--run;
	}

	if (fbp) {
		tbp = fbp;
		tbp->b_iocmd = BIO_READ; 
	} else {
		tbp = getblk(vp, lbn, size, 0, 0, gbflags);
		if (tbp->b_flags & B_CACHE)
			return tbp;
		tbp->b_flags |= B_ASYNC | B_RAM;
		tbp->b_iocmd = BIO_READ;
	}
	tbp->b_blkno = blkno;
	if( (tbp->b_flags & B_MALLOC) ||
		((tbp->b_flags & B_VMIO) == 0) || (run <= 1) )
		return tbp;

	bp = trypbuf(&cluster_pbuf_freecnt);
	if (bp == NULL)
		return tbp;

	/*
	 * We are synthesizing a buffer out of vm_page_t's, but
	 * if the block size is not page aligned then the starting
	 * address may not be either.  Inherit the b_data offset
	 * from the original buffer.
	 */
	bp->b_flags = B_ASYNC | B_CLUSTER | B_VMIO;
	if ((gbflags & GB_UNMAPPED) != 0) {
		bp->b_data = unmapped_buf;
	} else {
		bp->b_data = (char *)((vm_offset_t)bp->b_data |
		    ((vm_offset_t)tbp->b_data & PAGE_MASK));
	}
	bp->b_iocmd = BIO_READ;
	bp->b_iodone = cluster_callback;
	bp->b_blkno = blkno;
	bp->b_lblkno = lbn;
	bp->b_offset = tbp->b_offset;
	KASSERT(bp->b_offset != NOOFFSET, ("cluster_rbuild: no buffer offset"));
	pbgetvp(vp, bp);

	TAILQ_INIT(&bp->b_cluster.cluster_head);

	bp->b_bcount = 0;
	bp->b_bufsize = 0;
	bp->b_npages = 0;

	inc = btodb(size);
	for (bn = blkno, i = 0; i < run; ++i, bn += inc) {
		if (i == 0) {
			VM_OBJECT_WLOCK(tbp->b_bufobj->bo_object);
			vfs_drain_busy_pages(tbp);
			vm_object_pip_add(tbp->b_bufobj->bo_object,
			    tbp->b_npages);
			for (k = 0; k < tbp->b_npages; k++)
				vm_page_sbusy(tbp->b_pages[k]);
			VM_OBJECT_WUNLOCK(tbp->b_bufobj->bo_object);
		} else {
			if ((bp->b_npages * PAGE_SIZE) +
			    round_page(size) > vp->v_mount->mnt_iosize_max) {
				break;
			}

			tbp = getblk(vp, lbn + i, size, 0, 0, GB_LOCK_NOWAIT |
			    (gbflags & GB_UNMAPPED));

			/* Don't wait around for locked bufs. */
			if (tbp == NULL)
				break;

			/*
			 * Stop scanning if the buffer is fully valid
			 * (marked B_CACHE), or locked (may be doing a
			 * background write), or if the buffer is not
			 * VMIO backed.  The clustering code can only deal
			 * with VMIO-backed buffers.  The bo lock is not
			 * required for the BKGRDINPROG check since it
			 * can not be set without the buf lock.
			 */
			if ((tbp->b_vflags & BV_BKGRDINPROG) ||
			    (tbp->b_flags & B_CACHE) ||
			    (tbp->b_flags & B_VMIO) == 0) {
				bqrelse(tbp);
				break;
			}

			/*
			 * The buffer must be completely invalid in order to
			 * take part in the cluster.  If it is partially valid
			 * then we stop.
			 */
			off = tbp->b_offset;
			tsize = size;
			VM_OBJECT_WLOCK(tbp->b_bufobj->bo_object);
			for (j = 0; tsize > 0; j++) {
				toff = off & PAGE_MASK;
				tinc = tsize;
				if (toff + tinc > PAGE_SIZE)
					tinc = PAGE_SIZE - toff;
				VM_OBJECT_ASSERT_WLOCKED(tbp->b_pages[j]->object);
				if ((tbp->b_pages[j]->valid &
				    vm_page_bits(toff, tinc)) != 0)
					break;
				if (vm_page_xbusied(tbp->b_pages[j]))
					break;
				vm_object_pip_add(tbp->b_bufobj->bo_object, 1);
				vm_page_sbusy(tbp->b_pages[j]);
				off += tinc;
				tsize -= tinc;
			}
			if (tsize > 0) {
clean_sbusy:
				vm_object_pip_add(tbp->b_bufobj->bo_object, -j);
				for (k = 0; k < j; k++)
					vm_page_sunbusy(tbp->b_pages[k]);
				VM_OBJECT_WUNLOCK(tbp->b_bufobj->bo_object);
				bqrelse(tbp);
				break;
			}
			VM_OBJECT_WUNLOCK(tbp->b_bufobj->bo_object);

			/*
			 * Set a read-ahead mark as appropriate
			 */
			if ((fbp && (i == 1)) || (i == (run - 1)))
				tbp->b_flags |= B_RAM;

			/*
			 * Set the buffer up for an async read (XXX should
			 * we do this only if we do not wind up brelse()ing?).
			 * Set the block number if it isn't set, otherwise
			 * if it is make sure it matches the block number we
			 * expect.
			 */
			tbp->b_flags |= B_ASYNC;
			tbp->b_iocmd = BIO_READ;
			if (tbp->b_blkno == tbp->b_lblkno) {
				tbp->b_blkno = bn;
			} else if (tbp->b_blkno != bn) {
				VM_OBJECT_WLOCK(tbp->b_bufobj->bo_object);
				goto clean_sbusy;
			}
		}
		/*
		 * XXX fbp from caller may not be B_ASYNC, but we are going
		 * to biodone() it in cluster_callback() anyway
		 */
		BUF_KERNPROC(tbp);
		TAILQ_INSERT_TAIL(&bp->b_cluster.cluster_head,
			tbp, b_cluster.cluster_entry);
		VM_OBJECT_WLOCK(tbp->b_bufobj->bo_object);
		for (j = 0; j < tbp->b_npages; j += 1) {
			vm_page_t m;
			m = tbp->b_pages[j];
			if ((bp->b_npages == 0) ||
			    (bp->b_pages[bp->b_npages-1] != m)) {
				bp->b_pages[bp->b_npages] = m;
				bp->b_npages++;
			}
			if (m->valid == VM_PAGE_BITS_ALL)
				tbp->b_pages[j] = bogus_page;
		}
		VM_OBJECT_WUNLOCK(tbp->b_bufobj->bo_object);
		/*
		 * Don't inherit tbp->b_bufsize as it may be larger due to
		 * a non-page-aligned size.  Instead just aggregate using
		 * 'size'.
		 */
		if (tbp->b_bcount != size)
			printf("warning: tbp->b_bcount wrong %ld vs %ld\n", tbp->b_bcount, size);
		if (tbp->b_bufsize != size)
			printf("warning: tbp->b_bufsize wrong %ld vs %ld\n", tbp->b_bufsize, size);
		bp->b_bcount += size;
		bp->b_bufsize += size;
	}

	/*
	 * Fully valid pages in the cluster are already good and do not need
	 * to be re-read from disk.  Replace the page with bogus_page
	 */
	VM_OBJECT_WLOCK(bp->b_bufobj->bo_object);
	for (j = 0; j < bp->b_npages; j++) {
		VM_OBJECT_ASSERT_WLOCKED(bp->b_pages[j]->object);
		if (bp->b_pages[j]->valid == VM_PAGE_BITS_ALL)
			bp->b_pages[j] = bogus_page;
	}
	VM_OBJECT_WUNLOCK(bp->b_bufobj->bo_object);
	if (bp->b_bufsize > bp->b_kvasize)
		panic("cluster_rbuild: b_bufsize(%ld) > b_kvasize(%d)\n",
		    bp->b_bufsize, bp->b_kvasize);

	if (buf_mapped(bp)) {
		pmap_qenter(trunc_page((vm_offset_t) bp->b_data),
		    (vm_page_t *)bp->b_pages, bp->b_npages);
	}
	return (bp);
}
Esempio n. 9
0
static boolean_t
vnode_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *before,
    int *after)
{
	struct vnode *vp = object->handle;
	daddr_t bn;
	int err;
	daddr_t reqblock;
	int poff;
	int bsize;
	int pagesperblock, blocksperpage;

	VM_OBJECT_ASSERT_WLOCKED(object);
	/*
	 * If no vp or vp is doomed or marked transparent to VM, we do not
	 * have the page.
	 */
	if (vp == NULL || vp->v_iflag & VI_DOOMED)
		return FALSE;
	/*
	 * If the offset is beyond end of file we do
	 * not have the page.
	 */
	if (IDX_TO_OFF(pindex) >= object->un_pager.vnp.vnp_size)
		return FALSE;

	bsize = vp->v_mount->mnt_stat.f_iosize;
	pagesperblock = bsize / PAGE_SIZE;
	blocksperpage = 0;
	if (pagesperblock > 0) {
		reqblock = pindex / pagesperblock;
	} else {
		blocksperpage = (PAGE_SIZE / bsize);
		reqblock = pindex * blocksperpage;
	}
	VM_OBJECT_WUNLOCK(object);
	err = VOP_BMAP(vp, reqblock, NULL, &bn, after, before);
	VM_OBJECT_WLOCK(object);
	if (err)
		return TRUE;
	if (bn == -1)
		return FALSE;
	if (pagesperblock > 0) {
		poff = pindex - (reqblock * pagesperblock);
		if (before) {
			*before *= pagesperblock;
			*before += poff;
		}
		if (after) {
			/*
			 * The BMAP vop can report a partial block in the
			 * 'after', but must not report blocks after EOF.
			 * Assert the latter, and truncate 'after' in case
			 * of the former.
			 */
			KASSERT((reqblock + *after) * pagesperblock <
			    roundup2(object->size, pagesperblock),
			    ("%s: reqblock %jd after %d size %ju", __func__,
			    (intmax_t )reqblock, *after,
			    (uintmax_t )object->size));
			*after *= pagesperblock;
			*after += pagesperblock - (poff + 1);
			if (pindex + *after >= object->size)
				*after = object->size - 1 - pindex;
		}
	} else {
		if (before) {
			*before /= blocksperpage;
		}

		if (after) {
			*after /= blocksperpage;
		}
	}
	return TRUE;
}