Esempio n. 1
0
static void
vnode_pager_generic_getpages_done_async(struct buf *bp)
{
	int error;

	error = vnode_pager_generic_getpages_done(bp);
	bp->b_pager.pg_iodone(bp->b_caller1, bp->b_pages,
	  bp->b_pager.pg_reqpage, error);
	for (int i = 0; i < bp->b_npages; i++)
		bp->b_pages[i] = NULL;
	bp->b_vp = NULL;
	pbrelbo(bp);
	relpbuf(bp, &vnode_async_pbuf_freecnt);
}
Esempio n. 2
0
static void
vnode_pager_generic_getpages_done_async(struct buf *bp)
{
	int error;

	error = vnode_pager_generic_getpages_done(bp);
	/* Run the iodone upon the requested range. */
	bp->b_pgiodone(bp->b_caller1, bp->b_pages + bp->b_pgbefore,
	    bp->b_npages - bp->b_pgbefore - bp->b_pgafter, error);
	for (int i = 0; i < bp->b_npages; i++)
		bp->b_pages[i] = NULL;
	bp->b_vp = NULL;
	pbrelbo(bp);
	relpbuf(bp, &vnode_async_pbuf_freecnt);
}
Esempio n. 3
0
int
physio(struct cdev *dev, struct uio *uio, int ioflag)
{
	int i;
	int error;
	caddr_t sa;
	u_int iolen;
	struct buf *bp;

	/* Keep the process UPAGES from being swapped. XXX: why ? */
	PHOLD(curproc);

	bp = getpbuf(NULL);
	sa = bp->b_data;
	error = 0;

	/* XXX: sanity check */
	if(dev->si_iosize_max < PAGE_SIZE) {
		printf("WARNING: %s si_iosize_max=%d, using DFLTPHYS.\n",
		    devtoname(dev), dev->si_iosize_max);
		dev->si_iosize_max = DFLTPHYS;
	}

	for (i = 0; i < uio->uio_iovcnt; i++) {
		while (uio->uio_iov[i].iov_len) {
			bp->b_flags = 0;
			if (uio->uio_rw == UIO_READ) {
				bp->b_iocmd = BIO_READ;
				curthread->td_ru.ru_inblock++;
			} else {
				bp->b_iocmd = BIO_WRITE;
				curthread->td_ru.ru_oublock++;
			}
			bp->b_iodone = bdone;
			bp->b_data = uio->uio_iov[i].iov_base;
			bp->b_bcount = uio->uio_iov[i].iov_len;
			bp->b_offset = uio->uio_offset;
			bp->b_iooffset = uio->uio_offset;
			bp->b_saveaddr = sa;

			/* Don't exceed drivers iosize limit */
			if (bp->b_bcount > dev->si_iosize_max)
				bp->b_bcount = dev->si_iosize_max;

			/* 
			 * Make sure the pbuf can map the request
			 * XXX: The pbuf has kvasize = MAXPHYS so a request
			 * XXX: larger than MAXPHYS - PAGE_SIZE must be
			 * XXX: page aligned or it will be fragmented.
			 */
			iolen = ((vm_offset_t) bp->b_data) & PAGE_MASK;
			if ((bp->b_bcount + iolen) > bp->b_kvasize) {
				bp->b_bcount = bp->b_kvasize;
				if (iolen != 0)
					bp->b_bcount -= PAGE_SIZE;
			}
			bp->b_bufsize = bp->b_bcount;

			bp->b_blkno = btodb(bp->b_offset);

			if (uio->uio_segflg == UIO_USERSPACE)
				if (vmapbuf(bp) < 0) {
					error = EFAULT;
					goto doerror;
				}

			dev_strategy(dev, bp);
			if (uio->uio_rw == UIO_READ)
				bwait(bp, PRIBIO, "physrd");
			else
				bwait(bp, PRIBIO, "physwr");

			if (uio->uio_segflg == UIO_USERSPACE)
				vunmapbuf(bp);
			iolen = bp->b_bcount - bp->b_resid;
			if (iolen == 0 && !(bp->b_ioflags & BIO_ERROR))
				goto doerror;	/* EOF */
			uio->uio_iov[i].iov_len -= iolen;
			uio->uio_iov[i].iov_base =
			    (char *)uio->uio_iov[i].iov_base + iolen;
			uio->uio_resid -= iolen;
			uio->uio_offset += iolen;
			if( bp->b_ioflags & BIO_ERROR) {
				error = bp->b_error;
				goto doerror;
			}
		}
	}
doerror:
	relpbuf(bp, NULL);
	PRELE(curproc);
	return (error);
}
Esempio n. 4
0
static int
ffs_rawread_main(struct vnode *vp, struct uio *uio)
{
	int error, nerror;
	struct buf *bp, *nbp, *tbp;
	int iolen;
	caddr_t udata;
	int resid;
	off_t offset;
	
	udata = uio->uio_iov->iov_base;
	resid = uio->uio_resid;
	offset = uio->uio_offset;

	error = 0;
	nerror = 0;
	
	bp = NULL;
	nbp = NULL;
	
	while (resid > 0) {
		
		if (bp == NULL) { /* Setup first read */
			/* XXX: Leave some bufs for swap */
			bp = getpbuf_kva(&vp->v_mount->mnt_pbuf_count);
			error = ffs_rawread_readahead(vp, udata, offset,
						      resid, bp);
			if (error != 0)
				break;
			
			if (resid > bp->b_bufsize) { /* Setup fist readahead */
				/* XXX: Leave bufs for swap */
				if (rawreadahead != 0) 
					nbp = trypbuf_kva(&vp->v_mount->mnt_pbuf_count);
				else
					nbp = NULL;
				if (nbp != NULL) {
					nerror = ffs_rawread_readahead(
							vp, 
							udata + bp->b_bufsize,
							offset + bp->b_bufsize,
							resid - bp->b_bufsize,
							nbp);
					if (nerror) {
						relpbuf(nbp, &vp->v_mount->mnt_pbuf_count);
						nbp = NULL;
					}
				}
			}
		}
		
		biowait(&bp->b_bio1, "rawrd");
		
		vunmapbuf(bp);
		
		iolen = bp->b_bcount - bp->b_resid;
		if (iolen == 0 && (bp->b_flags & B_ERROR) == 0) {
			nerror = 0;	/* Ignore possible beyond EOF error */
			break; /* EOF */
		}
		
		if ((bp->b_flags & B_ERROR) != 0) {
			error = bp->b_error;
			break;
		}
		clearbiocache(&bp->b_bio2);
		resid -= iolen;
		udata += iolen;
		offset += iolen;
		if (iolen < bp->b_bufsize) {
			/* Incomplete read.  Try to read remaining part */
			error = ffs_rawread_readahead(
				    vp, udata, offset,
				    bp->b_bufsize - iolen, bp);
			if (error != 0)
				break;
		} else if (nbp != NULL) { /* Complete read with readahead */
			
			tbp = bp;
			bp = nbp;
			nbp = tbp;
			
			clearbiocache(&nbp->b_bio2);
			
			if (resid <= bp->b_bufsize) { /* No more readaheads */
				relpbuf(nbp, &vp->v_mount->mnt_pbuf_count);
				nbp = NULL;
			} else { /* Setup next readahead */
				nerror = ffs_rawread_readahead(
						vp, udata + bp->b_bufsize,
				   		offset + bp->b_bufsize,
						resid - bp->b_bufsize,
						nbp);
				if (nerror != 0) {
					relpbuf(nbp, &vp->v_mount->mnt_pbuf_count);
					nbp = NULL;
				}
			}
		} else if (nerror != 0) {/* Deferred Readahead error */
			break;		
		}  else if (resid > 0) { /* More to read, no readahead */
			error = ffs_rawread_readahead(vp, udata, offset,
						      resid, bp);
			if (error != 0)
				break;
		}
	}
	
	if (bp != NULL)
		relpbuf(bp, &vp->v_mount->mnt_pbuf_count);
	if (nbp != NULL) {			/* Run down readahead buffer */
		biowait(&nbp->b_bio1, "rawrd");
		vunmapbuf(nbp);
		relpbuf(nbp, &vp->v_mount->mnt_pbuf_count);
	}
	
	if (error == 0)
		error = nerror;
	uio->uio_iov->iov_base = udata;
	uio->uio_resid = resid;
	uio->uio_offset = offset;
	return error;
}
Esempio n. 5
0
int
physio(struct cdev *dev, struct uio *uio, int ioflag)
{
	struct buf *bp;
	struct cdevsw *csw;
	caddr_t sa;
	u_int iolen;
	int error, i, mapped;

	/* Keep the process UPAGES from being swapped. XXX: why ? */
	PHOLD(curproc);

	bp = getpbuf(NULL);
	sa = bp->b_data;
	error = 0;

	/* XXX: sanity check */
	if(dev->si_iosize_max < PAGE_SIZE) {
		printf("WARNING: %s si_iosize_max=%d, using DFLTPHYS.\n",
		    devtoname(dev), dev->si_iosize_max);
		dev->si_iosize_max = DFLTPHYS;
	}

	/*
	 * If the driver does not want I/O to be split, that means that we
	 * need to reject any requests that will not fit into one buffer.
	 */
	if (dev->si_flags & SI_NOSPLIT &&
	    (uio->uio_resid > dev->si_iosize_max || uio->uio_resid > MAXPHYS ||
	    uio->uio_iovcnt > 1)) {
		/*
		 * Tell the user why his I/O was rejected.
		 */
		if (uio->uio_resid > dev->si_iosize_max)
			uprintf("%s: request size=%zd > si_iosize_max=%d; "
			    "cannot split request\n", devtoname(dev),
			    uio->uio_resid, dev->si_iosize_max);
		if (uio->uio_resid > MAXPHYS)
			uprintf("%s: request size=%zd > MAXPHYS=%d; "
			    "cannot split request\n", devtoname(dev),
			    uio->uio_resid, MAXPHYS);
		if (uio->uio_iovcnt > 1)
			uprintf("%s: request vectors=%d > 1; "
			    "cannot split request\n", devtoname(dev),
			    uio->uio_iovcnt);

		error = EFBIG;
		goto doerror;
	}

	for (i = 0; i < uio->uio_iovcnt; i++) {
		while (uio->uio_iov[i].iov_len) {
			bp->b_flags = 0;
			if (uio->uio_rw == UIO_READ) {
				bp->b_iocmd = BIO_READ;
				curthread->td_ru.ru_inblock++;
			} else {
				bp->b_iocmd = BIO_WRITE;
				curthread->td_ru.ru_oublock++;
			}
			bp->b_iodone = bdone;
			bp->b_data = uio->uio_iov[i].iov_base;
			bp->b_bcount = uio->uio_iov[i].iov_len;
			bp->b_offset = uio->uio_offset;
			bp->b_iooffset = uio->uio_offset;
			bp->b_saveaddr = sa;

			/* Don't exceed drivers iosize limit */
			if (bp->b_bcount > dev->si_iosize_max)
				bp->b_bcount = dev->si_iosize_max;

			/* 
			 * Make sure the pbuf can map the request
			 * XXX: The pbuf has kvasize = MAXPHYS so a request
			 * XXX: larger than MAXPHYS - PAGE_SIZE must be
			 * XXX: page aligned or it will be fragmented.
			 */
			iolen = ((vm_offset_t) bp->b_data) & PAGE_MASK;
			if ((bp->b_bcount + iolen) > bp->b_kvasize) {
				/*
				 * This device does not want I/O to be split.
				 */
				if (dev->si_flags & SI_NOSPLIT) {
					uprintf("%s: request ptr %p is not "
					    "on a page boundary; cannot split "
					    "request\n", devtoname(dev),
					    bp->b_data);
					error = EFBIG;
					goto doerror;
				}
				bp->b_bcount = bp->b_kvasize;
				if (iolen != 0)
					bp->b_bcount -= PAGE_SIZE;
			}
			bp->b_bufsize = bp->b_bcount;

			bp->b_blkno = btodb(bp->b_offset);

			csw = dev->si_devsw;
			if (uio->uio_segflg == UIO_USERSPACE) {
				if (dev->si_flags & SI_UNMAPPED)
					mapped = 0;
				else
					mapped = 1;
				if (vmapbuf(bp, mapped) < 0) {
					error = EFAULT;
					goto doerror;
				}
			}

			dev_strategy_csw(dev, csw, bp);
			if (uio->uio_rw == UIO_READ)
				bwait(bp, PRIBIO, "physrd");
			else
				bwait(bp, PRIBIO, "physwr");

			if (uio->uio_segflg == UIO_USERSPACE)
				vunmapbuf(bp);
			iolen = bp->b_bcount - bp->b_resid;
			if (iolen == 0 && !(bp->b_ioflags & BIO_ERROR))
				goto doerror;	/* EOF */
			uio->uio_iov[i].iov_len -= iolen;
			uio->uio_iov[i].iov_base =
			    (char *)uio->uio_iov[i].iov_base + iolen;
			uio->uio_resid -= iolen;
			uio->uio_offset += iolen;
			if( bp->b_ioflags & BIO_ERROR) {
				error = bp->b_error;
				goto doerror;
			}
		}
	}
doerror:
	relpbuf(bp, NULL);
	PRELE(curproc);
	return (error);
}
Esempio n. 6
0
/*
    struct vnop_putpages_args {
        struct vnode *a_vp;
        vm_page_t *a_m;
        int a_count;
        int a_sync;
        int *a_rtvals;
        vm_ooffset_t a_offset;
    };
*/
static int
fuse_vnop_putpages(struct vop_putpages_args *ap)
{
	struct uio uio;
	struct iovec iov;
	vm_offset_t kva;
	struct buf *bp;
	int i, error, npages, count;
	off_t offset;
	int *rtvals;
	struct vnode *vp;
	struct thread *td;
	struct ucred *cred;
	vm_page_t *pages;
	vm_ooffset_t fsize;

	FS_DEBUG2G("heh\n");

	vp = ap->a_vp;
	KASSERT(vp->v_object, ("objectless vp passed to putpages"));
	fsize = vp->v_object->un_pager.vnp.vnp_size;
	td = curthread;			/* XXX */
	cred = curthread->td_ucred;	/* XXX */
	pages = ap->a_m;
	count = ap->a_count;
	rtvals = ap->a_rtvals;
	npages = btoc(count);
	offset = IDX_TO_OFF(pages[0]->pindex);

	if (!fsess_opt_mmap(vnode_mount(vp))) {
		FS_DEBUG("called on non-cacheable vnode??\n");
	}
	for (i = 0; i < npages; i++)
		rtvals[i] = VM_PAGER_AGAIN;

	/*
	 * When putting pages, do not extend file past EOF.
	 */

	if (offset + count > fsize) {
		count = fsize - offset;
		if (count < 0)
			count = 0;
	}
	/*
	 * We use only the kva address for the buffer, but this is extremely
	 * convienient and fast.
	 */
	bp = getpbuf(&fuse_pbuf_freecnt);

	kva = (vm_offset_t)bp->b_data;
	pmap_qenter(kva, pages, npages);
	PCPU_INC(cnt.v_vnodeout);
	PCPU_ADD(cnt.v_vnodepgsout, count);

	iov.iov_base = (caddr_t)kva;
	iov.iov_len = count;
	uio.uio_iov = &iov;
	uio.uio_iovcnt = 1;
	uio.uio_offset = offset;
	uio.uio_resid = count;
	uio.uio_segflg = UIO_SYSSPACE;
	uio.uio_rw = UIO_WRITE;
	uio.uio_td = td;

	error = fuse_io_dispatch(vp, &uio, IO_DIRECT, cred);

	pmap_qremove(kva, npages);
	relpbuf(bp, &fuse_pbuf_freecnt);

	if (!error) {
		int nwritten = round_page(count - uio.uio_resid) / PAGE_SIZE;

		for (i = 0; i < nwritten; i++) {
			rtvals[i] = VM_PAGER_OK;
			VM_OBJECT_WLOCK(pages[i]->object);
			vm_page_undirty(pages[i]);
			VM_OBJECT_WUNLOCK(pages[i]->object);
		}
	}
	return rtvals[0];
}
Esempio n. 7
0
/*
    struct vnop_getpages_args {
        struct vnode *a_vp;
        vm_page_t *a_m;
        int a_count;
        int a_reqpage;
        vm_ooffset_t a_offset;
    };
*/
static int
fuse_vnop_getpages(struct vop_getpages_args *ap)
{
	int i, error, nextoff, size, toff, count, npages;
	struct uio uio;
	struct iovec iov;
	vm_offset_t kva;
	struct buf *bp;
	struct vnode *vp;
	struct thread *td;
	struct ucred *cred;
	vm_page_t *pages;

	FS_DEBUG2G("heh\n");

	vp = ap->a_vp;
	KASSERT(vp->v_object, ("objectless vp passed to getpages"));
	td = curthread;			/* XXX */
	cred = curthread->td_ucred;	/* XXX */
	pages = ap->a_m;
	count = ap->a_count;

	if (!fsess_opt_mmap(vnode_mount(vp))) {
		FS_DEBUG("called on non-cacheable vnode??\n");
		return (VM_PAGER_ERROR);
	}
	npages = btoc(count);

	/*
	 * If the requested page is partially valid, just return it and
	 * allow the pager to zero-out the blanks.  Partially valid pages
	 * can only occur at the file EOF.
	 */

	VM_OBJECT_WLOCK(vp->v_object);
	fuse_vm_page_lock_queues();
	if (pages[ap->a_reqpage]->valid != 0) {
		for (i = 0; i < npages; ++i) {
			if (i != ap->a_reqpage) {
				fuse_vm_page_lock(pages[i]);
				vm_page_free(pages[i]);
				fuse_vm_page_unlock(pages[i]);
			}
		}
		fuse_vm_page_unlock_queues();
		VM_OBJECT_WUNLOCK(vp->v_object);
		return 0;
	}
	fuse_vm_page_unlock_queues();
	VM_OBJECT_WUNLOCK(vp->v_object);

	/*
	 * We use only the kva address for the buffer, but this is extremely
	 * convienient and fast.
	 */
	bp = getpbuf(&fuse_pbuf_freecnt);

	kva = (vm_offset_t)bp->b_data;
	pmap_qenter(kva, pages, npages);
	PCPU_INC(cnt.v_vnodein);
	PCPU_ADD(cnt.v_vnodepgsin, npages);

	iov.iov_base = (caddr_t)kva;
	iov.iov_len = count;
	uio.uio_iov = &iov;
	uio.uio_iovcnt = 1;
	uio.uio_offset = IDX_TO_OFF(pages[0]->pindex);
	uio.uio_resid = count;
	uio.uio_segflg = UIO_SYSSPACE;
	uio.uio_rw = UIO_READ;
	uio.uio_td = td;

	error = fuse_io_dispatch(vp, &uio, IO_DIRECT, cred);
	pmap_qremove(kva, npages);

	relpbuf(bp, &fuse_pbuf_freecnt);

	if (error && (uio.uio_resid == count)) {
		FS_DEBUG("error %d\n", error);
		VM_OBJECT_WLOCK(vp->v_object);
		fuse_vm_page_lock_queues();
		for (i = 0; i < npages; ++i) {
			if (i != ap->a_reqpage) {
				fuse_vm_page_lock(pages[i]);
				vm_page_free(pages[i]);
				fuse_vm_page_unlock(pages[i]);
			}
		}
		fuse_vm_page_unlock_queues();
		VM_OBJECT_WUNLOCK(vp->v_object);
		return VM_PAGER_ERROR;
	}
	/*
	 * Calculate the number of bytes read and validate only that number
	 * of bytes.  Note that due to pending writes, size may be 0.  This
	 * does not mean that the remaining data is invalid!
	 */

	size = count - uio.uio_resid;
	VM_OBJECT_WLOCK(vp->v_object);
	fuse_vm_page_lock_queues();
	for (i = 0, toff = 0; i < npages; i++, toff = nextoff) {
		vm_page_t m;

		nextoff = toff + PAGE_SIZE;
		m = pages[i];

		if (nextoff <= size) {
			/*
			 * Read operation filled an entire page
			 */
			m->valid = VM_PAGE_BITS_ALL;
			KASSERT(m->dirty == 0,
			    ("fuse_getpages: page %p is dirty", m));
		} else if (size > toff) {
			/*
			 * Read operation filled a partial page.
			 */
			m->valid = 0;
			vm_page_set_valid_range(m, 0, size - toff);
			KASSERT(m->dirty == 0,
			    ("fuse_getpages: page %p is dirty", m));
		} else {
			/*
			 * Read operation was short.  If no error occured
			 * we may have hit a zero-fill section.   We simply
			 * leave valid set to 0.
			 */
			;
		}
		if (i != ap->a_reqpage)
			vm_page_readahead_finish(m);
	}
	fuse_vm_page_unlock_queues();
	VM_OBJECT_WUNLOCK(vp->v_object);
	return 0;
}
Esempio n. 8
0
int
physio(dev_t dev, struct uio *uio, int ioflag)
{
	int i;
	int error;
	int spl;
	caddr_t sa;
	off_t blockno;
	u_int iolen;
	struct buf *bp;

	/* Keep the process UPAGES from being swapped. XXX: why ? */
	PHOLD(curproc);

	bp = getpbuf(NULL);
	sa = bp->b_data;
	error = bp->b_error = 0;

	/* XXX: sanity check */
	if(dev->si_iosize_max < PAGE_SIZE) {
		printf("WARNING: %s si_iosize_max=%d, using DFLTPHYS.\n",
		    devtoname(dev), dev->si_iosize_max);
		dev->si_iosize_max = DFLTPHYS;
	}

	for (i = 0; i < uio->uio_iovcnt; i++) {
		while (uio->uio_iov[i].iov_len) {
			if (uio->uio_rw == UIO_READ)
				bp->b_flags = B_PHYS | B_CALL | B_READ;
			else 
				bp->b_flags = B_PHYS | B_CALL | B_WRITE;
			bp->b_dev = dev;
			bp->b_iodone = physwakeup;
			bp->b_data = uio->uio_iov[i].iov_base;
			bp->b_bcount = uio->uio_iov[i].iov_len;
			bp->b_offset = uio->uio_offset;
			bp->b_saveaddr = sa;

			/* Don't exceed drivers iosize limit */
			if (bp->b_bcount > dev->si_iosize_max)
				bp->b_bcount = dev->si_iosize_max;

			/* 
			 * Make sure the pbuf can map the request
			 * XXX: The pbuf has kvasize = MAXPHYS so a request
			 * XXX: larger than MAXPHYS - PAGE_SIZE must be
			 * XXX: page aligned or it will be fragmented.
			 */
			iolen = ((vm_offset_t) bp->b_data) & PAGE_MASK;
			if ((bp->b_bcount + iolen) > bp->b_kvasize) {
				bp->b_bcount = bp->b_kvasize;
				if (iolen != 0)
					bp->b_bcount -= PAGE_SIZE;
			}
			bp->b_bufsize = bp->b_bcount;

			blockno = bp->b_offset >> DEV_BSHIFT;
			if ((daddr_t)blockno != blockno) {
				error = EINVAL; /* blockno overflow */
				goto doerror;
			}
			bp->b_blkno = blockno;

			if (uio->uio_segflg == UIO_USERSPACE) {
				if (!useracc(bp->b_data, bp->b_bufsize,
				    bp->b_flags & B_READ ?
				    VM_PROT_WRITE : VM_PROT_READ)) {
					error = EFAULT;
					goto doerror;
				}
				vmapbuf(bp);
			}

			BUF_STRATEGY(bp, 0);
			spl = splbio();
			while ((bp->b_flags & B_DONE) == 0)
				tsleep((caddr_t)bp, PRIBIO, "physstr", 0);
			splx(spl);

			if (uio->uio_segflg == UIO_USERSPACE)
				vunmapbuf(bp);
			iolen = bp->b_bcount - bp->b_resid;
			if (iolen == 0 && !(bp->b_flags & B_ERROR))
				goto doerror;	/* EOF */
			uio->uio_iov[i].iov_len -= iolen;
			uio->uio_iov[i].iov_base += iolen;
			uio->uio_resid -= iolen;
			uio->uio_offset += iolen;
			if( bp->b_flags & B_ERROR) {
				error = bp->b_error;
				goto doerror;
			}
		}
	}
doerror:
	relpbuf(bp, NULL);
	PRELE(curproc);
	return (error);
}
Esempio n. 9
0
/*
 * Vnode op for VM getpages.
 * Wish wish .... get rid from multiple IO routines
 *
 * nwfs_getpages(struct vnode *a_vp, vm_page_t *a_m, int a_count,
 *		 int a_reqpage, vm_ooffset_t a_offset)
 */
int
nwfs_getpages(struct vop_getpages_args *ap)
{
#ifndef NWFS_RWCACHE
	return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, ap->a_count,
					    ap->a_reqpage, ap->a_seqaccess);
#else
	int i, error, npages;
	size_t nextoff, toff;
	size_t count;
	size_t size;
	struct uio uio;
	struct iovec iov;
	vm_offset_t kva;
	struct buf *bp;
	struct vnode *vp;
	struct thread *td = curthread;	/* XXX */
	struct ucred *cred;
	struct nwmount *nmp;
	struct nwnode *np;
	vm_page_t *pages;

	KKASSERT(td->td_proc);
	cred = td->td_proc->p_ucred;

	vp = ap->a_vp;
	np = VTONW(vp);
	nmp = VFSTONWFS(vp->v_mount);
	pages = ap->a_m;
	count = (size_t)ap->a_count;

	if (vp->v_object == NULL) {
		kprintf("nwfs_getpages: called with non-merged cache vnode??\n");
		return VM_PAGER_ERROR;
	}

	bp = getpbuf_kva(&nwfs_pbuf_freecnt);
	npages = btoc(count);
	kva = (vm_offset_t) bp->b_data;
	pmap_qenter(kva, pages, npages);

	iov.iov_base = (caddr_t) kva;
	iov.iov_len = count;
	uio.uio_iov = &iov;
	uio.uio_iovcnt = 1;
	uio.uio_offset = IDX_TO_OFF(pages[0]->pindex);
	uio.uio_resid = count;
	uio.uio_segflg = UIO_SYSSPACE;
	uio.uio_rw = UIO_READ;
	uio.uio_td = td;

	error = ncp_read(NWFSTOCONN(nmp), &np->n_fh, &uio,cred);
	pmap_qremove(kva, npages);

	relpbuf(bp, &nwfs_pbuf_freecnt);

	if (error && (uio.uio_resid == count)) {
		kprintf("nwfs_getpages: error %d\n",error);
		for (i = 0; i < npages; i++) {
			if (ap->a_reqpage != i)
				vnode_pager_freepage(pages[i]);
		}
		return VM_PAGER_ERROR;
	}

	size = count - uio.uio_resid;

	for (i = 0, toff = 0; i < npages; i++, toff = nextoff) {
		vm_page_t m;
		nextoff = toff + PAGE_SIZE;
		m = pages[i];

		m->flags &= ~PG_ZERO;

		/*
		 * NOTE: pmap dirty bit should have already been cleared.
		 *	 We do not clear it here.
		 */
		if (nextoff <= size) {
			m->valid = VM_PAGE_BITS_ALL;
			m->dirty = 0;
		} else {
			int nvalid = ((size + DEV_BSIZE - 1) - toff) &
				      ~(DEV_BSIZE - 1);
			vm_page_set_validclean(m, 0, nvalid);
		}
		
		if (i != ap->a_reqpage) {
			/*
			 * Whether or not to leave the page activated is up in
			 * the air, but we should put the page on a page queue
			 * somewhere (it already is in the object).  Result:
			 * It appears that emperical results show that
			 * deactivating pages is best.
			 */

			/*
			 * Just in case someone was asking for this page we
			 * now tell them that it is ok to use.
			 */
			if (!error) {
				if (m->flags & PG_REFERENCED)
					vm_page_activate(m);
				else
					vm_page_deactivate(m);
				vm_page_wakeup(m);
			} else {
				vnode_pager_freepage(m);
			}
		}
	}
	return 0;
#endif /* NWFS_RWCACHE */
}
Esempio n. 10
0
/*
 * Vnode op for VM getpages.
 * Wish wish .... get rid from multiple IO routines
 *
 * smbfs_getpages(struct vnode *a_vp, vm_page_t *a_m, int a_count,
 *		  int a_reqpage, vm_ooffset_t a_offset)
 */
int
smbfs_getpages(struct vop_getpages_args *ap)
{
#ifdef SMBFS_RWGENERIC
	return vop_stdgetpages(ap);
#else
	int i, error, npages;
	int doclose;
	size_t size, toff, nextoff, count;
	struct uio uio;
	struct iovec iov;
	vm_offset_t kva;
	struct buf *bp;
	struct vnode *vp;
	struct thread *td = curthread;	/* XXX */
	struct ucred *cred;
	struct smbmount *smp;
	struct smbnode *np;
	struct smb_cred scred;
	vm_page_t *pages;

	KKASSERT(td->td_proc);

	vp = ap->a_vp;
	cred = td->td_proc->p_ucred;
	np = VTOSMB(vp);
	smp = VFSTOSMBFS(vp->v_mount);
	pages = ap->a_m;
	count = (size_t)ap->a_count;

	if (vp->v_object == NULL) {
		kprintf("smbfs_getpages: called with non-merged cache vnode??\n");
		return VM_PAGER_ERROR;
	}
	smb_makescred(&scred, td, cred);

	bp = getpbuf_kva(&smbfs_pbuf_freecnt);
	npages = btoc(count);
	kva = (vm_offset_t) bp->b_data;
	pmap_qenter(kva, pages, npages);

	iov.iov_base = (caddr_t) kva;
	iov.iov_len = count;
	uio.uio_iov = &iov;
	uio.uio_iovcnt = 1;
	uio.uio_offset = IDX_TO_OFF(pages[0]->pindex);
	uio.uio_resid = count;
	uio.uio_segflg = UIO_SYSSPACE;
	uio.uio_rw = UIO_READ;
	uio.uio_td = td;

	/*
	 * This is kinda nasty.  Since smbfs is physically closing the
	 * fid on close(), we have to reopen it if necessary.  There are
	 * other races here too, such as if another process opens the same
	 * file while we are blocked in read. XXX
	 */
	error = 0;
	doclose = 0;
	if (np->n_opencount == 0) {
		error = smbfs_smb_open(np, SMB_AM_OPENREAD, &scred);
		if (error == 0)
			doclose = 1;
	}
	if (error == 0)
		error = smb_read(smp->sm_share, np->n_fid, &uio, &scred);
	if (doclose)
		smbfs_smb_close(smp->sm_share, np->n_fid, NULL, &scred);
	pmap_qremove(kva, npages);

	relpbuf(bp, &smbfs_pbuf_freecnt);

	if (error && (uio.uio_resid == count)) {
		kprintf("smbfs_getpages: error %d\n",error);
		for (i = 0; i < npages; i++) {
			if (ap->a_reqpage != i)
				vnode_pager_freepage(pages[i]);
		}
		return VM_PAGER_ERROR;
	}

	size = count - uio.uio_resid;

	for (i = 0, toff = 0; i < npages; i++, toff = nextoff) {
		vm_page_t m;
		nextoff = toff + PAGE_SIZE;
		m = pages[i];

		m->flags &= ~PG_ZERO;

		/*
		 * NOTE: pmap dirty bit should have already been cleared.
		 *	 We do not clear it here.
		 */
		if (nextoff <= size) {
			m->valid = VM_PAGE_BITS_ALL;
			m->dirty = 0;
		} else {
			int nvalid = ((size + DEV_BSIZE - 1) - toff) &
				      ~(DEV_BSIZE - 1);
			vm_page_set_validclean(m, 0, nvalid);
		}
		
		if (i != ap->a_reqpage) {
			/*
			 * Whether or not to leave the page activated is up in
			 * the air, but we should put the page on a page queue
			 * somewhere (it already is in the object).  Result:
			 * It appears that emperical results show that
			 * deactivating pages is best.
			 */

			/*
			 * Just in case someone was asking for this page we
			 * now tell them that it is ok to use.
			 */
			if (!error) {
				if (m->flags & PG_REFERENCED)
					vm_page_activate(m);
				else
					vm_page_deactivate(m);
				vm_page_wakeup(m);
			} else {
				vnode_pager_freepage(m);
			}
		}
	}
	return 0;
#endif /* SMBFS_RWGENERIC */
}
Esempio n. 11
0
/*
 * Vnode op for VM putpages.
 * possible bug: all IO done in sync mode
 * Note that vop_close always invalidate pages before close, so it's
 * not necessary to open vnode.
 *
 * smbfs_putpages(struct vnode *a_vp, vm_page_t *a_m, int a_count, int a_sync,
 *		  int *a_rtvals, vm_ooffset_t a_offset)
 */
int
smbfs_putpages(struct vop_putpages_args *ap)
{
	int error;
	struct vnode *vp = ap->a_vp;
	struct thread *td = curthread;	/* XXX */
	struct ucred *cred;

#ifdef SMBFS_RWGENERIC
	KKASSERT(td->td_proc);
	cred = td->td_proc->p_ucred;
	VOP_OPEN(vp, FWRITE, cred, NULL);
	error = vop_stdputpages(ap);
	VOP_CLOSE(vp, FWRITE, cred);
	return error;
#else
	struct uio uio;
	struct iovec iov;
	vm_offset_t kva;
	struct buf *bp;
	int i, npages, count;
	int doclose;
	int *rtvals;
	struct smbmount *smp;
	struct smbnode *np;
	struct smb_cred scred;
	vm_page_t *pages;

	KKASSERT(td->td_proc);
	cred = td->td_proc->p_ucred;
/*	VOP_OPEN(vp, FWRITE, cred, NULL);*/
	np = VTOSMB(vp);
	smp = VFSTOSMBFS(vp->v_mount);
	pages = ap->a_m;
	count = ap->a_count;
	rtvals = ap->a_rtvals;
	npages = btoc(count);

	for (i = 0; i < npages; i++) {
		rtvals[i] = VM_PAGER_AGAIN;
	}

	bp = getpbuf_kva(&smbfs_pbuf_freecnt);
	kva = (vm_offset_t) bp->b_data;
	pmap_qenter(kva, pages, npages);

	iov.iov_base = (caddr_t) kva;
	iov.iov_len = count;
	uio.uio_iov = &iov;
	uio.uio_iovcnt = 1;
	uio.uio_offset = IDX_TO_OFF(pages[0]->pindex);
	uio.uio_resid = count;
	uio.uio_segflg = UIO_SYSSPACE;
	uio.uio_rw = UIO_WRITE;
	uio.uio_td = td;
	SMBVDEBUG("ofs=%d,resid=%d\n",(int)uio.uio_offset, uio.uio_resid);

	smb_makescred(&scred, td, cred);

	/*
	 * This is kinda nasty.  Since smbfs is physically closing the
	 * fid on close(), we have to reopen it if necessary.  There are
	 * other races here too, such as if another process opens the same
	 * file while we are blocked in read, or the file is open read-only
	 * XXX
	 */
	error = 0;
	doclose = 0;
	if (np->n_opencount == 0) {
		error = smbfs_smb_open(np, SMB_AM_OPENRW, &scred);
		if (error == 0)
			doclose = 1;
	}
	if (error == 0)
		error = smb_write(smp->sm_share, np->n_fid, &uio, &scred);
	if (doclose)
		smbfs_smb_close(smp->sm_share, np->n_fid, NULL, &scred);
/*	VOP_CLOSE(vp, FWRITE, cred);*/
	SMBVDEBUG("paged write done: %d\n", error);

	pmap_qremove(kva, npages);
	relpbuf(bp, &smbfs_pbuf_freecnt);

	if (!error) {
		int nwritten = round_page(count - uio.uio_resid) / PAGE_SIZE;
		for (i = 0; i < nwritten; i++) {
			rtvals[i] = VM_PAGER_OK;
			vm_page_undirty(pages[i]);
		}
	}
	return rtvals[0];
#endif /* SMBFS_RWGENERIC */
}
Esempio n. 12
0
/*
 * spec_getpages() - get pages associated with device vnode.
 *
 * Note that spec_read and spec_write do not use the buffer cache, so we
 * must fully implement getpages here.
 */
static int
devfs_spec_getpages(struct vop_getpages_args *ap)
{
	vm_offset_t kva;
	int error;
	int i, pcount, size;
	struct buf *bp;
	vm_page_t m;
	vm_ooffset_t offset;
	int toff, nextoff, nread;
	struct vnode *vp = ap->a_vp;
	int blksiz;
	int gotreqpage;

	error = 0;
	pcount = round_page(ap->a_count) / PAGE_SIZE;

	/*
	 * Calculate the offset of the transfer and do sanity check.
	 */
	offset = IDX_TO_OFF(ap->a_m[0]->pindex) + ap->a_offset;

	/*
	 * Round up physical size for real devices.  We cannot round using
	 * v_mount's block size data because v_mount has nothing to do with
	 * the device.  i.e. it's usually '/dev'.  We need the physical block
	 * size for the device itself.
	 *
	 * We can't use v_rdev->si_mountpoint because it only exists when the
	 * block device is mounted.  However, we can use v_rdev.
	 */
	if (vn_isdisk(vp, NULL))
		blksiz = vp->v_rdev->si_bsize_phys;
	else
		blksiz = DEV_BSIZE;

	size = (ap->a_count + blksiz - 1) & ~(blksiz - 1);

	bp = getpbuf_kva(NULL);
	kva = (vm_offset_t)bp->b_data;

	/*
	 * Map the pages to be read into the kva.
	 */
	pmap_qenter(kva, ap->a_m, pcount);

	/* Build a minimal buffer header. */
	bp->b_cmd = BUF_CMD_READ;
	bp->b_bcount = size;
	bp->b_resid = 0;
	bsetrunningbufspace(bp, size);

	bp->b_bio1.bio_offset = offset;
	bp->b_bio1.bio_done = devfs_spec_getpages_iodone;

	mycpu->gd_cnt.v_vnodein++;
	mycpu->gd_cnt.v_vnodepgsin += pcount;

	/* Do the input. */
	vn_strategy(ap->a_vp, &bp->b_bio1);

	crit_enter();

	/* We definitely need to be at splbio here. */
	while (bp->b_cmd != BUF_CMD_DONE)
		tsleep(bp, 0, "spread", 0);

	crit_exit();

	if (bp->b_flags & B_ERROR) {
		if (bp->b_error)
			error = bp->b_error;
		else
			error = EIO;
	}

	/*
	 * If EOF is encountered we must zero-extend the result in order
	 * to ensure that the page does not contain garabge.  When no
	 * error occurs, an early EOF is indicated if b_bcount got truncated.
	 * b_resid is relative to b_bcount and should be 0, but some devices
	 * might indicate an EOF with b_resid instead of truncating b_bcount.
	 */
	nread = bp->b_bcount - bp->b_resid;
	if (nread < ap->a_count)
		bzero((caddr_t)kva + nread, ap->a_count - nread);
	pmap_qremove(kva, pcount);

	gotreqpage = 0;
	for (i = 0, toff = 0; i < pcount; i++, toff = nextoff) {
		nextoff = toff + PAGE_SIZE;
		m = ap->a_m[i];

		m->flags &= ~PG_ZERO;

		/*
		 * NOTE: vm_page_undirty/clear_dirty etc do not clear the
		 *	 pmap modified bit.  pmap modified bit should have
		 *	 already been cleared.
		 */
		if (nextoff <= nread) {
			m->valid = VM_PAGE_BITS_ALL;
			vm_page_undirty(m);
		} else if (toff < nread) {
			/*
			 * Since this is a VM request, we have to supply the
			 * unaligned offset to allow vm_page_set_valid()
			 * to zero sub-DEV_BSIZE'd portions of the page.
			 */
			vm_page_set_valid(m, 0, nread - toff);
			vm_page_clear_dirty_end_nonincl(m, 0, nread - toff);
		} else {
			m->valid = 0;
			vm_page_undirty(m);
		}

		if (i != ap->a_reqpage) {
			/*
			 * Just in case someone was asking for this page we
			 * now tell them that it is ok to use.
			 */
			if (!error || (m->valid == VM_PAGE_BITS_ALL)) {
				if (m->valid) {
					if (m->flags & PG_REFERENCED) {
						vm_page_activate(m);
					} else {
						vm_page_deactivate(m);
					}
					vm_page_wakeup(m);
				} else {
					vm_page_free(m);
				}
			} else {
				vm_page_free(m);
			}
		} else if (m->valid) {
			gotreqpage = 1;
			/*
			 * Since this is a VM request, we need to make the
			 * entire page presentable by zeroing invalid sections.
			 */
			if (m->valid != VM_PAGE_BITS_ALL)
			    vm_page_zero_invalid(m, FALSE);
		}
	}
	if (!gotreqpage) {
		m = ap->a_m[ap->a_reqpage];
		devfs_debug(DEVFS_DEBUG_WARNING,
	    "spec_getpages:(%s) I/O read failure: (error=%d) bp %p vp %p\n",
			devtoname(vp->v_rdev), error, bp, bp->b_vp);
		devfs_debug(DEVFS_DEBUG_WARNING,
	    "               size: %d, resid: %d, a_count: %d, valid: 0x%x\n",
		    size, bp->b_resid, ap->a_count, m->valid);
		devfs_debug(DEVFS_DEBUG_WARNING,
	    "               nread: %d, reqpage: %d, pindex: %lu, pcount: %d\n",
		    nread, ap->a_reqpage, (u_long)m->pindex, pcount);
		/*
		 * Free the buffer header back to the swap buffer pool.
		 */
		relpbuf(bp, NULL);
		return VM_PAGER_ERROR;
	}
	/*
	 * Free the buffer header back to the swap buffer pool.
	 */
	relpbuf(bp, NULL);
	if (DEVFS_NODE(ap->a_vp))
		nanotime(&DEVFS_NODE(ap->a_vp)->mtime);
	return VM_PAGER_OK;
}
Esempio n. 13
0
int
mbrinit(cdev_t dev, struct disk_info *info, struct diskslices **sspp)
{
	struct buf *bp;
	u_char	*cp;
	int	dospart;
	struct dos_partition *dp;
	struct dos_partition *dp0;
	struct dos_partition dpcopy[NDOSPART];
	int	error;
	int	max_ncyls;
	int	max_nsectors;
	int	max_ntracks;
	u_int64_t mbr_offset;
	char	partname[2];
	u_long	secpercyl;
	char	*sname = "tempname";
	struct diskslice *sp;
	struct diskslices *ssp;
	cdev_t wdev;

	mbr_offset = DOSBBSECTOR;
reread_mbr:
	/*
	 * Don't bother if the block size is weird or the
	 * media size is 0 (probably means no media present).
	 */
	if (info->d_media_blksize & DEV_BMASK)
		return (EIO);
	if (info->d_media_size == 0)
		return (EIO);

	/*
	 * Read master boot record.
	 */
	wdev = dev;
	bp = getpbuf_mem(NULL);
	KKASSERT((int)info->d_media_blksize <= bp->b_bufsize);
	bp->b_bio1.bio_offset = (off_t)mbr_offset * info->d_media_blksize;
	bp->b_bio1.bio_done = biodone_sync;
	bp->b_bio1.bio_flags |= BIO_SYNC;
	bp->b_bcount = info->d_media_blksize;
	bp->b_cmd = BUF_CMD_READ;
	bp->b_flags |= B_FAILONDIS;
	dev_dstrategy(wdev, &bp->b_bio1);
	if (biowait(&bp->b_bio1, "mbrrd") != 0) {
		if ((info->d_dsflags & DSO_MBRQUIET) == 0) {
			diskerr(&bp->b_bio1, wdev,
				"reading primary partition table: error",
				LOG_PRINTF, 0);
			kprintf("\n");
		}
		error = EIO;
		goto done;
	}

	/* Weakly verify it. */
	cp = bp->b_data;
	sname = dsname(dev, 0, 0, 0, NULL);
	if (cp[0x1FE] != 0x55 || cp[0x1FF] != 0xAA) {
		if (bootverbose)
			kprintf("%s: invalid primary partition table: no magic\n",
			       sname);
		error = EINVAL;
		goto done;
	}

	/* Make a copy of the partition table to avoid alignment problems. */
	memcpy(&dpcopy[0], cp + DOSPARTOFF, sizeof(dpcopy));

	dp0 = &dpcopy[0];

	/*
	 * Check for "Ontrack Diskmanager" or GPT.  If a GPT is found in
	 * the first dos partition, ignore the rest of the MBR and go
	 * to GPT processing.
	 */
	for (dospart = 0, dp = dp0; dospart < NDOSPART; dospart++, dp++) {
		if (dospart == 0 && dp->dp_typ == DOSPTYP_PMBR) {
			if (bootverbose)
				kprintf(
	    "%s: Found GPT in slice #%d\n", sname, dospart + 1);
			error = gptinit(dev, info, sspp);
			goto done;
		}

		if (dp->dp_typ == DOSPTYP_ONTRACK) {
			if (bootverbose)
				kprintf(
	    "%s: Found \"Ontrack Disk Manager\" on this disk.\n", sname);
			bp->b_flags |= B_INVAL | B_AGE;
			brelse(bp);
			mbr_offset = 63;
			goto reread_mbr;
		}
	}

	if (bcmp(dp0, historical_bogus_partition_table,
		 sizeof historical_bogus_partition_table) == 0 ||
	    bcmp(dp0, historical_bogus_partition_table_fixed,
		 sizeof historical_bogus_partition_table_fixed) == 0) {
#if 0
		TRACE(("%s: invalid primary partition table: historical\n",
		       sname));
#endif /* 0 */
		if (bootverbose)
			kprintf(
     "%s: invalid primary partition table: Dangerously Dedicated (ignored)\n",
			       sname);
		error = EINVAL;
		goto done;
	}

	/* Guess the geometry. */
	/*
	 * TODO:
	 * Perhaps skip entries with 0 size.
	 * Perhaps only look at entries of type DOSPTYP_386BSD or
	 * DOSPTYP_DFLYBSD
	 */
	max_ncyls = 0;
	max_nsectors = 0;
	max_ntracks = 0;
	for (dospart = 0, dp = dp0; dospart < NDOSPART; dospart++, dp++) {
		int	ncyls;
		int	nsectors;
		int	ntracks;

		ncyls = DPCYL(dp->dp_ecyl, dp->dp_esect) + 1;
		if (max_ncyls < ncyls)
			max_ncyls = ncyls;
		nsectors = DPSECT(dp->dp_esect);
		if (max_nsectors < nsectors)
			max_nsectors = nsectors;
		ntracks = dp->dp_ehd + 1;
		if (max_ntracks < ntracks)
			max_ntracks = ntracks;
	}

	/*
	 * Check that we have guessed the geometry right by checking the
	 * partition entries.
	 */
	/*
	 * TODO:
	 * As above.
	 * Check for overlaps.
	 * Check against d_secperunit if the latter is reliable.
	 */
	error = 0;
	for (dospart = 0, dp = dp0; dospart < NDOSPART; dospart++, dp++) {
		if (dp->dp_scyl == 0 && dp->dp_shd == 0 && dp->dp_ssect == 0
		    && dp->dp_start == 0 && dp->dp_size == 0)
			continue;
		//sname = dsname(dev, dkunit(dev), BASE_SLICE + dospart,
		//	       WHOLE_SLICE_PART, partname);

		/*
		 * Temporarily ignore errors from this check.  We could
		 * simplify things by accepting the table eariler if we
		 * always ignore errors here.  Perhaps we should always
		 * accept the table if the magic is right but not let
		 * bad entries affect the geometry.
		 */
		check_part(sname, dp, mbr_offset, max_nsectors, max_ntracks,
			   mbr_offset);
	}
	if (error != 0)
		goto done;

	/*
	 * Accept the DOS partition table.
	 *
	 * Adjust the disk information structure with updated CHS
	 * conversion parameters, but only use values extracted from
	 * the primary partition table.
	 *
	 * NOTE!  Regardless of our having to deal with this old cruft,
	 * we do not screw around with the info->d_media* parameters.
	 */
	secpercyl = (u_long)max_nsectors * max_ntracks;
	if (secpercyl != 0 && mbr_offset == DOSBBSECTOR) {
		info->d_secpertrack = max_nsectors;
		info->d_nheads = max_ntracks;
		info->d_secpercyl = secpercyl;
		info->d_ncylinders = info->d_media_blocks / secpercyl;
	}

	/*
	 * We are passed a pointer to a suitably initialized minimal
	 * slices "struct" with no dangling pointers in it.  Replace it
	 * by a maximal one.  This usually oversizes the "struct", but
	 * enlarging it while searching for logical drives would be
	 * inconvenient.
	 */
	kfree(*sspp, M_DEVBUF);
	ssp = dsmakeslicestruct(MAX_SLICES, info);
	*sspp = ssp;

	/* Initialize normal slices. */
	sp = &ssp->dss_slices[BASE_SLICE];
	for (dospart = 0, dp = dp0; dospart < NDOSPART; dospart++, dp++, sp++) {
		sname = dsname(dev, dkunit(dev), BASE_SLICE + dospart,
			       WHOLE_SLICE_PART, partname);
		(void)mbr_setslice(sname, info, sp, dp, mbr_offset);
	}
	ssp->dss_nslices = BASE_SLICE + NDOSPART;

	/* Handle extended partitions. */
	sp -= NDOSPART;
	for (dospart = 0; dospart < NDOSPART; dospart++, sp++) {
		if (sp->ds_type == DOSPTYP_EXTENDED ||
		    sp->ds_type == DOSPTYP_EXTENDEDX) {
			mbr_extended(wdev, info, ssp,
				     sp->ds_offset, sp->ds_size, sp->ds_offset,
				     max_nsectors, max_ntracks, mbr_offset, 1);
		}
	}

	/*
	 * mbr_extended() abuses ssp->dss_nslices for the number of slices
	 * that would be found if there were no limit on the number of slices
	 * in *ssp.  Cut it back now.
	 */
	if (ssp->dss_nslices > MAX_SLICES)
		ssp->dss_nslices = MAX_SLICES;

done:
	bp->b_flags |= B_INVAL | B_AGE;
	relpbuf(bp, NULL);
	if (error == EINVAL)
		error = 0;
	return (error);
}
Esempio n. 14
0
static
void
mbr_extended(cdev_t dev, struct disk_info *info, struct diskslices *ssp,
	    u_int64_t ext_offset, u_int64_t ext_size, u_int64_t base_ext_offset,
	    int nsectors, int ntracks, u_int64_t mbr_offset, int level)
{
	struct buf *bp;
	u_char	*cp;
	int	dospart;
	struct dos_partition *dp;
	struct dos_partition dpcopy[NDOSPART];
	u_int64_t ext_offsets[NDOSPART];
	u_int64_t ext_sizes[NDOSPART];
	char	partname[2];
	int	slice;
	char	*sname;
	struct diskslice *sp;

	if (level >= 16) {
		kprintf(
	"%s: excessive recursion in search for slices; aborting search\n",
		       devtoname(dev));
		return;
	}

	/* Read extended boot record. */
	bp = getpbuf_mem(NULL);
	KKASSERT((int)info->d_media_blksize <= bp->b_bufsize);
	bp->b_bio1.bio_offset = (off_t)ext_offset * info->d_media_blksize;
	bp->b_bio1.bio_done = biodone_sync;
	bp->b_bio1.bio_flags |= BIO_SYNC;
	bp->b_bcount = info->d_media_blksize;
	bp->b_cmd = BUF_CMD_READ;
	bp->b_flags |= B_FAILONDIS;
	dev_dstrategy(dev, &bp->b_bio1);
	if (biowait(&bp->b_bio1, "mbrrd") != 0) {
		diskerr(&bp->b_bio1, dev,
			"reading extended partition table: error",
			LOG_PRINTF, 0);
		kprintf("\n");
		goto done;
	}

	/* Weakly verify it. */
	cp = bp->b_data;
	if (cp[0x1FE] != 0x55 || cp[0x1FF] != 0xAA) {
		sname = dsname(dev, dkunit(dev), WHOLE_DISK_SLICE, WHOLE_SLICE_PART,
			       partname);
		if (bootverbose)
			kprintf("%s: invalid extended partition table: no magic\n",
			       sname);
		goto done;
	}

	/* Make a copy of the partition table to avoid alignment problems. */
	memcpy(&dpcopy[0], cp + DOSPARTOFF, sizeof(dpcopy));

	slice = ssp->dss_nslices;
	for (dospart = 0, dp = &dpcopy[0]; dospart < NDOSPART;
	    dospart++, dp++) {
		ext_sizes[dospart] = 0;
		if (dp->dp_scyl == 0 && dp->dp_shd == 0 && dp->dp_ssect == 0
		    && dp->dp_start == 0 && dp->dp_size == 0)
			continue;
		if (dp->dp_typ == DOSPTYP_EXTENDED ||
		    dp->dp_typ == DOSPTYP_EXTENDEDX) {
			static char buf[32];

			sname = dsname(dev, dkunit(dev), WHOLE_DISK_SLICE,
				       WHOLE_SLICE_PART, partname);
			ksnprintf(buf, sizeof(buf), "%s", sname);
			if (strlen(buf) < sizeof buf - 11)
				strcat(buf, "<extended>");
			check_part(buf, dp, base_ext_offset, nsectors,
				   ntracks, mbr_offset);
			ext_offsets[dospart] = base_ext_offset + dp->dp_start;
			ext_sizes[dospart] = dp->dp_size;
		} else {
			sname = dsname(dev, dkunit(dev), slice, WHOLE_SLICE_PART,
				       partname);
			check_part(sname, dp, ext_offset, nsectors, ntracks,
				   mbr_offset);
			if (slice >= MAX_SLICES) {
				kprintf("%s: too many slices\n", sname);
				slice++;
				continue;
			}
			sp = &ssp->dss_slices[slice];
			if (mbr_setslice(sname, info, sp, dp, ext_offset) != 0)
				continue;
			slice++;
		}
	}
	ssp->dss_nslices = slice;

	/* If we found any more slices, recursively find all the subslices. */
	for (dospart = 0; dospart < NDOSPART; dospart++) {
		if (ext_sizes[dospart] != 0) {
			mbr_extended(dev, info, ssp, ext_offsets[dospart],
				     ext_sizes[dospart], base_ext_offset,
				     nsectors, ntracks, mbr_offset, ++level);
		}
	}

done:
	bp->b_flags |= B_INVAL | B_AGE;
	relpbuf(bp, NULL);
}
Esempio n. 15
0
/*
 * small block filesystem vnode pager input
 */
static int
vnode_pager_input_smlfs(vm_object_t object, vm_page_t m)
{
	struct vnode *vp;
	struct bufobj *bo;
	struct buf *bp;
	struct sf_buf *sf;
	daddr_t fileaddr;
	vm_offset_t bsize;
	vm_page_bits_t bits;
	int error, i;

	error = 0;
	vp = object->handle;
	if (vp->v_iflag & VI_DOOMED)
		return VM_PAGER_BAD;

	bsize = vp->v_mount->mnt_stat.f_iosize;

	VOP_BMAP(vp, 0, &bo, 0, NULL, NULL);

	sf = sf_buf_alloc(m, 0);

	for (i = 0; i < PAGE_SIZE / bsize; i++) {
		vm_ooffset_t address;

		bits = vm_page_bits(i * bsize, bsize);
		if (m->valid & bits)
			continue;

		address = IDX_TO_OFF(m->pindex) + i * bsize;
		if (address >= object->un_pager.vnp.vnp_size) {
			fileaddr = -1;
		} else {
			error = vnode_pager_addr(vp, address, &fileaddr, NULL);
			if (error)
				break;
		}
		if (fileaddr != -1) {
			bp = getpbuf(&vnode_pbuf_freecnt);

			/* build a minimal buffer header */
			bp->b_iocmd = BIO_READ;
			bp->b_iodone = bdone;
			KASSERT(bp->b_rcred == NOCRED, ("leaking read ucred"));
			KASSERT(bp->b_wcred == NOCRED, ("leaking write ucred"));
			bp->b_rcred = crhold(curthread->td_ucred);
			bp->b_wcred = crhold(curthread->td_ucred);
			bp->b_data = (caddr_t)sf_buf_kva(sf) + i * bsize;
			bp->b_blkno = fileaddr;
			pbgetbo(bo, bp);
			bp->b_vp = vp;
			bp->b_bcount = bsize;
			bp->b_bufsize = bsize;
			bp->b_runningbufspace = bp->b_bufsize;
			atomic_add_long(&runningbufspace, bp->b_runningbufspace);

			/* do the input */
			bp->b_iooffset = dbtob(bp->b_blkno);
			bstrategy(bp);

			bwait(bp, PVM, "vnsrd");

			if ((bp->b_ioflags & BIO_ERROR) != 0)
				error = EIO;

			/*
			 * free the buffer header back to the swap buffer pool
			 */
			bp->b_vp = NULL;
			pbrelbo(bp);
			relpbuf(bp, &vnode_pbuf_freecnt);
			if (error)
				break;
		} else
			bzero((caddr_t)sf_buf_kva(sf) + i * bsize, bsize);
		KASSERT((m->dirty & bits) == 0,
		    ("vnode_pager_input_smlfs: page %p is dirty", m));
		VM_OBJECT_WLOCK(object);
		m->valid |= bits;
		VM_OBJECT_WUNLOCK(object);
	}
	sf_buf_free(sf);
	if (error) {
		return VM_PAGER_ERROR;
	}
	return VM_PAGER_OK;
}
Esempio n. 16
0
/*
 * Vnode op for VM putpages.
 * possible bug: all IO done in sync mode
 * Note that vop_close always invalidate pages before close, so it's
 * not necessary to open vnode.
 *
 * nwfs_putpages(struct vnode *a_vp, vm_page_t *a_m, int a_count,
 *		 int a_sync, int *a_rtvals, vm_ooffset_t a_offset)
 */
int
nwfs_putpages(struct vop_putpages_args *ap)
{
	int error;
	struct thread *td = curthread;	/* XXX */
	struct vnode *vp = ap->a_vp;
	struct ucred *cred;

#ifndef NWFS_RWCACHE
	KKASSERT(td->td_proc);
	cred = td->td_proc->p_ucred;		/* XXX */
	VOP_OPEN(vp, FWRITE, cred, NULL);
	error = vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count,
		ap->a_sync, ap->a_rtvals);
	VOP_CLOSE(vp, FWRITE, cred);
	return error;
#else
	struct uio uio;
	struct iovec iov;
	vm_offset_t kva;
	struct buf *bp;
	int i, npages, count;
	int *rtvals;
	struct nwmount *nmp;
	struct nwnode *np;
	vm_page_t *pages;

	KKASSERT(td->td_proc);
	cred = td->td_proc->p_ucred;		/* XXX */

/*	VOP_OPEN(vp, FWRITE, cred, NULL);*/
	np = VTONW(vp);
	nmp = VFSTONWFS(vp->v_mount);
	pages = ap->a_m;
	count = ap->a_count;
	rtvals = ap->a_rtvals;
	npages = btoc(count);

	for (i = 0; i < npages; i++) {
		rtvals[i] = VM_PAGER_AGAIN;
	}

	bp = getpbuf_kva(&nwfs_pbuf_freecnt);
	kva = (vm_offset_t) bp->b_data;
	pmap_qenter(kva, pages, npages);

	iov.iov_base = (caddr_t) kva;
	iov.iov_len = count;
	uio.uio_iov = &iov;
	uio.uio_iovcnt = 1;
	uio.uio_offset = IDX_TO_OFF(pages[0]->pindex);
	uio.uio_resid = count;
	uio.uio_segflg = UIO_SYSSPACE;
	uio.uio_rw = UIO_WRITE;
	uio.uio_td = td;
	NCPVNDEBUG("ofs=%d,resid=%d\n",(int)uio.uio_offset, uio.uio_resid);

	error = ncp_write(NWFSTOCONN(nmp), &np->n_fh, &uio, cred);
/*	VOP_CLOSE(vp, FWRITE, cred);*/
	NCPVNDEBUG("paged write done: %d\n", error);

	pmap_qremove(kva, npages);
	relpbuf(bp, &nwfs_pbuf_freecnt);

	if (!error) {
		int nwritten = round_page(count - uio.uio_resid) / PAGE_SIZE;
		for (i = 0; i < nwritten; i++) {
			rtvals[i] = VM_PAGER_OK;
			vm_page_undirty(pages[i]);
		}
	}
	return rtvals[0];
#endif /* NWFS_RWCACHE */
}
Esempio n. 17
0
/*
 * This is now called from local media FS's to operate against their
 * own vnodes if they fail to implement VOP_GETPAGES.
 */
int
vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *m, int count,
    int *a_rbehind, int *a_rahead, vop_getpages_iodone_t iodone, void *arg)
{
	vm_object_t object;
	struct bufobj *bo;
	struct buf *bp;
	off_t foff;
#ifdef INVARIANTS
	off_t blkno0;
#endif
	int bsize, pagesperblock, *freecnt;
	int error, before, after, rbehind, rahead, poff, i;
	int bytecount, secmask;

	KASSERT(vp->v_type != VCHR && vp->v_type != VBLK,
	    ("%s does not support devices", __func__));

	if (vp->v_iflag & VI_DOOMED)
		return (VM_PAGER_BAD);

	object = vp->v_object;
	foff = IDX_TO_OFF(m[0]->pindex);
	bsize = vp->v_mount->mnt_stat.f_iosize;
	pagesperblock = bsize / PAGE_SIZE;

	KASSERT(foff < object->un_pager.vnp.vnp_size,
	    ("%s: page %p offset beyond vp %p size", __func__, m[0], vp));
	KASSERT(count <= sizeof(bp->b_pages),
	    ("%s: requested %d pages", __func__, count));

	/*
	 * The last page has valid blocks.  Invalid part can only
	 * exist at the end of file, and the page is made fully valid
	 * by zeroing in vm_pager_get_pages().
	 */
	if (m[count - 1]->valid != 0 && --count == 0) {
		if (iodone != NULL)
			iodone(arg, m, 1, 0);
		return (VM_PAGER_OK);
	}

	/*
	 * Synchronous and asynchronous paging operations use different
	 * free pbuf counters.  This is done to avoid asynchronous requests
	 * to consume all pbufs.
	 * Allocate the pbuf at the very beginning of the function, so that
	 * if we are low on certain kind of pbufs don't even proceed to BMAP,
	 * but sleep.
	 */
	freecnt = iodone != NULL ?
	    &vnode_async_pbuf_freecnt : &vnode_pbuf_freecnt;
	bp = getpbuf(freecnt);

	/*
	 * Get the underlying device blocks for the file with VOP_BMAP().
	 * If the file system doesn't support VOP_BMAP, use old way of
	 * getting pages via VOP_READ.
	 */
	error = VOP_BMAP(vp, foff / bsize, &bo, &bp->b_blkno, &after, &before);
	if (error == EOPNOTSUPP) {
		relpbuf(bp, freecnt);
		VM_OBJECT_WLOCK(object);
		for (i = 0; i < count; i++) {
			PCPU_INC(cnt.v_vnodein);
			PCPU_INC(cnt.v_vnodepgsin);
			error = vnode_pager_input_old(object, m[i]);
			if (error)
				break;
		}
		VM_OBJECT_WUNLOCK(object);
		return (error);
	} else if (error != 0) {
		relpbuf(bp, freecnt);
		return (VM_PAGER_ERROR);
	}

	/*
	 * If the file system supports BMAP, but blocksize is smaller
	 * than a page size, then use special small filesystem code.
	 */
	if (pagesperblock == 0) {
		relpbuf(bp, freecnt);
		for (i = 0; i < count; i++) {
			PCPU_INC(cnt.v_vnodein);
			PCPU_INC(cnt.v_vnodepgsin);
			error = vnode_pager_input_smlfs(object, m[i]);
			if (error)
				break;
		}
		return (error);
	}

	/*
	 * A sparse file can be encountered only for a single page request,
	 * which may not be preceded by call to vm_pager_haspage().
	 */
	if (bp->b_blkno == -1) {
		KASSERT(count == 1,
		    ("%s: array[%d] request to a sparse file %p", __func__,
		    count, vp));
		relpbuf(bp, freecnt);
		pmap_zero_page(m[0]);
		KASSERT(m[0]->dirty == 0, ("%s: page %p is dirty",
		    __func__, m[0]));
		VM_OBJECT_WLOCK(object);
		m[0]->valid = VM_PAGE_BITS_ALL;
		VM_OBJECT_WUNLOCK(object);
		return (VM_PAGER_OK);
	}

#ifdef INVARIANTS
	blkno0 = bp->b_blkno;
#endif
	bp->b_blkno += (foff % bsize) / DEV_BSIZE;

	/* Recalculate blocks available after/before to pages. */
	poff = (foff % bsize) / PAGE_SIZE;
	before *= pagesperblock;
	before += poff;
	after *= pagesperblock;
	after += pagesperblock - (poff + 1);
	if (m[0]->pindex + after >= object->size)
		after = object->size - 1 - m[0]->pindex;
	KASSERT(count <= after + 1, ("%s: %d pages asked, can do only %d",
	    __func__, count, after + 1));
	after -= count - 1;

	/* Trim requested rbehind/rahead to possible values. */   
	rbehind = a_rbehind ? *a_rbehind : 0;
	rahead = a_rahead ? *a_rahead : 0;
	rbehind = min(rbehind, before);
	rbehind = min(rbehind, m[0]->pindex);
	rahead = min(rahead, after);
	rahead = min(rahead, object->size - m[count - 1]->pindex);
	/*
	 * Check that total amount of pages fit into buf.  Trim rbehind and
	 * rahead evenly if not.
	 */
	if (rbehind + rahead + count > nitems(bp->b_pages)) {
		int trim, sum;

		trim = rbehind + rahead + count - nitems(bp->b_pages) + 1;
		sum = rbehind + rahead;
		if (rbehind == before) {
			/* Roundup rbehind trim to block size. */
			rbehind -= roundup(trim * rbehind / sum, pagesperblock);
			if (rbehind < 0)
				rbehind = 0;
		} else
			rbehind -= trim * rbehind / sum;
		rahead -= trim * rahead / sum;
	}
	KASSERT(rbehind + rahead + count <= nitems(bp->b_pages),
	    ("%s: behind %d ahead %d count %d", __func__,
	    rbehind, rahead, count));

	/*
	 * Fill in the bp->b_pages[] array with requested and optional   
	 * read behind or read ahead pages.  Read behind pages are looked
	 * up in a backward direction, down to a first cached page.  Same
	 * for read ahead pages, but there is no need to shift the array
	 * in case of encountering a cached page.
	 */
	i = bp->b_npages = 0;
	if (rbehind) {
		vm_pindex_t startpindex, tpindex;
		vm_page_t p;

		VM_OBJECT_WLOCK(object);
		startpindex = m[0]->pindex - rbehind;
		if ((p = TAILQ_PREV(m[0], pglist, listq)) != NULL &&
		    p->pindex >= startpindex)
			startpindex = p->pindex + 1;

		/* tpindex is unsigned; beware of numeric underflow. */
		for (tpindex = m[0]->pindex - 1;
		    tpindex >= startpindex && tpindex < m[0]->pindex;
		    tpindex--, i++) {
			p = vm_page_alloc(object, tpindex, VM_ALLOC_NORMAL);
			if (p == NULL) {
				/* Shift the array. */
				for (int j = 0; j < i; j++)
					bp->b_pages[j] = bp->b_pages[j + 
					    tpindex + 1 - startpindex]; 
				break;
			}
			bp->b_pages[tpindex - startpindex] = p;
		}

		bp->b_pgbefore = i;
		bp->b_npages += i;
		bp->b_blkno -= IDX_TO_OFF(i) / DEV_BSIZE;
	} else
		bp->b_pgbefore = 0;

	/* Requested pages. */
	for (int j = 0; j < count; j++, i++)
		bp->b_pages[i] = m[j];
	bp->b_npages += count;

	if (rahead) {
		vm_pindex_t endpindex, tpindex;
		vm_page_t p;

		if (!VM_OBJECT_WOWNED(object))
			VM_OBJECT_WLOCK(object);
		endpindex = m[count - 1]->pindex + rahead + 1;
		if ((p = TAILQ_NEXT(m[count - 1], listq)) != NULL &&
		    p->pindex < endpindex)
			endpindex = p->pindex;
		if (endpindex > object->size)
			endpindex = object->size;

		for (tpindex = m[count - 1]->pindex + 1;
		    tpindex < endpindex; i++, tpindex++) {
			p = vm_page_alloc(object, tpindex, VM_ALLOC_NORMAL);
			if (p == NULL)
				break;
			bp->b_pages[i] = p;
		}

		bp->b_pgafter = i - bp->b_npages;
		bp->b_npages = i;
	} else
		bp->b_pgafter = 0;

	if (VM_OBJECT_WOWNED(object))
		VM_OBJECT_WUNLOCK(object);

	/* Report back actual behind/ahead read. */
	if (a_rbehind)
		*a_rbehind = bp->b_pgbefore;
	if (a_rahead)
		*a_rahead = bp->b_pgafter;

#ifdef INVARIANTS
	KASSERT(bp->b_npages <= nitems(bp->b_pages),
	    ("%s: buf %p overflowed", __func__, bp));
	for (int j = 1; j < bp->b_npages; j++)
		KASSERT(bp->b_pages[j]->pindex - 1 ==
		    bp->b_pages[j - 1]->pindex,
		    ("%s: pages array not consecutive, bp %p", __func__, bp));
#endif

	/*
	 * Recalculate first offset and bytecount with regards to read behind.
	 * Truncate bytecount to vnode real size and round up physical size
	 * for real devices.
	 */
	foff = IDX_TO_OFF(bp->b_pages[0]->pindex);
	bytecount = bp->b_npages << PAGE_SHIFT;
	if ((foff + bytecount) > object->un_pager.vnp.vnp_size)
		bytecount = object->un_pager.vnp.vnp_size - foff;
	secmask = bo->bo_bsize - 1;
	KASSERT(secmask < PAGE_SIZE && secmask > 0,
	    ("%s: sector size %d too large", __func__, secmask + 1));
	bytecount = (bytecount + secmask) & ~secmask;

	/*
	 * And map the pages to be read into the kva, if the filesystem
	 * requires mapped buffers.
	 */
	if ((vp->v_mount->mnt_kern_flag & MNTK_UNMAPPED_BUFS) != 0 &&
	    unmapped_buf_allowed) {
		bp->b_data = unmapped_buf;
		bp->b_offset = 0;
	} else {
		bp->b_data = bp->b_kvabase;
		pmap_qenter((vm_offset_t)bp->b_data, bp->b_pages, bp->b_npages);
	}

	/* Build a minimal buffer header. */
	bp->b_iocmd = BIO_READ;
	KASSERT(bp->b_rcred == NOCRED, ("leaking read ucred"));
	KASSERT(bp->b_wcred == NOCRED, ("leaking write ucred"));
	bp->b_rcred = crhold(curthread->td_ucred);
	bp->b_wcred = crhold(curthread->td_ucred);
	pbgetbo(bo, bp);
	bp->b_vp = vp;
	bp->b_bcount = bp->b_bufsize = bp->b_runningbufspace = bytecount;
	bp->b_iooffset = dbtob(bp->b_blkno);
	KASSERT(IDX_TO_OFF(m[0]->pindex - bp->b_pages[0]->pindex) ==
	    (blkno0 - bp->b_blkno) * DEV_BSIZE +
	    IDX_TO_OFF(m[0]->pindex) % bsize,
	    ("wrong offsets bsize %d m[0] %ju b_pages[0] %ju "
	    "blkno0 %ju b_blkno %ju", bsize,
	    (uintmax_t)m[0]->pindex, (uintmax_t)bp->b_pages[0]->pindex,
	    (uintmax_t)blkno0, (uintmax_t)bp->b_blkno));

	atomic_add_long(&runningbufspace, bp->b_runningbufspace);
	PCPU_INC(cnt.v_vnodein);
	PCPU_ADD(cnt.v_vnodepgsin, bp->b_npages);

	if (iodone != NULL) { /* async */
		bp->b_pgiodone = iodone;
		bp->b_caller1 = arg;
		bp->b_iodone = vnode_pager_generic_getpages_done_async;
		bp->b_flags |= B_ASYNC;
		BUF_KERNPROC(bp);
		bstrategy(bp);
		return (VM_PAGER_OK);
	} else {
		bp->b_iodone = bdone;
		bstrategy(bp);
		bwait(bp, PVM, "vnread");
		error = vnode_pager_generic_getpages_done(bp);
		for (i = 0; i < bp->b_npages; i++)
			bp->b_pages[i] = NULL;
		bp->b_vp = NULL;
		pbrelbo(bp);
		relpbuf(bp, &vnode_pbuf_freecnt);
		return (error != 0 ? VM_PAGER_ERROR : VM_PAGER_OK);
	}
}
Esempio n. 18
0
/*
 * This is now called from local media FS's to operate against their
 * own vnodes if they fail to implement VOP_GETPAGES.
 */
int
vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *m, int bytecount,
    int reqpage, vop_getpages_iodone_t iodone, void *arg)
{
	vm_object_t object;
	struct bufobj *bo;
	struct buf *bp;
	daddr_t firstaddr, reqblock;
	off_t foff, pib;
	int pbefore, pafter, i, size, bsize, first, last, *freecnt;
	int count, error, before, after, secmask;

	KASSERT(vp->v_type != VCHR && vp->v_type != VBLK,
	    ("vnode_pager_generic_getpages does not support devices"));
	if (vp->v_iflag & VI_DOOMED)
		return (VM_PAGER_BAD);

	object = vp->v_object;
	count = bytecount / PAGE_SIZE;
	bsize = vp->v_mount->mnt_stat.f_iosize;

	/*
	 * Synchronous and asynchronous paging operations use different
	 * free pbuf counters.  This is done to avoid asynchronous requests
	 * to consume all pbufs.
	 * Allocate the pbuf at the very beginning of the function, so that
	 * if we are low on certain kind of pbufs don't even proceed to BMAP,
	 * but sleep.
	 */
	freecnt = iodone != NULL ?
	    &vnode_async_pbuf_freecnt : &vnode_pbuf_freecnt;
	bp = getpbuf(freecnt);

	/*
	 * Get the underlying device blocks for the file with VOP_BMAP().
	 * If the file system doesn't support VOP_BMAP, use old way of
	 * getting pages via VOP_READ.
	 */
	error = VOP_BMAP(vp, IDX_TO_OFF(m[reqpage]->pindex) / bsize, &bo,
	    &reqblock, &after, &before);
	if (error == EOPNOTSUPP) {
		relpbuf(bp, freecnt);
		VM_OBJECT_WLOCK(object);
		for (i = 0; i < count; i++)
			if (i != reqpage) {
				vm_page_lock(m[i]);
				vm_page_free(m[i]);
				vm_page_unlock(m[i]);
			}
		PCPU_INC(cnt.v_vnodein);
		PCPU_INC(cnt.v_vnodepgsin);
		error = vnode_pager_input_old(object, m[reqpage]);
		VM_OBJECT_WUNLOCK(object);
		return (error);
	} else if (error != 0) {
		relpbuf(bp, freecnt);
		vm_pager_free_nonreq(object, m, reqpage, count, FALSE);
		return (VM_PAGER_ERROR);

		/*
		 * If the blocksize is smaller than a page size, then use
		 * special small filesystem code.
		 */
	} else if ((PAGE_SIZE / bsize) > 1) {
		relpbuf(bp, freecnt);
		vm_pager_free_nonreq(object, m, reqpage, count, FALSE);
		PCPU_INC(cnt.v_vnodein);
		PCPU_INC(cnt.v_vnodepgsin);
		return (vnode_pager_input_smlfs(object, m[reqpage]));
	}

	/*
	 * Since the caller has busied the requested page, that page's valid
	 * field will not be changed by other threads.
	 */
	vm_page_assert_xbusied(m[reqpage]);

	/*
	 * If we have a completely valid page available to us, we can
	 * clean up and return.  Otherwise we have to re-read the
	 * media.
	 */
	if (m[reqpage]->valid == VM_PAGE_BITS_ALL) {
		relpbuf(bp, freecnt);
		vm_pager_free_nonreq(object, m, reqpage, count, FALSE);
		return (VM_PAGER_OK);
	} else if (reqblock == -1) {
		relpbuf(bp, freecnt);
		pmap_zero_page(m[reqpage]);
		KASSERT(m[reqpage]->dirty == 0,
		    ("vnode_pager_generic_getpages: page %p is dirty", m));
		VM_OBJECT_WLOCK(object);
		m[reqpage]->valid = VM_PAGE_BITS_ALL;
		vm_pager_free_nonreq(object, m, reqpage, count, TRUE);
		VM_OBJECT_WUNLOCK(object);
		return (VM_PAGER_OK);
	} else if (m[reqpage]->valid != 0) {
		VM_OBJECT_WLOCK(object);
		m[reqpage]->valid = 0;
		VM_OBJECT_WUNLOCK(object);
	}

	pib = IDX_TO_OFF(m[reqpage]->pindex) % bsize;
	pbefore = ((daddr_t)before * bsize + pib) / PAGE_SIZE;
	pafter = ((daddr_t)(after + 1) * bsize - pib) / PAGE_SIZE - 1;
	first = reqpage < pbefore ? 0 : reqpage - pbefore;
	last = reqpage + pafter >= count ? count - 1 : reqpage + pafter;
	if (first > 0 || last + 1 < count) {
		VM_OBJECT_WLOCK(object);
		for (i = 0; i < first; i++) {
			vm_page_lock(m[i]);
			vm_page_free(m[i]);
			vm_page_unlock(m[i]);
		}
		for (i = last + 1; i < count; i++) {
			vm_page_lock(m[i]);
			vm_page_free(m[i]);
			vm_page_unlock(m[i]);
		}
		VM_OBJECT_WUNLOCK(object);
	}

	/*
	 * here on direct device I/O
	 */
	firstaddr = reqblock;
	firstaddr += pib / DEV_BSIZE;
	firstaddr -= IDX_TO_OFF(reqpage - first) / DEV_BSIZE;

	/*
	 * The first and last page have been calculated now, move
	 * input pages to be zero based, and adjust the count.
	 */
	m += first;
	reqpage -= first;
	count = last - first + 1;

	/*
	 * calculate the file virtual address for the transfer
	 */
	foff = IDX_TO_OFF(m[0]->pindex);

	/*
	 * calculate the size of the transfer
	 */
	size = count * PAGE_SIZE;
	KASSERT(count > 0, ("zero count"));
	if ((foff + size) > object->un_pager.vnp.vnp_size)
		size = object->un_pager.vnp.vnp_size - foff;
	KASSERT(size > 0, ("zero size"));

	/*
	 * round up physical size for real devices.
	 */
	secmask = bo->bo_bsize - 1;
	KASSERT(secmask < PAGE_SIZE && secmask > 0,
	    ("vnode_pager_generic_getpages: sector size %d too large",
	    secmask + 1));
	size = (size + secmask) & ~secmask;

	/*
	 * and map the pages to be read into the kva, if the filesystem
	 * requires mapped buffers.
	 */
	if ((vp->v_mount->mnt_kern_flag & MNTK_UNMAPPED_BUFS) != 0 &&
	    unmapped_buf_allowed) {
		bp->b_data = unmapped_buf;
		bp->b_offset = 0;
	} else {
		bp->b_data = bp->b_kvabase;
		pmap_qenter((vm_offset_t)bp->b_data, m, count);
	}

	/* build a minimal buffer header */
	bp->b_iocmd = BIO_READ;
	KASSERT(bp->b_rcred == NOCRED, ("leaking read ucred"));
	KASSERT(bp->b_wcred == NOCRED, ("leaking write ucred"));
	bp->b_rcred = crhold(curthread->td_ucred);
	bp->b_wcred = crhold(curthread->td_ucred);
	bp->b_blkno = firstaddr;
	pbgetbo(bo, bp);
	bp->b_vp = vp;
	bp->b_bcount = size;
	bp->b_bufsize = size;
	bp->b_runningbufspace = bp->b_bufsize;
	for (i = 0; i < count; i++)
		bp->b_pages[i] = m[i];
	bp->b_npages = count;
	bp->b_pager.pg_reqpage = reqpage;
	atomic_add_long(&runningbufspace, bp->b_runningbufspace);

	PCPU_INC(cnt.v_vnodein);
	PCPU_ADD(cnt.v_vnodepgsin, count);

	/* do the input */
	bp->b_iooffset = dbtob(bp->b_blkno);

	if (iodone != NULL) { /* async */
		bp->b_pager.pg_iodone = iodone;
		bp->b_caller1 = arg;
		bp->b_iodone = vnode_pager_generic_getpages_done_async;
		bp->b_flags |= B_ASYNC;
		BUF_KERNPROC(bp);
		bstrategy(bp);
		/* Good bye! */
	} else {
		bp->b_iodone = bdone;
		bstrategy(bp);
		bwait(bp, PVM, "vnread");
		error = vnode_pager_generic_getpages_done(bp);
		for (i = 0; i < bp->b_npages; i++)
			bp->b_pages[i] = NULL;
		bp->b_vp = NULL;
		pbrelbo(bp);
		relpbuf(bp, &vnode_pbuf_freecnt);
	}

	return (error != 0 ? VM_PAGER_ERROR : VM_PAGER_OK);
}