예제 #1
0
int
sysvbfs_strategy(void *arg)
{
	struct vop_strategy_args /* {
		struct vnode *a_vp;
		struct buf *a_bp;
	} */ *a = arg;
	struct buf *b = a->a_bp;
	struct vnode *v = a->a_vp;
	struct sysvbfs_node *bnode = v->v_data;
	struct sysvbfs_mount *bmp = bnode->bmp;
	int error;

	DPRINTF("%s:\n", __func__);
	KDASSERT(v->v_type == VREG);
	if (b->b_blkno == b->b_lblkno) {
		error = VOP_BMAP(v, b->b_lblkno, NULL, &b->b_blkno, NULL);
		if (error) {
			b->b_error = error;
			biodone(b);
			return error;
		}
		if ((long)b->b_blkno == -1)
			clrbuf(b);
	}
	if ((long)b->b_blkno == -1) {
		biodone(b);
		return 0;
	}

	return VOP_STRATEGY(bmp->devvp, b);
}
예제 #2
0
/*
 * Calculate the logical to physical mapping if not done already,
 * then call the device strategy routine.
 */
int
cd9660_strategy(void *v)
{
	struct vop_strategy_args /* {
		struct vnode *a_vp;
		struct buf *a_bp;
	} */ *ap = v;
	struct buf *bp = ap->a_bp;
	struct vnode *vp = ap->a_vp;
	struct iso_node *ip;
	int error;

	ip = VTOI(vp);
	if (vp->v_type == VBLK || vp->v_type == VCHR)
		panic("cd9660_strategy: spec");
	if (bp->b_blkno == bp->b_lblkno) {
		error = VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL);
		if (error) {
			bp->b_error = error;
			biodone(bp);
			return (error);
		}
		if ((long)bp->b_blkno == -1)
			clrbuf(bp);
	}
	if ((long)bp->b_blkno == -1) {
		biodone(bp);
		return (0);
	}
	vp = ip->i_mnt->im_devvp;
	return (VOP_STRATEGY(vp, bp));
}
예제 #3
0
파일: v7fs_vnops.c 프로젝트: ryo/netbsd-src
int
v7fs_strategy(void *v)
{
	struct vop_strategy_args /* {
				    struct vnode *a_vp;
				    struct buf *a_bp;
				    } */ *a = v;
	struct buf *b = a->a_bp;
	struct vnode *vp = a->a_vp;
	struct v7fs_node *v7node = vp->v_data;
	struct v7fs_mount *v7fsmount = v7node->v7fsmount;
	int error;

	DPRINTF("%p\n", vp);
	KDASSERT(vp->v_type == VREG);
	if (b->b_blkno == b->b_lblkno) {
		error = VOP_BMAP(vp, b->b_lblkno, NULL, &b->b_blkno, NULL);
		if (error) {
			b->b_error = error;
			biodone(b);
			return error;
		}
		if ((long)b->b_blkno == -1)
			clrbuf(b);
	}
	if ((long)b->b_blkno == -1) {
		biodone(b);
		return 0;
	}

	return VOP_STRATEGY(v7fsmount->devvp, b);
}
/*
 * Calculate the logical to physical mapping if not done already,
 * then call the device strategy routine.
 */
int
filecore_strategy(void *v)
{
	struct vop_strategy_args /* {
		struct vnode *a_vp;
		struct buf *a_bp;
	} */ *ap = v;
	struct buf *bp = ap->a_bp;
	struct vnode *vp = ap->a_vp;
	struct filecore_node *ip;
	int error;

	ip = VTOI(vp);
	if (bp->b_blkno == bp->b_lblkno) {
		error = VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL);
		if (error) {
			bp->b_error = error;
			biodone(bp);
			return (error);
		}
		if ((long)bp->b_blkno == -1)
			clrbuf(bp);
	}
	if ((long)bp->b_blkno == -1) {
		biodone(bp);
		return (0);
	}
	vp = ip->i_devvp;
	return (VOP_STRATEGY(vp, bp));
}
예제 #5
0
/*
 * calculate the linear (byte) disk address of specified virtual
 * file address
 */
static int
vnode_pager_addr(struct vnode *vp, vm_ooffset_t address, daddr_t *rtaddress,
    int *run)
{
	int bsize;
	int err;
	daddr_t vblock;
	daddr_t voffset;

	if (address < 0)
		return -1;

	if (vp->v_iflag & VI_DOOMED)
		return -1;

	bsize = vp->v_mount->mnt_stat.f_iosize;
	vblock = address / bsize;
	voffset = address % bsize;

	err = VOP_BMAP(vp, vblock, NULL, rtaddress, run, NULL);
	if (err == 0) {
		if (*rtaddress != -1)
			*rtaddress += voffset / DEV_BSIZE;
		if (run) {
			*run += 1;
			*run *= bsize/PAGE_SIZE;
			*run -= voffset/PAGE_SIZE;
		}
	}

	return (err);
}
예제 #6
0
static int ReadMultipleNodes( BTScanState *theScanStatePtr )
{
	int						myErr = E_NONE;
	BTreeControlBlockPtr  	myBTreeCBPtr;
	daddr_t					myPhyBlockNum;
	u_int32_t				myBufferSize;
	struct vnode *			myDevPtr;
	int						myBlockRun;
	u_int32_t				myBlocksInBufferCount;

	// release old buffer if we have one
	if ( theScanStatePtr->bufferPtr != NULL )
	{
	    theScanStatePtr->bufferPtr->b_flags |= (B_INVAL | B_AGE);
		brelse( theScanStatePtr->bufferPtr );
		theScanStatePtr->bufferPtr = NULL;
		theScanStatePtr->currentNodePtr = NULL;
	}
	
	myBTreeCBPtr = theScanStatePtr->btcb;
			
	// map logical block in catalog btree file to physical block on volume
	myErr = VOP_BMAP( myBTreeCBPtr->fileRefNum, theScanStatePtr->nodeNum, 
					  &myDevPtr, &myPhyBlockNum, &myBlockRun );
	if ( myErr != E_NONE )
	{
		goto ExitThisRoutine;
	}

	// bmap block run gives us the remaining number of valid blocks (number of blocks 
	// minus the first).  so if there are 10 valid blocks our run number will be 9.
	// blocks, in our case is the same as nodes (both are 4K)
	myBlocksInBufferCount = (theScanStatePtr->bufferSize / myBTreeCBPtr->nodeSize );
	myBufferSize = theScanStatePtr->bufferSize;
	if ( (myBlockRun + 1) < myBlocksInBufferCount )
	{
		myBufferSize = (myBlockRun + 1) * myBTreeCBPtr->nodeSize;
	}
	
	// now read blocks from the device 
	myErr = bread( 	myDevPtr, 
							myPhyBlockNum, 
							myBufferSize,  
							NOCRED, 
							&theScanStatePtr->bufferPtr );
	if ( myErr != E_NONE )
	{
		goto ExitThisRoutine;
	}

	theScanStatePtr->nodesLeftInBuffer = theScanStatePtr->bufferPtr->b_bcount / theScanStatePtr->btcb->nodeSize;
	theScanStatePtr->currentNodePtr = (BTNodeDescriptor *) theScanStatePtr->bufferPtr->b_data;

ExitThisRoutine:
	return myErr;
	
} /* ReadMultipleNodes */
예제 #7
0
static int
unionfs_bmap(void *v)
{
	struct vop_bmap_args *ap = v;
	struct unionfs_node *unp;
	struct vnode   *tvp;

	unp = VTOUNIONFS(ap->a_vp);
	tvp = (unp->un_uppervp != NULLVP ? unp->un_uppervp : unp->un_lowervp);

	return VOP_BMAP(tvp, ap->a_bn, ap->a_vpp, ap->a_bnp, ap->a_runp);
}
예제 #8
0
int
RUMP_VOP_BMAP(struct vnode *vp,
    int64_t bn,
    struct vnode **vpp,
    int64_t *bnp,
    int *runp)
{
	int error;

	rump_schedule();
	error = VOP_BMAP(vp, bn, vpp, bnp, runp);
	rump_unschedule();

	return error;
}
/*
 * Return whether the vnode pager has the requested page.  Return the
 * number of disk-contiguous pages before and after the requested page,
 * not including the requested page.
 */
static boolean_t
vnode_pager_haspage(vm_object_t object, vm_pindex_t pindex)
{
	struct vnode *vp = object->handle;
	off_t loffset;
	off_t doffset;
	int voff;
	int bsize;
	int error;

	/*
	 * If no vp or vp is doomed or marked transparent to VM, we do not
	 * have the page.
	 */
	if ((vp == NULL) || (vp->v_flag & VRECLAIMED))
		return FALSE;

	/*
	 * If filesystem no longer mounted or offset beyond end of file we do
	 * not have the page.
	 */
	loffset = IDX_TO_OFF(pindex);

	if (vp->v_mount == NULL || loffset >= vp->v_filesize)
		return FALSE;

	bsize = vp->v_mount->mnt_stat.f_iosize;
	voff = loffset % bsize;

	/*
	 * XXX
	 *
	 * BMAP returns byte counts before and after, where after
	 * is inclusive of the base page.  haspage must return page
	 * counts before and after where after does not include the
	 * base page.
	 *
	 * BMAP is allowed to return a *after of 0 for backwards
	 * compatibility.  The base page is still considered valid if
	 * no error is returned.
	 */
	error = VOP_BMAP(vp, loffset - voff, &doffset, NULL, NULL, 0);
	if (error)
		return TRUE;
	if (doffset == NOOFFSET)
		return FALSE;
	return TRUE;
}
예제 #10
0
/*
 * Just call the device strategy routine
 */
int
adosfs_strategy(void *v)
{
	struct vop_strategy_args /* {
		struct vnode *a_vp;
		struct buf *a_bp;
	} */ *sp = v;
	struct buf *bp;
	struct anode *ap;
	struct vnode *vp;
	int error;

#ifdef ADOSFS_DIAGNOSTIC
	advopprint(sp);
#endif
	bp = sp->a_bp;
	if (bp->b_vp == NULL) {
		bp->b_error = EIO;
		biodone(bp);
		error = EIO;
		goto reterr;
	}
	vp = sp->a_vp;
	ap = VTOA(vp);
	if (bp->b_blkno == bp->b_lblkno) {
		error = VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL);
		if (error) {
			bp->b_flags = error;
			biodone(bp);
			goto reterr;
		}
	}
	if ((long)bp->b_blkno == -1) {
		biodone(bp);
		error = 0;
		goto reterr;
	}
	vp = ap->amp->devvp;
	error = VOP_STRATEGY(vp, bp);
reterr:
#ifdef ADOSFS_DIAGNOSTIC
	printf(" %d)", error);
#endif
	return(error);
}
예제 #11
0
static int
_xfs_strategy(
	struct vop_strategy_args /* {
		struct vnode *a_vp;
		struct buf *a_bp;
	} */ *ap)
{
	daddr_t blkno;
	struct buf *bp;;
	struct bufobj *bo;
	struct vnode *vp;
	struct xfs_mount *xmp;
	int error;

	bp = ap->a_bp;
	vp = ap->a_vp;

	KASSERT(ap->a_vp == ap->a_bp->b_vp, ("%s(%p != %p)",
	    __func__, ap->a_vp, ap->a_bp->b_vp));
	if (bp->b_blkno == bp->b_lblkno) {
		error = VOP_BMAP(vp, bp->b_lblkno, NULL, &blkno, NULL, NULL);
		bp->b_blkno = blkno;
		bp->b_iooffset = (blkno << BBSHIFT);
		if (error) {
			bp->b_error = error;
			bp->b_ioflags |= BIO_ERROR;
			bufdone(bp);
			return (0);
		}
		if ((long)bp->b_blkno == -1)
			vfs_bio_clrbuf(bp);
        }
	if ((long)bp->b_blkno == -1) {
		bufdone(bp);
		return (0);
	}

	xmp = XFS_VFSTOM(MNTTOVFS(vp->v_mount));
	bo = &xmp->m_ddev_targp->specvp->v_bufobj;
	bo->bo_ops->bop_strategy(bo, bp);
	return (0);
}
예제 #12
0
/*
 * Calculate the logical to physical mapping if not done already,
 * then call the device strategy routine.
 *
 * In order to be able to swap to a file, the VOP_BMAP operation may not
 * deadlock on memory.  See hpfs_bmap() for details. XXXXXXX (not impl)
 *
 * hpfs_strategy(struct vnode *a_vp, struct bio *a_bio)
 */
int
hpfs_strategy(struct vop_strategy_args *ap)
{
	struct bio *bio = ap->a_bio;
	struct bio *nbio;
	struct buf *bp = bio->bio_buf;
	struct vnode *vp = ap->a_vp;
	struct hpfsnode *hp;
	int error;

	dprintf(("hpfs_strategy(): \n"));

	if (vp->v_type == VBLK || vp->v_type == VCHR)
		panic("hpfs_strategy: spec");

	nbio = push_bio(bio);
	if (nbio->bio_offset == NOOFFSET) {
		error = VOP_BMAP(vp, bio->bio_offset, &nbio->bio_offset,
				 NULL, NULL, bp->b_cmd);
		if (error) {
			kprintf("hpfs_strategy: VOP_BMAP FAILED %d\n", error);
			bp->b_error = error;
			bp->b_flags |= B_ERROR;
			/* I/O was never started on nbio, must biodone(bio) */
			biodone(bio);
			return (error);
		}
		if (nbio->bio_offset == NOOFFSET)
			vfs_bio_clrbuf(bp);
	}
	if (nbio->bio_offset == NOOFFSET) {
		/* I/O was never started on nbio, must biodone(bio) */
		biodone(bio);
		return (0);
	}
        hp = VTOHP(ap->a_vp);
	vn_strategy(hp->h_devvp, nbio);
	return (0);
}
예제 #13
0
파일: exfs_vnops.c 프로젝트: AnnaSchwarz/OS
/*
 * Calculate the logical to physical mapping if not done already,
 * then call the device strategy routine.
 */
int
exfs_strategy(void *v)
{
	struct vop_strategy_args *ap = v;
	struct buf *bp = ap->a_bp;
	struct vnode *vp = bp->b_vp;
	struct iso_node *ip;
	int error;
	int s;

	printf("vowel v exfs_strategy\n");
	ip = VTOI(vp);
	if (vp->v_type == VBLK || vp->v_type == VCHR)
		panic("exfs_strategy: spec");
	if (bp->b_blkno == bp->b_lblkno) {
		error = VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL);
		if (error) {
			bp->b_error = error;
			bp->b_flags |= B_ERROR;
			s = splbio();
			biodone(bp);
			splx(s);
			return (error);
		}
		if ((long)bp->b_blkno == -1)
			clrbuf(bp);
	}
	if ((long)bp->b_blkno == -1) {
		s = splbio();
		biodone(bp);
		splx(s);
		return (0);
	}
	vp = ip->i_devvp;
	bp->b_dev = vp->v_rdev;
	(vp->v_op->vop_strategy)(ap);
	return (0);
}
예제 #14
0
static bool
vnode_strategy_probe(struct vnd_softc *vnd)
{
	int error;
	daddr_t nbn;

	if (!vnode_has_strategy(vnd))
		return false;

	/* Convert the first logical block number to its
	 * physical block number.
	 */
	error = 0;
	vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY);
	error = VOP_BMAP(vnd->sc_vp, 0, NULL, &nbn, NULL);
	VOP_UNLOCK(vnd->sc_vp);

	/* Test if that worked. */
	if (error == 0 && (long)nbn == -1)
		return false;

	return true;
}
예제 #15
0
/*
 * Calculate the logical to physical mapping if not done already,
 * then call the device strategy routine.
 */
int
ufs_strategy(void *v)
{
	struct vop_strategy_args /* {
		struct vnode *a_vp;
		struct buf *a_bp;
	} */ *ap = v;
	struct buf	*bp;
	struct vnode	*vp;
	struct inode	*ip;
	struct mount	*mp;
	int		error;

	bp = ap->a_bp;
	vp = ap->a_vp;
	ip = VTOI(vp);
	if (vp->v_type == VBLK || vp->v_type == VCHR)
		panic("ufs_strategy: spec");
	KASSERT(bp->b_bcount != 0);
	if (bp->b_blkno == bp->b_lblkno) {
		error = VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno,
				 NULL);
		if (error) {
			bp->b_error = error;
			biodone(bp);
			return (error);
		}
		if (bp->b_blkno == -1) /* no valid data */
			clrbuf(bp);
	}
	if (bp->b_blkno < 0) { /* block is not on disk */
		biodone(bp);
		return (0);
	}
	vp = ip->i_devvp;

	error = VOP_STRATEGY(vp, bp);
	if (error)
		return error;

	if (!BUF_ISREAD(bp))
		return 0;

	mp = wapbl_vptomp(vp);
	if (mp == NULL || mp->mnt_wapbl_replay == NULL ||
	    !WAPBL_REPLAY_ISOPEN(mp) ||
	    !WAPBL_REPLAY_CAN_READ(mp, bp->b_blkno, bp->b_bcount))
		return 0;

	error = biowait(bp);
	if (error)
		return error;

	error = WAPBL_REPLAY_READ(mp, bp->b_data, bp->b_blkno, bp->b_bcount);
	if (error) {
		mutex_enter(&bufcache_lock);
		SET(bp->b_cflags, BC_INVAL);
		mutex_exit(&bufcache_lock);
	}
	return error;
}
예제 #16
0
static boolean_t
vnode_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *before,
    int *after)
{
	struct vnode *vp = object->handle;
	daddr_t bn;
	int err;
	daddr_t reqblock;
	int poff;
	int bsize;
	int pagesperblock, blocksperpage;

	VM_OBJECT_ASSERT_WLOCKED(object);
	/*
	 * If no vp or vp is doomed or marked transparent to VM, we do not
	 * have the page.
	 */
	if (vp == NULL || vp->v_iflag & VI_DOOMED)
		return FALSE;
	/*
	 * If the offset is beyond end of file we do
	 * not have the page.
	 */
	if (IDX_TO_OFF(pindex) >= object->un_pager.vnp.vnp_size)
		return FALSE;

	bsize = vp->v_mount->mnt_stat.f_iosize;
	pagesperblock = bsize / PAGE_SIZE;
	blocksperpage = 0;
	if (pagesperblock > 0) {
		reqblock = pindex / pagesperblock;
	} else {
		blocksperpage = (PAGE_SIZE / bsize);
		reqblock = pindex * blocksperpage;
	}
	VM_OBJECT_WUNLOCK(object);
	err = VOP_BMAP(vp, reqblock, NULL, &bn, after, before);
	VM_OBJECT_WLOCK(object);
	if (err)
		return TRUE;
	if (bn == -1)
		return FALSE;
	if (pagesperblock > 0) {
		poff = pindex - (reqblock * pagesperblock);
		if (before) {
			*before *= pagesperblock;
			*before += poff;
		}
		if (after) {
			/*
			 * The BMAP vop can report a partial block in the
			 * 'after', but must not report blocks after EOF.
			 * Assert the latter, and truncate 'after' in case
			 * of the former.
			 */
			KASSERT((reqblock + *after) * pagesperblock <
			    roundup2(object->size, pagesperblock),
			    ("%s: reqblock %jd after %d size %ju", __func__,
			    (intmax_t )reqblock, *after,
			    (uintmax_t )object->size));
			*after *= pagesperblock;
			*after += pagesperblock - (poff + 1);
			if (pindex + *after >= object->size)
				*after = object->size - 1 - pindex;
		}
	} else {
		if (before) {
			*before /= blocksperpage;
		}

		if (after) {
			*after /= blocksperpage;
		}
	}
	return TRUE;
}
예제 #17
0
/*
 * Handes the read/write request given in 'bp' using the vnode's VOP_BMAP
 * and VOP_STRATEGY operations.
 *
 * 'obp' is a pointer to the original request fed to the vnd device.
 */
static void
handle_with_strategy(struct vnd_softc *vnd, const struct buf *obp,
    struct buf *bp)
{
	int bsize, error, flags, skipped;
	size_t resid, sz;
	off_t bn, offset;
	struct vnode *vp;

	flags = obp->b_flags;

	if (!(flags & B_READ)) {
		vp = bp->b_vp;
		mutex_enter(vp->v_interlock);
		vp->v_numoutput++;
		mutex_exit(vp->v_interlock);
	}

	/* convert to a byte offset within the file. */
	bn = obp->b_rawblkno * vnd->sc_dkdev.dk_label->d_secsize;

	bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize;
	skipped = 0;

	/*
	 * Break the request into bsize pieces and feed them
	 * sequentially using VOP_BMAP/VOP_STRATEGY.
	 * We do it this way to keep from flooding NFS servers if we
	 * are connected to an NFS file.  This places the burden on
	 * the client rather than the server.
	 */
	error = 0;
	bp->b_resid = bp->b_bcount;
	for (offset = 0, resid = bp->b_resid; resid;
	    resid -= sz, offset += sz) {
		struct buf *nbp;
		daddr_t nbn;
		int off, nra;

		nra = 0;
		vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY);
		error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra);
		VOP_UNLOCK(vnd->sc_vp);

		if (error == 0 && (long)nbn == -1)
			error = EIO;

		/*
		 * If there was an error or a hole in the file...punt.
		 * Note that we may have to wait for any operations
		 * that we have already fired off before releasing
		 * the buffer.
		 *
		 * XXX we could deal with holes here but it would be
		 * a hassle (in the write case).
		 */
		if (error) {
			skipped += resid;
			break;
		}

#ifdef DEBUG
		if (!dovndcluster)
			nra = 0;
#endif

		off = bn % bsize;
		sz = MIN(((off_t)1 + nra) * bsize - off, resid);
#ifdef	DEBUG
		if (vnddebug & VDB_IO)
			printf("vndstrategy: vp %p/%p bn 0x%qx/0x%" PRIx64
			    " sz 0x%zx\n", vnd->sc_vp, vp, (long long)bn,
			    nbn, sz);
#endif

		nbp = getiobuf(vp, true);
		nestiobuf_setup(bp, nbp, offset, sz);
		nbp->b_blkno = nbn + btodb(off);

#if 0 /* XXX #ifdef DEBUG */
		if (vnddebug & VDB_IO)
			printf("vndstart(%ld): bp %p vp %p blkno "
			    "0x%" PRIx64 " flags %x addr %p cnt 0x%x\n",
			    (long) (vnd-vnd_softc), &nbp->vb_buf,
			    nbp->vb_buf.b_vp, nbp->vb_buf.b_blkno,
			    nbp->vb_buf.b_flags, nbp->vb_buf.b_data,
			    nbp->vb_buf.b_bcount);
#endif
		VOP_STRATEGY(vp, nbp);
		bn += sz;
	}
	nestiobuf_done(bp, skipped, error);
}
예제 #18
0
int
lfs_bmapv(struct proc *p, fsid_t *fsidp, BLOCK_INFO *blkiov, int blkcnt)
{
	BLOCK_INFO *blkp;
	IFILE *ifp;
	struct buf *bp;
	struct inode *ip = NULL;
	struct lfs *fs;
	struct mount *mntp;
	struct ulfsmount *ump;
	struct vnode *vp;
	ino_t lastino;
	daddr_t v_daddr;
	int cnt, error;
	int numrefed = 0;

	lfs_cleaner_pid = p->p_pid;

	if ((mntp = vfs_getvfs(fsidp)) == NULL)
		return (ENOENT);

	ump = VFSTOULFS(mntp);
	if ((error = vfs_busy(mntp, NULL)) != 0)
		return (error);

	cnt = blkcnt;

	fs = VFSTOULFS(mntp)->um_lfs;

	error = 0;

	/* these were inside the initialization for the for loop */
	v_daddr = LFS_UNUSED_DADDR;
	lastino = LFS_UNUSED_INUM;
	for (blkp = blkiov; cnt--; ++blkp)
	{
		/*
		 * Get the IFILE entry (only once) and see if the file still
		 * exists.
		 */
		if (lastino != blkp->bi_inode) {
			/*
			 * Finish the old file, if there was one.  The presence
			 * of a usable vnode in vp is signaled by a valid
			 * v_daddr.
			 */
			if (v_daddr != LFS_UNUSED_DADDR) {
				lfs_vunref(vp);
				if (VTOI(vp)->i_lfs_iflags & LFSI_BMAP) {
					mutex_enter(vp->v_interlock);
					if (vget(vp, LK_NOWAIT) == 0) {
						if (! vrecycle(vp))
							vrele(vp);
					}
				}
				numrefed--;
			}

			/*
			 * Start a new file
			 */
			lastino = blkp->bi_inode;
			if (blkp->bi_inode == LFS_IFILE_INUM)
				v_daddr = fs->lfs_idaddr;
			else {
				LFS_IENTRY(ifp, fs, blkp->bi_inode, bp);
				v_daddr = ifp->if_daddr;
				brelse(bp, 0);
			}
			if (v_daddr == LFS_UNUSED_DADDR) {
				blkp->bi_daddr = LFS_UNUSED_DADDR;
				continue;
			}
			/*
			 * A regular call to VFS_VGET could deadlock
			 * here.  Instead, we try an unlocked access.
			 */
			mutex_enter(&ulfs_ihash_lock);
			vp = ulfs_ihashlookup(ump->um_dev, blkp->bi_inode);
			if (vp != NULL && !(vp->v_iflag & VI_XLOCK)) {
				ip = VTOI(vp);
				mutex_enter(vp->v_interlock);
				mutex_exit(&ulfs_ihash_lock);
				if (lfs_vref(vp)) {
					v_daddr = LFS_UNUSED_DADDR;
					continue;
				}
				numrefed++;
			} else {
				mutex_exit(&ulfs_ihash_lock);
				/*
				 * Don't VFS_VGET if we're being unmounted,
				 * since we hold vfs_busy().
				 */
				if (mntp->mnt_iflag & IMNT_UNMOUNT) {
					v_daddr = LFS_UNUSED_DADDR;
					continue;
				}
				error = VFS_VGET(mntp, blkp->bi_inode, &vp);
				if (error) {
					DLOG((DLOG_CLEAN, "lfs_bmapv: vget ino"
					      "%d failed with %d",
					      blkp->bi_inode,error));
					v_daddr = LFS_UNUSED_DADDR;
					continue;
				} else {
					KASSERT(VOP_ISLOCKED(vp));
					VTOI(vp)->i_lfs_iflags |= LFSI_BMAP;
					VOP_UNLOCK(vp);
					numrefed++;
				}
			}
			ip = VTOI(vp);
		} else if (v_daddr == LFS_UNUSED_DADDR) {
			/*
			 * This can only happen if the vnode is dead.
			 * Keep going.	Note that we DO NOT set the
			 * bi_addr to anything -- if we failed to get
			 * the vnode, for example, we want to assume
			 * conservatively that all of its blocks *are*
			 * located in the segment in question.
			 * lfs_markv will throw them out if we are
			 * wrong.
			 */
			/* blkp->bi_daddr = LFS_UNUSED_DADDR; */
			continue;
		}

		/* Past this point we are guaranteed that vp, ip are valid. */

		if (blkp->bi_lbn == LFS_UNUSED_LBN) {
			/*
			 * We just want the inode address, which is
			 * conveniently in v_daddr.
			 */
			blkp->bi_daddr = v_daddr;
		} else {
			daddr_t bi_daddr;

			/* XXX ondisk32 */
			error = VOP_BMAP(vp, blkp->bi_lbn, NULL,
					 &bi_daddr, NULL);
			if (error)
			{
				blkp->bi_daddr = LFS_UNUSED_DADDR;
				continue;
			}
			blkp->bi_daddr = LFS_DBTOFSB(fs, bi_daddr);
			/* Fill in the block size, too */
			if (blkp->bi_lbn >= 0)
				blkp->bi_size = lfs_blksize(fs, ip, blkp->bi_lbn);
			else
				blkp->bi_size = fs->lfs_bsize;
		}
	}

	/*
	 * Finish the old file, if there was one.  The presence
	 * of a usable vnode in vp is signaled by a valid v_daddr.
	 */
	if (v_daddr != LFS_UNUSED_DADDR) {
		lfs_vunref(vp);
		/* Recycle as above. */
		if (ip->i_lfs_iflags & LFSI_BMAP) {
			mutex_enter(vp->v_interlock);
			if (vget(vp, LK_NOWAIT) == 0) {
				if (! vrecycle(vp))
					vrele(vp);
			}
		}
		numrefed--;
	}

#ifdef DIAGNOSTIC
	if (numrefed != 0)
		panic("lfs_bmapv: numrefed=%d", numrefed);
#endif

	vfs_unbusy(mntp, false, NULL);

	return 0;
}
예제 #19
0
int
lfs_markv(struct proc *p, fsid_t *fsidp, BLOCK_INFO *blkiov,
    int blkcnt)
{
	BLOCK_INFO *blkp;
	IFILE *ifp;
	struct buf *bp;
	struct inode *ip = NULL;
	struct lfs *fs;
	struct mount *mntp;
	struct vnode *vp = NULL;
	ino_t lastino;
	daddr_t b_daddr, v_daddr;
	int cnt, error;
	int do_again = 0;
	int numrefed = 0;
	ino_t maxino;
	size_t obsize;

	/* number of blocks/inodes that we have already bwrite'ed */
	int nblkwritten, ninowritten;

	if ((mntp = vfs_getvfs(fsidp)) == NULL)
		return (ENOENT);

	fs = VFSTOULFS(mntp)->um_lfs;

	if (fs->lfs_ronly)
		return EROFS;

	maxino = (lfs_fragstoblks(fs, VTOI(fs->lfs_ivnode)->i_ffs1_blocks) -
		      fs->lfs_cleansz - fs->lfs_segtabsz) * fs->lfs_ifpb;

	cnt = blkcnt;

	if ((error = vfs_busy(mntp, NULL)) != 0)
		return (error);

	/*
	 * This seglock is just to prevent the fact that we might have to sleep
	 * from allowing the possibility that our blocks might become
	 * invalid.
	 *
	 * It is also important to note here that unless we specify SEGM_CKP,
	 * any Ifile blocks that we might be asked to clean will never get
	 * to the disk.
	 */
	lfs_seglock(fs, SEGM_CLEAN | SEGM_CKP | SEGM_SYNC);

	/* Mark blocks/inodes dirty.  */
	error = 0;

	/* these were inside the initialization for the for loop */
	v_daddr = LFS_UNUSED_DADDR;
	lastino = LFS_UNUSED_INUM;
	nblkwritten = ninowritten = 0;
	for (blkp = blkiov; cnt--; ++blkp)
	{
		/* Bounds-check incoming data, avoid panic for failed VGET */
		if (blkp->bi_inode <= 0 || blkp->bi_inode >= maxino) {
			error = EINVAL;
			goto err3;
		}
		/*
		 * Get the IFILE entry (only once) and see if the file still
		 * exists.
		 */
		if (lastino != blkp->bi_inode) {
			/*
			 * Finish the old file, if there was one.  The presence
			 * of a usable vnode in vp is signaled by a valid v_daddr.
			 */
			if (v_daddr != LFS_UNUSED_DADDR) {
				lfs_vunref(vp);
				numrefed--;
			}

			/*
			 * Start a new file
			 */
			lastino = blkp->bi_inode;
			if (blkp->bi_inode == LFS_IFILE_INUM)
				v_daddr = fs->lfs_idaddr;
			else {
				LFS_IENTRY(ifp, fs, blkp->bi_inode, bp);
				/* XXX fix for force write */
				v_daddr = ifp->if_daddr;
				brelse(bp, 0);
			}
			if (v_daddr == LFS_UNUSED_DADDR)
				continue;

			/* Get the vnode/inode. */
			error = lfs_fastvget(mntp, blkp->bi_inode, v_daddr,
					   &vp,
					   (blkp->bi_lbn == LFS_UNUSED_LBN
					    ? blkp->bi_bp
					    : NULL));

			if (!error) {
				numrefed++;
			}
			if (error) {
				DLOG((DLOG_CLEAN, "lfs_markv: lfs_fastvget"
				      " failed with %d (ino %d, segment %d)\n",
				      error, blkp->bi_inode,
				      lfs_dtosn(fs, blkp->bi_daddr)));
				/*
				 * If we got EAGAIN, that means that the
				 * Inode was locked.  This is
				 * recoverable: just clean the rest of
				 * this segment, and let the cleaner try
				 * again with another.	(When the
				 * cleaner runs again, this segment will
				 * sort high on the list, since it is
				 * now almost entirely empty.) But, we
				 * still set v_daddr = LFS_UNUSED_ADDR
				 * so as not to test this over and over
				 * again.
				 */
				if (error == EAGAIN) {
					error = 0;
					do_again++;
				}
#ifdef DIAGNOSTIC
				else if (error != ENOENT)
					panic("lfs_markv VFS_VGET FAILED");
#endif
				/* lastino = LFS_UNUSED_INUM; */
				v_daddr = LFS_UNUSED_DADDR;
				vp = NULL;
				ip = NULL;
				continue;
			}
			ip = VTOI(vp);
			ninowritten++;
		} else if (v_daddr == LFS_UNUSED_DADDR) {
			/*
			 * This can only happen if the vnode is dead (or
			 * in any case we can't get it...e.g., it is
			 * inlocked).  Keep going.
			 */
			continue;
		}

		/* Past this point we are guaranteed that vp, ip are valid. */

		/* Can't clean VU_DIROP directories in case of truncation */
		/* XXX - maybe we should mark removed dirs specially? */
		if (vp->v_type == VDIR && (vp->v_uflag & VU_DIROP)) {
			do_again++;
			continue;
		}

		/* If this BLOCK_INFO didn't contain a block, keep going. */
		if (blkp->bi_lbn == LFS_UNUSED_LBN) {
			/* XXX need to make sure that the inode gets written in this case */
			/* XXX but only write the inode if it's the right one */
			if (blkp->bi_inode != LFS_IFILE_INUM) {
				LFS_IENTRY(ifp, fs, blkp->bi_inode, bp);
				if (ifp->if_daddr == blkp->bi_daddr) {
					mutex_enter(&lfs_lock);
					LFS_SET_UINO(ip, IN_CLEANING);
					mutex_exit(&lfs_lock);
				}
				brelse(bp, 0);
			}
			continue;
		}

		b_daddr = 0;
		if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &b_daddr, NULL) ||
		    LFS_DBTOFSB(fs, b_daddr) != blkp->bi_daddr)
		{
			if (lfs_dtosn(fs, LFS_DBTOFSB(fs, b_daddr)) ==
			    lfs_dtosn(fs, blkp->bi_daddr))
			{
				DLOG((DLOG_CLEAN, "lfs_markv: wrong da same seg: %llx vs %llx\n",
				      (long long)blkp->bi_daddr, (long long)LFS_DBTOFSB(fs, b_daddr)));
			}
			do_again++;
			continue;
		}

		/*
		 * Check block sizes.  The blocks being cleaned come from
		 * disk, so they should have the same size as their on-disk
		 * counterparts.
		 */
		if (blkp->bi_lbn >= 0)
			obsize = lfs_blksize(fs, ip, blkp->bi_lbn);
		else
			obsize = fs->lfs_bsize;
		/* Check for fragment size change */
		if (blkp->bi_lbn >= 0 && blkp->bi_lbn < ULFS_NDADDR) {
			obsize = ip->i_lfs_fragsize[blkp->bi_lbn];
		}
		if (obsize != blkp->bi_size) {
			DLOG((DLOG_CLEAN, "lfs_markv: ino %d lbn %lld wrong"
			      " size (%ld != %d), try again\n",
			      blkp->bi_inode, (long long)blkp->bi_lbn,
			      (long) obsize, blkp->bi_size));
			do_again++;
			continue;
		}

		/*
		 * If we get to here, then we are keeping the block.  If
		 * it is an indirect block, we want to actually put it
		 * in the buffer cache so that it can be updated in the
		 * finish_meta section.	 If it's not, we need to
		 * allocate a fake buffer so that writeseg can perform
		 * the copyin and write the buffer.
		 */
		if (ip->i_number != LFS_IFILE_INUM && blkp->bi_lbn >= 0) {
			/* Data Block */
			bp = lfs_fakebuf(fs, vp, blkp->bi_lbn,
					 blkp->bi_size, blkp->bi_bp);
			/* Pretend we used bread() to get it */
			bp->b_blkno = LFS_FSBTODB(fs, blkp->bi_daddr);
		} else {
			/* Indirect block or ifile */
			if (blkp->bi_size != fs->lfs_bsize &&
			    ip->i_number != LFS_IFILE_INUM)
				panic("lfs_markv: partial indirect block?"
				    " size=%d\n", blkp->bi_size);
			bp = getblk(vp, blkp->bi_lbn, blkp->bi_size, 0, 0);
			if (!(bp->b_oflags & (BO_DONE|BO_DELWRI))) {
				/*
				 * The block in question was not found
				 * in the cache; i.e., the block that
				 * getblk() returned is empty.	So, we
				 * can (and should) copy in the
				 * contents, because we've already
				 * determined that this was the right
				 * version of this block on disk.
				 *
				 * And, it can't have changed underneath
				 * us, because we have the segment lock.
				 */
				error = copyin(blkp->bi_bp, bp->b_data, blkp->bi_size);
				if (error)
					goto err2;
			}
		}
		if ((error = lfs_bwrite_ext(bp, BW_CLEAN)) != 0)
			goto err2;

		nblkwritten++;
		/*
		 * XXX should account indirect blocks and ifile pages as well
		 */
		if (nblkwritten + lfs_lblkno(fs, ninowritten * sizeof (struct ulfs1_dinode))
		    > LFS_MARKV_MAX_BLOCKS) {
			DLOG((DLOG_CLEAN, "lfs_markv: writing %d blks %d inos\n",
			      nblkwritten, ninowritten));
			lfs_segwrite(mntp, SEGM_CLEAN);
			nblkwritten = ninowritten = 0;
		}
	}

	/*
	 * Finish the old file, if there was one
	 */
	if (v_daddr != LFS_UNUSED_DADDR) {
		lfs_vunref(vp);
		numrefed--;
	}

#ifdef DIAGNOSTIC
	if (numrefed != 0)
		panic("lfs_markv: numrefed=%d", numrefed);
#endif
	DLOG((DLOG_CLEAN, "lfs_markv: writing %d blks %d inos (check point)\n",
	      nblkwritten, ninowritten));

	/*
	 * The last write has to be SEGM_SYNC, because of calling semantics.
	 * It also has to be SEGM_CKP, because otherwise we could write
	 * over the newly cleaned data contained in a checkpoint, and then
	 * we'd be unhappy at recovery time.
	 */
	lfs_segwrite(mntp, SEGM_CLEAN | SEGM_CKP | SEGM_SYNC);

	lfs_segunlock(fs);

	vfs_unbusy(mntp, false, NULL);
	if (error)
		return (error);
	else if (do_again)
		return EAGAIN;

	return 0;

err2:
	DLOG((DLOG_CLEAN, "lfs_markv err2\n"));

	/*
	 * XXX we're here because copyin() failed.
	 * XXX it means that we can't trust the cleanerd.  too bad.
	 * XXX how can we recover from this?
	 */

err3:
	/*
	 * XXX should do segwrite here anyway?
	 */

	if (v_daddr != LFS_UNUSED_DADDR) {
		lfs_vunref(vp);
		--numrefed;
	}

	lfs_segunlock(fs);
	vfs_unbusy(mntp, false, NULL);
#ifdef DIAGNOSTIC
	if (numrefed != 0)
		panic("lfs_markv: numrefed=%d", numrefed);
#endif

	return (error);
}
예제 #20
0
static int
ffs_rawread_readahead(struct vnode *vp, caddr_t udata, off_t loffset,
		      size_t len, struct buf *bp)
{
	int error;
	int iolen;
	int blockoff;
	int bsize;
	struct vnode *dp;
	int bforwards;
	
	bsize = vp->v_mount->mnt_stat.f_iosize;

	/*
	 * Make sure it fits into the pbuf
	 */
	iolen = (int)(intptr_t)udata & PAGE_MASK;
	if (len + iolen > bp->b_kvasize) {
		len = bp->b_kvasize;
		if (iolen != 0)
			len -= PAGE_SIZE;
	}

	/*
	 * Raw disk address is in bio2, but we wait for it to
	 * chain to bio1.
	 */
	bp->b_flags &= ~B_ERROR;
	bp->b_loffset = loffset;
	bp->b_bio2.bio_offset = NOOFFSET;
	bp->b_bio1.bio_done = biodone_sync;
	bp->b_bio1.bio_flags |= BIO_SYNC;

	blockoff = (loffset % bsize) / DEV_BSIZE;

	error = VOP_BMAP(vp, bp->b_loffset, &bp->b_bio2.bio_offset,
			 &bforwards, NULL, BUF_CMD_READ);
	if (error != 0)
		return error;
	dp = VTOI(vp)->i_devvp;
	if (bp->b_bio2.bio_offset == NOOFFSET) {
		/* 
		 * Fill holes with NULs to preserve semantics 
		 */
		if (len + blockoff * DEV_BSIZE > bsize)
			len = bsize - blockoff * DEV_BSIZE;
		
		if (vmapbuf(bp, udata, len) < 0)
			return EFAULT;
		
		lwkt_user_yield();
		bzero(bp->b_data, bp->b_bcount);

		/* Mark operation completed (similar to bufdone()) */

		bp->b_resid = 0;
		return 0;
	}
	
	if (len + blockoff * DEV_BSIZE > bforwards)
		len = bforwards - blockoff * DEV_BSIZE;
	bp->b_bio2.bio_offset += blockoff * DEV_BSIZE;
	
	if (vmapbuf(bp, udata, len) < 0)
		return EFAULT;
	
	/*
	 * Access the block device layer using the device vnode (dp) and
	 * the translated block number (bio2) instead of the logical block
	 * number (bio1).
	 *
	 * Even though we are bypassing the vnode layer, we still
	 * want the vnode state to indicate that an I/O on its behalf
	 * is in progress.
	 */
	bp->b_cmd = BUF_CMD_READ;
	bio_start_transaction(&bp->b_bio1, &vp->v_track_read);
	vn_strategy(dp, &bp->b_bio2);
	return 0;
}
예제 #21
0
int
bread_cluster(struct vnode *vp, daddr_t blkno, int size, struct buf **rbpp)
{
	struct buf *bp, **xbpp;
	int howmany, maxra, i, inc;
	daddr_t sblkno;

	*rbpp = bio_doread(vp, blkno, size, 0);

	if (size != round_page(size))
		goto out;

	if (VOP_BMAP(vp, blkno + 1, NULL, &sblkno, &maxra))
		goto out;

	maxra++; 
	if (sblkno == -1 || maxra < 2)
		goto out;

	howmany = MAXPHYS / size;
	if (howmany > maxra)
		howmany = maxra;

	xbpp = malloc((howmany + 1) * sizeof(struct buf *), M_TEMP, M_NOWAIT);
	if (xbpp == NULL)
		goto out;

	for (i = howmany - 1; i >= 0; i--) {
		size_t sz;

		/*
		 * First buffer allocates big enough size to cover what
		 * all the other buffers need.
		 */
		sz = i == 0 ? howmany * size : 0;

		xbpp[i] = buf_get(vp, blkno + i + 1, sz);
		if (xbpp[i] == NULL) {
			for (++i; i < howmany; i++) {
				SET(xbpp[i]->b_flags, B_INVAL);
				brelse(xbpp[i]);
			}
			free(xbpp, M_TEMP);
			goto out;
		}
	}

	bp = xbpp[0];

	xbpp[howmany] = 0;

	inc = btodb(size);

	for (i = 1; i < howmany; i++) {
		bcstats.pendingreads++;
		bcstats.numreads++;
		SET(xbpp[i]->b_flags, B_READ | B_ASYNC);
		xbpp[i]->b_blkno = sblkno + (i * inc);
		xbpp[i]->b_bufsize = xbpp[i]->b_bcount = size;
		xbpp[i]->b_data = NULL;
		xbpp[i]->b_pobj = bp->b_pobj;
		xbpp[i]->b_poffs = bp->b_poffs + (i * size);
	}

	KASSERT(bp->b_lblkno == blkno + 1);
	KASSERT(bp->b_vp == vp);

	bp->b_blkno = sblkno;
	SET(bp->b_flags, B_READ | B_ASYNC | B_CALL);

	bp->b_saveaddr = (void *)xbpp;
	bp->b_iodone = bread_cluster_callback;

	bcstats.pendingreads++;
	bcstats.numreads++;
	VOP_STRATEGY(bp);
	curproc->p_ru.ru_inblock++;

out:
	return (biowait(*rbpp));
}
예제 #22
0
/*
 * This is a slightly strangely structured routine.  It always puts
 * all the pages for a vnode.  It starts by releasing pages which
 * are clean and simultaneously looks up the smallest offset for a
 * dirty page beloning to the object.  If there is no smallest offset,
 * all pages have been cleaned.  Otherwise, it finds a contiguous range
 * of dirty pages starting from the smallest offset and writes them out.
 * After this the scan is restarted.
 */
int
genfs_do_putpages(struct vnode *vp, off_t startoff, off_t endoff, int flags,
	struct vm_page **busypg)
{
	char databuf[MAXPHYS];
	struct uvm_object *uobj = &vp->v_uobj;
	struct vm_page *pg, *pg_next;
	voff_t smallest;
	voff_t curoff, bufoff;
	off_t eof;
	size_t xfersize;
	int bshift = vp->v_mount->mnt_fs_bshift;
	int bsize = 1 << bshift;
#if 0
	int async = (flags & PGO_SYNCIO) == 0;
#else
	int async = 0;
#endif

 restart:
	/* check if all pages are clean */
	smallest = -1;
	for (pg = TAILQ_FIRST(&uobj->memq); pg; pg = pg_next) {
		pg_next = TAILQ_NEXT(pg, listq.queue);

		/*
		 * XXX: this is not correct at all.  But it's based on
		 * assumptions we can make when accessing the pages
		 * only through the file system and not through the
		 * virtual memory subsystem.  Well, at least I hope
		 * so ;)
		 */
		KASSERT((pg->flags & PG_BUSY) == 0);

		/* If we can just dump the page, do so */
		if (pg->flags & PG_CLEAN || flags & PGO_FREE) {
			uvm_pagefree(pg);
			continue;
		}

		if (pg->offset < smallest || smallest == -1)
			smallest = pg->offset;
	}

	/* all done? */
	if (TAILQ_EMPTY(&uobj->memq)) {
		vp->v_iflag &= ~VI_ONWORKLST;
		mutex_exit(&uobj->vmobjlock);
		return 0;
	}

	/* we need to flush */
	GOP_SIZE(vp, vp->v_writesize, &eof, 0);
	for (curoff = smallest; curoff < eof; curoff += PAGE_SIZE) {
		void *curva;

		if (curoff - smallest >= MAXPHYS)
			break;
		pg = uvm_pagelookup(uobj, curoff);
		if (pg == NULL)
			break;

		/* XXX: see comment about above KASSERT */
		KASSERT((pg->flags & PG_BUSY) == 0);

		curva = databuf + (curoff-smallest);
		memcpy(curva, (void *)pg->uanon, PAGE_SIZE);
		rumpvm_enterva((vaddr_t)curva, pg);

		pg->flags |= PG_CLEAN;
	}
	KASSERT(curoff > smallest);

	mutex_exit(&uobj->vmobjlock);

	/* then we write */
	for (bufoff = 0; bufoff < MIN(curoff-smallest,eof); bufoff+=xfersize) {
		struct buf *bp;
		struct vnode *devvp;
		daddr_t bn, lbn;
		int run, error;

		lbn = (smallest + bufoff) >> bshift;
		error = VOP_BMAP(vp, lbn, &devvp, &bn, &run);
		if (error)
			panic("%s: VOP_BMAP failed: %d", __func__, error);

		xfersize = MIN(((lbn+1+run) << bshift) - (smallest+bufoff),
		     curoff - (smallest+bufoff));

		/*
		 * We might run across blocks which aren't allocated yet.
		 * A reason might be e.g. the write operation being still
		 * in the kernel page cache while truncate has already
		 * enlarged the file.  So just ignore those ranges.
		 */
		if (bn == -1)
			continue;

		bp = getiobuf(vp, true);

		/* only write max what we are allowed to write */
		bp->b_bcount = xfersize;
		if (smallest + bufoff + xfersize > eof)
			bp->b_bcount -= (smallest+bufoff+xfersize) - eof;
		bp->b_bcount = (bp->b_bcount + DEV_BSIZE-1) & ~(DEV_BSIZE-1);

		KASSERT(bp->b_bcount > 0);
		KASSERT(smallest >= 0);

		DPRINTF(("putpages writing from %x to %x (vp size %x)\n",
		    (int)(smallest + bufoff),
		    (int)(smallest + bufoff + bp->b_bcount),
		    (int)eof));

		bp->b_bufsize = round_page(bp->b_bcount);
		bp->b_lblkno = 0;
		bp->b_blkno = bn + (((smallest+bufoff)&(bsize-1))>>DEV_BSHIFT);
		bp->b_data = databuf + bufoff;
		bp->b_flags = B_WRITE;
		bp->b_cflags |= BC_BUSY;

		if (async) {
			bp->b_flags |= B_ASYNC;
			bp->b_iodone = uvm_aio_biodone;
		}

		vp->v_numoutput++;
		VOP_STRATEGY(devvp, bp);
		if (bp->b_error)
			panic("%s: VOP_STRATEGY lazy bum %d",
			    __func__, bp->b_error);
		if (!async)
			putiobuf(bp);
	}
	rumpvm_flushva();

	mutex_enter(&uobj->vmobjlock);
	goto restart;
}
예제 #23
0
/*
 * miscfs/genfs getpages routine.  This is a fair bit simpler than the
 * kernel counterpart since we're not being executed from a fault handler
 * and generally don't need to care about PGO_LOCKED or other cruft.
 * We do, however, need to care about page locking and we keep trying until
 * we get all the pages within the range.  The object locking protocol
 * is the same as for the kernel: enter with the object lock held,
 * return with it released.
 */
int
genfs_getpages(void *v)
{
	struct vop_getpages_args /* {
		struct vnode *a_vp;
		voff_t a_offset;
		struct vm_page **a_m;
		int *a_count;
		int a_centeridx;
		vm_prot_t a_access_type;
		int a_advice;
		int a_flags;
	} */ *ap = v;
	struct vnode *vp = ap->a_vp;
	struct uvm_object *uobj = (struct uvm_object *)vp;
	struct vm_page *pg;
	voff_t curoff, endoff;
	off_t diskeof;
	size_t bufsize, remain, bufoff, xfersize;
	uint8_t *tmpbuf;
	int bshift = vp->v_mount->mnt_fs_bshift;
	int bsize = 1<<bshift;
	int count = *ap->a_count;
	int async;
	int i, error;

	/*
	 * Ignore async for now, the structure of this routine
	 * doesn't exactly allow for it ...
	 */
	async = 0;

	if (ap->a_centeridx != 0)
		panic("%s: centeridx != not supported", __func__);

	if (ap->a_access_type & VM_PROT_WRITE)
		vp->v_iflag |= VI_ONWORKLST;

	curoff = ap->a_offset & ~PAGE_MASK;
	for (i = 0; i < count; i++, curoff += PAGE_SIZE) {
 retrylookup:
		pg = uvm_pagelookup(uobj, curoff);
		if (pg == NULL)
			break;

		/* page is busy?  we need to wait until it's released */
		if (pg->flags & PG_BUSY) {
			pg->flags |= PG_WANTED;
			UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0, "getpg",0);
			mutex_enter(&uobj->vmobjlock);
			goto retrylookup;
		}
		pg->flags |= PG_BUSY;
		if (pg->flags & PG_FAKE)
			break;
		ap->a_m[i] = pg;
	}

	/* got everything?  if so, just return */
	if (i == count) {
		mutex_exit(&uobj->vmobjlock);
		return 0;
	}

	/*
	 * didn't?  Ok, allocate backing pages.  Start from the first
	 * one we missed.
	 */
	for (; i < count; i++, curoff += PAGE_SIZE) {
 retrylookup2:
		pg = uvm_pagelookup(uobj, curoff);

		/* found?  busy it and be happy */
		if (pg) {
			if (pg->flags & PG_BUSY) {
				pg->flags = PG_WANTED;
				UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0,
				    "getpg2", 0);
				mutex_enter(&uobj->vmobjlock);
				goto retrylookup2;
			} else {
				pg->flags |= PG_BUSY;
			}

		/* not found?  make a new page */
		} else {
			pg = rumpvm_makepage(uobj, curoff);
		}
		ap->a_m[i] = pg;
	}

	/*
	 * We have done all the clerical work and have all pages busied.
	 * Release the vm object for other consumers.
	 */
	mutex_exit(&uobj->vmobjlock);

	/*
	 * Now, we have all the pages here & busy.  Transfer the range
	 * starting from the missing offset and transfer into the
	 * page buffers.
	 */
	GOP_SIZE(vp, vp->v_size, &diskeof, 0);

	/* align to boundaries */
	endoff = trunc_page(ap->a_offset) + (count << PAGE_SHIFT);
	endoff = MIN(endoff, ((vp->v_writesize+bsize-1) & ~(bsize-1)));
	curoff = ap->a_offset & ~(MAX(bsize,PAGE_SIZE)-1);
	remain = endoff - curoff;
	if (diskeof > curoff)
		remain = MIN(remain, diskeof - curoff);

	DPRINTF(("a_offset: %llx, startoff: 0x%llx, endoff 0x%llx\n",
	    (unsigned long long)ap->a_offset, (unsigned long long)curoff,
	    (unsigned long long)endoff));

	/* read everything into a buffer */
	bufsize = round_page(remain);
	tmpbuf = kmem_zalloc(bufsize, KM_SLEEP);
	for (bufoff = 0; remain; remain -= xfersize, bufoff+=xfersize) {
		struct buf *bp;
		struct vnode *devvp;
		daddr_t lbn, bn;
		int run;

		lbn = (curoff + bufoff) >> bshift;
		/* XXX: assume eof */
		error = VOP_BMAP(vp, lbn, &devvp, &bn, &run);
		if (error)
			panic("%s: VOP_BMAP & lazy bum: %d", __func__, error);

		DPRINTF(("lbn %d (off %d) -> bn %d run %d\n", (int)lbn,
		    (int)(curoff+bufoff), (int)bn, run));
		xfersize = MIN(((lbn+1+run)<<bshift)-(curoff+bufoff), remain);

		/* hole? */
		if (bn == -1) {
			memset(tmpbuf + bufoff, 0, xfersize);
			continue;
		}

		bp = getiobuf(vp, true);

		bp->b_data = tmpbuf + bufoff;
		bp->b_bcount = xfersize;
		bp->b_blkno = bn;
		bp->b_lblkno = 0;
		bp->b_flags = B_READ;
		bp->b_cflags = BC_BUSY;

		if (async) {
			bp->b_flags |= B_ASYNC;
			bp->b_iodone = uvm_aio_biodone;
		}

		VOP_STRATEGY(devvp, bp);
		if (bp->b_error)
			panic("%s: VOP_STRATEGY, lazy bum", __func__);
		
		if (!async)
			putiobuf(bp);
	}

	/* skip to beginning of pages we're interested in */
	bufoff = 0;
	while (round_page(curoff + bufoff) < trunc_page(ap->a_offset))
		bufoff += PAGE_SIZE;

	DPRINTF(("first page offset 0x%x\n", (int)(curoff + bufoff)));

	for (i = 0; i < count; i++, bufoff += PAGE_SIZE) {
		/* past our prime? */
		if (curoff + bufoff >= endoff)
			break;

		pg = uvm_pagelookup(&vp->v_uobj, curoff + bufoff);
		KASSERT(pg);
		DPRINTF(("got page %p (off 0x%x)\n", pg, (int)(curoff+bufoff)));
		if (pg->flags & PG_FAKE) {
			memcpy((void *)pg->uanon, tmpbuf+bufoff, PAGE_SIZE);
			pg->flags &= ~PG_FAKE;
			pg->flags |= PG_CLEAN;
		}
		ap->a_m[i] = pg;
	}
	*ap->a_count = i;

	kmem_free(tmpbuf, bufsize);

	return 0;
}
예제 #24
0
/*
 * Do clustered write for FFS.
 *
 * Three cases:
 *	1. Write is not sequential (write asynchronously)
 *	Write is sequential:
 *	2.	beginning of cluster - begin cluster
 *	3.	middle of a cluster - add to cluster
 *	4.	end of a cluster - asynchronously write cluster
 */
void
cluster_write(struct buf *bp, struct cluster_info *ci, u_quad_t filesize)
{
	struct vnode *vp;
	daddr64_t lbn;
	int maxclen, cursize;

	vp = bp->b_vp;
	lbn = bp->b_lblkno;

	/* Initialize vnode to beginning of file. */
	if (lbn == 0)
		ci->ci_lasta = ci->ci_clen = ci->ci_cstart = ci->ci_lastw = 0;

	if (ci->ci_clen == 0 || lbn != ci->ci_lastw + 1 ||
	    (bp->b_blkno != ci->ci_lasta + btodb(bp->b_bcount))) {
		maxclen = MAXBSIZE / vp->v_mount->mnt_stat.f_iosize - 1;
		if (ci->ci_clen != 0) {
			/*
			 * Next block is not sequential.
			 *
			 * If we are not writing at end of file, the process
			 * seeked to another point in the file since its
			 * last write, or we have reached our maximum
			 * cluster size, then push the previous cluster.
			 * Otherwise try reallocating to make it sequential.
			 */
			cursize = ci->ci_lastw - ci->ci_cstart + 1;
			if (((u_quad_t)(lbn + 1)) * bp->b_bcount != filesize ||
			    lbn != ci->ci_lastw + 1 || ci->ci_clen <= cursize) {
				cluster_wbuild(vp, NULL, bp->b_bcount,
				    ci->ci_cstart, cursize, lbn);
			} else {
				struct buf **bpp, **endbp;
				struct cluster_save *buflist;

				buflist = cluster_collectbufs(vp, ci, bp);
				endbp = &buflist->bs_children
				    [buflist->bs_nchildren - 1];
				if (VOP_REALLOCBLKS(vp, buflist)) {
					/*
					 * Failed, push the previous cluster.
					 */
					for (bpp = buflist->bs_children;
					    bpp < endbp; bpp++)
						brelse(*bpp);
					free(buflist, M_VCLUSTER);
					cluster_wbuild(vp, NULL, bp->b_bcount,
					    ci->ci_cstart, cursize, lbn);
				} else {
					/*
					 * Succeeded, keep building cluster.
					 */
					for (bpp = buflist->bs_children;
					    bpp <= endbp; bpp++)
						bdwrite(*bpp);
					free(buflist, M_VCLUSTER);
					ci->ci_lastw = lbn;
					ci->ci_lasta = bp->b_blkno;
					return;
				}
			}
		}
		/*
		 * Consider beginning a cluster.
		 * If at end of file, make cluster as large as possible,
		 * otherwise find size of existing cluster.
		 */
		if ((u_quad_t)(lbn + 1) * (u_quad_t)bp->b_bcount != filesize &&
		    (VOP_BMAP(vp, lbn, NULL, &bp->b_blkno, &maxclen) ||
		    bp->b_blkno == -1)) {
			bawrite(bp);
			ci->ci_clen = 0;
			ci->ci_lasta = bp->b_blkno;
			ci->ci_cstart = lbn + 1;
			ci->ci_lastw = lbn;
			return;
		}
		ci->ci_clen = maxclen;
		if (maxclen == 0) {		/* I/O not contiguous */
			ci->ci_cstart = lbn + 1;
			bawrite(bp);
		} else {			/* Wait for rest of cluster */
			ci->ci_cstart = lbn;
			bdwrite(bp);
		}
	} else if (lbn == ci->ci_cstart + ci->ci_clen) {
		/*
		 * At end of cluster, write it out.
		 */
		cluster_wbuild(vp, bp, bp->b_bcount, ci->ci_cstart,
		    ci->ci_clen + 1, lbn);
		ci->ci_clen = 0;
		ci->ci_cstart = lbn + 1;
	} else
		/*
		 * In the middle of a cluster, so just delay the
		 * I/O for now.
		 */
		bdwrite(bp);
	ci->ci_lastw = lbn;
	ci->ci_lasta = bp->b_blkno;
}
예제 #25
0
/*
 * small block filesystem vnode pager input
 */
static int
vnode_pager_input_smlfs(vm_object_t object, vm_page_t m)
{
	struct vnode *vp;
	struct bufobj *bo;
	struct buf *bp;
	struct sf_buf *sf;
	daddr_t fileaddr;
	vm_offset_t bsize;
	vm_page_bits_t bits;
	int error, i;

	error = 0;
	vp = object->handle;
	if (vp->v_iflag & VI_DOOMED)
		return VM_PAGER_BAD;

	bsize = vp->v_mount->mnt_stat.f_iosize;

	VOP_BMAP(vp, 0, &bo, 0, NULL, NULL);

	sf = sf_buf_alloc(m, 0);

	for (i = 0; i < PAGE_SIZE / bsize; i++) {
		vm_ooffset_t address;

		bits = vm_page_bits(i * bsize, bsize);
		if (m->valid & bits)
			continue;

		address = IDX_TO_OFF(m->pindex) + i * bsize;
		if (address >= object->un_pager.vnp.vnp_size) {
			fileaddr = -1;
		} else {
			error = vnode_pager_addr(vp, address, &fileaddr, NULL);
			if (error)
				break;
		}
		if (fileaddr != -1) {
			bp = getpbuf(&vnode_pbuf_freecnt);

			/* build a minimal buffer header */
			bp->b_iocmd = BIO_READ;
			bp->b_iodone = bdone;
			KASSERT(bp->b_rcred == NOCRED, ("leaking read ucred"));
			KASSERT(bp->b_wcred == NOCRED, ("leaking write ucred"));
			bp->b_rcred = crhold(curthread->td_ucred);
			bp->b_wcred = crhold(curthread->td_ucred);
			bp->b_data = (caddr_t)sf_buf_kva(sf) + i * bsize;
			bp->b_blkno = fileaddr;
			pbgetbo(bo, bp);
			bp->b_vp = vp;
			bp->b_bcount = bsize;
			bp->b_bufsize = bsize;
			bp->b_runningbufspace = bp->b_bufsize;
			atomic_add_long(&runningbufspace, bp->b_runningbufspace);

			/* do the input */
			bp->b_iooffset = dbtob(bp->b_blkno);
			bstrategy(bp);

			bwait(bp, PVM, "vnsrd");

			if ((bp->b_ioflags & BIO_ERROR) != 0)
				error = EIO;

			/*
			 * free the buffer header back to the swap buffer pool
			 */
			bp->b_vp = NULL;
			pbrelbo(bp);
			relpbuf(bp, &vnode_pbuf_freecnt);
			if (error)
				break;
		} else
			bzero((caddr_t)sf_buf_kva(sf) + i * bsize, bsize);
		KASSERT((m->dirty & bits) == 0,
		    ("vnode_pager_input_smlfs: page %p is dirty", m));
		VM_OBJECT_WLOCK(object);
		m->valid |= bits;
		VM_OBJECT_WUNLOCK(object);
	}
	sf_buf_free(sf);
	if (error) {
		return VM_PAGER_ERROR;
	}
	return VM_PAGER_OK;
}
예제 #26
0
/*
 * File table vnode ioctl routine.
 */
static int
vn_ioctl(file_t *fp, u_long com, void *data)
{
	struct vnode *vp = fp->f_data, *ovp;
	struct vattr vattr;
	int error;

	switch (vp->v_type) {

	case VREG:
	case VDIR:
		if (com == FIONREAD) {
			vn_lock(vp, LK_SHARED | LK_RETRY);
			error = VOP_GETATTR(vp, &vattr, kauth_cred_get());
			VOP_UNLOCK(vp);
			if (error)
				return (error);
			*(int *)data = vattr.va_size - fp->f_offset;
			return (0);
		}
		if ((com == FIONWRITE) || (com == FIONSPACE)) {
			/*
			 * Files don't have send queues, so there never
			 * are any bytes in them, nor is there any
			 * open space in them.
			 */
			*(int *)data = 0;
			return (0);
		}
		if (com == FIOGETBMAP) {
			daddr_t *block;

			if (*(daddr_t *)data < 0)
				return (EINVAL);
			block = (daddr_t *)data;
			return (VOP_BMAP(vp, *block, NULL, block, NULL));
		}
		if (com == OFIOGETBMAP) {
			daddr_t ibn, obn;

			if (*(int32_t *)data < 0)
				return (EINVAL);
			ibn = (daddr_t)*(int32_t *)data;
			error = VOP_BMAP(vp, ibn, NULL, &obn, NULL);
			*(int32_t *)data = (int32_t)obn;
			return error;
		}
		if (com == FIONBIO || com == FIOASYNC)	/* XXX */
			return (0);			/* XXX */
		/* fall into ... */
	case VFIFO:
	case VCHR:
	case VBLK:
		error = VOP_IOCTL(vp, com, data, fp->f_flag,
		    kauth_cred_get());
		if (error == 0 && com == TIOCSCTTY) {
			vref(vp);
			mutex_enter(proc_lock);
			ovp = curproc->p_session->s_ttyvp;
			curproc->p_session->s_ttyvp = vp;
			mutex_exit(proc_lock);
			if (ovp != NULL)
				vrele(ovp);
		}
		return (error);

	default:
		return (EPASSTHROUGH);
	}
}
예제 #27
0
/*
 * This is now called from local media FS's to operate against their
 * own vnodes if they fail to implement VOP_GETPAGES.
 */
int
vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *m, int count,
    int *a_rbehind, int *a_rahead, vop_getpages_iodone_t iodone, void *arg)
{
	vm_object_t object;
	struct bufobj *bo;
	struct buf *bp;
	off_t foff;
#ifdef INVARIANTS
	off_t blkno0;
#endif
	int bsize, pagesperblock, *freecnt;
	int error, before, after, rbehind, rahead, poff, i;
	int bytecount, secmask;

	KASSERT(vp->v_type != VCHR && vp->v_type != VBLK,
	    ("%s does not support devices", __func__));

	if (vp->v_iflag & VI_DOOMED)
		return (VM_PAGER_BAD);

	object = vp->v_object;
	foff = IDX_TO_OFF(m[0]->pindex);
	bsize = vp->v_mount->mnt_stat.f_iosize;
	pagesperblock = bsize / PAGE_SIZE;

	KASSERT(foff < object->un_pager.vnp.vnp_size,
	    ("%s: page %p offset beyond vp %p size", __func__, m[0], vp));
	KASSERT(count <= sizeof(bp->b_pages),
	    ("%s: requested %d pages", __func__, count));

	/*
	 * The last page has valid blocks.  Invalid part can only
	 * exist at the end of file, and the page is made fully valid
	 * by zeroing in vm_pager_get_pages().
	 */
	if (m[count - 1]->valid != 0 && --count == 0) {
		if (iodone != NULL)
			iodone(arg, m, 1, 0);
		return (VM_PAGER_OK);
	}

	/*
	 * Synchronous and asynchronous paging operations use different
	 * free pbuf counters.  This is done to avoid asynchronous requests
	 * to consume all pbufs.
	 * Allocate the pbuf at the very beginning of the function, so that
	 * if we are low on certain kind of pbufs don't even proceed to BMAP,
	 * but sleep.
	 */
	freecnt = iodone != NULL ?
	    &vnode_async_pbuf_freecnt : &vnode_pbuf_freecnt;
	bp = getpbuf(freecnt);

	/*
	 * Get the underlying device blocks for the file with VOP_BMAP().
	 * If the file system doesn't support VOP_BMAP, use old way of
	 * getting pages via VOP_READ.
	 */
	error = VOP_BMAP(vp, foff / bsize, &bo, &bp->b_blkno, &after, &before);
	if (error == EOPNOTSUPP) {
		relpbuf(bp, freecnt);
		VM_OBJECT_WLOCK(object);
		for (i = 0; i < count; i++) {
			PCPU_INC(cnt.v_vnodein);
			PCPU_INC(cnt.v_vnodepgsin);
			error = vnode_pager_input_old(object, m[i]);
			if (error)
				break;
		}
		VM_OBJECT_WUNLOCK(object);
		return (error);
	} else if (error != 0) {
		relpbuf(bp, freecnt);
		return (VM_PAGER_ERROR);
	}

	/*
	 * If the file system supports BMAP, but blocksize is smaller
	 * than a page size, then use special small filesystem code.
	 */
	if (pagesperblock == 0) {
		relpbuf(bp, freecnt);
		for (i = 0; i < count; i++) {
			PCPU_INC(cnt.v_vnodein);
			PCPU_INC(cnt.v_vnodepgsin);
			error = vnode_pager_input_smlfs(object, m[i]);
			if (error)
				break;
		}
		return (error);
	}

	/*
	 * A sparse file can be encountered only for a single page request,
	 * which may not be preceded by call to vm_pager_haspage().
	 */
	if (bp->b_blkno == -1) {
		KASSERT(count == 1,
		    ("%s: array[%d] request to a sparse file %p", __func__,
		    count, vp));
		relpbuf(bp, freecnt);
		pmap_zero_page(m[0]);
		KASSERT(m[0]->dirty == 0, ("%s: page %p is dirty",
		    __func__, m[0]));
		VM_OBJECT_WLOCK(object);
		m[0]->valid = VM_PAGE_BITS_ALL;
		VM_OBJECT_WUNLOCK(object);
		return (VM_PAGER_OK);
	}

#ifdef INVARIANTS
	blkno0 = bp->b_blkno;
#endif
	bp->b_blkno += (foff % bsize) / DEV_BSIZE;

	/* Recalculate blocks available after/before to pages. */
	poff = (foff % bsize) / PAGE_SIZE;
	before *= pagesperblock;
	before += poff;
	after *= pagesperblock;
	after += pagesperblock - (poff + 1);
	if (m[0]->pindex + after >= object->size)
		after = object->size - 1 - m[0]->pindex;
	KASSERT(count <= after + 1, ("%s: %d pages asked, can do only %d",
	    __func__, count, after + 1));
	after -= count - 1;

	/* Trim requested rbehind/rahead to possible values. */   
	rbehind = a_rbehind ? *a_rbehind : 0;
	rahead = a_rahead ? *a_rahead : 0;
	rbehind = min(rbehind, before);
	rbehind = min(rbehind, m[0]->pindex);
	rahead = min(rahead, after);
	rahead = min(rahead, object->size - m[count - 1]->pindex);
	/*
	 * Check that total amount of pages fit into buf.  Trim rbehind and
	 * rahead evenly if not.
	 */
	if (rbehind + rahead + count > nitems(bp->b_pages)) {
		int trim, sum;

		trim = rbehind + rahead + count - nitems(bp->b_pages) + 1;
		sum = rbehind + rahead;
		if (rbehind == before) {
			/* Roundup rbehind trim to block size. */
			rbehind -= roundup(trim * rbehind / sum, pagesperblock);
			if (rbehind < 0)
				rbehind = 0;
		} else
			rbehind -= trim * rbehind / sum;
		rahead -= trim * rahead / sum;
	}
	KASSERT(rbehind + rahead + count <= nitems(bp->b_pages),
	    ("%s: behind %d ahead %d count %d", __func__,
	    rbehind, rahead, count));

	/*
	 * Fill in the bp->b_pages[] array with requested and optional   
	 * read behind or read ahead pages.  Read behind pages are looked
	 * up in a backward direction, down to a first cached page.  Same
	 * for read ahead pages, but there is no need to shift the array
	 * in case of encountering a cached page.
	 */
	i = bp->b_npages = 0;
	if (rbehind) {
		vm_pindex_t startpindex, tpindex;
		vm_page_t p;

		VM_OBJECT_WLOCK(object);
		startpindex = m[0]->pindex - rbehind;
		if ((p = TAILQ_PREV(m[0], pglist, listq)) != NULL &&
		    p->pindex >= startpindex)
			startpindex = p->pindex + 1;

		/* tpindex is unsigned; beware of numeric underflow. */
		for (tpindex = m[0]->pindex - 1;
		    tpindex >= startpindex && tpindex < m[0]->pindex;
		    tpindex--, i++) {
			p = vm_page_alloc(object, tpindex, VM_ALLOC_NORMAL);
			if (p == NULL) {
				/* Shift the array. */
				for (int j = 0; j < i; j++)
					bp->b_pages[j] = bp->b_pages[j + 
					    tpindex + 1 - startpindex]; 
				break;
			}
			bp->b_pages[tpindex - startpindex] = p;
		}

		bp->b_pgbefore = i;
		bp->b_npages += i;
		bp->b_blkno -= IDX_TO_OFF(i) / DEV_BSIZE;
	} else
		bp->b_pgbefore = 0;

	/* Requested pages. */
	for (int j = 0; j < count; j++, i++)
		bp->b_pages[i] = m[j];
	bp->b_npages += count;

	if (rahead) {
		vm_pindex_t endpindex, tpindex;
		vm_page_t p;

		if (!VM_OBJECT_WOWNED(object))
			VM_OBJECT_WLOCK(object);
		endpindex = m[count - 1]->pindex + rahead + 1;
		if ((p = TAILQ_NEXT(m[count - 1], listq)) != NULL &&
		    p->pindex < endpindex)
			endpindex = p->pindex;
		if (endpindex > object->size)
			endpindex = object->size;

		for (tpindex = m[count - 1]->pindex + 1;
		    tpindex < endpindex; i++, tpindex++) {
			p = vm_page_alloc(object, tpindex, VM_ALLOC_NORMAL);
			if (p == NULL)
				break;
			bp->b_pages[i] = p;
		}

		bp->b_pgafter = i - bp->b_npages;
		bp->b_npages = i;
	} else
		bp->b_pgafter = 0;

	if (VM_OBJECT_WOWNED(object))
		VM_OBJECT_WUNLOCK(object);

	/* Report back actual behind/ahead read. */
	if (a_rbehind)
		*a_rbehind = bp->b_pgbefore;
	if (a_rahead)
		*a_rahead = bp->b_pgafter;

#ifdef INVARIANTS
	KASSERT(bp->b_npages <= nitems(bp->b_pages),
	    ("%s: buf %p overflowed", __func__, bp));
	for (int j = 1; j < bp->b_npages; j++)
		KASSERT(bp->b_pages[j]->pindex - 1 ==
		    bp->b_pages[j - 1]->pindex,
		    ("%s: pages array not consecutive, bp %p", __func__, bp));
#endif

	/*
	 * Recalculate first offset and bytecount with regards to read behind.
	 * Truncate bytecount to vnode real size and round up physical size
	 * for real devices.
	 */
	foff = IDX_TO_OFF(bp->b_pages[0]->pindex);
	bytecount = bp->b_npages << PAGE_SHIFT;
	if ((foff + bytecount) > object->un_pager.vnp.vnp_size)
		bytecount = object->un_pager.vnp.vnp_size - foff;
	secmask = bo->bo_bsize - 1;
	KASSERT(secmask < PAGE_SIZE && secmask > 0,
	    ("%s: sector size %d too large", __func__, secmask + 1));
	bytecount = (bytecount + secmask) & ~secmask;

	/*
	 * And map the pages to be read into the kva, if the filesystem
	 * requires mapped buffers.
	 */
	if ((vp->v_mount->mnt_kern_flag & MNTK_UNMAPPED_BUFS) != 0 &&
	    unmapped_buf_allowed) {
		bp->b_data = unmapped_buf;
		bp->b_offset = 0;
	} else {
		bp->b_data = bp->b_kvabase;
		pmap_qenter((vm_offset_t)bp->b_data, bp->b_pages, bp->b_npages);
	}

	/* Build a minimal buffer header. */
	bp->b_iocmd = BIO_READ;
	KASSERT(bp->b_rcred == NOCRED, ("leaking read ucred"));
	KASSERT(bp->b_wcred == NOCRED, ("leaking write ucred"));
	bp->b_rcred = crhold(curthread->td_ucred);
	bp->b_wcred = crhold(curthread->td_ucred);
	pbgetbo(bo, bp);
	bp->b_vp = vp;
	bp->b_bcount = bp->b_bufsize = bp->b_runningbufspace = bytecount;
	bp->b_iooffset = dbtob(bp->b_blkno);
	KASSERT(IDX_TO_OFF(m[0]->pindex - bp->b_pages[0]->pindex) ==
	    (blkno0 - bp->b_blkno) * DEV_BSIZE +
	    IDX_TO_OFF(m[0]->pindex) % bsize,
	    ("wrong offsets bsize %d m[0] %ju b_pages[0] %ju "
	    "blkno0 %ju b_blkno %ju", bsize,
	    (uintmax_t)m[0]->pindex, (uintmax_t)bp->b_pages[0]->pindex,
	    (uintmax_t)blkno0, (uintmax_t)bp->b_blkno));

	atomic_add_long(&runningbufspace, bp->b_runningbufspace);
	PCPU_INC(cnt.v_vnodein);
	PCPU_ADD(cnt.v_vnodepgsin, bp->b_npages);

	if (iodone != NULL) { /* async */
		bp->b_pgiodone = iodone;
		bp->b_caller1 = arg;
		bp->b_iodone = vnode_pager_generic_getpages_done_async;
		bp->b_flags |= B_ASYNC;
		BUF_KERNPROC(bp);
		bstrategy(bp);
		return (VM_PAGER_OK);
	} else {
		bp->b_iodone = bdone;
		bstrategy(bp);
		bwait(bp, PVM, "vnread");
		error = vnode_pager_generic_getpages_done(bp);
		for (i = 0; i < bp->b_npages; i++)
			bp->b_pages[i] = NULL;
		bp->b_vp = NULL;
		pbrelbo(bp);
		relpbuf(bp, &vnode_pbuf_freecnt);
		return (error != 0 ? VM_PAGER_ERROR : VM_PAGER_OK);
	}
}
예제 #28
0
int
lfs_bmapv(struct lwp *l, fsid_t *fsidp, BLOCK_INFO *blkiov, int blkcnt)
{
	BLOCK_INFO *blkp;
	IFILE *ifp;
	struct buf *bp;
	struct inode *ip = NULL;
	struct lfs *fs;
	struct mount *mntp;
	struct ulfsmount *ump;
	struct vnode *vp;
	ino_t lastino;
	daddr_t v_daddr;
	int cnt, error;
	int numrefed = 0;

	error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_LFS,
	    KAUTH_REQ_SYSTEM_LFS_BMAPV, NULL, NULL, NULL);
	if (error)
		return (error);

	if ((mntp = vfs_getvfs(fsidp)) == NULL)
		return (ENOENT);

	if ((error = vfs_busy(mntp, NULL)) != 0)
		return (error);

	ump = VFSTOULFS(mntp);
	fs = ump->um_lfs;

	if (fs->lfs_cleaner_thread == NULL)
		fs->lfs_cleaner_thread = curlwp;
	KASSERT(fs->lfs_cleaner_thread == curlwp);

	cnt = blkcnt;

	error = 0;

	/* these were inside the initialization for the for loop */
	vp = NULL;
	v_daddr = LFS_UNUSED_DADDR;
	lastino = LFS_UNUSED_INUM;
	for (blkp = blkiov; cnt--; ++blkp)
	{
		/*
		 * Get the IFILE entry (only once) and see if the file still
		 * exists.
		 */
		if (lastino != blkp->bi_inode) {
			/*
			 * Finish the old file, if there was one.
			 */
			if (vp != NULL) {
				vput(vp);
				vp = NULL;
				numrefed--;
			}

			/*
			 * Start a new file
			 */
			lastino = blkp->bi_inode;
			if (blkp->bi_inode == LFS_IFILE_INUM)
				v_daddr = lfs_sb_getidaddr(fs);
			else {
				LFS_IENTRY(ifp, fs, blkp->bi_inode, bp);
				v_daddr = lfs_if_getdaddr(fs, ifp);
				brelse(bp, 0);
			}
			if (v_daddr == LFS_UNUSED_DADDR) {
				blkp->bi_daddr = LFS_UNUSED_DADDR;
				continue;
			}
			error = lfs_fastvget(mntp, blkp->bi_inode, NULL,
			    LK_SHARED, &vp);
			if (error) {
				DLOG((DLOG_CLEAN, "lfs_bmapv: lfs_fastvget ino"
				      "%d failed with %d",
				      blkp->bi_inode,error));
				KASSERT(vp == NULL);
				continue;
			} else {
				KASSERT(VOP_ISLOCKED(vp));
				numrefed++;
			}
			ip = VTOI(vp);
		} else if (vp == NULL) {
			/*
			 * This can only happen if the vnode is dead.
			 * Keep going.	Note that we DO NOT set the
			 * bi_addr to anything -- if we failed to get
			 * the vnode, for example, we want to assume
			 * conservatively that all of its blocks *are*
			 * located in the segment in question.
			 * lfs_markv will throw them out if we are
			 * wrong.
			 */
			continue;
		}

		/* Past this point we are guaranteed that vp, ip are valid. */

		if (blkp->bi_lbn == LFS_UNUSED_LBN) {
			/*
			 * We just want the inode address, which is
			 * conveniently in v_daddr.
			 */
			blkp->bi_daddr = v_daddr;
		} else {
			daddr_t bi_daddr;

			error = VOP_BMAP(vp, blkp->bi_lbn, NULL,
					 &bi_daddr, NULL);
			if (error)
			{
				blkp->bi_daddr = LFS_UNUSED_DADDR;
				continue;
			}
			blkp->bi_daddr = LFS_DBTOFSB(fs, bi_daddr);
			/* Fill in the block size, too */
			if (blkp->bi_lbn >= 0)
				blkp->bi_size = lfs_blksize(fs, ip, blkp->bi_lbn);
			else
				blkp->bi_size = lfs_sb_getbsize(fs);
		}
	}

	/*
	 * Finish the old file, if there was one.
	 */
	if (vp != NULL) {
		vput(vp);
		vp = NULL;
		numrefed--;
	}

#ifdef DIAGNOSTIC
	if (numrefed != 0)
		panic("lfs_bmapv: numrefed=%d", numrefed);
#endif

	vfs_unbusy(mntp, false, NULL);

	return 0;
}
예제 #29
0
/*
 * Read data to a buf, including read-ahead if we find this to be beneficial.
 * cluster_read replaces bread.
 */
int
cluster_read(struct vnode *vp, u_quad_t filesize, daddr_t lblkno, long size,
    struct ucred *cred, long totread, int seqcount, int gbflags,
    struct buf **bpp)
{
	struct buf *bp, *rbp, *reqbp;
	struct bufobj *bo;
	daddr_t blkno, origblkno;
	int maxra, racluster;
	int error, ncontig;
	int i;

	error = 0;
	bo = &vp->v_bufobj;
	if (!unmapped_buf_allowed)
		gbflags &= ~GB_UNMAPPED;

	/*
	 * Try to limit the amount of read-ahead by a few
	 * ad-hoc parameters.  This needs work!!!
	 */
	racluster = vp->v_mount->mnt_iosize_max / size;
	maxra = seqcount;
	maxra = min(read_max, maxra);
	maxra = min(nbuf/8, maxra);
	if (((u_quad_t)(lblkno + maxra + 1) * size) > filesize)
		maxra = (filesize / size) - lblkno;

	/*
	 * get the requested block
	 */
	*bpp = reqbp = bp = getblk(vp, lblkno, size, 0, 0, gbflags);
	if (bp == NULL)
		return (EBUSY);
	origblkno = lblkno;

	/*
	 * if it is in the cache, then check to see if the reads have been
	 * sequential.  If they have, then try some read-ahead, otherwise
	 * back-off on prospective read-aheads.
	 */
	if (bp->b_flags & B_CACHE) {
		if (!seqcount) {
			return 0;
		} else if ((bp->b_flags & B_RAM) == 0) {
			return 0;
		} else {
			bp->b_flags &= ~B_RAM;
			BO_RLOCK(bo);
			for (i = 1; i < maxra; i++) {
				/*
				 * Stop if the buffer does not exist or it
				 * is invalid (about to go away?)
				 */
				rbp = gbincore(&vp->v_bufobj, lblkno+i);
				if (rbp == NULL || (rbp->b_flags & B_INVAL))
					break;

				/*
				 * Set another read-ahead mark so we know 
				 * to check again. (If we can lock the
				 * buffer without waiting)
				 */
				if ((((i % racluster) == (racluster - 1)) ||
				    (i == (maxra - 1))) 
				    && (0 == BUF_LOCK(rbp, 
					LK_EXCLUSIVE | LK_NOWAIT, NULL))) {
					rbp->b_flags |= B_RAM;
					BUF_UNLOCK(rbp);
				}			
			}
			BO_RUNLOCK(bo);
			if (i >= maxra) {
				return 0;
			}
			lblkno += i;
		}
		reqbp = bp = NULL;
	/*
	 * If it isn't in the cache, then get a chunk from
	 * disk if sequential, otherwise just get the block.
	 */
	} else {
		off_t firstread = bp->b_offset;
		int nblks;
		long minread;

		KASSERT(bp->b_offset != NOOFFSET,
		    ("cluster_read: no buffer offset"));

		ncontig = 0;

		/*
		 * Adjust totread if needed
		 */
		minread = read_min * size;
		if (minread > totread)
			totread = minread;

		/*
		 * Compute the total number of blocks that we should read
		 * synchronously.
		 */
		if (firstread + totread > filesize)
			totread = filesize - firstread;
		nblks = howmany(totread, size);
		if (nblks > racluster)
			nblks = racluster;

		/*
		 * Now compute the number of contiguous blocks.
		 */
		if (nblks > 1) {
	    		error = VOP_BMAP(vp, lblkno, NULL,
				&blkno, &ncontig, NULL);
			/*
			 * If this failed to map just do the original block.
			 */
			if (error || blkno == -1)
				ncontig = 0;
		}

		/*
		 * If we have contiguous data available do a cluster
		 * otherwise just read the requested block.
		 */
		if (ncontig) {
			/* Account for our first block. */
			ncontig = min(ncontig + 1, nblks);
			if (ncontig < nblks)
				nblks = ncontig;
			bp = cluster_rbuild(vp, filesize, lblkno,
			    blkno, size, nblks, gbflags, bp);
			lblkno += (bp->b_bufsize / size);
		} else {
			bp->b_flags |= B_RAM;
			bp->b_iocmd = BIO_READ;
			lblkno += 1;
		}
	}

	/*
	 * handle the synchronous read so that it is available ASAP.
	 */
	if (bp) {
		if ((bp->b_flags & B_CLUSTER) == 0) {
			vfs_busy_pages(bp, 0);
		}
		bp->b_flags &= ~B_INVAL;
		bp->b_ioflags &= ~BIO_ERROR;
		if ((bp->b_flags & B_ASYNC) || bp->b_iodone != NULL)
			BUF_KERNPROC(bp);
		bp->b_iooffset = dbtob(bp->b_blkno);
		bstrategy(bp);
#ifdef RACCT
		if (racct_enable) {
			PROC_LOCK(curproc);
			racct_add_buf(curproc, bp, 0);
			PROC_UNLOCK(curproc);
		}
#endif /* RACCT */
		curthread->td_ru.ru_inblock++;
	}

	/*
	 * If we have been doing sequential I/O, then do some read-ahead.
	 */
	while (lblkno < (origblkno + maxra)) {
		error = VOP_BMAP(vp, lblkno, NULL, &blkno, &ncontig, NULL);
		if (error)
			break;

		if (blkno == -1)
			break;

		/*
		 * We could throttle ncontig here by maxra but we might as
		 * well read the data if it is contiguous.  We're throttled
		 * by racluster anyway.
		 */
		if (ncontig) {
			ncontig = min(ncontig + 1, racluster);
			rbp = cluster_rbuild(vp, filesize, lblkno, blkno,
			    size, ncontig, gbflags, NULL);
			lblkno += (rbp->b_bufsize / size);
			if (rbp->b_flags & B_DELWRI) {
				bqrelse(rbp);
				continue;
			}
		} else {
			rbp = getblk(vp, lblkno, size, 0, 0, gbflags);
			lblkno += 1;
			if (rbp->b_flags & B_DELWRI) {
				bqrelse(rbp);
				continue;
			}
			rbp->b_flags |= B_ASYNC | B_RAM;
			rbp->b_iocmd = BIO_READ;
			rbp->b_blkno = blkno;
		}
		if (rbp->b_flags & B_CACHE) {
			rbp->b_flags &= ~B_ASYNC;
			bqrelse(rbp);
			continue;
		}
		if ((rbp->b_flags & B_CLUSTER) == 0) {
			vfs_busy_pages(rbp, 0);
		}
		rbp->b_flags &= ~B_INVAL;
		rbp->b_ioflags &= ~BIO_ERROR;
		if ((rbp->b_flags & B_ASYNC) || rbp->b_iodone != NULL)
			BUF_KERNPROC(rbp);
		rbp->b_iooffset = dbtob(rbp->b_blkno);
		bstrategy(rbp);
#ifdef RACCT
		if (racct_enable) {
			PROC_LOCK(curproc);
			racct_add_buf(curproc, rbp, 0);
			PROC_UNLOCK(curproc);
		}
#endif /* RACCT */
		curthread->td_ru.ru_inblock++;
	}

	if (reqbp) {
		/*
		 * Like bread, always brelse() the buffer when
		 * returning an error.
		 */
		error = bufwait(reqbp);
		if (error != 0) {
			brelse(reqbp);
			*bpp = NULL;
		}
	}
	return (error);
}
예제 #30
0
/*
 * This is now called from local media FS's to operate against their
 * own vnodes if they fail to implement VOP_GETPAGES.
 */
int
vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *m, int bytecount,
    int reqpage, vop_getpages_iodone_t iodone, void *arg)
{
	vm_object_t object;
	struct bufobj *bo;
	struct buf *bp;
	daddr_t firstaddr, reqblock;
	off_t foff, pib;
	int pbefore, pafter, i, size, bsize, first, last, *freecnt;
	int count, error, before, after, secmask;

	KASSERT(vp->v_type != VCHR && vp->v_type != VBLK,
	    ("vnode_pager_generic_getpages does not support devices"));
	if (vp->v_iflag & VI_DOOMED)
		return (VM_PAGER_BAD);

	object = vp->v_object;
	count = bytecount / PAGE_SIZE;
	bsize = vp->v_mount->mnt_stat.f_iosize;

	/*
	 * Synchronous and asynchronous paging operations use different
	 * free pbuf counters.  This is done to avoid asynchronous requests
	 * to consume all pbufs.
	 * Allocate the pbuf at the very beginning of the function, so that
	 * if we are low on certain kind of pbufs don't even proceed to BMAP,
	 * but sleep.
	 */
	freecnt = iodone != NULL ?
	    &vnode_async_pbuf_freecnt : &vnode_pbuf_freecnt;
	bp = getpbuf(freecnt);

	/*
	 * Get the underlying device blocks for the file with VOP_BMAP().
	 * If the file system doesn't support VOP_BMAP, use old way of
	 * getting pages via VOP_READ.
	 */
	error = VOP_BMAP(vp, IDX_TO_OFF(m[reqpage]->pindex) / bsize, &bo,
	    &reqblock, &after, &before);
	if (error == EOPNOTSUPP) {
		relpbuf(bp, freecnt);
		VM_OBJECT_WLOCK(object);
		for (i = 0; i < count; i++)
			if (i != reqpage) {
				vm_page_lock(m[i]);
				vm_page_free(m[i]);
				vm_page_unlock(m[i]);
			}
		PCPU_INC(cnt.v_vnodein);
		PCPU_INC(cnt.v_vnodepgsin);
		error = vnode_pager_input_old(object, m[reqpage]);
		VM_OBJECT_WUNLOCK(object);
		return (error);
	} else if (error != 0) {
		relpbuf(bp, freecnt);
		vm_pager_free_nonreq(object, m, reqpage, count, FALSE);
		return (VM_PAGER_ERROR);

		/*
		 * If the blocksize is smaller than a page size, then use
		 * special small filesystem code.
		 */
	} else if ((PAGE_SIZE / bsize) > 1) {
		relpbuf(bp, freecnt);
		vm_pager_free_nonreq(object, m, reqpage, count, FALSE);
		PCPU_INC(cnt.v_vnodein);
		PCPU_INC(cnt.v_vnodepgsin);
		return (vnode_pager_input_smlfs(object, m[reqpage]));
	}

	/*
	 * Since the caller has busied the requested page, that page's valid
	 * field will not be changed by other threads.
	 */
	vm_page_assert_xbusied(m[reqpage]);

	/*
	 * If we have a completely valid page available to us, we can
	 * clean up and return.  Otherwise we have to re-read the
	 * media.
	 */
	if (m[reqpage]->valid == VM_PAGE_BITS_ALL) {
		relpbuf(bp, freecnt);
		vm_pager_free_nonreq(object, m, reqpage, count, FALSE);
		return (VM_PAGER_OK);
	} else if (reqblock == -1) {
		relpbuf(bp, freecnt);
		pmap_zero_page(m[reqpage]);
		KASSERT(m[reqpage]->dirty == 0,
		    ("vnode_pager_generic_getpages: page %p is dirty", m));
		VM_OBJECT_WLOCK(object);
		m[reqpage]->valid = VM_PAGE_BITS_ALL;
		vm_pager_free_nonreq(object, m, reqpage, count, TRUE);
		VM_OBJECT_WUNLOCK(object);
		return (VM_PAGER_OK);
	} else if (m[reqpage]->valid != 0) {
		VM_OBJECT_WLOCK(object);
		m[reqpage]->valid = 0;
		VM_OBJECT_WUNLOCK(object);
	}

	pib = IDX_TO_OFF(m[reqpage]->pindex) % bsize;
	pbefore = ((daddr_t)before * bsize + pib) / PAGE_SIZE;
	pafter = ((daddr_t)(after + 1) * bsize - pib) / PAGE_SIZE - 1;
	first = reqpage < pbefore ? 0 : reqpage - pbefore;
	last = reqpage + pafter >= count ? count - 1 : reqpage + pafter;
	if (first > 0 || last + 1 < count) {
		VM_OBJECT_WLOCK(object);
		for (i = 0; i < first; i++) {
			vm_page_lock(m[i]);
			vm_page_free(m[i]);
			vm_page_unlock(m[i]);
		}
		for (i = last + 1; i < count; i++) {
			vm_page_lock(m[i]);
			vm_page_free(m[i]);
			vm_page_unlock(m[i]);
		}
		VM_OBJECT_WUNLOCK(object);
	}

	/*
	 * here on direct device I/O
	 */
	firstaddr = reqblock;
	firstaddr += pib / DEV_BSIZE;
	firstaddr -= IDX_TO_OFF(reqpage - first) / DEV_BSIZE;

	/*
	 * The first and last page have been calculated now, move
	 * input pages to be zero based, and adjust the count.
	 */
	m += first;
	reqpage -= first;
	count = last - first + 1;

	/*
	 * calculate the file virtual address for the transfer
	 */
	foff = IDX_TO_OFF(m[0]->pindex);

	/*
	 * calculate the size of the transfer
	 */
	size = count * PAGE_SIZE;
	KASSERT(count > 0, ("zero count"));
	if ((foff + size) > object->un_pager.vnp.vnp_size)
		size = object->un_pager.vnp.vnp_size - foff;
	KASSERT(size > 0, ("zero size"));

	/*
	 * round up physical size for real devices.
	 */
	secmask = bo->bo_bsize - 1;
	KASSERT(secmask < PAGE_SIZE && secmask > 0,
	    ("vnode_pager_generic_getpages: sector size %d too large",
	    secmask + 1));
	size = (size + secmask) & ~secmask;

	/*
	 * and map the pages to be read into the kva, if the filesystem
	 * requires mapped buffers.
	 */
	if ((vp->v_mount->mnt_kern_flag & MNTK_UNMAPPED_BUFS) != 0 &&
	    unmapped_buf_allowed) {
		bp->b_data = unmapped_buf;
		bp->b_offset = 0;
	} else {
		bp->b_data = bp->b_kvabase;
		pmap_qenter((vm_offset_t)bp->b_data, m, count);
	}

	/* build a minimal buffer header */
	bp->b_iocmd = BIO_READ;
	KASSERT(bp->b_rcred == NOCRED, ("leaking read ucred"));
	KASSERT(bp->b_wcred == NOCRED, ("leaking write ucred"));
	bp->b_rcred = crhold(curthread->td_ucred);
	bp->b_wcred = crhold(curthread->td_ucred);
	bp->b_blkno = firstaddr;
	pbgetbo(bo, bp);
	bp->b_vp = vp;
	bp->b_bcount = size;
	bp->b_bufsize = size;
	bp->b_runningbufspace = bp->b_bufsize;
	for (i = 0; i < count; i++)
		bp->b_pages[i] = m[i];
	bp->b_npages = count;
	bp->b_pager.pg_reqpage = reqpage;
	atomic_add_long(&runningbufspace, bp->b_runningbufspace);

	PCPU_INC(cnt.v_vnodein);
	PCPU_ADD(cnt.v_vnodepgsin, count);

	/* do the input */
	bp->b_iooffset = dbtob(bp->b_blkno);

	if (iodone != NULL) { /* async */
		bp->b_pager.pg_iodone = iodone;
		bp->b_caller1 = arg;
		bp->b_iodone = vnode_pager_generic_getpages_done_async;
		bp->b_flags |= B_ASYNC;
		BUF_KERNPROC(bp);
		bstrategy(bp);
		/* Good bye! */
	} else {
		bp->b_iodone = bdone;
		bstrategy(bp);
		bwait(bp, PVM, "vnread");
		error = vnode_pager_generic_getpages_done(bp);
		for (i = 0; i < bp->b_npages; i++)
			bp->b_pages[i] = NULL;
		bp->b_vp = NULL;
		pbrelbo(bp);
		relpbuf(bp, &vnode_pbuf_freecnt);
	}

	return (error != 0 ? VM_PAGER_ERROR : VM_PAGER_OK);
}