/* ARGSUSED */
static int
ext2_read(struct vop_read_args *ap)
{
	struct vnode *vp;
	struct inode *ip;
	struct uio *uio;
	FS *fs;
	struct buf *bp;
	daddr_t lbn, nextlbn;
	off_t nextloffset;
	off_t bytesinfile;
	long size, xfersize, blkoffset;
	int error, orig_resid;
	int seqcount = ap->a_ioflag >> 16;

	vp = ap->a_vp;
	ip = VTOI(vp);
	uio = ap->a_uio;

#ifdef DIAGNOSTIC
	if (uio->uio_rw != UIO_READ)
		panic("ext2_read: mode");

	if (vp->v_type == VLNK) {
		if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen)
			panic("ext2_read: short symlink");
	} else if (vp->v_type != VREG && vp->v_type != VDIR)
		panic("ext2_read: type %d", vp->v_type);
#endif
	fs = ip->I_FS;
#if 0
	if ((u_quad_t)uio->uio_offset > fs->fs_maxfilesize)
		return (EFBIG);
#endif

	orig_resid = uio->uio_resid;
	for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
		if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0)
			break;
		lbn = lblkno(fs, uio->uio_offset);
		nextlbn = lbn + 1;
		nextloffset = lblktodoff(fs, nextlbn);
		size = BLKSIZE(fs, ip, lbn);
		blkoffset = blkoff(fs, uio->uio_offset);

		xfersize = fs->s_frag_size - blkoffset;
		if (uio->uio_resid < xfersize)
			xfersize = uio->uio_resid;
		if (bytesinfile < xfersize)
			xfersize = bytesinfile;

		if (nextloffset >= ip->i_size) {
			error = bread(vp, lblktodoff(fs, lbn), size, &bp);
		} else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
			error = cluster_read(vp, (off_t)ip->i_size,
					     lblktodoff(fs, lbn), size,
					     uio->uio_resid,
					     (ap->a_ioflag >> 16) * BKVASIZE,
					     &bp);
		} else if (seqcount > 1) {
Beispiel #2
0
/*
 * Return buffer with the contents of block "offset" from the beginning of
 * vnode "vp".  If "res" is non-zero, fill it in with a pointer to the
 * remaining space in the vnode.
 */
int
ffs_blkatoff(struct vnode *vp, off_t uoffset, char **res, struct buf **bpp)
{
	struct inode *ip;
	struct fs *fs;
	struct buf *bp;
	ufs_daddr_t lbn;
	int bsize, error;

	ip = VTOI(vp);
	fs = ip->i_fs;
	lbn = lblkno(fs, uoffset);
	bsize = blksize(fs, ip, lbn);

	*bpp = NULL;
	error = bread(vp, lblktodoff(fs, lbn), bsize, &bp);
	if (error) {
		brelse(bp);
		return (error);
	}
	if (res)
		*res = (char *)bp->b_data + blkoff(fs, uoffset);
	*bpp = bp;
	return (0);
}
Beispiel #3
0
/*
 * Return buffer with the contents of block "offset" from the beginning of
 * directory "ip".  If "res" is non-zero, fill it in with a pointer to the
 * remaining space in the directory.
 */
int
ext2_blkatoff(struct vnode *vp, off_t offset, char **res, struct buf **bpp)
{
	struct inode *ip;
	struct ext2_sb_info *fs;
	struct buf *bp;
	daddr_t lbn;
	int bsize, error;

	ip = VTOI(vp);
	fs = ip->i_e2fs;
	lbn = lblkno(fs, offset);
	bsize = blksize(fs, ip, lbn);

	*bpp = NULL;
	if ((error = bread(vp, lblktodoff(fs, lbn), bsize, &bp)) != 0) {
		brelse(bp);
		return (error);
	}
	if (res)
		*res = (char *)bp->b_data + blkoff(fs, offset);
	*bpp = bp;
	return (0);
}
Beispiel #4
0
/*
 * Release blocks associated with the inode ip and stored in the indirect
 * block bn.  Blocks are free'd in LIFO order up to (but not including)
 * lastbn.  If level is greater than SINGLE, the block is an indirect block
 * and recursive calls to indirtrunc must be used to cleanse other indirect
 * blocks.
 *
 * NB: triple indirect blocks are untested.
 */
static int
ffs_indirtrunc(struct inode *ip, ufs_daddr_t lbn, ufs_daddr_t dbn,
	       ufs_daddr_t lastbn, int level, long *countp)
{
	int i;
	struct buf *bp;
	struct fs *fs = ip->i_fs;
	ufs_daddr_t *bap;
	struct vnode *vp;
	ufs_daddr_t *copy = NULL, nb, nlbn, last;
	long blkcount, factor;
	int nblocks, blocksreleased = 0;
	int error = 0, allerror = 0;

	/*
	 * Calculate index in current block of last
	 * block to be kept.  -1 indicates the entire
	 * block so we need not calculate the index.
	 */
	factor = 1;
	for (i = SINGLE; i < level; i++)
		factor *= NINDIR(fs);
	last = lastbn;
	if (lastbn > 0)
		last /= factor;
	nblocks = btodb(fs->fs_bsize);
	/*
	 * Get buffer of block pointers, zero those entries corresponding
	 * to blocks to be free'd, and update on disk copy first.  Since
	 * double(triple) indirect before single(double) indirect, calls
	 * to bmap on these blocks will fail.  However, we already have
	 * the on disk address, so we have to set the bio_offset field
	 * explicitly instead of letting bread do everything for us.
	 */
	vp = ITOV(ip);
	bp = getblk(vp, lblktodoff(fs, lbn), (int)fs->fs_bsize, 0, 0);
	if ((bp->b_flags & B_CACHE) == 0) {
		bp->b_flags &= ~(B_ERROR|B_INVAL);
		bp->b_cmd = BUF_CMD_READ;
		if (bp->b_bcount > bp->b_bufsize)
			panic("ffs_indirtrunc: bad buffer size");
		/*
		 * BIO is bio2 which chains back to bio1.  We wait
		 * on bio1.
		 */
		bp->b_bio2.bio_offset = dbtodoff(fs, dbn);
		bp->b_bio1.bio_done = biodone_sync;
		bp->b_bio1.bio_flags |= BIO_SYNC;
		vfs_busy_pages(vp, bp);
		/*
		 * Access the block device layer using the device vnode
		 * and the translated block number (bio2) instead of the
		 * file vnode (vp) and logical block number (bio1).
		 *
		 * Even though we are bypassing the vnode layer, we still
		 * want the vnode state to indicate that an I/O on its behalf
		 * is in progress.
		 */
		bio_start_transaction(&bp->b_bio1, &vp->v_track_read);
		vn_strategy(ip->i_devvp, &bp->b_bio2);
		error = biowait(&bp->b_bio1, "biord");
	}
	if (error) {
		brelse(bp);
		*countp = 0;
		return (error);
	}

	bap = (ufs_daddr_t *)bp->b_data;
	if (lastbn != -1) {
		copy = kmalloc(fs->fs_bsize, M_TEMP, M_WAITOK);
		bcopy((caddr_t)bap, (caddr_t)copy, (uint)fs->fs_bsize);
		bzero((caddr_t)&bap[last + 1],
		    (uint)(NINDIR(fs) - (last + 1)) * sizeof (ufs_daddr_t));
		if (DOINGASYNC(vp)) {
			bawrite(bp);
		} else {
			error = bwrite(bp);
			if (error)
				allerror = error;
		}
		bap = copy;
	}

	/*
	 * Recursively free totally unused blocks.
	 */
	for (i = NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last;
	    i--, nlbn += factor) {
		nb = bap[i];
		if (nb == 0)
			continue;
		if (level > SINGLE) {
			if ((error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb),
			    (ufs_daddr_t)-1, level - 1, &blkcount)) != 0)
				allerror = error;
			blocksreleased += blkcount;
		}
		ffs_blkfree(ip, nb, fs->fs_bsize);
		blocksreleased += nblocks;
	}

	/*
	 * Recursively free last partial block.
	 */
	if (level > SINGLE && lastbn >= 0) {
		last = lastbn % factor;
		nb = bap[i];
		if (nb != 0) {
			error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb),
			    last, level - 1, &blkcount);
			if (error)
				allerror = error;
			blocksreleased += blkcount;
		}
	}
	if (copy != NULL) {
		kfree(copy, M_TEMP);
	} else {
		bp->b_flags |= B_INVAL | B_NOCACHE;
		brelse(bp);
	}
		
	*countp = blocksreleased;
	return (allerror);
}
		if (uio->uio_resid < xfersize)
			xfersize = uio->uio_resid;
		if (bytesinfile < xfersize)
			xfersize = bytesinfile;

		if (nextloffset >= ip->i_size) {
			error = bread(vp, lblktodoff(fs, lbn), size, &bp);
		} else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
			error = cluster_read(vp, (off_t)ip->i_size,
					     lblktodoff(fs, lbn), size,
					     uio->uio_resid,
					     (ap->a_ioflag >> 16) * BKVASIZE,
					     &bp);
		} else if (seqcount > 1) {
			int nextsize = BLKSIZE(fs, ip, nextlbn);
			error = breadn(vp, lblktodoff(fs, lbn),
					size, &nextloffset, &nextsize, 1, &bp);
		} else {
			error = bread(vp, lblktodoff(fs, lbn), size, &bp);
		}
		if (error) {
			brelse(bp);
			bp = NULL;
			break;
		}

		/*
		 * We should only get non-zero b_resid when an I/O error
		 * has occurred, which should cause us to break above.
		 * However, if the short read did not cause an error,
		 * then we want to ensure that we do not uiomove bad
Beispiel #6
0
/*
 * ffs_balloc(struct vnode *a_vp, ufs_daddr_t a_lbn, int a_size,
 *	      struct ucred *a_cred, int a_flags, struct buf *a_bpp)
 *
 * Balloc defines the structure of filesystem storage by allocating
 * the physical blocks on a device given the inode and the logical
 * block number in a file.
 *
 * NOTE: B_CLRBUF - this flag tells balloc to clear invalid portions
 *	 of the buffer.  However, any dirty bits will override missing
 *	 valid bits.  This case occurs when writable mmaps are truncated
 *	 and then extended.
 */
int
ffs_balloc(struct vop_balloc_args *ap)
{
	struct inode *ip;
	ufs_daddr_t lbn;
	int size;
	struct ucred *cred;
	int flags;
	struct fs *fs;
	ufs_daddr_t nb;
	struct buf *bp, *nbp, *dbp;
	struct vnode *vp;
	struct indir indirs[NIADDR + 2];
	ufs_daddr_t newb, *bap, pref;
	int deallocated, osize, nsize, num, i, error;
	ufs_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
	ufs_daddr_t *lbns_remfree, lbns[NIADDR + 1];
	int unwindidx;
	int seqcount;

	vp = ap->a_vp;
	ip = VTOI(vp);
	fs = ip->i_fs;
	lbn = lblkno(fs, ap->a_startoffset);
	size = blkoff(fs, ap->a_startoffset) + ap->a_size;
	if (size > fs->fs_bsize)
		panic("ffs_balloc: blk too big");
	*ap->a_bpp = NULL;
	if (lbn < 0)
		return (EFBIG);
	cred = ap->a_cred;
	flags = ap->a_flags;

	/*
	 * The vnode must be locked for us to be able to safely mess
	 * around with the inode.
	 */
	if (vn_islocked(vp) != LK_EXCLUSIVE) {
		panic("ffs_balloc: vnode %p not exclusively locked!", vp);
	}

	/*
	 * If the next write will extend the file into a new block,
	 * and the file is currently composed of a fragment
	 * this fragment has to be extended to be a full block.
	 */
	nb = lblkno(fs, ip->i_size);
	if (nb < NDADDR && nb < lbn) {
		/*
		 * The filesize prior to this write can fit in direct
		 * blocks (ex. fragmentation is possibly done)
		 * we are now extending the file write beyond
		 * the block which has end of the file prior to this write.
		 */
		osize = blksize(fs, ip, nb);
		/*
		 * osize gives disk allocated size in the last block. It is
		 * either in fragments or a file system block size.
		 */
		if (osize < fs->fs_bsize && osize > 0) {
			/* A few fragments are already allocated, since the
			 * current extends beyond this block allocated the
			 * complete block as fragments are on in last block.
			 */
			error = ffs_realloccg(ip, nb,
				ffs_blkpref(ip, nb, (int)nb, &ip->i_db[0]),
				osize, (int)fs->fs_bsize, cred, &bp);
			if (error)
				return (error);
			if (DOINGSOFTDEP(vp))
				softdep_setup_allocdirect(ip, nb,
				    dofftofsb(fs, bp->b_bio2.bio_offset), 
				    ip->i_db[nb], fs->fs_bsize, osize, bp);
			/* adjust the inode size, we just grew */
			ip->i_size = smalllblktosize(fs, nb + 1);
			ip->i_db[nb] = dofftofsb(fs, bp->b_bio2.bio_offset);
			ip->i_flag |= IN_CHANGE | IN_UPDATE;
			if (flags & B_SYNC)
				bwrite(bp);
			else
				bawrite(bp);
			/* bp is already released here */
		}
	}
	/*
	 * The first NDADDR blocks are direct blocks
	 */
	if (lbn < NDADDR) {
		nb = ip->i_db[lbn];
		if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
			error = bread(vp, lblktodoff(fs, lbn), fs->fs_bsize, &bp);
			if (error) {
				brelse(bp);
				return (error);
			}
			bp->b_bio2.bio_offset = fsbtodoff(fs, nb);
			*ap->a_bpp = bp;
			return (0);
		}
		if (nb != 0) {
			/*
			 * Consider need to reallocate a fragment.
			 */
			osize = fragroundup(fs, blkoff(fs, ip->i_size));
			nsize = fragroundup(fs, size);
			if (nsize <= osize) {
				error = bread(vp, lblktodoff(fs, lbn), 
					      osize, &bp);
				if (error) {
					brelse(bp);
					return (error);
				}
				bp->b_bio2.bio_offset = fsbtodoff(fs, nb);
			} else {
				/*
				 * NOTE: ffs_realloccg() issues a bread().
				 */
				error = ffs_realloccg(ip, lbn,
				    ffs_blkpref(ip, lbn, (int)lbn,
					&ip->i_db[0]), osize, nsize, cred, &bp);
				if (error)
					return (error);
				if (DOINGSOFTDEP(vp))
					softdep_setup_allocdirect(ip, lbn,
					    dofftofsb(fs, bp->b_bio2.bio_offset),
					    nb, nsize, osize, bp);
			}
		} else {
			if (ip->i_size < smalllblktosize(fs, lbn + 1))
				nsize = fragroundup(fs, size);
			else
				nsize = fs->fs_bsize;
			error = ffs_alloc(ip, lbn,
			    ffs_blkpref(ip, lbn, (int)lbn, &ip->i_db[0]),
			    nsize, cred, &newb);
			if (error)
				return (error);
			bp = getblk(vp, lblktodoff(fs, lbn), nsize, 0, 0);
			bp->b_bio2.bio_offset = fsbtodoff(fs, newb);
			if (flags & B_CLRBUF)
				vfs_bio_clrbuf(bp);
			if (DOINGSOFTDEP(vp))
				softdep_setup_allocdirect(ip, lbn, newb, 0,
				    nsize, 0, bp);
		}
		ip->i_db[lbn] = dofftofsb(fs, bp->b_bio2.bio_offset);
		ip->i_flag |= IN_CHANGE | IN_UPDATE;
		*ap->a_bpp = bp;
		return (0);
	}
	/*
	 * Determine the number of levels of indirection.
	 */
	pref = 0;
	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
		return(error);
#ifdef DIAGNOSTIC
	if (num < 1)
		panic ("ffs_balloc: ufs_bmaparray returned indirect block");
#endif
	/*
	 * Get a handle on the data block buffer before working through 
	 * indirect blocks to avoid a deadlock between the VM system holding
	 * a locked VM page and issuing a BMAP (which tries to lock the
	 * indirect blocks), and the filesystem holding a locked indirect
	 * block and then trying to read a data block (which tries to lock
	 * the underlying VM pages).
	 */
	dbp = getblk(vp, lblktodoff(fs, lbn), fs->fs_bsize, 0, 0);

	/*
	 * Setup undo history
	 */
	allocib = NULL;
	allocblk = allociblk;
	lbns_remfree = lbns;

	unwindidx = -1;

	/*
	 * Fetch the first indirect block directly from the inode, allocating
	 * one if necessary. 
	 */
	--num;
	nb = ip->i_ib[indirs[0].in_off];
	if (nb == 0) {
		pref = ffs_blkpref(ip, lbn, 0, NULL);
		/*
		 * If the filesystem has run out of space we can skip the
		 * full fsync/undo of the main [fail] case since no undo
		 * history has been built yet.  Hence the goto fail2.
		 */
	        if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
		    cred, &newb)) != 0)
			goto fail2;
		nb = newb;
		*allocblk++ = nb;
		*lbns_remfree++ = indirs[1].in_lbn;
		bp = getblk(vp, lblktodoff(fs, indirs[1].in_lbn),
			    fs->fs_bsize, 0, 0);
		bp->b_bio2.bio_offset = fsbtodoff(fs, nb);
		vfs_bio_clrbuf(bp);
		if (DOINGSOFTDEP(vp)) {
			softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
			    newb, 0, fs->fs_bsize, 0, bp);
			bdwrite(bp);
		} else {
			/*
			 * Write synchronously so that indirect blocks
			 * never point at garbage.
			 */
			if (DOINGASYNC(vp))
				bdwrite(bp);
			else if ((error = bwrite(bp)) != 0)
				goto fail;
		}
		allocib = &ip->i_ib[indirs[0].in_off];
		*allocib = nb;
		ip->i_flag |= IN_CHANGE | IN_UPDATE;
	}

	/*
	 * Fetch through the indirect blocks, allocating as necessary.
	 */
	for (i = 1;;) {
		error = bread(vp, lblktodoff(fs, indirs[i].in_lbn), (int)fs->fs_bsize, &bp);
		if (error) {
			brelse(bp);
			goto fail;
		}
		bap = (ufs_daddr_t *)bp->b_data;
		nb = bap[indirs[i].in_off];
		if (i == num)
			break;
		i += 1;
		if (nb != 0) {
			bqrelse(bp);
			continue;
		}
		if (pref == 0)
			pref = ffs_blkpref(ip, lbn, 0, NULL);
		if ((error =
		    ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) != 0) {
			brelse(bp);
			goto fail;
		}
		nb = newb;
		*allocblk++ = nb;
		*lbns_remfree++ = indirs[i].in_lbn;
		nbp = getblk(vp, lblktodoff(fs, indirs[i].in_lbn),
			     fs->fs_bsize, 0, 0);
		nbp->b_bio2.bio_offset = fsbtodoff(fs, nb);
		vfs_bio_clrbuf(nbp);
		if (DOINGSOFTDEP(vp)) {
			softdep_setup_allocindir_meta(nbp, ip, bp,
			    indirs[i - 1].in_off, nb);
			bdwrite(nbp);
		} else {
			/*
			 * Write synchronously so that indirect blocks
			 * never point at garbage.
			 */
			if ((error = bwrite(nbp)) != 0) {
				brelse(bp);
				goto fail;
			}
		}
		bap[indirs[i - 1].in_off] = nb;
		if (allocib == NULL && unwindidx < 0)
			unwindidx = i - 1;
		/*
		 * If required, write synchronously, otherwise use
		 * delayed write.
		 */
		if (flags & B_SYNC) {
			bwrite(bp);
		} else {
			if (bp->b_bufsize == fs->fs_bsize)
				bp->b_flags |= B_CLUSTEROK;
			bdwrite(bp);
		}
	}

	/*
	 * Get the data block, allocating if necessary.  We have already
	 * called getblk() on the data block buffer, dbp.  If we have to
	 * allocate it and B_CLRBUF has been set the inference is an intention
	 * to zero out the related disk blocks, so we do not have to issue
	 * a read.  Instead we simply call vfs_bio_clrbuf().  If B_CLRBUF is
	 * not set the caller intends to overwrite the entire contents of the
	 * buffer and we don't waste time trying to clean up the contents.
	 *
	 * bp references the current indirect block.  When allocating, 
	 * the block must be updated.
	 */
	if (nb == 0) {
		pref = ffs_blkpref(ip, lbn, indirs[i].in_off, &bap[0]);
		error = ffs_alloc(ip,
		    lbn, pref, (int)fs->fs_bsize, cred, &newb);
		if (error) {
			brelse(bp);
			goto fail;
		}
		nb = newb;
		*allocblk++ = nb;
		*lbns_remfree++ = lbn;
		dbp->b_bio2.bio_offset = fsbtodoff(fs, nb);
		if (flags & B_CLRBUF)
			vfs_bio_clrbuf(dbp);
		if (DOINGSOFTDEP(vp))
			softdep_setup_allocindir_page(ip, lbn, bp,
			    indirs[i].in_off, nb, 0, dbp);
		bap[indirs[i].in_off] = nb;
		/*
		 * If required, write synchronously, otherwise use
		 * delayed write.
		 */
		if (flags & B_SYNC) {
			bwrite(bp);
		} else {
			if (bp->b_bufsize == fs->fs_bsize)
				bp->b_flags |= B_CLUSTEROK;
			bdwrite(bp);
		}
		*ap->a_bpp = dbp;
		return (0);
	}
	brelse(bp);

	/*
	 * At this point all related indirect blocks have been allocated
	 * if necessary and released.  bp is no longer valid.  dbp holds
	 * our getblk()'d data block.
	 *
	 * XXX we previously performed a cluster_read operation here.
	 */
	if (flags & B_CLRBUF) {
		/*
		 * If B_CLRBUF is set we must validate the invalid portions
		 * of the buffer.  This typically requires a read-before-
		 * write.  The strategy call will fill in bio_offset in that
		 * case.
		 *
		 * If we hit this case we do a cluster read if possible
		 * since nearby data blocks are likely to be accessed soon
		 * too.
		 */
		if ((dbp->b_flags & B_CACHE) == 0) {
			bqrelse(dbp);
			seqcount = (flags & B_SEQMASK) >> B_SEQSHIFT;
			if (seqcount &&
			    (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
				error = cluster_read(vp, (off_t)ip->i_size,
					    lblktodoff(fs, lbn),
					    (int)fs->fs_bsize, 
					    fs->fs_bsize,
					    seqcount * BKVASIZE,
					    &dbp);
			} else {
				error = bread(vp, lblktodoff(fs, lbn),
					      (int)fs->fs_bsize, &dbp);
			}
			if (error)
				goto fail;
		} else {
Beispiel #7
0
/*
 * Return buffer with the contents of block "offset" from the beginning of
 * vnode "vp".  If "res" is non-zero, fill it in with a pointer to the
 * remaining space in the vnode.
 *
 * This version includes a read-ahead optimization.
 */
int
ffs_blkatoff_ra(struct vnode *vp, off_t uoffset, char **res, struct buf **bpp,
		int seqcount)
{
	struct inode *ip;
	struct fs *fs;
	struct buf *bp;
	ufs_daddr_t lbn;
	ufs_daddr_t nextlbn;
	off_t base_loffset;
	off_t next_loffset;
	int bsize, error;
	int nextbsize;

	ip = VTOI(vp);
	fs = ip->i_fs;
	lbn = lblkno(fs, uoffset);
	base_loffset = lblktodoff(fs, lbn);
	bsize = blksize(fs, ip, lbn);

	nextlbn = lbn + 1;
	next_loffset = lblktodoff(fs, nextlbn);


	*bpp = NULL;

	if (next_loffset >= ip->i_size) {
		/*
		 * Do not do readahead if this is the last block,
		 * bsize might represent a fragment.
		 */
		error = bread(vp, base_loffset, bsize, &bp);
	} else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
		/*
		 * Try to cluster if we allowed to.
		 */
		error = cluster_read(vp, (off_t)ip->i_size,
				     base_loffset, bsize,
				     bsize, seqcount * BKVASIZE, &bp);
	} else if (seqcount > 1) {
		/*
		 * Faked read ahead
		 */
		nextbsize = blksize(fs, ip, nextlbn);
		error = breadn(vp, base_loffset, bsize,
			       &next_loffset, &nextbsize, 1, &bp);
	} else {
		/*
		 * Failing all of the above, just read what the
		 * user asked for. Interestingly, the same as
		 * the first option above.
		 */
		error = bread(vp, base_loffset, bsize, &bp);
	}
	if (error) {
		brelse(bp);
		return (error);
	}
	if (res)
		*res = (char *)bp->b_data + (int)(uoffset - base_loffset);
	*bpp = bp;
	return (0);
}
Beispiel #8
0
/*
 * ext2_reallocblks(struct vnode *a_vp, struct cluster_save *a_buflist)
 */
int
ext2_reallocblks(struct vop_reallocblks_args *ap)
{
#ifndef FANCY_REALLOC
/* kprintf("ext2_reallocblks not implemented\n"); */
return ENOSPC;
#else

	struct ext2_sb_info *fs;
	struct inode *ip;
	struct vnode *vp;
	struct buf *sbp, *ebp;
	daddr_t *bap, *sbap, *ebap;
	struct cluster_save *buflist;
	daddr_t start_lbn, end_lbn, soff, eoff, newblk, blkno;
	struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp;
	int i, len, start_lvl, end_lvl, pref, ssize;

	vp = ap->a_vp;
	ip = VTOI(vp);
	fs = ip->i_e2fs;
#ifdef UNKLAR
	if (fs->fs_contigsumsize <= 0)
		return (ENOSPC);
#endif
	buflist = ap->a_buflist;
	len = buflist->bs_nchildren;
	start_lbn = lblkno(fs, buflist->bs_children[0]->b_loffset);
	end_lbn = start_lbn + len - 1;
#if DIAGNOSTIC
	for (i = 1; i < len; i++) {
		if (buflist->bs_children[i]->b_loffset != lblktodoff(fs, start_lbn) + lblktodoff(fs, i))
			panic("ext2_reallocblks: non-cluster");
	}
#endif
	/*
	 * If the latest allocation is in a new block group, assume that
	 * the filesystem has decided to move and do not force it back to
	 * the previous block group.
	 */
	if (dtog(fs, dofftofsb(fs, buflist->bs_children[0]->b_bio2.bio_offset)) !=
	    dtog(fs, dofftofsb(fs, buflist->bs_children[len - 1]->b_bio2.bio_offset)))
		return (ENOSPC);
	if (ext2_getlbns(vp, start_lbn, start_ap, &start_lvl) ||
	    ext2_getlbns(vp, end_lbn, end_ap, &end_lvl))
		return (ENOSPC);
	/*
	 * Get the starting offset and block map for the first block.
	 */
	if (start_lvl == 0) {
		sbap = &ip->i_db[0];
		soff = start_lbn;
	} else {
		idp = &start_ap[start_lvl - 1];
		if (bread(vp, lblktodoff(fs, idp->in_lbn), (int)fs->s_blocksize, NOCRED, &sbp)) {
			brelse(sbp);
			return (ENOSPC);
		}
		sbap = (daddr_t *)sbp->b_data;
		soff = idp->in_off;
	}
	/*
	 * Find the preferred location for the cluster.
	 */
	pref = ext2_blkpref(ip, start_lbn, soff, sbap);
	/*
	 * If the block range spans two block maps, get the second map.
	 */
	if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) {
		ssize = len;
	} else {
#if DIAGNOSTIC
		if (start_ap[start_lvl-1].in_lbn == idp->in_lbn)
			panic("ext2_reallocblk: start == end");
#endif
		ssize = len - (idp->in_off + 1);
		if (bread(vp, lblktodoff(fs, idp->in_lbn), (int)fs->s_blocksize, NOCRED, &ebp))
			goto fail;
		ebap = (daddr_t *)ebp->b_data;
	}
	/*
	 * Search the block map looking for an allocation of the desired size.
	 */
	if ((newblk = (daddr_t)ext2_hashalloc(ip, dtog(fs, pref), (long)pref,
	    len, (u_long (*)())ext2_clusteralloc)) == 0)
		goto fail;
	/*
	 * We have found a new contiguous block.
	 *
	 * First we have to replace the old block pointers with the new
	 * block pointers in the inode and indirect blocks associated
	 * with the file.
	 */
	blkno = newblk;
	for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->s_frags_per_block) {
		if (i == ssize)
			bap = ebap;
#if DIAGNOSTIC
		if (buflist->bs_children[i]->b_bio2.bio_offset != fsbtodoff(fs, *bap))
			panic("ext2_reallocblks: alloc mismatch");
#endif
		*bap++ = blkno;
	}
	/*
	 * Next we must write out the modified inode and indirect blocks.
	 * For strict correctness, the writes should be synchronous since
	 * the old block values may have been written to disk. In practise
	 * they are almost never written, but if we are concerned about
	 * strict correctness, the `doasyncfree' flag should be set to zero.
	 *
	 * The test on `doasyncfree' should be changed to test a flag
	 * that shows whether the associated buffers and inodes have
	 * been written. The flag should be set when the cluster is
	 * started and cleared whenever the buffer or inode is flushed.
	 * We can then check below to see if it is set, and do the
	 * synchronous write only when it has been cleared.
	 */
	if (sbap != &ip->i_db[0]) {
		if (doasyncfree)
			bdwrite(sbp);
		else
			bwrite(sbp);
	} else {
		ip->i_flag |= IN_CHANGE | IN_UPDATE;
		if (!doasyncfree)
			EXT2_UPDATE(vp, 1);
	}
	if (ssize < len)
		if (doasyncfree)
			bdwrite(ebp);
		else
			bwrite(ebp);
	/*
	 * Last, free the old blocks and assign the new blocks to the buffers.
	 */
	for (blkno = newblk, i = 0; i < len; i++, blkno += fs->s_frags_per_block) {
		ext2_blkfree(ip, dofftofsb(fs, buflist->bs_children[i]->b_bio2.bio_offset),
		    fs->s_blocksize);
		buflist->bs_children[i]->b_bio2.bio_offset = fsbtodoff(fs, blkno);
	}
	return (0);

fail:
	if (ssize < len)
		brelse(ebp);
	if (sbap != &ip->i_db[0])
		brelse(sbp);
	return (ENOSPC);

#endif /* FANCY_REALLOC */
}
Beispiel #9
0
static int
ext2_indirtrunc(struct inode *ip, daddr_t lbn, off_t doffset, daddr_t lastbn,
                int level, long *countp)
{
    int i;
    struct buf *bp;
    struct ext2_sb_info *fs = ip->i_e2fs;
    daddr_t *bap;
    struct vnode *vp;
    daddr_t *copy, nb, nlbn, last;
    long blkcount, factor;
    int nblocks, blocksreleased = 0;
    int error = 0, allerror = 0;

    /*
     * Calculate index in current block of last
     * block to be kept.  -1 indicates the entire
     * block so we need not calculate the index.
     */
    factor = 1;
    for (i = SINGLE; i < level; i++)
        factor *= NINDIR(fs);
    last = lastbn;
    if (lastbn > 0)
        last /= factor;
    nblocks = btodb(fs->s_blocksize);
    /*
     * Get buffer of block pointers, zero those entries corresponding
     * to blocks to be free'd, and update on disk copy first.  Since
     * double(triple) indirect before single(double) indirect, calls
     * to bmap on these blocks will fail.  However, we already have
     * the on disk address, so we have to set the bio_offset field
     * explicitly instead of letting bread do everything for us.
     */
    vp = ITOV(ip);
    bp = getblk(vp, lblktodoff(fs, lbn), (int)fs->s_blocksize, 0, 0);
    if ((bp->b_flags & B_CACHE) == 0) {
        bp->b_flags &= ~(B_ERROR | B_INVAL);
        bp->b_cmd = BUF_CMD_READ;
        if (bp->b_bcount > bp->b_bufsize)
            panic("ext2_indirtrunc: bad buffer size");
        bp->b_bio2.bio_offset = doffset;
        bp->b_bio1.bio_done = biodone_sync;
        bp->b_bio1.bio_flags |= BIO_SYNC;
        vfs_busy_pages(bp->b_vp, bp);
        vn_strategy(vp, &bp->b_bio1);
        error = biowait(&bp->b_bio1, "biord");
    }
    if (error) {
        brelse(bp);
        *countp = 0;
        return (error);
    }

    bap = (daddr_t *)bp->b_data;
    MALLOC(copy, daddr_t *, fs->s_blocksize, M_TEMP, M_WAITOK);
    bcopy((caddr_t)bap, (caddr_t)copy, (u_int)fs->s_blocksize);
    bzero((caddr_t)&bap[last + 1],
          (u_int)(NINDIR(fs) - (last + 1)) * sizeof (daddr_t));
    if (last == -1)
        bp->b_flags |= B_INVAL;
    error = bwrite(bp);
    if (error)
        allerror = error;
    bap = copy;

    /*
     * Recursively free totally unused blocks.
     */
    for (i = NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last;
            i--, nlbn += factor) {
        nb = bap[i];
        if (nb == 0)
            continue;
        if (level > SINGLE) {
            if ((error = ext2_indirtrunc(ip, nlbn,
                                         fsbtodoff(fs, nb), (daddr_t)-1, level - 1, &blkcount)) != 0)
                allerror = error;
            blocksreleased += blkcount;
        }
        ext2_blkfree(ip, nb, fs->s_blocksize);
        blocksreleased += nblocks;
    }

    /*
     * Recursively free last partial block.
     */
    if (level > SINGLE && lastbn >= 0) {
        last = lastbn % factor;
        nb = bap[i];
        if (nb != 0) {
            error = ext2_indirtrunc(ip, nlbn, fsbtodoff(fs, nb),
                                    last, level - 1, &blkcount);
            if (error)
                allerror = error;
            blocksreleased += blkcount;
        }
    }
    FREE(copy, M_TEMP);
    *countp = blocksreleased;
    return (allerror);
}
Beispiel #10
0
/*
 * Indirect blocks are now on the vnode for the file.  They are given negative
 * logical block numbers.  Indirect blocks are addressed by the negative
 * address of the first data block to which they point.  Double indirect blocks
 * are addressed by one less than the address of the first indirect block to
 * which they point.  Triple indirect blocks are addressed by one less than
 * the address of the first double indirect block to which they point.
 *
 * ext2_bmaparray does the bmap conversion, and if requested returns the
 * array of logical blocks which must be traversed to get to a block.
 * Each entry contains the offset into that block that gets you to the
 * next block and the disk address of the block (if it is assigned).
 */
static
int
ext2_bmaparray(struct vnode *vp, ext2_daddr_t bn, ext2_daddr_t *bnp,
	      struct indir *ap, int *nump, int *runp, int *runb)
{
	struct inode *ip;
	struct buf *bp;
	struct ext2_mount *ump;
	struct mount *mp;
	struct ext2_sb_info *fs;
	struct indir a[NIADDR+1], *xap;
	ext2_daddr_t daddr;
	long metalbn;
	int error, maxrun, num;

	ip = VTOI(vp);
	mp = vp->v_mount;
	ump = VFSTOEXT2(mp);
	fs = ip->i_e2fs;
#ifdef DIAGNOSTIC
	if ((ap != NULL && nump == NULL) || (ap == NULL && nump != NULL))
		panic("ext2_bmaparray: invalid arguments");
#endif

	if (runp) {
		*runp = 0;
	}

	if (runb) {
		*runb = 0;
	}

	maxrun = mp->mnt_iosize_max / mp->mnt_stat.f_iosize - 1;

	xap = ap == NULL ? a : ap;
	if (!nump)
		nump = &num;
	error = ext2_getlbns(vp, bn, xap, nump);
	if (error)
		return (error);

	num = *nump;
	if (num == 0) {
		*bnp = blkptrtodb(ump, ip->i_db[bn]);
		if (*bnp == 0)
			*bnp = -1;
		else if (runp) {
			daddr_t bnb = bn;
			for (++bn; bn < NDADDR && *runp < maxrun &&
			    is_sequential(ump, ip->i_db[bn - 1], ip->i_db[bn]);
			    ++bn, ++*runp);
			bn = bnb;
			if (runb && (bn > 0)) {
				for (--bn; (bn >= 0) && (*runb < maxrun) &&
					is_sequential(ump, ip->i_db[bn],
						ip->i_db[bn+1]);
						--bn, ++*runb);
			}
		}
		return (0);
	}


	/* Get disk address out of indirect block array */
	daddr = ip->i_ib[xap->in_off];

	for (bp = NULL, ++xap; --num; ++xap) {
		/*
		 * Exit the loop if there is no disk address assigned yet and
		 * the indirect block isn't in the cache, or if we were
		 * looking for an indirect block and we've found it.
		 */

		metalbn = xap->in_lbn;
		if ((daddr == 0 &&
		     !findblk(vp, dbtodoff(fs, metalbn), FINDBLK_TEST)) ||
		    metalbn == bn) {
			break;
		}
		/*
		 * If we get here, we've either got the block in the cache
		 * or we have a disk address for it, go fetch it.
		 */
		if (bp)
			bqrelse(bp);

		xap->in_exists = 1;
		bp = getblk(vp, lblktodoff(fs, metalbn),
			    mp->mnt_stat.f_iosize, 0, 0);
		if ((bp->b_flags & B_CACHE) == 0) {
#ifdef DIAGNOSTIC
			if (!daddr)
				panic("ext2_bmaparray: indirect block not in cache");
#endif
			/*
			 * This runs through ext2_strategy using bio2 to
			 * cache the disk offset, then comes back through
			 * bio1.  So we want to wait on bio1
			 */
			bp->b_bio1.bio_done = biodone_sync;
			bp->b_bio1.bio_flags |= BIO_SYNC;
			bp->b_bio2.bio_offset = fsbtodoff(fs, daddr);
			bp->b_flags &= ~(B_INVAL|B_ERROR);
			bp->b_cmd = BUF_CMD_READ;
			vfs_busy_pages(bp->b_vp, bp);
			vn_strategy(bp->b_vp, &bp->b_bio1);
			error = biowait(&bp->b_bio1, "biord");
			if (error) {
				brelse(bp);
				return (error);
			}
		}

		daddr = ((ext2_daddr_t *)bp->b_data)[xap->in_off];
		if (num == 1 && daddr && runp) {
			for (bn = xap->in_off + 1;
			    bn < MNINDIR(ump) && *runp < maxrun &&
			    is_sequential(ump,
			    ((ext2_daddr_t *)bp->b_data)[bn - 1],
			    ((ext2_daddr_t *)bp->b_data)[bn]);
			    ++bn, ++*runp);
			bn = xap->in_off;
			if (runb && bn) {
				for(--bn; bn >= 0 && *runb < maxrun &&
					is_sequential(ump, ((daddr_t *)bp->b_data)[bn],
					    ((daddr_t *)bp->b_data)[bn+1]);
					--bn, ++*runb);
			}
		}
	}
	if (bp)
		bqrelse(bp);

	daddr = blkptrtodb(ump, daddr);
	*bnp = daddr == 0 ? -1 : daddr;
	return (0);
}