예제 #1
0
/*
 * Rewrite an existing directory entry to point at the inode
 * supplied.  The parameters describing the directory entry are
 * set up by a call to namei.
 */
int
ufs_dirrewrite(struct inode *dp, struct inode *oip, ufsino_t newinum,
    int newtype, int isrmdir)
{
	struct buf *bp;
	struct direct *ep;
	struct vnode *vdp = ITOV(dp);
	int error;

	error = UFS_BUFATOFF(dp, (off_t)dp->i_offset, (char **)&ep, &bp);
	if (error)
		return (error);
	ep->d_ino = newinum;
	if (vdp->v_mount->mnt_maxsymlinklen > 0)
 		ep->d_type = newtype;
 	oip->i_effnlink--;
 	if (DOINGSOFTDEP(vdp)) {
		softdep_change_linkcnt(oip, 0);
 		softdep_setup_directory_change(bp, dp, oip, newinum, isrmdir);
 		bdwrite(bp);
 	} else {
		DIP_ADD(oip, nlink, -1);
		oip->i_flag |= IN_CHANGE;
		UFS_WAPBL_UPDATE(oip, MNT_WAIT);
		if (DOINGASYNC(vdp)) {
			bdwrite(bp);
			error = 0;
		} else {
			error = VOP_BWRITE(bp);
		}
 	}
	dp->i_flag |= IN_CHANGE | IN_UPDATE;
	UFS_WAPBL_UPDATE(dp, MNT_WAIT);
	return (error);
}
예제 #2
0
/*
 * Update the access, modified, and inode change times as specified by the
 * IN_ACCESS, IN_UPDATE, and IN_CHANGE flags respectively.  Write the inode
 * to disk if the IN_MODIFIED flag is set (it may be set initially, or by
 * the timestamp update).  The IN_LAZYMOD flag is set to force a write
 * later if not now.  If we write now, then clear both IN_MODIFIED and
 * IN_LAZYMOD to reflect the presumably successful write, and if waitfor is
 * set, then wait for the write to complete.
 */
int
ffs_update(struct vnode *vp, int waitfor)
{
	struct fs *fs;
	struct buf *bp;
	struct inode *ip;
	int error;

	ufs_itimes(vp);
	ip = VTOI(vp);
	if ((ip->i_flag & IN_MODIFIED) == 0 && waitfor == 0)
		return (0);
	ip->i_flag &= ~(IN_LAZYMOD | IN_MODIFIED);
	fs = ip->i_fs;
	if (fs->fs_ronly)
		return (0);

	/*
	 * The vnode type is usually set to VBAD if an unrecoverable I/O
	 * error has occured (such as when reading the inode).  Clear the
	 * modified bits but do not write anything out in this case.
	 */
	if (vp->v_type == VBAD)
		return (0);
	/*
	 * Ensure that uid and gid are correct. This is a temporary
	 * fix until fsck has been changed to do the update.
	 */
	if (fs->fs_inodefmt < FS_44INODEFMT) {		/* XXX */
		ip->i_din.di_ouid = ip->i_uid;		/* XXX */
		ip->i_din.di_ogid = ip->i_gid;		/* XXX */
	}						/* XXX */
	error = bread(ip->i_devvp, 
		      fsbtodoff(fs, ino_to_fsba(fs, ip->i_number)),
		      (int)fs->fs_bsize, &bp);
	if (error) {
		brelse(bp);
		return (error);
	}
	if (DOINGSOFTDEP(vp))
		softdep_update_inodeblock(ip, bp, waitfor);
	else if (ip->i_effnlink != ip->i_nlink)
		panic("ffs_update: bad link cnt");
	*((struct ufs1_dinode *)bp->b_data +
	    ino_to_fsbo(fs, ip->i_number)) = ip->i_din;
	if (waitfor && !DOINGASYNC(vp)) {
		return (bwrite(bp));
	} else if (vm_page_count_severe() || buf_dirty_count_severe()) {
		return (bwrite(bp));
	} else {
		if (bp->b_bufsize == fs->fs_bsize)
			bp->b_flags |= B_CLUSTEROK;
		bdwrite(bp);
		return (0);
	}
}
예제 #3
0
/*
 * ffs_balloc(struct vnode *a_vp, ufs_daddr_t a_lbn, int a_size,
 *	      struct ucred *a_cred, int a_flags, struct buf *a_bpp)
 *
 * Balloc defines the structure of filesystem storage by allocating
 * the physical blocks on a device given the inode and the logical
 * block number in a file.
 *
 * NOTE: B_CLRBUF - this flag tells balloc to clear invalid portions
 *	 of the buffer.  However, any dirty bits will override missing
 *	 valid bits.  This case occurs when writable mmaps are truncated
 *	 and then extended.
 */
int
ffs_balloc(struct vop_balloc_args *ap)
{
	struct inode *ip;
	ufs_daddr_t lbn;
	int size;
	struct ucred *cred;
	int flags;
	struct fs *fs;
	ufs_daddr_t nb;
	struct buf *bp, *nbp, *dbp;
	struct vnode *vp;
	struct indir indirs[NIADDR + 2];
	ufs_daddr_t newb, *bap, pref;
	int deallocated, osize, nsize, num, i, error;
	ufs_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
	ufs_daddr_t *lbns_remfree, lbns[NIADDR + 1];
	int unwindidx;
	int seqcount;

	vp = ap->a_vp;
	ip = VTOI(vp);
	fs = ip->i_fs;
	lbn = lblkno(fs, ap->a_startoffset);
	size = blkoff(fs, ap->a_startoffset) + ap->a_size;
	if (size > fs->fs_bsize)
		panic("ffs_balloc: blk too big");
	*ap->a_bpp = NULL;
	if (lbn < 0)
		return (EFBIG);
	cred = ap->a_cred;
	flags = ap->a_flags;

	/*
	 * The vnode must be locked for us to be able to safely mess
	 * around with the inode.
	 */
	if (vn_islocked(vp) != LK_EXCLUSIVE) {
		panic("ffs_balloc: vnode %p not exclusively locked!", vp);
	}

	/*
	 * If the next write will extend the file into a new block,
	 * and the file is currently composed of a fragment
	 * this fragment has to be extended to be a full block.
	 */
	nb = lblkno(fs, ip->i_size);
	if (nb < NDADDR && nb < lbn) {
		/*
		 * The filesize prior to this write can fit in direct
		 * blocks (ex. fragmentation is possibly done)
		 * we are now extending the file write beyond
		 * the block which has end of the file prior to this write.
		 */
		osize = blksize(fs, ip, nb);
		/*
		 * osize gives disk allocated size in the last block. It is
		 * either in fragments or a file system block size.
		 */
		if (osize < fs->fs_bsize && osize > 0) {
			/* A few fragments are already allocated, since the
			 * current extends beyond this block allocated the
			 * complete block as fragments are on in last block.
			 */
			error = ffs_realloccg(ip, nb,
				ffs_blkpref(ip, nb, (int)nb, &ip->i_db[0]),
				osize, (int)fs->fs_bsize, cred, &bp);
			if (error)
				return (error);
			if (DOINGSOFTDEP(vp))
				softdep_setup_allocdirect(ip, nb,
				    dofftofsb(fs, bp->b_bio2.bio_offset), 
				    ip->i_db[nb], fs->fs_bsize, osize, bp);
			/* adjust the inode size, we just grew */
			ip->i_size = smalllblktosize(fs, nb + 1);
			ip->i_db[nb] = dofftofsb(fs, bp->b_bio2.bio_offset);
			ip->i_flag |= IN_CHANGE | IN_UPDATE;
			if (flags & B_SYNC)
				bwrite(bp);
			else
				bawrite(bp);
			/* bp is already released here */
		}
	}
	/*
	 * The first NDADDR blocks are direct blocks
	 */
	if (lbn < NDADDR) {
		nb = ip->i_db[lbn];
		if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
			error = bread(vp, lblktodoff(fs, lbn), fs->fs_bsize, &bp);
			if (error) {
				brelse(bp);
				return (error);
			}
			bp->b_bio2.bio_offset = fsbtodoff(fs, nb);
			*ap->a_bpp = bp;
			return (0);
		}
		if (nb != 0) {
			/*
			 * Consider need to reallocate a fragment.
			 */
			osize = fragroundup(fs, blkoff(fs, ip->i_size));
			nsize = fragroundup(fs, size);
			if (nsize <= osize) {
				error = bread(vp, lblktodoff(fs, lbn), 
					      osize, &bp);
				if (error) {
					brelse(bp);
					return (error);
				}
				bp->b_bio2.bio_offset = fsbtodoff(fs, nb);
			} else {
				/*
				 * NOTE: ffs_realloccg() issues a bread().
				 */
				error = ffs_realloccg(ip, lbn,
				    ffs_blkpref(ip, lbn, (int)lbn,
					&ip->i_db[0]), osize, nsize, cred, &bp);
				if (error)
					return (error);
				if (DOINGSOFTDEP(vp))
					softdep_setup_allocdirect(ip, lbn,
					    dofftofsb(fs, bp->b_bio2.bio_offset),
					    nb, nsize, osize, bp);
			}
		} else {
			if (ip->i_size < smalllblktosize(fs, lbn + 1))
				nsize = fragroundup(fs, size);
			else
				nsize = fs->fs_bsize;
			error = ffs_alloc(ip, lbn,
			    ffs_blkpref(ip, lbn, (int)lbn, &ip->i_db[0]),
			    nsize, cred, &newb);
			if (error)
				return (error);
			bp = getblk(vp, lblktodoff(fs, lbn), nsize, 0, 0);
			bp->b_bio2.bio_offset = fsbtodoff(fs, newb);
			if (flags & B_CLRBUF)
				vfs_bio_clrbuf(bp);
			if (DOINGSOFTDEP(vp))
				softdep_setup_allocdirect(ip, lbn, newb, 0,
				    nsize, 0, bp);
		}
		ip->i_db[lbn] = dofftofsb(fs, bp->b_bio2.bio_offset);
		ip->i_flag |= IN_CHANGE | IN_UPDATE;
		*ap->a_bpp = bp;
		return (0);
	}
	/*
	 * Determine the number of levels of indirection.
	 */
	pref = 0;
	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
		return(error);
#ifdef DIAGNOSTIC
	if (num < 1)
		panic ("ffs_balloc: ufs_bmaparray returned indirect block");
#endif
	/*
	 * Get a handle on the data block buffer before working through 
	 * indirect blocks to avoid a deadlock between the VM system holding
	 * a locked VM page and issuing a BMAP (which tries to lock the
	 * indirect blocks), and the filesystem holding a locked indirect
	 * block and then trying to read a data block (which tries to lock
	 * the underlying VM pages).
	 */
	dbp = getblk(vp, lblktodoff(fs, lbn), fs->fs_bsize, 0, 0);

	/*
	 * Setup undo history
	 */
	allocib = NULL;
	allocblk = allociblk;
	lbns_remfree = lbns;

	unwindidx = -1;

	/*
	 * Fetch the first indirect block directly from the inode, allocating
	 * one if necessary. 
	 */
	--num;
	nb = ip->i_ib[indirs[0].in_off];
	if (nb == 0) {
		pref = ffs_blkpref(ip, lbn, 0, NULL);
		/*
		 * If the filesystem has run out of space we can skip the
		 * full fsync/undo of the main [fail] case since no undo
		 * history has been built yet.  Hence the goto fail2.
		 */
	        if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
		    cred, &newb)) != 0)
			goto fail2;
		nb = newb;
		*allocblk++ = nb;
		*lbns_remfree++ = indirs[1].in_lbn;
		bp = getblk(vp, lblktodoff(fs, indirs[1].in_lbn),
			    fs->fs_bsize, 0, 0);
		bp->b_bio2.bio_offset = fsbtodoff(fs, nb);
		vfs_bio_clrbuf(bp);
		if (DOINGSOFTDEP(vp)) {
			softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
			    newb, 0, fs->fs_bsize, 0, bp);
			bdwrite(bp);
		} else {
			/*
			 * Write synchronously so that indirect blocks
			 * never point at garbage.
			 */
			if (DOINGASYNC(vp))
				bdwrite(bp);
			else if ((error = bwrite(bp)) != 0)
				goto fail;
		}
		allocib = &ip->i_ib[indirs[0].in_off];
		*allocib = nb;
		ip->i_flag |= IN_CHANGE | IN_UPDATE;
	}

	/*
	 * Fetch through the indirect blocks, allocating as necessary.
	 */
	for (i = 1;;) {
		error = bread(vp, lblktodoff(fs, indirs[i].in_lbn), (int)fs->fs_bsize, &bp);
		if (error) {
			brelse(bp);
			goto fail;
		}
		bap = (ufs_daddr_t *)bp->b_data;
		nb = bap[indirs[i].in_off];
		if (i == num)
			break;
		i += 1;
		if (nb != 0) {
			bqrelse(bp);
			continue;
		}
		if (pref == 0)
			pref = ffs_blkpref(ip, lbn, 0, NULL);
		if ((error =
		    ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) != 0) {
			brelse(bp);
			goto fail;
		}
		nb = newb;
		*allocblk++ = nb;
		*lbns_remfree++ = indirs[i].in_lbn;
		nbp = getblk(vp, lblktodoff(fs, indirs[i].in_lbn),
			     fs->fs_bsize, 0, 0);
		nbp->b_bio2.bio_offset = fsbtodoff(fs, nb);
		vfs_bio_clrbuf(nbp);
		if (DOINGSOFTDEP(vp)) {
			softdep_setup_allocindir_meta(nbp, ip, bp,
			    indirs[i - 1].in_off, nb);
			bdwrite(nbp);
		} else {
			/*
			 * Write synchronously so that indirect blocks
			 * never point at garbage.
			 */
			if ((error = bwrite(nbp)) != 0) {
				brelse(bp);
				goto fail;
			}
		}
		bap[indirs[i - 1].in_off] = nb;
		if (allocib == NULL && unwindidx < 0)
			unwindidx = i - 1;
		/*
		 * If required, write synchronously, otherwise use
		 * delayed write.
		 */
		if (flags & B_SYNC) {
			bwrite(bp);
		} else {
			if (bp->b_bufsize == fs->fs_bsize)
				bp->b_flags |= B_CLUSTEROK;
			bdwrite(bp);
		}
	}

	/*
	 * Get the data block, allocating if necessary.  We have already
	 * called getblk() on the data block buffer, dbp.  If we have to
	 * allocate it and B_CLRBUF has been set the inference is an intention
	 * to zero out the related disk blocks, so we do not have to issue
	 * a read.  Instead we simply call vfs_bio_clrbuf().  If B_CLRBUF is
	 * not set the caller intends to overwrite the entire contents of the
	 * buffer and we don't waste time trying to clean up the contents.
	 *
	 * bp references the current indirect block.  When allocating, 
	 * the block must be updated.
	 */
	if (nb == 0) {
		pref = ffs_blkpref(ip, lbn, indirs[i].in_off, &bap[0]);
		error = ffs_alloc(ip,
		    lbn, pref, (int)fs->fs_bsize, cred, &newb);
		if (error) {
			brelse(bp);
			goto fail;
		}
		nb = newb;
		*allocblk++ = nb;
		*lbns_remfree++ = lbn;
		dbp->b_bio2.bio_offset = fsbtodoff(fs, nb);
		if (flags & B_CLRBUF)
			vfs_bio_clrbuf(dbp);
		if (DOINGSOFTDEP(vp))
			softdep_setup_allocindir_page(ip, lbn, bp,
			    indirs[i].in_off, nb, 0, dbp);
		bap[indirs[i].in_off] = nb;
		/*
		 * If required, write synchronously, otherwise use
		 * delayed write.
		 */
		if (flags & B_SYNC) {
			bwrite(bp);
		} else {
			if (bp->b_bufsize == fs->fs_bsize)
				bp->b_flags |= B_CLUSTEROK;
			bdwrite(bp);
		}
		*ap->a_bpp = dbp;
		return (0);
	}
	brelse(bp);

	/*
	 * At this point all related indirect blocks have been allocated
	 * if necessary and released.  bp is no longer valid.  dbp holds
	 * our getblk()'d data block.
	 *
	 * XXX we previously performed a cluster_read operation here.
	 */
	if (flags & B_CLRBUF) {
		/*
		 * If B_CLRBUF is set we must validate the invalid portions
		 * of the buffer.  This typically requires a read-before-
		 * write.  The strategy call will fill in bio_offset in that
		 * case.
		 *
		 * If we hit this case we do a cluster read if possible
		 * since nearby data blocks are likely to be accessed soon
		 * too.
		 */
		if ((dbp->b_flags & B_CACHE) == 0) {
			bqrelse(dbp);
			seqcount = (flags & B_SEQMASK) >> B_SEQSHIFT;
			if (seqcount &&
			    (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
				error = cluster_read(vp, (off_t)ip->i_size,
					    lblktodoff(fs, lbn),
					    (int)fs->fs_bsize, 
					    fs->fs_bsize,
					    seqcount * BKVASIZE,
					    &dbp);
			} else {
				error = bread(vp, lblktodoff(fs, lbn),
					      (int)fs->fs_bsize, &dbp);
			}
			if (error)
				goto fail;
		} else {
예제 #4
0
/*
 * Balloc defines the structure of filesystem storage
 * by allocating the physical blocks on a device given
 * the inode and the logical block number in a file.
 * This is the allocation strategy for UFS1. Below is
 * the allocation strategy for UFS2.
 */
int
ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
    struct ucred *cred, int flags, struct buf **bpp)
{
	struct inode *ip;
	struct ufs1_dinode *dp;
	ufs_lbn_t lbn, lastlbn;
	struct fs *fs;
	ufs1_daddr_t nb;
	struct buf *bp, *nbp;
	struct ufsmount *ump;
	struct indir indirs[NIADDR + 2];
	int deallocated, osize, nsize, num, i, error;
	ufs2_daddr_t newb;
	ufs1_daddr_t *bap, pref;
	ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
	ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1];
	int unwindidx = -1;
	int saved_inbdflush;
	static struct timeval lastfail;
	static int curfail;
	int reclaimed;

	ip = VTOI(vp);
	dp = ip->i_din1;
	fs = ip->i_fs;
	ump = ip->i_ump;
	lbn = lblkno(fs, startoffset);
	size = blkoff(fs, startoffset) + size;
	reclaimed = 0;
	if (size > fs->fs_bsize)
		panic("ffs_balloc_ufs1: blk too big");
	*bpp = NULL;
	if (flags & IO_EXT)
		return (EOPNOTSUPP);
	if (lbn < 0)
		return (EFBIG);

	if (DOINGSOFTDEP(vp))
		softdep_prealloc(vp, MNT_WAIT);
	/*
	 * If the next write will extend the file into a new block,
	 * and the file is currently composed of a fragment
	 * this fragment has to be extended to be a full block.
	 */
	lastlbn = lblkno(fs, ip->i_size);
	if (lastlbn < NDADDR && lastlbn < lbn) {
		nb = lastlbn;
		osize = blksize(fs, ip, nb);
		if (osize < fs->fs_bsize && osize > 0) {
			UFS_LOCK(ump);
			error = ffs_realloccg(ip, nb, dp->di_db[nb],
			   ffs_blkpref_ufs1(ip, lastlbn, (int)nb,
			   &dp->di_db[0]), osize, (int)fs->fs_bsize, flags,
			   cred, &bp);
			if (error)
				return (error);
			if (DOINGSOFTDEP(vp))
				softdep_setup_allocdirect(ip, nb,
				    dbtofsb(fs, bp->b_blkno), dp->di_db[nb],
				    fs->fs_bsize, osize, bp);
			ip->i_size = smalllblktosize(fs, nb + 1);
			dp->di_size = ip->i_size;
			dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
			ip->i_flag |= IN_CHANGE | IN_UPDATE;
			if (flags & IO_SYNC)
				bwrite(bp);
			else
				bawrite(bp);
		}
	}
	/*
	 * The first NDADDR blocks are direct blocks
	 */
	if (lbn < NDADDR) {
		if (flags & BA_METAONLY)
			panic("ffs_balloc_ufs1: BA_METAONLY for direct block");
		nb = dp->di_db[lbn];
		if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
			error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
			if (error) {
				brelse(bp);
				return (error);
			}
			bp->b_blkno = fsbtodb(fs, nb);
			*bpp = bp;
			return (0);
		}
		if (nb != 0) {
			/*
			 * Consider need to reallocate a fragment.
			 */
			osize = fragroundup(fs, blkoff(fs, ip->i_size));
			nsize = fragroundup(fs, size);
			if (nsize <= osize) {
				error = bread(vp, lbn, osize, NOCRED, &bp);
				if (error) {
					brelse(bp);
					return (error);
				}
				bp->b_blkno = fsbtodb(fs, nb);
			} else {
				UFS_LOCK(ump);
				error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
				    ffs_blkpref_ufs1(ip, lbn, (int)lbn,
				    &dp->di_db[0]), osize, nsize, flags,
				    cred, &bp);
				if (error)
					return (error);
				if (DOINGSOFTDEP(vp))
					softdep_setup_allocdirect(ip, lbn,
					    dbtofsb(fs, bp->b_blkno), nb,
					    nsize, osize, bp);
			}
		} else {
			if (ip->i_size < smalllblktosize(fs, lbn + 1))
				nsize = fragroundup(fs, size);
			else
				nsize = fs->fs_bsize;
			UFS_LOCK(ump);
			error = ffs_alloc(ip, lbn,
			    ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]),
			    nsize, flags, cred, &newb);
			if (error)
				return (error);
			bp = getblk(vp, lbn, nsize, 0, 0, 0);
			bp->b_blkno = fsbtodb(fs, newb);
			if (flags & BA_CLRBUF)
				vfs_bio_clrbuf(bp);
			if (DOINGSOFTDEP(vp))
				softdep_setup_allocdirect(ip, lbn, newb, 0,
				    nsize, 0, bp);
		}
		dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
		ip->i_flag |= IN_CHANGE | IN_UPDATE;
		*bpp = bp;
		return (0);
	}
	/*
	 * Determine the number of levels of indirection.
	 */
	pref = 0;
	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
		return(error);
#ifdef INVARIANTS
	if (num < 1)
		panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block");
#endif
	saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
	/*
	 * Fetch the first indirect block allocating if necessary.
	 */
	--num;
	nb = dp->di_ib[indirs[0].in_off];
	allocib = NULL;
	allocblk = allociblk;
	lbns_remfree = lbns;
	if (nb == 0) {
		UFS_LOCK(ump);
		pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0);
	        if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
		    flags, cred, &newb)) != 0) {
			curthread_pflags_restore(saved_inbdflush);
			return (error);
		}
		nb = newb;
		*allocblk++ = nb;
		*lbns_remfree++ = indirs[1].in_lbn;
		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, 0);
		bp->b_blkno = fsbtodb(fs, nb);
		vfs_bio_clrbuf(bp);
		if (DOINGSOFTDEP(vp)) {
			softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
			    newb, 0, fs->fs_bsize, 0, bp);
			bdwrite(bp);
		} else {
			/*
			 * Write synchronously so that indirect blocks
			 * never point at garbage.
			 */
			if (DOINGASYNC(vp))
				bdwrite(bp);
			else if ((error = bwrite(bp)) != 0)
				goto fail;
		}
		allocib = &dp->di_ib[indirs[0].in_off];
		*allocib = nb;
		ip->i_flag |= IN_CHANGE | IN_UPDATE;
	}
	/*
	 * Fetch through the indirect blocks, allocating as necessary.
	 */
retry:
	for (i = 1;;) {
		error = bread(vp,
		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
		if (error) {
			brelse(bp);
			goto fail;
		}
		bap = (ufs1_daddr_t *)bp->b_data;
		nb = bap[indirs[i].in_off];
		if (i == num)
			break;
		i += 1;
		if (nb != 0) {
			bqrelse(bp);
			continue;
		}
		UFS_LOCK(ump);
		if (pref == 0)
			pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0);
		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
		    flags | IO_BUFLOCKED, cred, &newb)) != 0) {
			brelse(bp);
			if (++reclaimed == 1) {
				UFS_LOCK(ump);
				softdep_request_cleanup(fs, vp, cred,
				    FLUSH_BLOCKS_WAIT);
				UFS_UNLOCK(ump);
				goto retry;
			}
			if (ppsratecheck(&lastfail, &curfail, 1)) {
				ffs_fserr(fs, ip->i_number, "filesystem full");
				uprintf("\n%s: write failed, filesystem "
				    "is full\n", fs->fs_fsmnt);
			}
			goto fail;
		}
		nb = newb;
		*allocblk++ = nb;
		*lbns_remfree++ = indirs[i].in_lbn;
		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0);
		nbp->b_blkno = fsbtodb(fs, nb);
		vfs_bio_clrbuf(nbp);
		if (DOINGSOFTDEP(vp)) {
			softdep_setup_allocindir_meta(nbp, ip, bp,
			    indirs[i - 1].in_off, nb);
			bdwrite(nbp);
		} else {
			/*
			 * Write synchronously so that indirect blocks
			 * never point at garbage.
			 */
			if ((error = bwrite(nbp)) != 0) {
				brelse(bp);
				goto fail;
			}
		}
		bap[indirs[i - 1].in_off] = nb;
		if (allocib == NULL && unwindidx < 0)
			unwindidx = i - 1;
		/*
		 * If required, write synchronously, otherwise use
		 * delayed write.
		 */
		if (flags & IO_SYNC) {
			bwrite(bp);
		} else {
			if (bp->b_bufsize == fs->fs_bsize)
				bp->b_flags |= B_CLUSTEROK;
			bdwrite(bp);
		}
	}
	/*
	 * If asked only for the indirect block, then return it.
	 */
	if (flags & BA_METAONLY) {
		curthread_pflags_restore(saved_inbdflush);
		*bpp = bp;
		return (0);
	}
	/*
	 * Get the data block, allocating if necessary.
	 */
	if (nb == 0) {
		UFS_LOCK(ump);
		pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off, &bap[0]);
		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
		    flags | IO_BUFLOCKED, cred, &newb);
		if (error) {
			brelse(bp);
			if (++reclaimed == 1) {
				UFS_LOCK(ump);
				softdep_request_cleanup(fs, vp, cred,
				    FLUSH_BLOCKS_WAIT);
				UFS_UNLOCK(ump);
				goto retry;
			}
			if (ppsratecheck(&lastfail, &curfail, 1)) {
				ffs_fserr(fs, ip->i_number, "filesystem full");
				uprintf("\n%s: write failed, filesystem "
				    "is full\n", fs->fs_fsmnt);
			}
			goto fail;
		}
		nb = newb;
		*allocblk++ = nb;
		*lbns_remfree++ = lbn;
		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0);
		nbp->b_blkno = fsbtodb(fs, nb);
		if (flags & BA_CLRBUF)
			vfs_bio_clrbuf(nbp);
		if (DOINGSOFTDEP(vp))
			softdep_setup_allocindir_page(ip, lbn, bp,
			    indirs[i].in_off, nb, 0, nbp);
		bap[indirs[i].in_off] = nb;
		/*
		 * If required, write synchronously, otherwise use
		 * delayed write.
		 */
		if (flags & IO_SYNC) {
			bwrite(bp);
		} else {
			if (bp->b_bufsize == fs->fs_bsize)
				bp->b_flags |= B_CLUSTEROK;
			bdwrite(bp);
		}
		curthread_pflags_restore(saved_inbdflush);
		*bpp = nbp;
		return (0);
	}
	brelse(bp);
	if (flags & BA_CLRBUF) {
		int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
		if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
			error = cluster_read(vp, ip->i_size, lbn,
			    (int)fs->fs_bsize, NOCRED,
			    MAXBSIZE, seqcount, &nbp);
		} else {
			error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
		}
		if (error) {
			brelse(nbp);
			goto fail;
		}
	} else {
예제 #5
0
/*
 * Update the access, modified, and inode change times as specified by the
 * IN_ACCESS, IN_UPDATE, and IN_CHANGE flags respectively.  Write the inode
 * to disk if the IN_MODIFIED flag is set (it may be set initially, or by
 * the timestamp update).  The IN_LAZYMOD flag is set to force a write
 * later if not now.  The IN_LAZYACCESS is set instead of IN_MODIFIED if the fs
 * is currently being suspended (or is suspended) and vnode has been accessed.
 * If we write now, then clear IN_MODIFIED, IN_LAZYACCESS and IN_LAZYMOD to
 * reflect the presumably successful write, and if waitfor is set, then wait
 * for the write to complete.
 */
int 
ffs_update (vnode *vp, int waitfor)
{
	int error = 0;
	print("HARVEY TODO: %s\n", __func__);
#if 0
	struct fs *fs;
	struct buf *bp;
	struct inode *ip;
	int flags, error;

	ASSERT_VOP_ELOCKED(vp, "ffs_update");
	ufs_itimes(vp);
	ip = VTOI(vp);
	if ((ip->i_flag & IN_MODIFIED) == 0 && waitfor == 0)
		return (0);
	ip->i_flag &= ~(IN_LAZYACCESS | IN_LAZYMOD | IN_MODIFIED);
	fs = ITOFS(ip);
	if (fs->fs_ronly && ITOUMP(ip)->um_fsckpid == 0)
		return (0);
	/*
	 * If we are updating a snapshot and another process is currently
	 * writing the buffer containing the inode for this snapshot then
	 * a deadlock can occur when it tries to check the snapshot to see
	 * if that block needs to be copied. Thus when updating a snapshot
	 * we check to see if the buffer is already locked, and if it is
	 * we drop the snapshot lock until the buffer has been written
	 * and is available to us. We have to grab a reference to the
	 * snapshot vnode to prevent it from being removed while we are
	 * waiting for the buffer.
	 */
	flags = 0;
	if (IS_SNAPSHOT(ip))
		flags = GB_LOCK_NOWAIT;
loop:
	error = breadn_flags(ITODEVVP(ip),
	     fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
	     (int) fs->fs_bsize, 0, 0, 0, NOCRED, flags, &bp);
	if (error != 0) {
		if (error != EBUSY)
			return (error);
		KASSERT((IS_SNAPSHOT(ip)), ("EBUSY from non-snapshot"));
		/*
		 * Wait for our inode block to become available.
		 *
		 * Hold a reference to the vnode to protect against
		 * ffs_snapgone(). Since we hold a reference, it can only
		 * get reclaimed (VI_DOOMED flag) in a forcible downgrade
		 * or unmount. For an unmount, the entire filesystem will be
		 * gone, so we cannot attempt to touch anything associated
		 * with it while the vnode is unlocked; all we can do is 
		 * pause briefly and try again. If when we relock the vnode
		 * we discover that it has been reclaimed, updating it is no
		 * longer necessary and we can just return an error.
		 */
		vref(vp);
		VOP_UNLOCK(vp, 0);
		pause("ffsupd", 1);
		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
		vrele(vp);
		if ((vp->v_iflag & VI_DOOMED) != 0)
			return (ENOENT);
		goto loop;
	}
	if (DOINGSOFTDEP(vp))
		softdep_update_inodeblock(ip, bp, waitfor);
	else if (ip->i_effnlink != ip->i_nlink)
		panic("ffs_update: bad link cnt");
	if (I_IS_UFS1(ip)) {
		*((struct ufs1_dinode *)bp->b_data +
		    ino_to_fsbo(fs, ip->i_number)) = *ip->i_din1;
		/* XXX: FIX? The entropy here is desirable, but the harvesting may be expensive */
		random_harvest_queue(&(ip->i_din1), sizeof(ip->i_din1), 1, RANDOM_FS_ATIME);
	} else {
		*((struct ufs2_dinode *)bp->b_data +
		    ino_to_fsbo(fs, ip->i_number)) = *ip->i_din2;
		/* XXX: FIX? The entropy here is desirable, but the harvesting may be expensive */
		random_harvest_queue(&(ip->i_din2), sizeof(ip->i_din2), 1, RANDOM_FS_ATIME);
	}
	if (waitfor)
		error = bwrite(bp);
	else if (vm_page_count_severe() || buf_dirty_count_severe()) {
		bawrite(bp);
		error = 0;
	} else {
		if (bp->b_bufsize == fs->fs_bsize)
			bp->b_flags |= B_CLUSTEROK;
		bdwrite(bp);
		error = 0;
	}
#endif // 0
	return (error);
}
예제 #6
0
/*
 * Balloc defines the structure of file system storage
 * by allocating the physical blocks on a device given
 * the inode and the logical block number in a file.
 */
int
ffs1_balloc(struct inode *ip, off_t startoffset, int size, struct ucred *cred,
    int flags, struct buf **bpp)
{
	daddr_t lbn, nb, newb, pref;
	struct fs *fs;
	struct buf *bp, *nbp;
	struct vnode *vp;
	struct proc *p;
	struct indir indirs[NIADDR + 2];
	int32_t *bap;
	int deallocated, osize, nsize, num, i, error;
	int32_t *allocib, *blkp, *allocblk, allociblk[NIADDR+1];
	int unwindidx = -1;

	vp = ITOV(ip);
	fs = ip->i_fs;
	p = curproc;
	lbn = lblkno(fs, startoffset);
	size = blkoff(fs, startoffset) + size;
	if (size > fs->fs_bsize)
		panic("ffs1_balloc: blk too big");
	if (bpp != NULL)
		*bpp = NULL;
	if (lbn < 0)
		return (EFBIG);

	/*
	 * If the next write will extend the file into a new block,
	 * and the file is currently composed of a fragment
	 * this fragment has to be extended to be a full block.
	 */
	nb = lblkno(fs, ip->i_ffs1_size);
	if (nb < NDADDR && nb < lbn) {
		osize = blksize(fs, ip, nb);
		if (osize < fs->fs_bsize && osize > 0) {
			error = ffs_realloccg(ip, nb,
			    ffs1_blkpref(ip, nb, (int)nb, &ip->i_ffs1_db[0]),
			    osize, (int)fs->fs_bsize, cred, bpp, &newb);
			if (error)
				return (error);
			if (DOINGSOFTDEP(vp))
				softdep_setup_allocdirect(ip, nb, newb,
				    ip->i_ffs1_db[nb], fs->fs_bsize, osize,
				    bpp ? *bpp : NULL);

			ip->i_ffs1_size = lblktosize(fs, nb + 1);
			uvm_vnp_setsize(vp, ip->i_ffs1_size);
			ip->i_ffs1_db[nb] = newb;
			ip->i_flag |= IN_CHANGE | IN_UPDATE;
			if (bpp != NULL) {
				if (flags & B_SYNC)
					bwrite(*bpp);
				else
					bawrite(*bpp);
			}
		}
	}
	/*
	 * The first NDADDR blocks are direct blocks
	 */
	if (lbn < NDADDR) {
		nb = ip->i_ffs1_db[lbn];
		if (nb != 0 && ip->i_ffs1_size >= lblktosize(fs, lbn + 1)) {
			/*
			 * The block is an already-allocated direct block
			 * and the file already extends past this block,
			 * thus this must be a whole block.
			 * Just read the block (if requested).
			 */

			if (bpp != NULL) {
				error = bread(vp, lbn, fs->fs_bsize, bpp);
				if (error) {
					brelse(*bpp);
					return (error);
				}
			}
			return (0);
		}
		if (nb != 0) {
			/*
			 * Consider need to reallocate a fragment.
			 */
			osize = fragroundup(fs, blkoff(fs, ip->i_ffs1_size));
			nsize = fragroundup(fs, size);
			if (nsize <= osize) {
				/*
				 * The existing block is already
				 * at least as big as we want.
				 * Just read the block (if requested).
				 */
				if (bpp != NULL) {
					error = bread(vp, lbn, fs->fs_bsize,
					    bpp);
					if (error) {
						brelse(*bpp);
						return (error);
					}
					(*bpp)->b_bcount = osize;
				}
				return (0);
			} else {
				/*
				 * The existing block is smaller than we
				 * want, grow it.
				 */
				error = ffs_realloccg(ip, lbn,
				    ffs1_blkpref(ip, lbn, (int)lbn,
					&ip->i_ffs1_db[0]),
				    osize, nsize, cred, bpp, &newb);
				if (error)
					return (error);
				if (DOINGSOFTDEP(vp))
					softdep_setup_allocdirect(ip, lbn,
					    newb, nb, nsize, osize,
					    bpp ? *bpp : NULL);
			}
		} else {
			/*
			 * The block was not previously allocated,
			 * allocate a new block or fragment.
			 */

			if (ip->i_ffs1_size < lblktosize(fs, lbn + 1))
				nsize = fragroundup(fs, size);
			else
				nsize = fs->fs_bsize;
			error = ffs_alloc(ip, lbn,
			    ffs1_blkpref(ip, lbn, (int)lbn, &ip->i_ffs1_db[0]),
			    nsize, cred, &newb);
			if (error)
				return (error);
			if (bpp != NULL) {
				*bpp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
				if (nsize < fs->fs_bsize)
					(*bpp)->b_bcount = nsize;
				(*bpp)->b_blkno = fsbtodb(fs, newb);
				if (flags & B_CLRBUF)
					clrbuf(*bpp);
			}
			if (DOINGSOFTDEP(vp))
				softdep_setup_allocdirect(ip, lbn, newb, 0,
				    nsize, 0, bpp ? *bpp : NULL);
		}
		ip->i_ffs1_db[lbn] = newb;
		ip->i_flag |= IN_CHANGE | IN_UPDATE;
		return (0);
	}

	/*
	 * Determine the number of levels of indirection.
	 */
	pref = 0;
	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
		return(error);
#ifdef DIAGNOSTIC
	if (num < 1)
		panic ("ffs1_balloc: ufs_bmaparray returned indirect block");
#endif
	/*
	 * Fetch the first indirect block allocating if necessary.
	 */
	--num;
	nb = ip->i_ffs1_ib[indirs[0].in_off];

	allocib = NULL;
	allocblk = allociblk;
	if (nb == 0) {
		pref = ffs1_blkpref(ip, lbn, -indirs[0].in_off - 1, NULL);
	        error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
				  cred, &newb);
		if (error)
			goto fail;
		nb = newb;

		*allocblk++ = nb;
		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0);
		bp->b_blkno = fsbtodb(fs, nb);
		clrbuf(bp);

		if (DOINGSOFTDEP(vp)) {
			softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
			    newb, 0, fs->fs_bsize, 0, bp);
			bdwrite(bp);
		} else {
			/*
			 * Write synchronously so that indirect blocks
			 * never point at garbage.
			 */
			if ((error = bwrite(bp)) != 0)
				goto fail;
		}
		allocib = &ip->i_ffs1_ib[indirs[0].in_off];
		*allocib = nb;
		ip->i_flag |= IN_CHANGE | IN_UPDATE;
	}

	/*
	 * Fetch through the indirect blocks, allocating as necessary.
	 */
	for (i = 1;;) {
		error = bread(vp, indirs[i].in_lbn, (int)fs->fs_bsize, &bp);
		if (error) {
			brelse(bp);
			goto fail;
		}
		bap = (int32_t *)bp->b_data;
		nb = bap[indirs[i].in_off];
		if (i == num)
			break;
		i++;
		if (nb != 0) {
			brelse(bp);
			continue;
		}
		if (pref == 0)
			pref = ffs1_blkpref(ip, lbn, i - num - 1, NULL);
		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
				  &newb);
		if (error) {
			brelse(bp);
			goto fail;
		}
		nb = newb;
		*allocblk++ = nb;
		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0);
		nbp->b_blkno = fsbtodb(fs, nb);
		clrbuf(nbp);

		if (DOINGSOFTDEP(vp)) {
			softdep_setup_allocindir_meta(nbp, ip, bp,
			    indirs[i - 1].in_off, nb);
			bdwrite(nbp);
		} else {
			/*
			 * Write synchronously so that indirect blocks
			 * never point at garbage.
			 */
			if ((error = bwrite(nbp)) != 0) {
				brelse(bp);
				goto fail;
			}
		}
		bap[indirs[i - 1].in_off] = nb;
		if (allocib == NULL && unwindidx < 0)
			unwindidx = i - 1;
		/*
		 * If required, write synchronously, otherwise use
		 * delayed write.
		 */
		if (flags & B_SYNC) {
			bwrite(bp);
		} else {
			bdwrite(bp);
		}
	}
	/*
	 * Get the data block, allocating if necessary.
	 */
	if (nb == 0) {
		pref = ffs1_blkpref(ip, lbn, indirs[i].in_off, &bap[0]);
		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
				  &newb);
		if (error) {
			brelse(bp);
			goto fail;
		}
		nb = newb;
		*allocblk++ = nb;
		if (bpp != NULL) {
			nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
			nbp->b_blkno = fsbtodb(fs, nb);
			if (flags & B_CLRBUF)
				clrbuf(nbp);
			*bpp = nbp;
		}
		if (DOINGSOFTDEP(vp))
			softdep_setup_allocindir_page(ip, lbn, bp,
			    indirs[i].in_off, nb, 0, bpp ? *bpp : NULL);
		bap[indirs[i].in_off] = nb;
		/*
		 * If required, write synchronously, otherwise use
		 * delayed write.
		 */
		if (flags & B_SYNC) {
			bwrite(bp);
		} else {
			bdwrite(bp);
		}
		return (0);
	}
	brelse(bp);
	if (bpp != NULL) {
		if (flags & B_CLRBUF) {
			error = bread(vp, lbn, (int)fs->fs_bsize, &nbp);
			if (error) {
				brelse(nbp);
				goto fail;
			}
		} else {
			nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
			nbp->b_blkno = fsbtodb(fs, nb);
		}
		*bpp = nbp;
	}
	return (0);

fail:
	/*
	 * If we have failed to allocate any blocks, simply return the error.
	 * This is the usual case and avoids the need to fsync the file.
	 */
	if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
		return (error);
	/*
	 * If we have failed part way through block allocation, we have to
	 * deallocate any indirect blocks that we have allocated. We have to
	 * fsync the file before we start to get rid of all of its
	 * dependencies so that we do not leave them dangling. We have to sync
	 * it at the end so that the softdep code does not find any untracked
	 * changes. Although this is really slow, running out of disk space is
	 * not expected to be a common occurrence. The error return from fsync
	 * is ignored as we already have an error to return to the user.
	 */
	VOP_FSYNC(vp, p->p_ucred, MNT_WAIT, p);
	for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
		ffs_blkfree(ip, *blkp, fs->fs_bsize);
		deallocated += fs->fs_bsize;
	}
	if (allocib != NULL) {
		*allocib = 0;
	} else if (unwindidx >= 0) {
		int r;

		r = bread(vp, indirs[unwindidx].in_lbn, (int)fs->fs_bsize, &bp);
		if (r)
			panic("Could not unwind indirect block, error %d", r);
		bap = (int32_t *)bp->b_data;
		bap[indirs[unwindidx].in_off] = 0;
		if (flags & B_SYNC) {
			bwrite(bp);
		} else {
			bdwrite(bp);
		}
	}
	if (deallocated) {
		/*
		 * Restore user's disk quota because allocation failed.
		 */
		(void)ufs_quota_free_blocks(ip, btodb(deallocated), cred);

		ip->i_ffs1_blocks -= btodb(deallocated);
		ip->i_flag |= IN_CHANGE | IN_UPDATE;
	}
	VOP_FSYNC(vp, p->p_ucred, MNT_WAIT, p);
	return (error);
}
예제 #7
0
/*
 * Write a directory entry after a call to namei, using the parameters
 * that it left in nameidata. The argument dirp is the new directory
 * entry contents. Dvp is a pointer to the directory to be written,
 * which was left locked by namei. Remaining parameters (dp->i_offset,
 * dp->i_count) indicate how the space for the new entry is to be obtained.
 * Non-null bp indicates that a directory is being created (for the
 * soft dependency code).
 */
int
ufs_direnter(struct vnode *dvp, struct vnode *tvp, struct direct *dirp,
    struct componentname *cnp, struct buf *newdirbp)
{
  	struct ucred *cr;
  	struct proc *p;
  	int newentrysize;
  	struct inode *dp;
  	struct buf *bp;
  	u_int dsize;
  	struct direct *ep, *nep;
	int error, ret, blkoff, loc, spacefree, flags;
  	char *dirbuf;

	UFS_WAPBL_JLOCK_ASSERT(dvp->v_mount);

 	error = 0;
 	cr = cnp->cn_cred;
 	p = cnp->cn_proc;
  	dp = VTOI(dvp);
  	newentrysize = DIRSIZ(FSFMT(dvp), dirp);

	if (dp->i_count == 0) {
		/*
		 * If dp->i_count is 0, then namei could find no
		 * space in the directory. Here, dp->i_offset will
		 * be on a directory block boundary and we will write the
  		 * new entry into a fresh block.
  		 */
  		if (dp->i_offset & (DIRBLKSIZ - 1))
			panic("ufs_direnter: newblk");
		flags = B_CLRBUF;
		if (!DOINGSOFTDEP(dvp))
			flags |= B_SYNC;
		if ((error = UFS_BUF_ALLOC(dp, (off_t)dp->i_offset, DIRBLKSIZ,
		    cr, flags, &bp)) != 0) {
			if (DOINGSOFTDEP(dvp) && newdirbp != NULL)
				bdwrite(newdirbp);
			return (error);
		}
		DIP_ASSIGN(dp, size, dp->i_offset + DIRBLKSIZ);
		dp->i_flag |= IN_CHANGE | IN_UPDATE;
		uvm_vnp_setsize(dvp, DIP(dp, size));
  		dirp->d_reclen = DIRBLKSIZ;
		blkoff = dp->i_offset &
		    (VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_iosize - 1);
		memcpy(bp->b_data + blkoff, dirp, newentrysize);

#ifdef UFS_DIRHASH
		if (dp->i_dirhash != NULL) {
			ufsdirhash_newblk(dp, dp->i_offset);
			ufsdirhash_add(dp, dirp, dp->i_offset);
			ufsdirhash_checkblock(dp, (char *)bp->b_data + blkoff,
			dp->i_offset);
		}
#endif

		if (DOINGSOFTDEP(dvp)) {
			/*
			 * Ensure that the entire newly allocated block is a
			 * valid directory so that future growth within the
			 * block does not have to ensure that the block is
			 * written before the inode.
			 */
			blkoff += DIRBLKSIZ;
			while (blkoff < bp->b_bcount) {
				((struct direct *)
				   (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ;
				blkoff += DIRBLKSIZ;
			}
			if (softdep_setup_directory_add(bp, dp, dp->i_offset,
			    dirp->d_ino, newdirbp, 1) == 0) {
				bdwrite(bp);
				return (UFS_UPDATE(dp, 0));
			}
			/* We have just allocated a directory block in an
			 * indirect block. Rather than tracking when it gets
			 * claimed by the inode, we simply do a VOP_FSYNC
			 * now to ensure that it is there (in case the user
			 * does a future fsync). Note that we have to unlock
			 * the inode for the entry that we just entered, as
			 * the VOP_FSYNC may need to lock other inodes which
			 * can lead to deadlock if we also hold a lock on
			 * the newly entered node.
			 */
			if ((error = VOP_BWRITE(bp)))
				return (error);
			if (tvp != NULL)
				VOP_UNLOCK(tvp, 0);
			error = VOP_FSYNC(dvp, p->p_ucred, MNT_WAIT);
			if (tvp != NULL)
				vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p);
			return (error);
		}
		error = VOP_BWRITE(bp);
 		ret = UFS_UPDATE(dp, !DOINGSOFTDEP(dvp));
 		if (error == 0)
 			return (ret);
  		return (error);
  	}
  
  	/*
	 * If dp->i_count is non-zero, then namei found space for the new
	 * entry in the range dp->i_offset to dp->i_offset + dp->i_count
	 * in the directory. To use this space, we may have to compact
	 * the entries located there, by copying them together towards the
	 * beginning of the block, leaving the free space in one usable
	 * chunk at the end.
  	 */
  
  	/*
	 * Increase size of directory if entry eats into new space.
	 * This should never push the size past a new multiple of
	 * DIRBLKSIZE.
	 *
	 * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN.
	 */
	if (dp->i_offset + dp->i_count > DIP(dp, size)) {
		DIP_ASSIGN(dp, size, dp->i_offset + dp->i_count);
		dp->i_flag |= IN_CHANGE | IN_UPDATE;
		UFS_WAPBL_UPDATE(dp, MNT_WAIT);
	}
	/*
	 * Get the block containing the space for the new directory entry.
	 */
 	if ((error = UFS_BUFATOFF(dp, (off_t)dp->i_offset, &dirbuf, &bp)) 
	    != 0) {
 		if (DOINGSOFTDEP(dvp) && newdirbp != NULL)
 			bdwrite(newdirbp);
  		return (error);
 	}
	/*
	 * Find space for the new entry. In the simple case, the entry at
	 * offset base will have the space. If it does not, then namei
	 * arranged that compacting the region dp->i_offset to
	 * dp->i_offset + dp->i_count would yield the space.
	 */
	ep = (struct direct *)dirbuf;
	dsize = ep->d_ino ? DIRSIZ(FSFMT(dvp), ep) : 0;
	spacefree = ep->d_reclen - dsize;
	for (loc = ep->d_reclen; loc < dp->i_count; ) {
		nep = (struct direct *)(dirbuf + loc);

		/* Trim the existing slot (NB: dsize may be zero). */
		ep->d_reclen = dsize;
		ep = (struct direct *)((char *)ep + dsize);

		/* Read nep->d_reclen now as the memmove() may clobber it. */
		loc += nep->d_reclen;
		if (nep->d_ino == 0) {
			/*
			 * A mid-block unused entry. Such entries are
			 * never created by the kernel, but fsck_ffs
			 * can create them (and it doesn't fix them).
			 *
			 * Add up the free space, and initialise the
			 * relocated entry since we don't memmove it.
			 */
			spacefree += nep->d_reclen;
			ep->d_ino = 0;
			dsize = 0;
			continue;
		}
		dsize = DIRSIZ(FSFMT(dvp), nep);
		spacefree += nep->d_reclen - dsize;
#ifdef UFS_DIRHASH
		if (dp->i_dirhash != NULL)
			ufsdirhash_move(dp, nep,
			    dp->i_offset + ((char *)nep - dirbuf),
			    dp->i_offset + ((char *)ep - dirbuf));
#endif
 		if (DOINGSOFTDEP(dvp))
 			softdep_change_directoryentry_offset(dp, dirbuf,
 			    (caddr_t)nep, (caddr_t)ep, dsize); 
 		else
 			memmove(ep, nep, dsize);
	}
	/*
	 * Here, `ep' points to a directory entry containing `dsize' in-use
	 * bytes followed by `spacefree' unused bytes. If ep->d_ino == 0,
	 * then the entry is completely unused (dsize == 0). The value
	 * of ep->d_reclen is always indeterminate.
	 *
	 * Update the pointer fields in the previous entry (if any),
	 * copy in the new entry, and write out the block.
	 */
	if (ep->d_ino == 0) {
		if (spacefree + dsize < newentrysize)
			panic("ufs_direnter: compact1");
		dirp->d_reclen = spacefree + dsize;
	} else {
		if (spacefree < newentrysize)
			panic("ufs_direnter: compact2");
		dirp->d_reclen = spacefree;
		ep->d_reclen = dsize;
		ep = (struct direct *)((char *)ep + dsize);
	}

#ifdef UFS_DIRHASH
	if (dp->i_dirhash != NULL && (ep->d_ino == 0 ||
	    dirp->d_reclen == spacefree))
		ufsdirhash_add(dp, dirp, dp->i_offset + ((char *)ep - dirbuf));
#endif
	memcpy(ep, dirp, newentrysize);
#ifdef UFS_DIRHASH
	if (dp->i_dirhash != NULL)
		ufsdirhash_checkblock(dp, dirbuf -
		    (dp->i_offset & (DIRBLKSIZ - 1)),
		    dp->i_offset & ~(DIRBLKSIZ - 1));
#endif

  	if (DOINGSOFTDEP(dvp)) {
  		(void)softdep_setup_directory_add(bp, dp,
  		    dp->i_offset + (caddr_t)ep - dirbuf,
		    dirp->d_ino, newdirbp, 0);
  		bdwrite(bp);
  	} else {
  		error = VOP_BWRITE(bp);
  	}
	dp->i_flag |= IN_CHANGE | IN_UPDATE;

 	/*
 	 * If all went well, and the directory can be shortened, proceed
 	 * with the truncation. Note that we have to unlock the inode for
 	 * the entry that we just entered, as the truncation may need to
 	 * lock other inodes which can lead to deadlock if we also hold a
 	 * lock on the newly entered node.
 	 */

	if (error == 0 && dp->i_endoff && dp->i_endoff < DIP(dp, size)) {
		if (tvp != NULL)
			VOP_UNLOCK(tvp, 0);
#ifdef UFS_DIRHASH
		if (dp->i_dirhash != NULL)
			ufsdirhash_dirtrunc(dp, dp->i_endoff);
#endif


		error = UFS_TRUNCATE(dp, (off_t)dp->i_endoff, IO_SYNC, cr);

		if (tvp != NULL)
			vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p);
	}
	UFS_WAPBL_UPDATE(dp, MNT_WAIT);
	return (error);
}
예제 #8
0
/*
 * Look up a FFS dinode number to find its incore vnode, otherwise read it
 * in from disk.  If it is in core, wait for the lock bit to clear, then
 * return the inode locked.  Detection and handling of mount points must be
 * done by the calling routine.
 */
int
ffs_vget(struct mount *mp, ino_t ino, struct vnode **vpp)
{
	struct fs *fs;
	struct inode *ip;
	struct ufs1_dinode *dp1;
#ifdef FFS2
	struct ufs2_dinode *dp2;
#endif
	struct ufsmount *ump;
	struct buf *bp;
	struct vnode *vp;
	dev_t dev;
	int error;

	ump = VFSTOUFS(mp);
	dev = ump->um_dev;
retry:
	if ((*vpp = ufs_ihashget(dev, ino)) != NULL)
		return (0);

	/* Allocate a new vnode/inode. */
	if ((error = getnewvnode(VT_UFS, mp, ffs_vnodeop_p, &vp)) != 0) {
		*vpp = NULL;
		return (error);
	}
#ifdef VFSDEBUG
	vp->v_flag |= VLOCKSWORK;
#endif
	/* XXX - we use the same pool for ffs and mfs */
	ip = pool_get(&ffs_ino_pool, PR_WAITOK);
	bzero((caddr_t)ip, sizeof(struct inode));
	lockinit(&ip->i_lock, PINOD, "inode", 0, 0);
	ip->i_ump = ump;
	VREF(ip->i_devvp);
	vp->v_data = ip;
	ip->i_vnode = vp;
	ip->i_fs = fs = ump->um_fs;
	ip->i_dev = dev;
	ip->i_number = ino;
	ip->i_vtbl = &ffs_vtbl;

	/*
	 * Put it onto its hash chain and lock it so that other requests for
	 * this inode will block if they arrive while we are sleeping waiting
	 * for old data structures to be purged or for the contents of the
	 * disk portion of this inode to be read.
	 */
	error = ufs_ihashins(ip);
	
	if (error) {
		/*
		 * VOP_INACTIVE will treat this as a stale file
		 * and recycle it quickly
		 */
		vrele(vp);

		if (error == EEXIST)
			goto retry;

		return (error);
	}


	/* Read in the disk contents for the inode, copy into the inode. */
	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
		      (int)fs->fs_bsize, NOCRED, &bp);
	if (error) {
		/*
		 * The inode does not contain anything useful, so it would
		 * be misleading to leave it on its hash chain. With mode
		 * still zero, it will be unlinked and returned to the free
		 * list by vput().
		 */
		vput(vp);
		brelse(bp);
		*vpp = NULL;
		return (error);
	}

#ifdef FFS2
	if (ip->i_ump->um_fstype == UM_UFS2) {
		ip->i_din2 = pool_get(&ffs_dinode2_pool, PR_WAITOK);
		dp2 = (struct ufs2_dinode *) bp->b_data + ino_to_fsbo(fs, ino);
		*ip->i_din2 = *dp2;
	} else
#endif
	{
		ip->i_din1 = pool_get(&ffs_dinode1_pool, PR_WAITOK);
		dp1 = (struct ufs1_dinode *) bp->b_data + ino_to_fsbo(fs, ino);
		*ip->i_din1 = *dp1;
	}

	brelse(bp);

	if (DOINGSOFTDEP(vp))
		softdep_load_inodeblock(ip);
	else
		ip->i_effnlink = DIP(ip, nlink);

	/*
	 * Initialize the vnode from the inode, check for aliases.
	 * Note that the underlying vnode may have changed.
	 */
	error = ufs_vinit(mp, ffs_specop_p, FFS_FIFOOPS, &vp);
	if (error) {
		vput(vp);
		*vpp = NULL;
		return (error);
	}

	/*
	 * Set up a generation number for this inode if it does not
	 * already have one. This should only happen on old filesystems.
	 */
	if (DIP(ip, gen) == 0) {
		DIP_ASSIGN(ip, gen, arc4random() & INT_MAX);
		if (DIP(ip, gen) == 0 || DIP(ip, gen) == -1)
			DIP_ASSIGN(ip, gen, 1);	/* Shouldn't happen */
		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
			ip->i_flag |= IN_MODIFIED;
	}

	/*
	 * Ensure that uid and gid are correct. This is a temporary
	 * fix until fsck has been changed to do the update.
	 */
	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_inodefmt < FS_44INODEFMT) {
		ip->i_ffs1_uid = ip->i_din1->di_ouid;
		ip->i_ffs1_gid = ip->i_din1->di_ogid;
	}

	*vpp = vp;

	return (0);
}
예제 #9
0
파일: ufs_vnops.c 프로젝트: sofuture/bitrig
/*
 * Rmdir system call.
 */
int
ufs_rmdir(void *v)
{
	struct vop_rmdir_args *ap = v;
	struct vnode *vp = ap->a_vp;
	struct vnode *dvp = ap->a_dvp;
	struct componentname *cnp = ap->a_cnp;
	struct inode *ip, *dp;
	int error;

	ip = VTOI(vp);
	dp = VTOI(dvp);
	/*
	 * No rmdir "." or of mounted on directories.
	 */
	if (dp == ip || vp->v_mountedhere != NULL) {
		if (dp == ip)
			vrele(dvp);
		else
			vput(dvp);
		vput(vp);
		return (EINVAL);
	}
	/*
         * Do not remove a directory that is in the process of being renamed.
         * Verify the directory is empty (and valid). Rmdir ".." will not be
         * valid since ".." will contain a reference to the current directory
         * and thus be non-empty.
	 */
	error = 0;
	if (ip->i_flag & IN_RENAME) {
		error = EINVAL;
		goto out;
	}
	if (ip->i_effnlink != 2 ||
	    !ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) {
		error = ENOTEMPTY;
		goto out;
	}
	if ((DIP(dp, flags) & APPEND) ||
		(DIP(ip, flags) & (IMMUTABLE | APPEND))) {
		error = EPERM;
		goto out;
	}
	/*
	 * Delete reference to directory before purging
	 * inode.  If we crash in between, the directory
	 * will be reattached to lost+found,
	 */
	dp->i_effnlink--;
	ip->i_effnlink--;
	if (DOINGSOFTDEP(vp)) {
		softdep_change_linkcnt(dp, 0);
		softdep_change_linkcnt(ip, 0);
	}
	if ((error = ufs_dirremove(dvp, ip, cnp->cn_flags, 1)) != 0) {
		dp->i_effnlink++;
		ip->i_effnlink++;
		if (DOINGSOFTDEP(vp)) {
			softdep_change_linkcnt(dp, 0);
			softdep_change_linkcnt(ip, 0);
		}
		goto out;
	}

	VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK);
	cache_purge(dvp);
        /*
	 * Truncate inode. The only stuff left in the directory is "." and
	 * "..". The "." reference is inconsequential since we are quashing
	 * it. The soft dependency code will arrange to do these operations
	 * after the parent directory entry has been deleted on disk, so
	 * when running with that code we avoid doing them now.
	 */
	if (!DOINGSOFTDEP(vp)) {
		int ioflag;

		DIP_ADD(dp, nlink, -1);
		dp->i_flag |= IN_CHANGE;
		DIP_ADD(ip, nlink, -1);
		ip->i_flag |= IN_CHANGE;
		ioflag = DOINGASYNC(vp) ? 0 : IO_SYNC;
		error = UFS_TRUNCATE(ip, (off_t)0, ioflag, cnp->cn_cred);
	}
	cache_purge(vp);
#ifdef UFS_DIRHASH
	/* Kill any active hash; i_effnlink == 0, so it will not come back. */
	if (ip->i_dirhash != NULL)
		ufsdirhash_free(ip);
#endif

out:
	VN_KNOTE(vp, NOTE_DELETE);
        vput(dvp);
	vput(vp);
	return (error);
}
예제 #10
0
파일: ufs_vnops.c 프로젝트: sofuture/bitrig
/*
 * Mkdir system call
 */
int
ufs_mkdir(void *v)
{
	struct vop_mkdir_args *ap = v;
	struct vnode *dvp = ap->a_dvp;
	struct vattr *vap = ap->a_vap;
	struct componentname *cnp = ap->a_cnp;
	struct inode *ip, *dp;
	struct vnode *tvp;
	struct buf *bp;
	struct direct newdir;
	struct dirtemplate dirtemplate, *dtp;
	int error, dmode, blkoff;

#ifdef DIAGNOSTIC
	if ((cnp->cn_flags & HASBUF) == 0)
		panic("ufs_mkdir: no name");
#endif
	dp = VTOI(dvp);
	if ((nlink_t) DIP(dp, nlink) >= LINK_MAX) {
		error = EMLINK;
		goto out;
	}
	dmode = vap->va_mode & 0777;
	dmode |= IFDIR;
	/*
	 * Must simulate part of ufs_makeinode here to acquire the inode,
	 * but not have it entered in the parent directory. The entry is
	 * made later after writing "." and ".." entries.
	 */
	if ((error = UFS_INODE_ALLOC(dp, dmode, cnp->cn_cred, &tvp)) != 0)
		goto out;

	ip = VTOI(tvp);

	DIP_ASSIGN(ip, uid, cnp->cn_cred->cr_uid);
	DIP_ASSIGN(ip, gid, DIP(dp, gid));

	if ((error = getinoquota(ip)) ||
	    (error = ufs_quota_alloc_inode(ip, cnp->cn_cred))) {
		pool_put(&namei_pool, cnp->cn_pnbuf);
		UFS_INODE_FREE(ip, ip->i_number, dmode);
		vput(tvp);
		vput(dvp);
		return (error);
	}

	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
	DIP_ASSIGN(ip, mode, dmode);
	tvp->v_type = VDIR;	/* Rest init'd in getnewvnode(). */
	ip->i_effnlink = 2;
	DIP_ASSIGN(ip, nlink, 2);
	if (DOINGSOFTDEP(tvp))
		softdep_change_linkcnt(ip, 0);

	/*
	 * Bump link count in parent directory to reflect work done below.
	 * Should be done before reference is create so cleanup is 
	 * possible if we crash.
	 */
	dp->i_effnlink++;
	DIP_ADD(dp, nlink, 1);
	dp->i_flag |= IN_CHANGE;
	if (DOINGSOFTDEP(dvp))
		softdep_change_linkcnt(dp, 0);
	if ((error = UFS_UPDATE(dp, !DOINGSOFTDEP(dvp))) != 0)
		goto bad;

	/* 
	 * Initialize directory with "." and ".." from static template.
	 */
	if (dvp->v_mount->mnt_maxsymlinklen > 0)
		dtp = &mastertemplate;
	else
		dtp = (struct dirtemplate *)&omastertemplate;
	dirtemplate = *dtp;
	dirtemplate.dot_ino = ip->i_number;
	dirtemplate.dotdot_ino = dp->i_number;

	if ((error = UFS_BUF_ALLOC(ip, (off_t)0, DIRBLKSIZ, cnp->cn_cred,
            B_CLRBUF, &bp)) != 0)
		goto bad;
	DIP_ASSIGN(ip, size, DIRBLKSIZ);
	ip->i_flag |= IN_CHANGE | IN_UPDATE;
	uvm_vnp_setsize(tvp, DIP(ip, size));
	bcopy((caddr_t)&dirtemplate, (caddr_t)bp->b_data, sizeof dirtemplate);
	if (DOINGSOFTDEP(tvp)) {
		/*
		 * Ensure that the entire newly allocated block is a
		 * valid directory so that future growth within the
		 * block does not have to ensure that the block is
		 * written before the inode
		 */
		blkoff = DIRBLKSIZ;
		while (blkoff < bp->b_bcount) {
			((struct direct *)
			 (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ;
			blkoff += DIRBLKSIZ;
		}
	}
	if ((error = UFS_UPDATE(ip, !DOINGSOFTDEP(tvp))) != 0) {
		(void)VOP_BWRITE(bp);
		goto bad;
	}

	/*
         * Directory set up, now install its entry in the parent directory.
         *
         * If we are not doing soft dependencies, then we must write out the
         * buffer containing the new directory body before entering the new
         * name in the parent. If we are doing soft dependencies, then the
         * buffer containing the new directory body will be passed to and
         * released in the soft dependency code after the code has attached
         * an appropriate ordering dependency to the buffer which ensures that
         * the buffer is written before the new name is written in the parent.
	 */
        if (!DOINGSOFTDEP(dvp) && ((error = VOP_BWRITE(bp)) != 0))
                goto bad;
        ufs_makedirentry(ip, cnp, &newdir);
        error = ufs_direnter(dvp, tvp, &newdir, cnp, bp);
  
bad:
        if (error == 0) {
		VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK);
                *ap->a_vpp = tvp;
        } else {
                dp->i_effnlink--;
                DIP_ADD(dp, nlink, -1);
                dp->i_flag |= IN_CHANGE;
		if (DOINGSOFTDEP(dvp))
			softdep_change_linkcnt(dp, 0);
                /*
                 * No need to do an explicit VOP_TRUNCATE here, vrele will
                 * do this for us because we set the link count to 0.
                 */
                ip->i_effnlink = 0;
                DIP_ASSIGN(ip, nlink, 0);
                ip->i_flag |= IN_CHANGE;
		if (DOINGSOFTDEP(tvp))
			softdep_change_linkcnt(ip, 0);
		vput(tvp);
	}
out:
	pool_put(&namei_pool, cnp->cn_pnbuf);
	vput(dvp);

	return (error);
}
예제 #11
0
/*
 * Last reference to an inode.  If necessary, write or delete it.
 */
int
ufs_inactive(void *v)
{
	struct vop_inactive_args *ap = v;
	struct vnode *vp = ap->a_vp;
	struct inode *ip = VTOI(vp);
	struct proc *p = ap->a_p;
	mode_t mode;
	int error = 0;
#ifdef DIAGNOSTIC
	extern int prtactive;

	if (prtactive && vp->v_usecount != 0)
		vprint("ffs_inactive: pushing active", vp);
#endif

	/*
	 * Ignore inodes related to stale file handles.
	 */
	if (ip->i_din1 == NULL || DIP(ip, mode) == 0)
		goto out;

	if (DIP(ip, nlink) <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
		if (getinoquota(ip) == 0)
			(void)ufs_quota_free_inode(ip, NOCRED);

		error = UFS_TRUNCATE(ip, (off_t)0, IO_EXT | IO_NORMAL, NOCRED);

		DIP_ASSIGN(ip, rdev, 0);
		mode = DIP(ip, mode);
		DIP_ASSIGN(ip, mode, 0);
		ip->i_flag |= IN_CHANGE | IN_UPDATE;

		/*
		 * Setting the mode to zero needs to wait for the inode to be
		 * written just as does a change to the link count. So, rather
		 * than creating a new entry point to do the same thing, we
		 * just use softdep_change_linkcnt(). Also, we can't let
		 * softdep co-opt us to help on its worklist, as we may end up
		 * trying to recycle vnodes and getting to this same point a
		 * couple of times, blowing the kernel stack. However, this
		 * could be optimized by checking if we are coming from
		 * vrele(), vput() or vclean() (by checking for VXLOCK) and
		 * just avoiding the co-opt to happen in the last case.
		 */
		if (DOINGSOFTDEP(vp))
			softdep_change_linkcnt(ip, 1);

		UFS_INODE_FREE(ip, ip->i_number, mode);
	}

	if (ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) {
		UFS_UPDATE(ip, 0);
	}
out:
	VOP_UNLOCK(vp, 0, p);

	/*
	 * If we are done with the inode, reclaim it
	 * so that it can be reused immediately.
	 */
	if (ip->i_din1 == NULL || DIP(ip, mode) == 0)
		vrecycle(vp, p);

	return (error);
}
예제 #12
0
/*
 * Last reference to an inode.  If necessary, write or delete it.
 */
int
ufs_inactive(void *v)
{
	struct vop_inactive_args /* {
		struct vnode *a_vp;
		struct bool *a_recycle;
	} */ *ap = v;
	struct vnode *vp = ap->a_vp;
	struct inode *ip = VTOI(vp);
	struct mount *transmp;
	mode_t mode;
	int error = 0;
	int logged = 0;

	UFS_WAPBL_JUNLOCK_ASSERT(vp->v_mount);

	transmp = vp->v_mount;
	fstrans_start(transmp, FSTRANS_SHARED);
	/*
	 * Ignore inodes related to stale file handles.
	 */
	if (ip->i_mode == 0)
		goto out;
	if (ip->i_ffs_effnlink == 0 && DOINGSOFTDEP(vp))
		softdep_releasefile(ip);

	if (ip->i_nlink <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
		error = UFS_WAPBL_BEGIN(vp->v_mount);
		if (error)
			goto out;
		logged = 1;
#ifdef QUOTA
		(void)chkiq(ip, -1, NOCRED, 0);
#endif
#ifdef UFS_EXTATTR
		ufs_extattr_vnode_inactive(vp, curlwp);
#endif
		if (ip->i_size != 0) {
			/*
			 * When journaling, only truncate one indirect block
			 * at a time
			 */
			if (vp->v_mount->mnt_wapbl) {
				uint64_t incr = MNINDIR(ip->i_ump) <<
				    vp->v_mount->mnt_fs_bshift; /* Power of 2 */
				uint64_t base = NDADDR <<
				    vp->v_mount->mnt_fs_bshift;
				while (!error && ip->i_size > base + incr) {
					/*
					 * round down to next full indirect
					 * block boundary.
					 */
					uint64_t nsize = base +
					    ((ip->i_size - base - 1) &
					    ~(incr - 1));
					error = UFS_TRUNCATE(vp, nsize, 0,
					    NOCRED);
					if (error)
						break;
					UFS_WAPBL_END(vp->v_mount);
					error = UFS_WAPBL_BEGIN(vp->v_mount);
					if (error)
						goto out;
				}
			}
			if (!error)
				error = UFS_TRUNCATE(vp, (off_t)0, 0, NOCRED);
		}
		/*
		 * Setting the mode to zero needs to wait for the inode
		 * to be written just as does a change to the link count.
		 * So, rather than creating a new entry point to do the
		 * same thing, we just use softdep_change_linkcnt().
		 */
		DIP_ASSIGN(ip, rdev, 0);
		mode = ip->i_mode;
		ip->i_mode = 0;
		DIP_ASSIGN(ip, mode, 0);
		ip->i_flag |= IN_CHANGE | IN_UPDATE;
		mutex_enter(&vp->v_interlock);
		vp->v_iflag |= VI_FREEING;
		mutex_exit(&vp->v_interlock);
		if (DOINGSOFTDEP(vp))
			softdep_change_linkcnt(ip);
		UFS_VFREE(vp, ip->i_number, mode);
	}

	if (ip->i_flag & (IN_CHANGE | IN_UPDATE | IN_MODIFIED)) {
		if (!logged++) {
			int err;
			err = UFS_WAPBL_BEGIN(vp->v_mount);
			if (err)
				goto out;
		}
		UFS_UPDATE(vp, NULL, NULL, 0);
	}
	if (logged)
		UFS_WAPBL_END(vp->v_mount);
out:
	/*
	 * If we are done with the inode, reclaim it
	 * so that it can be reused immediately.
	 */
	*ap->a_recycle = (ip->i_mode == 0);
	VOP_UNLOCK(vp, 0);
	fstrans_done(transmp);
	return (error);
}
예제 #13
0
/*
 * Balloc defines the structure of file system storage
 * by allocating the physical blocks on a device given
 * the inode and the logical block number in a file.
 * This is the allocation strategy for UFS2. Above is
 * the allocation strategy for UFS1.
 */
int
ffs_balloc_ufs2(vnode *vp, off_t startoffset, int size,
    Ucred *cred, int flags, Buf **bpp)
{
	int error = 0;
	print("HARVEY TODO: %s\n", __func__);
#if 0
	struct inode *ip;
	struct ufs2_dinode *dp;
	ufs_lbn_t lbn, lastlbn;
	struct fs *fs;
	struct buf *bp, *nbp;
	struct ufsmount *ump;
	struct indir indirs[UFS_NIADDR + 2];
	ufs2_daddr_t nb, newb, *bap, pref;
	ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
	ufs2_daddr_t *lbns_remfree, lbns[UFS_NIADDR + 1];
	int deallocated, osize, nsize, num, i, error;
	int unwindidx = -1;
	int saved_inbdflush;
	static struct timeval lastfail;
	static int curfail;
	int gbflags, reclaimed;

	ip = VTOI(vp);
	dp = ip->i_din2;
	fs = ITOFS(ip);
	ump = ITOUMP(ip);
	lbn = lblkno(fs, startoffset);
	size = blkoff(fs, startoffset) + size;
	reclaimed = 0;
	if (size > fs->fs_bsize)
		panic("ffs_balloc_ufs2: blk too big");
	*bpp = nil;
	if (lbn < 0)
		return (EFBIG);
	gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;

	if (DOINGSOFTDEP(vp))
		softdep_prealloc(vp, MNT_WAIT);
	
	/*
	 * Check for allocating external data.
	 */
	if (flags & IO_EXT) {
		if (lbn >= UFS_NXADDR)
			return (EFBIG);
		/*
		 * If the next write will extend the data into a new block,
		 * and the data is currently composed of a fragment
		 * this fragment has to be extended to be a full block.
		 */
		lastlbn = lblkno(fs, dp->di_extsize);
		if (lastlbn < lbn) {
			nb = lastlbn;
			osize = sblksize(fs, dp->di_extsize, nb);
			if (osize < fs->fs_bsize && osize > 0) {
				UFS_LOCK(ump);
				error = ffs_realloccg(ip, -1 - nb,
				    dp->di_extb[nb],
				    ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
				    &dp->di_extb[0]), osize,
				    (int)fs->fs_bsize, flags, cred, &bp);
				if (error)
					return (error);
				if (DOINGSOFTDEP(vp))
					softdep_setup_allocext(ip, nb,
					    dbtofsb(fs, bp->b_blkno),
					    dp->di_extb[nb],
					    fs->fs_bsize, osize, bp);
				dp->di_extsize = smalllblktosize(fs, nb + 1);
				dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
				bp->b_xflags |= BX_ALTDATA;
				ip->i_flag |= IN_CHANGE;
				if (flags & IO_SYNC)
					bwrite(bp);
				else
					bawrite(bp);
			}
		}
		/*
		 * All blocks are direct blocks
		 */
		if (flags & BA_METAONLY)
			panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
		nb = dp->di_extb[lbn];
		if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
			error = bread_gb(vp, -1 - lbn, fs->fs_bsize, NOCRED,
			    gbflags, &bp);
			if (error) {
				brelse(bp);
				return (error);
			}
			bp->b_blkno = fsbtodb(fs, nb);
			bp->b_xflags |= BX_ALTDATA;
			*bpp = bp;
			return (0);
		}
		if (nb != 0) {
			/*
			 * Consider need to reallocate a fragment.
			 */
			osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
			nsize = fragroundup(fs, size);
			if (nsize <= osize) {
				error = bread_gb(vp, -1 - lbn, osize, NOCRED,
				    gbflags, &bp);
				if (error) {
					brelse(bp);
					return (error);
				}
				bp->b_blkno = fsbtodb(fs, nb);
				bp->b_xflags |= BX_ALTDATA;
			} else {
				UFS_LOCK(ump);
				error = ffs_realloccg(ip, -1 - lbn,
				    dp->di_extb[lbn],
				    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
				    &dp->di_extb[0]), osize, nsize, flags,
				    cred, &bp);
				if (error)
					return (error);
				bp->b_xflags |= BX_ALTDATA;
				if (DOINGSOFTDEP(vp))
					softdep_setup_allocext(ip, lbn,
					    dbtofsb(fs, bp->b_blkno), nb,
					    nsize, osize, bp);
			}
		} else {
			if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
				nsize = fragroundup(fs, size);
			else
				nsize = fs->fs_bsize;
			UFS_LOCK(ump);
			error = ffs_alloc(ip, lbn,
			   ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]),
			   nsize, flags, cred, &newb);
			if (error)
				return (error);
			bp = getblk(vp, -1 - lbn, nsize, 0, 0, gbflags);
			bp->b_blkno = fsbtodb(fs, newb);
			bp->b_xflags |= BX_ALTDATA;
			if (flags & BA_CLRBUF)
				vfs_bio_clrbuf(bp);
			if (DOINGSOFTDEP(vp))
				softdep_setup_allocext(ip, lbn, newb, 0,
				    nsize, 0, bp);
		}
		dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
		ip->i_flag |= IN_CHANGE;
		*bpp = bp;
		return (0);
	}
	/*
	 * If the next write will extend the file into a new block,
	 * and the file is currently composed of a fragment
	 * this fragment has to be extended to be a full block.
	 */
	lastlbn = lblkno(fs, ip->i_size);
	if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
		nb = lastlbn;
		osize = blksize(fs, ip, nb);
		if (osize < fs->fs_bsize && osize > 0) {
			UFS_LOCK(ump);
			error = ffs_realloccg(ip, nb, dp->di_db[nb],
			    ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
			    &dp->di_db[0]), osize, (int)fs->fs_bsize,
			    flags, cred, &bp);
			if (error)
				return (error);
			if (DOINGSOFTDEP(vp))
				softdep_setup_allocdirect(ip, nb,
				    dbtofsb(fs, bp->b_blkno),
				    dp->di_db[nb],
				    fs->fs_bsize, osize, bp);
			ip->i_size = smalllblktosize(fs, nb + 1);
			dp->di_size = ip->i_size;
			dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
			ip->i_flag |= IN_CHANGE | IN_UPDATE;
			if (flags & IO_SYNC)
				bwrite(bp);
			else
				bawrite(bp);
		}
	}
	/*
	 * The first UFS_NDADDR blocks are direct blocks
	 */
	if (lbn < UFS_NDADDR) {
		if (flags & BA_METAONLY)
			panic("ffs_balloc_ufs2: BA_METAONLY for direct block");
		nb = dp->di_db[lbn];
		if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
			error = bread_gb(vp, lbn, fs->fs_bsize, NOCRED,
			    gbflags, &bp);
			if (error) {
				brelse(bp);
				return (error);
			}
			bp->b_blkno = fsbtodb(fs, nb);
			*bpp = bp;
			return (0);
		}
		if (nb != 0) {
			/*
			 * Consider need to reallocate a fragment.
			 */
			osize = fragroundup(fs, blkoff(fs, ip->i_size));
			nsize = fragroundup(fs, size);
			if (nsize <= osize) {
				error = bread_gb(vp, lbn, osize, NOCRED,
				    gbflags, &bp);
				if (error) {
					brelse(bp);
					return (error);
				}
				bp->b_blkno = fsbtodb(fs, nb);
			} else {
				UFS_LOCK(ump);
				error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
				    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
				    &dp->di_db[0]), osize, nsize, flags,
				    cred, &bp);
				if (error)
					return (error);
				if (DOINGSOFTDEP(vp))
					softdep_setup_allocdirect(ip, lbn,
					    dbtofsb(fs, bp->b_blkno), nb,
					    nsize, osize, bp);
			}
		} else {
			if (ip->i_size < smalllblktosize(fs, lbn + 1))
				nsize = fragroundup(fs, size);
			else
				nsize = fs->fs_bsize;
			UFS_LOCK(ump);
			error = ffs_alloc(ip, lbn,
			    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
				&dp->di_db[0]), nsize, flags, cred, &newb);
			if (error)
				return (error);
			bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
			bp->b_blkno = fsbtodb(fs, newb);
			if (flags & BA_CLRBUF)
				vfs_bio_clrbuf(bp);
			if (DOINGSOFTDEP(vp))
				softdep_setup_allocdirect(ip, lbn, newb, 0,
				    nsize, 0, bp);
		}
		dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
		ip->i_flag |= IN_CHANGE | IN_UPDATE;
		*bpp = bp;
		return (0);
	}
	/*
	 * Determine the number of levels of indirection.
	 */
	pref = 0;
	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
		return(error);
#ifdef INVARIANTS
	if (num < 1)
		panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block");
#endif
	saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
	/*
	 * Fetch the first indirect block allocating if necessary.
	 */
	--num;
	nb = dp->di_ib[indirs[0].in_off];
	allocib = nil;
	allocblk = allociblk;
	lbns_remfree = lbns;
	if (nb == 0) {
		UFS_LOCK(ump);
		pref = ffs_blkpref_ufs2(ip, lbn, -indirs[0].in_off - 1,
		    (ufs2_daddr_t *)0);
		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
		    flags, cred, &newb)) != 0) {
			curthread_pflags_restore(saved_inbdflush);
			return (error);
		}
		pref = newb + fs->fs_frag;
		nb = newb;
		MPASS(allocblk < allociblk + nitems(allociblk));
		MPASS(lbns_remfree < lbns + nitems(lbns));
		*allocblk++ = nb;
		*lbns_remfree++ = indirs[1].in_lbn;
		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0,
		    GB_UNMAPPED);
		bp->b_blkno = fsbtodb(fs, nb);
		vfs_bio_clrbuf(bp);
		if (DOINGSOFTDEP(vp)) {
			softdep_setup_allocdirect(ip,
			    UFS_NDADDR + indirs[0].in_off, newb, 0,
			    fs->fs_bsize, 0, bp);
			bdwrite(bp);
		} else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
			if (bp->b_bufsize == fs->fs_bsize)
				bp->b_flags |= B_CLUSTEROK;
			bdwrite(bp);
		} else {
			if ((error = bwrite(bp)) != 0)
				goto fail;
		}
		allocib = &dp->di_ib[indirs[0].in_off];
		*allocib = nb;
		ip->i_flag |= IN_CHANGE | IN_UPDATE;
	}
	/*
	 * Fetch through the indirect blocks, allocating as necessary.
	 */
retry:
	for (i = 1;;) {
		error = bread(vp,
		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
		if (error) {
			brelse(bp);
			goto fail;
		}
		bap = (ufs2_daddr_t *)bp->b_data;
		nb = bap[indirs[i].in_off];
		if (i == num)
			break;
		i += 1;
		if (nb != 0) {
			bqrelse(bp);
			continue;
		}
		UFS_LOCK(ump);
		/*
		 * If parent indirect has just been allocated, try to cluster
		 * immediately following it.
		 */
		if (pref == 0)
			pref = ffs_blkpref_ufs2(ip, lbn, i - num - 1,
			    (ufs2_daddr_t *)0);
		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
		    flags | IO_BUFLOCKED, cred, &newb)) != 0) {
			brelse(bp);
			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
				UFS_LOCK(ump);
				softdep_request_cleanup(fs, vp, cred,
				    FLUSH_BLOCKS_WAIT);
				UFS_UNLOCK(ump);
				goto retry;
			}
			if (ppsratecheck(&lastfail, &curfail, 1)) {
				ffs_fserr(fs, ip->i_number, "filesystem full");
				uprintf("\n%s: write failed, filesystem "
				    "is full\n", fs->fs_fsmnt);
			}
			goto fail;
		}
		pref = newb + fs->fs_frag;
		nb = newb;
		MPASS(allocblk < allociblk + nitems(allociblk));
		MPASS(lbns_remfree < lbns + nitems(lbns));
		*allocblk++ = nb;
		*lbns_remfree++ = indirs[i].in_lbn;
		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0,
		    GB_UNMAPPED);
		nbp->b_blkno = fsbtodb(fs, nb);
		vfs_bio_clrbuf(nbp);
		if (DOINGSOFTDEP(vp)) {
			softdep_setup_allocindir_meta(nbp, ip, bp,
			    indirs[i - 1].in_off, nb);
			bdwrite(nbp);
		} else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
			if (nbp->b_bufsize == fs->fs_bsize)
				nbp->b_flags |= B_CLUSTEROK;
			bdwrite(nbp);
		} else {
			if ((error = bwrite(nbp)) != 0) {
				brelse(bp);
				goto fail;
			}
		}
		bap[indirs[i - 1].in_off] = nb;
		if (allocib == nil && unwindidx < 0)
			unwindidx = i - 1;
		/*
		 * If required, write synchronously, otherwise use
		 * delayed write.
		 */
		if (flags & IO_SYNC) {
			bwrite(bp);
		} else {
			if (bp->b_bufsize == fs->fs_bsize)
				bp->b_flags |= B_CLUSTEROK;
			bdwrite(bp);
		}
	}
	/*
	 * If asked only for the indirect block, then return it.
	 */
	if (flags & BA_METAONLY) {
		curthread_pflags_restore(saved_inbdflush);
		*bpp = bp;
		return (0);
	}
	/*
	 * Get the data block, allocating if necessary.
	 */
	if (nb == 0) {
		UFS_LOCK(ump);
		/*
		 * If allocating metadata at the front of the cylinder
		 * group and parent indirect block has just been allocated,
		 * then cluster next to it if it is the first indirect in
		 * the file. Otherwise it has been allocated in the metadata
		 * area, so we want to find our own place out in the data area.
		 */
		if (pref == 0 || (lbn > UFS_NDADDR && fs->fs_metaspace != 0))
			pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off,
			    &bap[0]);
		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
		    flags | IO_BUFLOCKED, cred, &newb);
		if (error) {
			brelse(bp);
			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
				UFS_LOCK(ump);
				softdep_request_cleanup(fs, vp, cred,
				    FLUSH_BLOCKS_WAIT);
				UFS_UNLOCK(ump);
				goto retry;
			}
			if (ppsratecheck(&lastfail, &curfail, 1)) {
				ffs_fserr(fs, ip->i_number, "filesystem full");
				uprintf("\n%s: write failed, filesystem "
				    "is full\n", fs->fs_fsmnt);
			}
			goto fail;
		}
		nb = newb;
		MPASS(allocblk < allociblk + nitems(allociblk));
		MPASS(lbns_remfree < lbns + nitems(lbns));
		*allocblk++ = nb;
		*lbns_remfree++ = lbn;
		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
		nbp->b_blkno = fsbtodb(fs, nb);
		if (flags & BA_CLRBUF)
			vfs_bio_clrbuf(nbp);
		if (DOINGSOFTDEP(vp))
			softdep_setup_allocindir_page(ip, lbn, bp,
			    indirs[i].in_off, nb, 0, nbp);
		bap[indirs[i].in_off] = nb;
		/*
		 * If required, write synchronously, otherwise use
		 * delayed write.
		 */
		if (flags & IO_SYNC) {
			bwrite(bp);
		} else {
			if (bp->b_bufsize == fs->fs_bsize)
				bp->b_flags |= B_CLUSTEROK;
			bdwrite(bp);
		}
		curthread_pflags_restore(saved_inbdflush);
		*bpp = nbp;
		return (0);
	}
	brelse(bp);
	/*
	 * If requested clear invalid portions of the buffer.  If we
	 * have to do a read-before-write (typical if BA_CLRBUF is set),
	 * try to do some read-ahead in the sequential case to reduce
	 * the number of I/O transactions.
	 */
	if (flags & BA_CLRBUF) {
		int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
		if (seqcount != 0 &&
		    (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0 &&
		    !(vm_page_count_severe() || buf_dirty_count_severe())) {
			error = cluster_read(vp, ip->i_size, lbn,
			    (int)fs->fs_bsize, NOCRED,
			    MAXBSIZE, seqcount, gbflags, &nbp);
		} else {
			error = bread_gb(vp, lbn, (int)fs->fs_bsize,
			    NOCRED, gbflags, &nbp);
		}
		if (error) {
			brelse(nbp);
			goto fail;
		}
	} else {
예제 #14
0
/*
 * Last reference to an inode.  If necessary, write or delete it.
 */
int
ufs_inactive(void *v)
{
	struct vop_inactive_args *ap = v;
	struct vnode *vp = ap->a_vp;
	struct inode *ip = VTOI(vp);
	struct fs *fs = ip->i_fs;
	struct proc *p = curproc;
	mode_t mode;
	int error = 0, logged = 0, truncate_error = 0;
#ifdef DIAGNOSTIC
	extern int prtactive;

	if (prtactive && vp->v_usecount != 0)
		vprint("ufs_inactive: pushing active", vp);
#endif

	UFS_WAPBL_JUNLOCK_ASSERT(vp->v_mount);

	/*
	 * Ignore inodes related to stale file handles.
	 */
	if (ip->i_din1 == NULL || DIP(ip, mode) == 0)
		goto out;

	if (DIP(ip, nlink) <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
		error = UFS_WAPBL_BEGIN(vp->v_mount);
		if (error)
			goto out;
		logged = 1;
		if (getinoquota(ip) == 0)
			(void)ufs_quota_free_inode(ip, NOCRED);
		if (DIP(ip, size) != 0 && vp->v_mount->mnt_wapbl) {
			/*
			 * When journaling, only truncate one indirect block at
			 * a time.
			 */
			uint64_t incr = MNINDIR(ip->i_ump) << fs->fs_bshift;
			uint64_t base = NDADDR << fs->fs_bshift;
			while (!error && DIP(ip, size) > base + incr) {
				/*
				 * round down to next full indirect block
				 * boundary.
				 */
				uint64_t nsize = base +
				    ((DIP(ip, size) - base - 1) &
				    ~(incr - 1));
				error = UFS_TRUNCATE(ip, nsize, 0, NOCRED);
				if (error)
					break;
				UFS_WAPBL_END(vp->v_mount);
				error = UFS_WAPBL_BEGIN(vp->v_mount);
				if (error)
					goto out;
			}
		}

		if (error == 0) {
			truncate_error = UFS_TRUNCATE(ip, (off_t)0, 0, NOCRED);
			/* XXX pedro: remove me */
			if (truncate_error)
				printf("UFS_TRUNCATE()=%d\n", truncate_error);
		}

		DIP_ASSIGN(ip, rdev, 0);
		mode = DIP(ip, mode);
		DIP_ASSIGN(ip, mode, 0);
		ip->i_flag |= IN_CHANGE | IN_UPDATE;

		/*
		 * Setting the mode to zero needs to wait for the inode to be
		 * written just as does a change to the link count. So, rather
		 * than creating a new entry point to do the same thing, we
		 * just use softdep_change_linkcnt(). Also, we can't let
		 * softdep co-opt us to help on its worklist, as we may end up
		 * trying to recycle vnodes and getting to this same point a
		 * couple of times, blowing the kernel stack. However, this
		 * could be optimized by checking if we are coming from
		 * vrele(), vput() or vclean() (by checking for VXLOCK) and
		 * just avoiding the co-opt to happen in the last case.
		 */
		if (DOINGSOFTDEP(vp))
			softdep_change_linkcnt(ip, 1);

		UFS_INODE_FREE(ip, ip->i_number, mode);
	}

	if (ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) {
		if (!logged++) {
			int err;
			err = UFS_WAPBL_BEGIN(vp->v_mount);
			if (err) {
				error = err;
				goto out;
			}
		}
		UFS_UPDATE(ip, 0);
	}
	if (logged)
		UFS_WAPBL_END(vp->v_mount);
out:
	VOP_UNLOCK(vp, 0);

	/*
	 * If we are done with the inode, reclaim it
	 * so that it can be reused immediately.
	 */
	if (error == 0 && truncate_error == 0 &&
	    (ip->i_din1 == NULL || DIP(ip, mode) == 0))
		vrecycle(vp, p);

	return (truncate_error ? truncate_error : error);
}
예제 #15
0
/*
 * Truncate the inode oip to at most length size, freeing the
 * disk blocks.
 */
int
ffs_truncate(struct inode *oip, off_t length, int flags, struct ucred *cred)
{
	struct vnode *ovp;
	daddr64_t lastblock;
	daddr64_t bn, lbn, lastiblock[NIADDR], indir_lbn[NIADDR];
	daddr64_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR];
	struct fs *fs;
	struct buf *bp;
	int offset, size, level;
	long count, nblocks, vflags, blocksreleased = 0;
	int i, aflags, error, allerror, indirect = 0;
	off_t osize;
	extern int num_indirdep;
	extern int max_indirdep;

	if (length < 0)
		return (EINVAL);
	ovp = ITOV(oip);

	if (ovp->v_type != VREG &&
	    ovp->v_type != VDIR &&
	    ovp->v_type != VLNK)
		return (0);

	if (DIP(oip, size) == length)
		return (0);

	if (ovp->v_type == VLNK &&
	    (DIP(oip, size) < ovp->v_mount->mnt_maxsymlinklen ||
	     (ovp->v_mount->mnt_maxsymlinklen == 0 &&
	      oip->i_din1->di_blocks == 0))) {
#ifdef DIAGNOSTIC
		if (length != 0)
			panic("ffs_truncate: partial truncate of symlink");
#endif
		memset(SHORTLINK(oip), 0, (size_t) DIP(oip, size));
		DIP_ASSIGN(oip, size, 0);
		oip->i_flag |= IN_CHANGE | IN_UPDATE;
		return (UFS_UPDATE(oip, MNT_WAIT));
	}

	if ((error = getinoquota(oip)) != 0)
		return (error);

	uvm_vnp_setsize(ovp, length);
	oip->i_ci.ci_lasta = oip->i_ci.ci_clen 
	    = oip->i_ci.ci_cstart = oip->i_ci.ci_lastw = 0;

	if (DOINGSOFTDEP(ovp)) {
		if (length > 0 || softdep_slowdown(ovp)) {
			/*
			 * If a file is only partially truncated, then
			 * we have to clean up the data structures
			 * describing the allocation past the truncation
			 * point. Finding and deallocating those structures
			 * is a lot of work. Since partial truncation occurs
			 * rarely, we solve the problem by syncing the file
			 * so that it will have no data structures left.
			 */
			if ((error = VOP_FSYNC(ovp, cred, MNT_WAIT)) != 0)
				return (error);
		} else {
			(void)ufs_quota_free_blocks(oip, DIP(oip, blocks),
			    NOCRED);
			softdep_setup_freeblocks(oip, length);
			(void) vinvalbuf(ovp, 0, cred, curproc, 0, 0);
			oip->i_flag |= IN_CHANGE | IN_UPDATE;
			return (UFS_UPDATE(oip, 0));
		}
	}

	fs = oip->i_fs;
	osize = DIP(oip, size);
	/*
	 * Lengthen the size of the file. We must ensure that the
	 * last byte of the file is allocated. Since the smallest
	 * value of osize is 0, length will be at least 1.
	 */
	if (osize < length) {
		if (length > fs->fs_maxfilesize)
			return (EFBIG);
		aflags = B_CLRBUF;
		if (flags & IO_SYNC)
			aflags |= B_SYNC;
		error = UFS_BUF_ALLOC(oip, length - 1, 1, 
				   cred, aflags, &bp);
		if (error)
			return (error);
		if (bp->b_lblkno >= NDADDR)
			indirect = 1;
		DIP_ASSIGN(oip, size, length);
		uvm_vnp_setsize(ovp, length);
		(void) uvm_vnp_uncache(ovp);
		if (aflags & B_SYNC)
			bwrite(bp);
		else
			bawrite(bp);
		oip->i_flag |= IN_CHANGE | IN_UPDATE;
		error = UFS_UPDATE(oip, MNT_WAIT);
		if (DOINGSOFTDEP(ovp) && num_indirdep > max_indirdep)
			if (indirect) {
				/*
				 * If the number of pending indirect block
				 * dependencies is sufficiently close to the
				 * maximum number of simultaneously mappable
				 * buffers force a sync on the vnode to prevent
				 * buffer cache exhaustion.
				 */
				VOP_FSYNC(ovp, curproc->p_ucred, MNT_WAIT);
			}
		return (error);
	}
	uvm_vnp_setsize(ovp, length);

	/*
	 * Shorten the size of the file. If the file is not being
	 * truncated to a block boundary, the contents of the
	 * partial block following the end of the file must be
	 * zero'ed in case it ever becomes accessible again because
	 * of subsequent file growth. Directories however are not
	 * zero'ed as they should grow back initialized to empty.
	 */
	offset = blkoff(fs, length);
	if (offset == 0) {
		DIP_ASSIGN(oip, size, length);
	} else {
		lbn = lblkno(fs, length);
		aflags = B_CLRBUF;
		if (flags & IO_SYNC)
			aflags |= B_SYNC;
		error = UFS_BUF_ALLOC(oip, length - 1, 1,
				   cred, aflags, &bp);
		if (error)
			return (error);
		/*
		 * When we are doing soft updates and the UFS_BALLOC
		 * above fills in a direct block hole with a full sized
		 * block that will be truncated down to a fragment below,
		 * we must flush out the block dependency with an FSYNC
		 * so that we do not get a soft updates inconsistency
		 * when we create the fragment below.
		 */
		if (DOINGSOFTDEP(ovp) && lbn < NDADDR &&
		    fragroundup(fs, blkoff(fs, length)) < fs->fs_bsize &&
		    (error = VOP_FSYNC(ovp, cred, MNT_WAIT)) != 0)
			return (error);
		DIP_ASSIGN(oip, size, length);
		size = blksize(fs, oip, lbn);
		(void) uvm_vnp_uncache(ovp);
		if (ovp->v_type != VDIR)
			bzero((char *)bp->b_data + offset,
			      (u_int)(size - offset));
		bp->b_bcount = size;
		if (aflags & B_SYNC)
			bwrite(bp);
		else
			bawrite(bp);
	}
	/*
	 * Calculate index into inode's block list of
	 * last direct and indirect blocks (if any)
	 * which we want to keep.  Lastblock is -1 when
	 * the file is truncated to 0.
	 */
	lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1;
	lastiblock[SINGLE] = lastblock - NDADDR;
	lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs);
	lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs);
	nblocks = btodb(fs->fs_bsize);

	/*
	 * Update file and block pointers on disk before we start freeing
	 * blocks.  If we crash before free'ing blocks below, the blocks
	 * will be returned to the free list.  lastiblock values are also
	 * normalized to -1 for calls to ffs_indirtrunc below.
	 */
	for (level = TRIPLE; level >= SINGLE; level--) {
		oldblks[NDADDR + level] = DIP(oip, ib[level]);
		if (lastiblock[level] < 0) {
			DIP_ASSIGN(oip, ib[level], 0);
			lastiblock[level] = -1;
		}
	}

	for (i = 0; i < NDADDR; i++) {
		oldblks[i] = DIP(oip, db[i]);
		if (i > lastblock)
			DIP_ASSIGN(oip, db[i], 0);
	}

	oip->i_flag |= IN_CHANGE | IN_UPDATE;
	if ((error = UFS_UPDATE(oip, MNT_WAIT)) != 0)
		allerror = error;

	/*
	 * Having written the new inode to disk, save its new configuration
	 * and put back the old block pointers long enough to process them.
	 * Note that we save the new block configuration so we can check it
	 * when we are done.
	 */
	for (i = 0; i < NDADDR; i++) {
		newblks[i] = DIP(oip, db[i]);
		DIP_ASSIGN(oip, db[i], oldblks[i]);
	}

	for (i = 0; i < NIADDR; i++) {
		newblks[NDADDR + i] = DIP(oip, ib[i]);
		DIP_ASSIGN(oip, ib[i], oldblks[NDADDR + i]);
	}

	DIP_ASSIGN(oip, size, osize);
	vflags = ((length > 0) ? V_SAVE : 0) | V_SAVEMETA;
	allerror = vinvalbuf(ovp, vflags, cred, curproc, 0, 0);

	/*
	 * Indirect blocks first.
	 */
	indir_lbn[SINGLE] = -NDADDR;
	indir_lbn[DOUBLE] = indir_lbn[SINGLE] - NINDIR(fs) - 1;
	indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - NINDIR(fs) * NINDIR(fs) - 1;
	for (level = TRIPLE; level >= SINGLE; level--) {
		bn = DIP(oip, ib[level]);
		if (bn != 0) {
			error = ffs_indirtrunc(oip, indir_lbn[level],
			    fsbtodb(fs, bn), lastiblock[level], level, &count);
			if (error)
				allerror = error;
			blocksreleased += count;
			if (lastiblock[level] < 0) {
				DIP_ASSIGN(oip, ib[level], 0);
				ffs_blkfree(oip, bn, fs->fs_bsize);
				blocksreleased += nblocks;
			}
		}
		if (lastiblock[level] >= 0)
			goto done;
	}

	/*
	 * All whole direct blocks or frags.
	 */
	for (i = NDADDR - 1; i > lastblock; i--) {
		long bsize;

		bn = DIP(oip, db[i]);
		if (bn == 0)
			continue;

		DIP_ASSIGN(oip, db[i], 0);
		bsize = blksize(fs, oip, i);
		ffs_blkfree(oip, bn, bsize);
		blocksreleased += btodb(bsize);
	}
	if (lastblock < 0)
		goto done;

	/*
	 * Finally, look for a change in size of the
	 * last direct block; release any frags.
	 */
	bn = DIP(oip, db[lastblock]);
	if (bn != 0) {
		long oldspace, newspace;

		/*
		 * Calculate amount of space we're giving
		 * back as old block size minus new block size.
		 */
		oldspace = blksize(fs, oip, lastblock);
		DIP_ASSIGN(oip, size, length);
		newspace = blksize(fs, oip, lastblock);
		if (newspace == 0)
			panic("ffs_truncate: newspace");
		if (oldspace - newspace > 0) {
			/*
			 * Block number of space to be free'd is
			 * the old block # plus the number of frags
			 * required for the storage we're keeping.
			 */
			bn += numfrags(fs, newspace);
			ffs_blkfree(oip, bn, oldspace - newspace);
			blocksreleased += btodb(oldspace - newspace);
		}
	}
done:
#ifdef DIAGNOSTIC
	for (level = SINGLE; level <= TRIPLE; level++)
		if (newblks[NDADDR + level] != DIP(oip, ib[level]))
			panic("ffs_truncate1");
	for (i = 0; i < NDADDR; i++)
		if (newblks[i] != DIP(oip, db[i]))
			panic("ffs_truncate2");
#endif /* DIAGNOSTIC */
	/*
	 * Put back the real size.
	 */
	DIP_ASSIGN(oip, size, length);
	DIP_ADD(oip, blocks, -blocksreleased);
	oip->i_flag |= IN_CHANGE;
	(void)ufs_quota_free_blocks(oip, blocksreleased, NOCRED);
	return (allerror);
}
예제 #16
0
/*
 * Update the access, modified, and inode change times as specified by the
 * IN_ACCESS, IN_UPDATE, and IN_CHANGE flags respectively. The IN_MODIFIED
 * flag is used to specify that the inode needs to be updated but that the
 * times have already been set. The access and modified times are taken from
 * the second and third parameters; the inode change time is always taken
 * from the current time. If waitfor is set, then wait for the disk write
 * of the inode to complete.
 */
int
ffs_update(struct inode *ip, struct timespec *atime, 
    struct timespec *mtime, int waitfor)
{
	struct vnode *vp;
	struct fs *fs;
	struct buf *bp;
	int error;
	struct timespec ts;

	vp = ITOV(ip);
	if (vp->v_mount->mnt_flag & MNT_RDONLY) {
		ip->i_flag &=
		    ~(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE);
		return (0);
	}

	if ((ip->i_flag &
	    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
	    waitfor != MNT_WAIT)
		return (0);

	getnanotime(&ts);

	if (ip->i_flag & IN_ACCESS) {
		DIP_ASSIGN(ip, atime, atime ? atime->tv_sec : ts.tv_sec);
		DIP_ASSIGN(ip, atimensec, atime ? atime->tv_nsec : ts.tv_nsec);
	}

	if (ip->i_flag & IN_UPDATE) {
		DIP_ASSIGN(ip, mtime, mtime ? mtime->tv_sec : ts.tv_sec);
		DIP_ASSIGN(ip, mtimensec, mtime ? mtime->tv_nsec : ts.tv_nsec);
		ip->i_modrev++;
	}

	if (ip->i_flag & IN_CHANGE) {
		DIP_ASSIGN(ip, ctime, ts.tv_sec);
		DIP_ASSIGN(ip, ctimensec, ts.tv_nsec);
	}

	ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE);
	fs = ip->i_fs;

	/*
	 * Ensure that uid and gid are correct. This is a temporary
	 * fix until fsck has been changed to do the update.
	 */
	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_inodefmt < FS_44INODEFMT) {
		ip->i_din1->di_ouid = ip->i_ffs1_uid;
		ip->i_din1->di_ogid = ip->i_ffs1_gid;
	}

	error = bread(ip->i_devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
	    (int)fs->fs_bsize, &bp);
	if (error) {
		brelse(bp);
		return (error);
	}

	if (DOINGSOFTDEP(vp))
		softdep_update_inodeblock(ip, bp, waitfor);
	else if (ip->i_effnlink != DIP(ip, nlink))
		panic("ffs_update: bad link cnt");

#ifdef FFS2
	if (ip->i_ump->um_fstype == UM_UFS2)
		*((struct ufs2_dinode *)bp->b_data +
		    ino_to_fsbo(fs, ip->i_number)) = *ip->i_din2;
	else
#endif
		*((struct ufs1_dinode *)bp->b_data +
		    ino_to_fsbo(fs, ip->i_number)) = *ip->i_din1;

	if (waitfor && !DOINGASYNC(vp)) {
		return (bwrite(bp));
	} else {
		bdwrite(bp);
		return (0);
	}
}
예제 #17
0
파일: ufs_vnops.c 프로젝트: sofuture/bitrig
/*
 * Allocate a new inode.
 */
int
ufs_makeinode(int mode, struct vnode *dvp, struct vnode **vpp,
    struct componentname *cnp)
{
	struct inode *ip, *pdir;
	struct direct newdir;
	struct vnode *tvp;
	int error;

	pdir = VTOI(dvp);
#ifdef DIAGNOSTIC
	if ((cnp->cn_flags & HASBUF) == 0)
		panic("ufs_makeinode: no name");
#endif
	*vpp = NULL;
	if ((mode & IFMT) == 0)
		mode |= IFREG;

	if ((error = UFS_INODE_ALLOC(pdir, mode, cnp->cn_cred, &tvp)) != 0) {
		pool_put(&namei_pool, cnp->cn_pnbuf);
		vput(dvp);
		return (error);
	}

	ip = VTOI(tvp);

	DIP_ASSIGN(ip, gid, DIP(pdir, gid));
	DIP_ASSIGN(ip, uid, cnp->cn_cred->cr_uid);

	if ((error = getinoquota(ip)) ||
	    (error = ufs_quota_alloc_inode(ip, cnp->cn_cred))) {
		pool_put(&namei_pool, cnp->cn_pnbuf);
		UFS_INODE_FREE(ip, ip->i_number, mode);
		vput(tvp);
		vput(dvp);
		return (error);
	}

	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
	DIP_ASSIGN(ip, mode, mode);
	tvp->v_type = IFTOVT(mode);	/* Rest init'd in getnewvnode(). */
	ip->i_effnlink = 1;
	DIP_ASSIGN(ip, nlink, 1);
	if (DOINGSOFTDEP(tvp))
		softdep_change_linkcnt(ip, 0);
	if ((DIP(ip, mode) & ISGID) &&
		!groupmember(DIP(ip, gid), cnp->cn_cred) &&
	    suser_ucred(cnp->cn_cred))
		DIP_AND(ip, mode, ~ISGID);

	/*
	 * Make sure inode goes to disk before directory entry.
	 */
	if ((error = UFS_UPDATE(ip, !DOINGSOFTDEP(tvp))) != 0)
		goto bad;

	ufs_makedirentry(ip, cnp, &newdir);
	if ((error = ufs_direnter(dvp, tvp, &newdir, cnp, NULL)) != 0)
		goto bad;

	if ((cnp->cn_flags & SAVESTART) == 0)
		pool_put(&namei_pool, cnp->cn_pnbuf);
	vput(dvp);
	*vpp = tvp;
	return (0);

bad:
	/*
	 * Write error occurred trying to update the inode
	 * or the directory so must deallocate the inode.
	 */
	pool_put(&namei_pool, cnp->cn_pnbuf);
	vput(dvp);
	ip->i_effnlink = 0;
	DIP_ASSIGN(ip, nlink, 0);
	ip->i_flag |= IN_CHANGE;
	if (DOINGSOFTDEP(tvp))
		softdep_change_linkcnt(ip, 0);
	tvp->v_type = VNON;
	vput(tvp);

	return (error);
}
예제 #18
0
/*
 * Truncate the inode oip to at most length size, freeing the
 * disk blocks.
 */
int
ffs_truncate(struct vnode *vp, off_t length, int flags, struct ucred *cred)
{
	struct vnode *ovp = vp;
	ufs_daddr_t lastblock;
	struct inode *oip;
	ufs_daddr_t bn, lbn, lastiblock[NIADDR], indir_lbn[NIADDR];
	ufs_daddr_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR];
	struct fs *fs;
	struct buf *bp;
	int offset, size, level;
	long count, nblocks, blocksreleased = 0;
	int i;
	int aflags, error, allerror;
	off_t osize;

	oip = VTOI(ovp);
	fs = oip->i_fs;
	if (length < 0)
		return (EINVAL);
	if (length > fs->fs_maxfilesize)
		return (EFBIG);
	if (ovp->v_type == VLNK &&
	    (oip->i_size < ovp->v_mount->mnt_maxsymlinklen || oip->i_din.di_blocks == 0)) {
#ifdef DIAGNOSTIC
		if (length != 0)
			panic("ffs_truncate: partial truncate of symlink");
#endif /* DIAGNOSTIC */
		bzero((char *)&oip->i_shortlink, (uint)oip->i_size);
		oip->i_size = 0;
		oip->i_flag |= IN_CHANGE | IN_UPDATE;
		return (ffs_update(ovp, 1));
	}
	if (oip->i_size == length) {
		oip->i_flag |= IN_CHANGE | IN_UPDATE;
		return (ffs_update(ovp, 0));
	}
	if (fs->fs_ronly)
		panic("ffs_truncate: read-only filesystem");
#ifdef QUOTA
	error = ufs_getinoquota(oip);
	if (error)
		return (error);
#endif
	ovp->v_lasta = ovp->v_clen = ovp->v_cstart = ovp->v_lastw = 0;
	if (DOINGSOFTDEP(ovp)) {
		if (length > 0 || softdep_slowdown(ovp)) {
			/*
			 * If a file is only partially truncated, then
			 * we have to clean up the data structures
			 * describing the allocation past the truncation
			 * point. Finding and deallocating those structures
			 * is a lot of work. Since partial truncation occurs
			 * rarely, we solve the problem by syncing the file
			 * so that it will have no data structures left.
			 */
			if ((error = VOP_FSYNC(ovp, MNT_WAIT, 0)) != 0)
				return (error);
		} else {
#ifdef QUOTA
			(void) ufs_chkdq(oip, -oip->i_blocks, NOCRED, 0);
#endif
			softdep_setup_freeblocks(oip, length);
			vinvalbuf(ovp, 0, 0, 0);
			nvnode_pager_setsize(ovp, 0, fs->fs_bsize, 0);
			oip->i_flag |= IN_CHANGE | IN_UPDATE;
			return (ffs_update(ovp, 0));
		}
	}
	osize = oip->i_size;

	/*
	 * Lengthen the size of the file. We must ensure that the
	 * last byte of the file is allocated. Since the smallest
	 * value of osize is 0, length will be at least 1.
	 *
	 * nvextendbuf() only breads the old buffer.  The blocksize
	 * of the new buffer must be specified so it knows how large
	 * to make the VM object.
	 */
	if (osize < length) {
		nvextendbuf(vp, osize, length,
			    blkoffsize(fs, oip, osize),	/* oblksize */
			    blkoffresize(fs, length),	/* nblksize */
			    blkoff(fs, osize),
			    blkoff(fs, length),
			    0);

		aflags = B_CLRBUF;
		if (flags & IO_SYNC)
			aflags |= B_SYNC;
		/* BALLOC will reallocate the fragment at the old EOF */
		error = VOP_BALLOC(ovp, length - 1, 1, cred, aflags, &bp);
		if (error)
			return (error);
		oip->i_size = length;
		if (bp->b_bufsize == fs->fs_bsize)
			bp->b_flags |= B_CLUSTEROK;
		if (aflags & B_SYNC)
			bwrite(bp);
		else
			bawrite(bp);
		oip->i_flag |= IN_CHANGE | IN_UPDATE;
		return (ffs_update(ovp, 1));
	}

	/*
	 * Shorten the size of the file.
	 *
	 * NOTE: The block size specified in nvtruncbuf() is the blocksize
	 *	 of the buffer containing length prior to any reallocation
	 *	 of the block.
	 */
	allerror = nvtruncbuf(ovp, length, blkoffsize(fs, oip, length),
			      blkoff(fs, length), 0);
	offset = blkoff(fs, length);
	if (offset == 0) {
		oip->i_size = length;
	} else {
		lbn = lblkno(fs, length);
		aflags = B_CLRBUF;
		if (flags & IO_SYNC)
			aflags |= B_SYNC;
		error = VOP_BALLOC(ovp, length - 1, 1, cred, aflags, &bp);
		if (error)
			return (error);

		/*
		 * When we are doing soft updates and the UFS_BALLOC
		 * above fills in a direct block hole with a full sized
		 * block that will be truncated down to a fragment below,
		 * we must flush out the block dependency with an FSYNC
		 * so that we do not get a soft updates inconsistency
		 * when we create the fragment below.
		 *
		 * nvtruncbuf() may have re-dirtied the underlying block
		 * as part of its truncation zeroing code.  To avoid a
		 * 'locking against myself' panic in the second fsync we
		 * can simply undirty the bp since the redirtying was
		 * related to areas of the buffer that we are going to
		 * throw away anyway, and we will b*write() the remainder
		 * anyway down below.
		 */
		if (DOINGSOFTDEP(ovp) && lbn < NDADDR &&
		    fragroundup(fs, blkoff(fs, length)) < fs->fs_bsize) {
			bundirty(bp);
			error = VOP_FSYNC(ovp, MNT_WAIT, 0);
			if (error) {
				bdwrite(bp);
				return (error);
			}
		}
		oip->i_size = length;
		size = blksize(fs, oip, lbn);
#if 0
		/* remove - nvtruncbuf deals with this */
		if (ovp->v_type != VDIR)
			bzero((char *)bp->b_data + offset,
			    (uint)(size - offset));
#endif
		/* Kirk's code has reallocbuf(bp, size, 1) here */
		allocbuf(bp, size);
		if (bp->b_bufsize == fs->fs_bsize)
			bp->b_flags |= B_CLUSTEROK;
		if (aflags & B_SYNC)
			bwrite(bp);
		else
			bawrite(bp);
	}
	/*
	 * Calculate index into inode's block list of
	 * last direct and indirect blocks (if any)
	 * which we want to keep.  Lastblock is -1 when
	 * the file is truncated to 0.
	 */
	lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1;
	lastiblock[SINGLE] = lastblock - NDADDR;
	lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs);
	lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs);
	nblocks = btodb(fs->fs_bsize);

	/*
	 * Update file and block pointers on disk before we start freeing
	 * blocks.  If we crash before free'ing blocks below, the blocks
	 * will be returned to the free list.  lastiblock values are also
	 * normalized to -1 for calls to ffs_indirtrunc below.
	 */
	bcopy((caddr_t)&oip->i_db[0], (caddr_t)oldblks, sizeof oldblks);
	for (level = TRIPLE; level >= SINGLE; level--)
		if (lastiblock[level] < 0) {
			oip->i_ib[level] = 0;
			lastiblock[level] = -1;
		}
	for (i = NDADDR - 1; i > lastblock; i--)
		oip->i_db[i] = 0;
	oip->i_flag |= IN_CHANGE | IN_UPDATE;
	error = ffs_update(ovp, 1);
	if (error && allerror == 0)
		allerror = error;
	
	/*
	 * Having written the new inode to disk, save its new configuration
	 * and put back the old block pointers long enough to process them.
	 * Note that we save the new block configuration so we can check it
	 * when we are done.
	 */
	bcopy((caddr_t)&oip->i_db[0], (caddr_t)newblks, sizeof newblks);
	bcopy((caddr_t)oldblks, (caddr_t)&oip->i_db[0], sizeof oldblks);
	oip->i_size = osize;

	if (error && allerror == 0)
		allerror = error;

	/*
	 * Indirect blocks first.
	 */
	indir_lbn[SINGLE] = -NDADDR;
	indir_lbn[DOUBLE] = indir_lbn[SINGLE] - NINDIR(fs) - 1;
	indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - NINDIR(fs) * NINDIR(fs) - 1;
	for (level = TRIPLE; level >= SINGLE; level--) {
		bn = oip->i_ib[level];
		if (bn != 0) {
			error = ffs_indirtrunc(oip, indir_lbn[level],
			    fsbtodb(fs, bn), lastiblock[level], level, &count);
			if (error)
				allerror = error;
			blocksreleased += count;
			if (lastiblock[level] < 0) {
				oip->i_ib[level] = 0;
				ffs_blkfree(oip, bn, fs->fs_bsize);
				blocksreleased += nblocks;
			}
		}
		if (lastiblock[level] >= 0)
			goto done;
	}

	/*
	 * All whole direct blocks or frags.
	 */
	for (i = NDADDR - 1; i > lastblock; i--) {
		long bsize;

		bn = oip->i_db[i];
		if (bn == 0)
			continue;
		oip->i_db[i] = 0;
		bsize = blksize(fs, oip, i);
		ffs_blkfree(oip, bn, bsize);
		blocksreleased += btodb(bsize);
	}
	if (lastblock < 0)
		goto done;

	/*
	 * Finally, look for a change in size of the
	 * last direct block; release any frags.
	 */
	bn = oip->i_db[lastblock];
	if (bn != 0) {
		long oldspace, newspace;

		/*
		 * Calculate amount of space we're giving
		 * back as old block size minus new block size.
		 */
		oldspace = blksize(fs, oip, lastblock);
		oip->i_size = length;
		newspace = blksize(fs, oip, lastblock);
		if (newspace == 0)
			panic("ffs_truncate: newspace");
		if (oldspace - newspace > 0) {
			/*
			 * Block number of space to be free'd is
			 * the old block # plus the number of frags
			 * required for the storage we're keeping.
			 */
			bn += numfrags(fs, newspace);
			ffs_blkfree(oip, bn, oldspace - newspace);
			blocksreleased += btodb(oldspace - newspace);
		}
	}
done:
#ifdef DIAGNOSTIC
	for (level = SINGLE; level <= TRIPLE; level++)
		if (newblks[NDADDR + level] != oip->i_ib[level])
			panic("ffs_truncate1");
	for (i = 0; i < NDADDR; i++)
		if (newblks[i] != oip->i_db[i])
			panic("ffs_truncate2");
	if (length == 0 && !RB_EMPTY(&ovp->v_rbdirty_tree))
		panic("ffs_truncate3");
#endif /* DIAGNOSTIC */
	/*
	 * Put back the real size.
	 */
	oip->i_size = length;
	oip->i_blocks -= blocksreleased;

	if (oip->i_blocks < 0)			/* sanity */
		oip->i_blocks = 0;
	oip->i_flag |= IN_CHANGE;
#ifdef QUOTA
	(void) ufs_chkdq(oip, -blocksreleased, NOCRED, 0);
#endif
	return (allerror);
}
예제 #19
0
파일: ufs_vnops.c 프로젝트: sofuture/bitrig
/*
 * link vnode call
 */
int
ufs_link(void *v)
{
	struct vop_link_args *ap = v;
	struct vnode *dvp = ap->a_dvp;
	struct vnode *vp = ap->a_vp;
	struct componentname *cnp = ap->a_cnp;
	struct proc *p = cnp->cn_proc;
	struct inode *ip;
	struct direct newdir;
	int error;

#ifdef DIAGNOSTIC
	if ((cnp->cn_flags & HASBUF) == 0)
		panic("ufs_link: no name");
#endif
	if (vp->v_type == VDIR) {
		VOP_ABORTOP(dvp, cnp);
		error = EPERM;
		goto out2;
	}
	if (dvp->v_mount != vp->v_mount) {
		VOP_ABORTOP(dvp, cnp);
		error = EXDEV;
		goto out2;
	}
	if (dvp != vp && (error = vn_lock(vp, LK_EXCLUSIVE, p))) {
		VOP_ABORTOP(dvp, cnp);
		goto out2;
	}
	ip = VTOI(vp);
	if ((nlink_t) DIP(ip, nlink) >= LINK_MAX) {
		VOP_ABORTOP(dvp, cnp);
		error = EMLINK;
		goto out1;
	}
	if (DIP(ip, flags) & (IMMUTABLE | APPEND)) {
		VOP_ABORTOP(dvp, cnp);
		error = EPERM;
		goto out1;
	}
	ip->i_effnlink++;
	DIP_ADD(ip, nlink, 1);
	ip->i_flag |= IN_CHANGE;
	if (DOINGSOFTDEP(vp))
		softdep_change_linkcnt(ip, 0);
	if ((error = UFS_UPDATE(ip, !DOINGSOFTDEP(vp))) == 0) {
		ufs_makedirentry(ip, cnp, &newdir);
		error = ufs_direnter(dvp, vp, &newdir, cnp, NULL);
	}
	if (error) {
		ip->i_effnlink--;
		DIP_ADD(ip, nlink, -1);
		ip->i_flag |= IN_CHANGE;
		if (DOINGSOFTDEP(vp))
			softdep_change_linkcnt(ip, 0);
	}
	pool_put(&namei_pool, cnp->cn_pnbuf);
	VN_KNOTE(vp, NOTE_LINK);
	VN_KNOTE(dvp, NOTE_WRITE);
out1:
	if (dvp != vp)
		VOP_UNLOCK(vp, 0);
out2:
	vput(dvp);
	return (error);
}
예제 #20
0
파일: ufs_vnops.c 프로젝트: sofuture/bitrig
/*
 * Rename system call.
 * 	rename("foo", "bar");
 * is essentially
 *	unlink("bar");
 *	link("foo", "bar");
 *	unlink("foo");
 * but ``atomically''.  Can't do full commit without saving state in the
 * inode on disk which isn't feasible at this time.  Best we can do is
 * always guarantee the target exists.
 *
 * Basic algorithm is:
 *
 * 1) Bump link count on source while we're linking it to the
 *    target.  This also ensure the inode won't be deleted out
 *    from underneath us while we work (it may be truncated by
 *    a concurrent `trunc' or `open' for creation).
 * 2) Link source to destination.  If destination already exists,
 *    delete it first.
 * 3) Unlink source reference to inode if still around. If a
 *    directory was moved and the parent of the destination
 *    is different from the source, patch the ".." entry in the
 *    directory.
 */
int
ufs_rename(void *v)
{
	struct vop_rename_args *ap = v;
	struct vnode *tvp = ap->a_tvp;
	struct vnode *tdvp = ap->a_tdvp;
	struct vnode *fvp = ap->a_fvp;
	struct vnode *fdvp = ap->a_fdvp;
	struct componentname *tcnp = ap->a_tcnp;
	struct componentname *fcnp = ap->a_fcnp;
	struct proc *p = fcnp->cn_proc;
	struct inode *ip, *xp, *dp;
	struct direct newdir;
	int doingdirectory = 0, oldparent = 0, newparent = 0;
	int error = 0;

#ifdef DIAGNOSTIC
	if ((tcnp->cn_flags & HASBUF) == 0 ||
	    (fcnp->cn_flags & HASBUF) == 0)
		panic("ufs_rename: no name");
#endif
	/*
	 * Check for cross-device rename.
	 */
	if ((fvp->v_mount != tdvp->v_mount) ||
	    (tvp && (fvp->v_mount != tvp->v_mount))) {
		error = EXDEV;
abortit:
		VOP_ABORTOP(tdvp, tcnp);
		if (tdvp == tvp)
			vrele(tdvp);
		else
			vput(tdvp);
		if (tvp)
			vput(tvp);
		VOP_ABORTOP(fdvp, fcnp);
		vrele(fdvp);
		vrele(fvp);
		return (error);
	}

	if (tvp && ((DIP(VTOI(tvp), flags) & (IMMUTABLE | APPEND)) ||
	    (DIP(VTOI(tdvp), flags) & APPEND))) {
		error = EPERM;
		goto abortit;
	}

	/*
	 * Check if just deleting a link name or if we've lost a race.
	 * If another process completes the same rename after we've looked
	 * up the source and have blocked looking up the target, then the
	 * source and target inodes may be identical now although the
	 * names were never linked.
	 */
	if (fvp == tvp) {
		if (fvp->v_type == VDIR) {
			/*
			 * Linked directories are impossible, so we must
			 * have lost the race.  Pretend that the rename
			 * completed before the lookup.
			 */
			error = ENOENT;
			goto abortit;
		}

		/* Release destination completely. */
		VOP_ABORTOP(tdvp, tcnp);
		vput(tdvp);
		vput(tvp);

		/*
		 * Delete source.  There is another race now that everything
		 * is unlocked, but this doesn't cause any new complications.
		 * relookup() may find a file that is unrelated to the
		 * original one, or it may fail.  Too bad.
		 */
		vrele(fvp);
		fcnp->cn_flags &= ~MODMASK;
		fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
		if ((fcnp->cn_flags & SAVESTART) == 0)
			panic("ufs_rename: lost from startdir");
		fcnp->cn_nameiop = DELETE;
		if ((error = vfs_relookup(fdvp, &fvp, fcnp)) != 0)
			return (error);		/* relookup did vrele() */
		vrele(fdvp);
		return (VOP_REMOVE(fdvp, fvp, fcnp));
	}

	if ((error = vn_lock(fvp, LK_EXCLUSIVE, p)) != 0)
		goto abortit;

	/* fvp, tdvp, tvp now locked */
	dp = VTOI(fdvp);
	ip = VTOI(fvp);
	if ((nlink_t) DIP(ip, nlink) >= LINK_MAX) {
		VOP_UNLOCK(fvp, 0);
		error = EMLINK;
		goto abortit;
	}
	if ((DIP(ip, flags) & (IMMUTABLE | APPEND)) ||
	    (DIP(dp, flags) & APPEND)) {
		VOP_UNLOCK(fvp, 0);
		error = EPERM;
		goto abortit;
	}
	if ((DIP(ip, mode) & IFMT) == IFDIR) {
		error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred);
		if (!error && tvp)
			error = VOP_ACCESS(tvp, VWRITE, tcnp->cn_cred);
		if (error) {
			VOP_UNLOCK(fvp, 0);
			error = EACCES;
			goto abortit;
		}
		/*
		 * Avoid ".", "..", and aliases of "." for obvious reasons.
		 */
		if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
		    dp == ip ||
		    (fcnp->cn_flags & ISDOTDOT) ||
		    (tcnp->cn_flags & ISDOTDOT) ||
		    (ip->i_flag & IN_RENAME)) {
			VOP_UNLOCK(fvp, 0);
			error = EINVAL;
			goto abortit;
		}
		ip->i_flag |= IN_RENAME;
		oldparent = dp->i_number;
		doingdirectory = 1;
	}
	VN_KNOTE(fdvp, NOTE_WRITE);		/* XXX right place? */

	/*
	 * When the target exists, both the directory
	 * and target vnodes are returned locked.
	 */
	dp = VTOI(tdvp);
	xp = NULL;
	if (tvp)
		xp = VTOI(tvp);

	/*
	 * 1) Bump link count while we're moving stuff
	 *    around.  If we crash somewhere before
	 *    completing our work, the link count
	 *    may be wrong, but correctable.
	 */
	ip->i_effnlink++;
	DIP_ADD(ip, nlink, 1);
	ip->i_flag |= IN_CHANGE;
	if (DOINGSOFTDEP(fvp))
		softdep_change_linkcnt(ip, 0);
	if ((error = UFS_UPDATE(ip, !DOINGSOFTDEP(fvp))) != 0) {
		VOP_UNLOCK(fvp, 0);
		goto bad;
	}

	/*
	 * If ".." must be changed (ie the directory gets a new
	 * parent) then the source directory must not be in the
	 * directory hierarchy above the target, as this would
	 * orphan everything below the source directory. Also
	 * the user must have write permission in the source so
	 * as to be able to change "..". We must repeat the call 
	 * to namei, as the parent directory is unlocked by the
	 * call to checkpath().
	 */
	error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred);
	VOP_UNLOCK(fvp, 0);

	/* tdvp and tvp locked */
	if (oldparent != dp->i_number)
		newparent = dp->i_number;
	if (doingdirectory && newparent) {
		if (error)	/* write access check above */
			goto bad;
		if (xp != NULL)
			vput(tvp);
		/*
		 * Compensate for the reference ufs_checkpath() loses.
		 */
		vref(tdvp);
		/* Only tdvp is locked */
		if ((error = ufs_checkpath(ip, dp, tcnp->cn_cred)) != 0) {
			vrele(tdvp);
			goto out;
		}
		if ((tcnp->cn_flags & SAVESTART) == 0)
			panic("ufs_rename: lost to startdir");
		if ((error = vfs_relookup(tdvp, &tvp, tcnp)) != 0)
			goto out;
		vrele(tdvp); /* relookup() acquired a reference */
		dp = VTOI(tdvp);
		xp = NULL;
		if (tvp)
			xp = VTOI(tvp);
	}
	/*
	 * 2) If target doesn't exist, link the target
	 *    to the source and unlink the source. 
	 *    Otherwise, rewrite the target directory
	 *    entry to reference the source inode and
	 *    expunge the original entry's existence.
	 */
	if (xp == NULL) {
		if (dp->i_dev != ip->i_dev)
			panic("rename: EXDEV");
		/*
		 * Account for ".." in new directory.
		 * When source and destination have the same
		 * parent we don't fool with the link count.
		 */
		if (doingdirectory && newparent) {
			if ((nlink_t) DIP(dp, nlink) >= LINK_MAX) {
				error = EMLINK;
				goto bad;
			}
			dp->i_effnlink++;
			DIP_ADD(dp, nlink, 1);
			dp->i_flag |= IN_CHANGE;
			if (DOINGSOFTDEP(tdvp))
                               softdep_change_linkcnt(dp, 0);
			if ((error = UFS_UPDATE(dp, !DOINGSOFTDEP(tdvp))) 
			    != 0) {
				dp->i_effnlink--;
				DIP_ADD(dp, nlink, -1);
				dp->i_flag |= IN_CHANGE;
				if (DOINGSOFTDEP(tdvp))
					softdep_change_linkcnt(dp, 0);
				goto bad;
			}
		}
		ufs_makedirentry(ip, tcnp, &newdir);
		if ((error = ufs_direnter(tdvp, NULL, &newdir, tcnp, NULL)) != 0) {
			if (doingdirectory && newparent) {
				dp->i_effnlink--;
				DIP_ADD(dp, nlink, -1);
				dp->i_flag |= IN_CHANGE;
				if (DOINGSOFTDEP(tdvp))
					softdep_change_linkcnt(dp, 0);
				(void)UFS_UPDATE(dp, 1);
			}
			goto bad;
		}
		VN_KNOTE(tdvp, NOTE_WRITE);
		vput(tdvp);
	} else {
		if (xp->i_dev != dp->i_dev || xp->i_dev != ip->i_dev)
			panic("rename: EXDEV");
		/*
		 * Short circuit rename(foo, foo).
		 */
		if (xp->i_number == ip->i_number)
			panic("ufs_rename: same file");
		/*
		 * If the parent directory is "sticky", then the user must
		 * own the parent directory, or the destination of the rename,
		 * otherwise the destination may not be changed (except by
		 * root). This implements append-only directories.
		 */
		if ((DIP(dp, mode) & S_ISTXT) && tcnp->cn_cred->cr_uid != 0 &&
		    tcnp->cn_cred->cr_uid != DIP(dp, uid) &&
		    DIP(xp, uid )!= tcnp->cn_cred->cr_uid) {
			error = EPERM;
			goto bad;
		}
		/*
		 * Target must be empty if a directory and have no links
		 * to it. Also, ensure source and target are compatible
		 * (both directories, or both not directories).
		 */
		if ((DIP(xp, mode) & IFMT) == IFDIR) {
			if (xp->i_effnlink > 2 ||
			    !ufs_dirempty(xp, dp->i_number, tcnp->cn_cred)) {
				error = ENOTEMPTY;
				goto bad;
			}
			if (!doingdirectory) {
				error = ENOTDIR;
				goto bad;
			}
			cache_purge(tdvp);
		} else if (doingdirectory) {
			error = EISDIR;
			goto bad;
		}
		
		if ((error = ufs_dirrewrite(dp, xp, ip->i_number,
                   IFTODT(DIP(ip, mode)), (doingdirectory && newparent) ?
		   newparent : doingdirectory)) != 0)
                        goto bad;
		if (doingdirectory) {
			if (!newparent) {
				dp->i_effnlink--;
				if (DOINGSOFTDEP(tdvp))
					softdep_change_linkcnt(dp, 0);
			}
			xp->i_effnlink--;
			if (DOINGSOFTDEP(tvp))
				softdep_change_linkcnt(xp, 0);
		}
		if (doingdirectory && !DOINGSOFTDEP(tvp)) {
		       /*
			* Truncate inode. The only stuff left in the directory
			* is "." and "..". The "." reference is inconsequential
                        * since we are quashing it. We have removed the "."
                        * reference and the reference in the parent directory,
                        * but there may be other hard links. The soft
                        * dependency code will arrange to do these operations
                        * after the parent directory entry has been deleted on
                        * disk, so when running with that code we avoid doing
                        * them now.
                        */
			if (!newparent) {
				DIP_ADD(dp, nlink, -1);
				dp->i_flag |= IN_CHANGE;
			}

			DIP_ADD(xp, nlink, -1);
			xp->i_flag |= IN_CHANGE;
			if ((error = UFS_TRUNCATE(VTOI(tvp), (off_t)0, IO_SYNC,
			        tcnp->cn_cred)) != 0)
				goto bad;
                }
		VN_KNOTE(tdvp, NOTE_WRITE);
	        vput(tdvp);
		VN_KNOTE(tvp, NOTE_DELETE);
		vput(tvp);
		xp = NULL;
	}

	/*
	 * 3) Unlink the source.
	 */
	fcnp->cn_flags &= ~MODMASK;
	fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
	if ((fcnp->cn_flags & SAVESTART) == 0)
		panic("ufs_rename: lost from startdir");
	if ((error = vfs_relookup(fdvp, &fvp, fcnp)) != 0) {
		vrele(ap->a_fvp);
		return (error);
	}
	vrele(fdvp);
	if (fvp == NULL) {
		/*
		 * From name has disappeared.
		 */
		if (doingdirectory)
			panic("ufs_rename: lost dir entry");
		vrele(ap->a_fvp);
		return (0);
	}

	xp = VTOI(fvp);
	dp = VTOI(fdvp);

	/*
	 * Ensure that the directory entry still exists and has not
	 * changed while the new name has been entered. If the source is
	 * a file then the entry may have been unlinked or renamed. In
	 * either case there is no further work to be done. If the source
	 * is a directory then it cannot have been rmdir'ed; the IN_RENAME 
	 * flag ensures that it cannot be moved by another rename or removed
	 * by a rmdir.
	 */
	if (xp != ip) {
		if (doingdirectory)
			panic("ufs_rename: lost dir entry");
	} else {
		/*
		 * If the source is a directory with a
		 * new parent, the link count of the old
		 * parent directory must be decremented
		 * and ".." set to point to the new parent.
		 */
		if (doingdirectory && newparent) {
			xp->i_offset = mastertemplate.dot_reclen;
			ufs_dirrewrite(xp, dp, newparent, DT_DIR, 0);
			cache_purge(fdvp);
		}
		error = ufs_dirremove(fdvp, xp, fcnp->cn_flags, 0);
		xp->i_flag &= ~IN_RENAME;
	}
	VN_KNOTE(fvp, NOTE_RENAME);
	if (dp)
		vput(fdvp);
	if (xp)
		vput(fvp);
	vrele(ap->a_fvp);
	return (error);

bad:
	if (xp)
		vput(ITOV(xp));
	vput(ITOV(dp));
out:
	vrele(fdvp);
	if (doingdirectory)
		ip->i_flag &= ~IN_RENAME;
	if (vn_lock(fvp, LK_EXCLUSIVE, p) == 0) {
		ip->i_effnlink--;
		DIP_ADD(ip, nlink, -1);
		ip->i_flag |= IN_CHANGE;
		ip->i_flag &= ~IN_RENAME;
		if (DOINGSOFTDEP(fvp))
			softdep_change_linkcnt(ip, 0);
		vput(fvp);
	} else
		vrele(fvp);
	return (error);
}
예제 #21
0
/*
 * Truncate the inode ip to at most length size, freeing the
 * disk blocks.
 */
int
ffs_truncate(vnode *vp, off_t length, int flags, Ucred *cred)
{
	print("HARVEY TODO: %s\n", __func__);
#if 0
	struct inode *ip;
	ufs2_daddr_t bn, lbn, lastblock, lastiblock[UFS_NIADDR];
	ufs2_daddr_t indir_lbn[UFS_NIADDR], oldblks[UFS_NDADDR + UFS_NIADDR];
	ufs2_daddr_t newblks[UFS_NDADDR + UFS_NIADDR];
	ufs2_daddr_t count, blocksreleased = 0, datablocks, blkno;
	struct bufobj *bo;
	struct fs *fs;
	struct buf *bp;
	struct ufsmount *ump;
	int softdeptrunc, journaltrunc;
	int needextclean, extblocks;
	int offset, size, level, nblocks;
	int i, error, allerror, indiroff, waitforupdate;
	off_t osize;

	ip = VTOI(vp);
	ump = VFSTOUFS(vp->v_mount);
	fs = ump->um_fs;
	bo = &vp->v_bufobj;

	ASSERT_VOP_LOCKED(vp, "ffs_truncate");

	if (length < 0)
		return (EINVAL);
	if (length > fs->fs_maxfilesize)
		return (EFBIG);
#ifdef QUOTA
	error = getinoquota(ip);
	if (error)
		return (error);
#endif
	/*
	 * Historically clients did not have to specify which data
	 * they were truncating. So, if not specified, we assume
	 * traditional behavior, e.g., just the normal data.
	 */
	if ((flags & (IO_EXT | IO_NORMAL)) == 0)
		flags |= IO_NORMAL;
	if (!DOINGSOFTDEP(vp) && !DOINGASYNC(vp))
		flags |= IO_SYNC;
	waitforupdate = (flags & IO_SYNC) != 0 || !DOINGASYNC(vp);
	/*
	 * If we are truncating the extended-attributes, and cannot
	 * do it with soft updates, then do it slowly here. If we are
	 * truncating both the extended attributes and the file contents
	 * (e.g., the file is being unlinked), then pick it off with
	 * soft updates below.
	 */
	allerror = 0;
	needextclean = 0;
	softdeptrunc = 0;
	journaltrunc = DOINGSUJ(vp);
	if (journaltrunc == 0 && DOINGSOFTDEP(vp) && length == 0)
		softdeptrunc = !softdep_slowdown(vp);
	extblocks = 0;
	datablocks = DIP(ip, i_blocks);
	if (fs->fs_magic == FS_UFS2_MAGIC && ip->i_din2->di_extsize > 0) {
		extblocks = btodb(fragroundup(fs, ip->i_din2->di_extsize));
		datablocks -= extblocks;
	}
	if ((flags & IO_EXT) && extblocks > 0) {
		if (length != 0)
			panic("ffs_truncate: partial trunc of extdata");
		if (softdeptrunc || journaltrunc) {
			if ((flags & IO_NORMAL) == 0)
				goto extclean;
			needextclean = 1;
		} else {
			if ((error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0)
				return (error);
#ifdef QUOTA
			(void) chkdq(ip, -extblocks, NOCRED, 0);
#endif
			vinvalbuf(vp, V_ALT, 0, 0);
			vn_pages_remove(vp,
			    OFF_TO_IDX(lblktosize(fs, -extblocks)), 0);
			osize = ip->i_din2->di_extsize;
			ip->i_din2->di_blocks -= extblocks;
			ip->i_din2->di_extsize = 0;
			for (i = 0; i < UFS_NXADDR; i++) {
				oldblks[i] = ip->i_din2->di_extb[i];
				ip->i_din2->di_extb[i] = 0;
			}
			ip->i_flag |= IN_CHANGE;
			if ((error = ffs_update(vp, waitforupdate)))
				return (error);
			for (i = 0; i < UFS_NXADDR; i++) {
				if (oldblks[i] == 0)
					continue;
				ffs_blkfree(ump, fs, ITODEVVP(ip), oldblks[i],
				    sblksize(fs, osize, i), ip->i_number,
				    vp->v_type, nil);
			}
		}
	}
	if ((flags & IO_NORMAL) == 0)
		return (0);
	if (vp->v_type == VLNK &&
	    (ip->i_size < vp->v_mount->mnt_maxsymlinklen ||
	     datablocks == 0)) {
#ifdef INVARIANTS
		if (length != 0)
			panic("ffs_truncate: partial truncate of symlink");
#endif
		bzero(SHORTLINK(ip), (uint)ip->i_size);
		ip->i_size = 0;
		DIP_SET(ip, i_size, 0);
		ip->i_flag |= IN_CHANGE | IN_UPDATE;
		if (needextclean)
			goto extclean;
		return (ffs_update(vp, waitforupdate));
	}
	if (ip->i_size == length) {
		ip->i_flag |= IN_CHANGE | IN_UPDATE;
		if (needextclean)
			goto extclean;
		return (ffs_update(vp, 0));
	}
	if (fs->fs_ronly)
		panic("ffs_truncate: read-only filesystem");
	if (IS_SNAPSHOT(ip))
		ffs_snapremove(vp);
	vp->v_lasta = vp->v_clen = vp->v_cstart = vp->v_lastw = 0;
	osize = ip->i_size;
	/*
	 * Lengthen the size of the file. We must ensure that the
	 * last byte of the file is allocated. Since the smallest
	 * value of osize is 0, length will be at least 1.
	 */
	if (osize < length) {
		vnode_pager_setsize(vp, length);
		flags |= BA_CLRBUF;
		error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp);
		if (error) {
			vnode_pager_setsize(vp, osize);
			return (error);
		}
		ip->i_size = length;
		DIP_SET(ip, i_size, length);
		if (bp->b_bufsize == fs->fs_bsize)
			bp->b_flags |= B_CLUSTEROK;
		if (flags & IO_SYNC)
			bwrite(bp);
		else if (DOINGASYNC(vp))
			bdwrite(bp);
		else
			bawrite(bp);
		ip->i_flag |= IN_CHANGE | IN_UPDATE;
		return (ffs_update(vp, waitforupdate));
	}
	/*
	 * Lookup block number for a given offset. Zero length files
	 * have no blocks, so return a blkno of -1.
	 */
	lbn = lblkno(fs, length - 1);
	if (length == 0) {
		blkno = -1;
	} else if (lbn < UFS_NDADDR) {
		blkno = DIP(ip, i_db[lbn]);
	} else {
		error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), fs->fs_bsize,
		    cred, BA_METAONLY, &bp);
		if (error)
			return (error);
		indiroff = (lbn - UFS_NDADDR) % NINDIR(fs);
		if (I_IS_UFS1(ip))
			blkno = ((ufs1_daddr_t *)(bp->b_data))[indiroff];
		else
			blkno = ((ufs2_daddr_t *)(bp->b_data))[indiroff];
		/*
		 * If the block number is non-zero, then the indirect block
		 * must have been previously allocated and need not be written.
		 * If the block number is zero, then we may have allocated
		 * the indirect block and hence need to write it out.
		 */
		if (blkno != 0)
			brelse(bp);
		else if (flags & IO_SYNC)
			bwrite(bp);
		else
			bdwrite(bp);
	}
	/*
	 * If the block number at the new end of the file is zero,
	 * then we must allocate it to ensure that the last block of 
	 * the file is allocated. Soft updates does not handle this
	 * case, so here we have to clean up the soft updates data
	 * structures describing the allocation past the truncation
	 * point. Finding and deallocating those structures is a lot of
	 * work. Since partial truncation with a hole at the end occurs
	 * rarely, we solve the problem by syncing the file so that it
	 * will have no soft updates data structures left.
	 */
	if (blkno == 0 && (error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0)
		return (error);
	if (blkno != 0 && DOINGSOFTDEP(vp)) {
		if (softdeptrunc == 0 && journaltrunc == 0) {
			/*
			 * If soft updates cannot handle this truncation,
			 * clean up soft dependency data structures and
			 * fall through to the synchronous truncation.
			 */
			if ((error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0)
				return (error);
		} else {
			flags = IO_NORMAL | (needextclean ? IO_EXT: 0);
			if (journaltrunc)
				softdep_journal_freeblocks(ip, cred, length,
				    flags);
			else
				softdep_setup_freeblocks(ip, length, flags);
			ASSERT_VOP_LOCKED(vp, "ffs_truncate1");
			if (journaltrunc == 0) {
				ip->i_flag |= IN_CHANGE | IN_UPDATE;
				error = ffs_update(vp, 0);
			}
			return (error);
		}
	}
	/*
	 * Shorten the size of the file. If the last block of the
	 * shortened file is unallocated, we must allocate it.
	 * Additionally, if the file is not being truncated to a
	 * block boundary, the contents of the partial block
	 * following the end of the file must be zero'ed in
	 * case it ever becomes accessible again because of
	 * subsequent file growth. Directories however are not
	 * zero'ed as they should grow back initialized to empty.
	 */
	offset = blkoff(fs, length);
	if (blkno != 0 && offset == 0) {
		ip->i_size = length;
		DIP_SET(ip, i_size, length);
	} else {
		lbn = lblkno(fs, length);
		flags |= BA_CLRBUF;
		error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp);
		if (error)
			return (error);
		/*
		 * When we are doing soft updates and the UFS_BALLOC
		 * above fills in a direct block hole with a full sized
		 * block that will be truncated down to a fragment below,
		 * we must flush out the block dependency with an FSYNC
		 * so that we do not get a soft updates inconsistency
		 * when we create the fragment below.
		 */
		if (DOINGSOFTDEP(vp) && lbn < UFS_NDADDR &&
		    fragroundup(fs, blkoff(fs, length)) < fs->fs_bsize &&
		    (error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0)
			return (error);
		ip->i_size = length;
		DIP_SET(ip, i_size, length);
		size = blksize(fs, ip, lbn);
		if (vp->v_type != VDIR && offset != 0)
			bzero((char *)bp->b_data + offset,
			    (uint)(size - offset));
		/* Kirk's code has reallocbuf(bp, size, 1) here */
		allocbuf(bp, size);
		if (bp->b_bufsize == fs->fs_bsize)
			bp->b_flags |= B_CLUSTEROK;
		if (flags & IO_SYNC)
			bwrite(bp);
		else if (DOINGASYNC(vp))
			bdwrite(bp);
		else
			bawrite(bp);
	}
	/*
	 * Calculate index into inode's block list of
	 * last direct and indirect blocks (if any)
	 * which we want to keep.  Lastblock is -1 when
	 * the file is truncated to 0.
	 */
	lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1;
	lastiblock[SINGLE] = lastblock - UFS_NDADDR;
	lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs);
	lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs);
	nblocks = btodb(fs->fs_bsize);
	/*
	 * Update file and block pointers on disk before we start freeing
	 * blocks.  If we crash before free'ing blocks below, the blocks
	 * will be returned to the free list.  lastiblock values are also
	 * normalized to -1 for calls to ffs_indirtrunc below.
	 */
	for (level = TRIPLE; level >= SINGLE; level--) {
		oldblks[UFS_NDADDR + level] = DIP(ip, i_ib[level]);
		if (lastiblock[level] < 0) {
			DIP_SET(ip, i_ib[level], 0);
			lastiblock[level] = -1;
		}
	}
	for (i = 0; i < UFS_NDADDR; i++) {
		oldblks[i] = DIP(ip, i_db[i]);
		if (i > lastblock)
			DIP_SET(ip, i_db[i], 0);
	}
	ip->i_flag |= IN_CHANGE | IN_UPDATE;
	allerror = ffs_update(vp, waitforupdate);
	
	/*
	 * Having written the new inode to disk, save its new configuration
	 * and put back the old block pointers long enough to process them.
	 * Note that we save the new block configuration so we can check it
	 * when we are done.
	 */
	for (i = 0; i < UFS_NDADDR; i++) {
		newblks[i] = DIP(ip, i_db[i]);
		DIP_SET(ip, i_db[i], oldblks[i]);
	}
	for (i = 0; i < UFS_NIADDR; i++) {
		newblks[UFS_NDADDR + i] = DIP(ip, i_ib[i]);
		DIP_SET(ip, i_ib[i], oldblks[UFS_NDADDR + i]);
	}
	ip->i_size = osize;
	DIP_SET(ip, i_size, osize);

	error = vtruncbuf(vp, cred, length, fs->fs_bsize);
	if (error && (allerror == 0))
		allerror = error;

	/*
	 * Indirect blocks first.
	 */
	indir_lbn[SINGLE] = -UFS_NDADDR;
	indir_lbn[DOUBLE] = indir_lbn[SINGLE] - NINDIR(fs) - 1;
	indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - NINDIR(fs) * NINDIR(fs) - 1;
	for (level = TRIPLE; level >= SINGLE; level--) {
		bn = DIP(ip, i_ib[level]);
		if (bn != 0) {
			error = ffs_indirtrunc(ip, indir_lbn[level],
			    fsbtodb(fs, bn), lastiblock[level], level, &count);
			if (error)
				allerror = error;
			blocksreleased += count;
			if (lastiblock[level] < 0) {
				DIP_SET(ip, i_ib[level], 0);
				ffs_blkfree(ump, fs, ump->um_devvp, bn,
				    fs->fs_bsize, ip->i_number,
				    vp->v_type, nil);
				blocksreleased += nblocks;
			}
		}
		if (lastiblock[level] >= 0)
			goto done;
	}

	/*
	 * All whole direct blocks or frags.
	 */
	for (i = UFS_NDADDR - 1; i > lastblock; i--) {
		long bsize;

		bn = DIP(ip, i_db[i]);
		if (bn == 0)
			continue;
		DIP_SET(ip, i_db[i], 0);
		bsize = blksize(fs, ip, i);
		ffs_blkfree(ump, fs, ump->um_devvp, bn, bsize, ip->i_number,
		    vp->v_type, nil);
		blocksreleased += btodb(bsize);
	}
	if (lastblock < 0)
		goto done;

	/*
	 * Finally, look for a change in size of the
	 * last direct block; release any frags.
	 */
	bn = DIP(ip, i_db[lastblock]);
	if (bn != 0) {
		long oldspace, newspace;

		/*
		 * Calculate amount of space we're giving
		 * back as old block size minus new block size.
		 */
		oldspace = blksize(fs, ip, lastblock);
		ip->i_size = length;
		DIP_SET(ip, i_size, length);
		newspace = blksize(fs, ip, lastblock);
		if (newspace == 0)
			panic("ffs_truncate: newspace");
		if (oldspace - newspace > 0) {
			/*
			 * Block number of space to be free'd is
			 * the old block # plus the number of frags
			 * required for the storage we're keeping.
			 */
			bn += numfrags(fs, newspace);
			ffs_blkfree(ump, fs, ump->um_devvp, bn,
			   oldspace - newspace, ip->i_number, vp->v_type, nil);
			blocksreleased += btodb(oldspace - newspace);
		}
	}
done:
#ifdef INVARIANTS
	for (level = SINGLE; level <= TRIPLE; level++)
		if (newblks[UFS_NDADDR + level] != DIP(ip, i_ib[level]))
			panic("ffs_truncate1");
	for (i = 0; i < UFS_NDADDR; i++)
		if (newblks[i] != DIP(ip, i_db[i]))
			panic("ffs_truncate2");
	BO_LOCK(bo);
	if (length == 0 &&
	    (fs->fs_magic != FS_UFS2_MAGIC || ip->i_din2->di_extsize == 0) &&
	    (bo->bo_dirty.bv_cnt > 0 || bo->bo_clean.bv_cnt > 0))
		panic("ffs_truncate3");
	BO_UNLOCK(bo);
#endif /* INVARIANTS */
	/*
	 * Put back the real size.
	 */
	ip->i_size = length;
	DIP_SET(ip, i_size, length);
	if (DIP(ip, i_blocks) >= blocksreleased)
		DIP_SET(ip, i_blocks, DIP(ip, i_blocks) - blocksreleased);
	else	/* sanity */
		DIP_SET(ip, i_blocks, 0);
	ip->i_flag |= IN_CHANGE;
#ifdef QUOTA
	(void) chkdq(ip, -blocksreleased, NOCRED, 0);
#endif
	return (allerror);

extclean:
	if (journaltrunc)
		softdep_journal_freeblocks(ip, cred, length, IO_EXT);
	else
		softdep_setup_freeblocks(ip, length, IO_EXT);
	return (ffs_update(vp, waitforupdate));

#endif // 0
	return 0;
}
예제 #22
0
/*
 * Remove a directory entry after a call to namei, using
 * the parameters which it left in nameidata. The entry
 * dp->i_offset contains the offset into the directory of the
 * entry to be eliminated.  The dp->i_count field contains the
 * size of the previous record in the directory.  If this
 * is 0, the first entry is being deleted, so we need only
 * zero the inode number to mark the entry as free.  If the
 * entry is not the first in the directory, we must reclaim
 * the space of the now empty record by adding the record size
 * to the size of the previous entry.
 */
int
ufs_dirremove(struct vnode *dvp, struct inode *ip, int flags, int isrmdir)
{
	struct inode *dp;
	struct direct *ep;
	struct buf *bp;
	int error;

	UFS_WAPBL_JLOCK_ASSERT(dvp->v_mount);

	dp = VTOI(dvp);

	if ((error = UFS_BUFATOFF(dp,
	    (off_t)(dp->i_offset - dp->i_count), (char **)&ep, &bp)) != 0)
		return (error);
#ifdef UFS_DIRHASH
	/*
	 * Remove the dirhash entry. This is complicated by the fact
	 * that `ep' is the previous entry when dp->i_count != 0.
	 */
	if (dp->i_dirhash != NULL)
		ufsdirhash_remove(dp, (dp->i_count == 0) ? ep :
		(struct direct *)((char *)ep + ep->d_reclen), dp->i_offset);
#endif

	if (dp->i_count == 0) {
		/*
		 * First entry in block: set d_ino to zero.
		 */
		ep->d_ino = 0;
	} else {
 		/*
 		 * Collapse new free space into previous entry.
 		 */
 		ep->d_reclen += dp->i_reclen;
	}
#ifdef UFS_DIRHASH
	if (dp->i_dirhash != NULL)
		ufsdirhash_checkblock(dp, (char *)ep -
		    ((dp->i_offset - dp->i_count) & (DIRBLKSIZ - 1)),
		    dp->i_offset & ~(DIRBLKSIZ - 1));
#endif
 	if (DOINGSOFTDEP(dvp)) {
		if (ip) {
			ip->i_effnlink--;
			softdep_change_linkcnt(ip, 0);
			softdep_setup_remove(bp, dp, ip, isrmdir);
		}
		if (softdep_slowdown(dvp)) {
			error = bwrite(bp);
		} else {
			bdwrite(bp);
			error = 0;
		}
 	} else {
		if (ip) {
			ip->i_effnlink--;
			DIP_ADD(ip, nlink, -1);
			ip->i_flag |= IN_CHANGE;
			UFS_WAPBL_UPDATE(ip, 0);
		}
		if (DOINGASYNC(dvp) && dp->i_count != 0) {
			bdwrite(bp);
			error = 0;
		} else
			error = bwrite(bp);
	}
	dp->i_flag |= IN_CHANGE | IN_UPDATE;
	UFS_WAPBL_UPDATE(dp, 0);
	return (error);
}
예제 #23
0
int
ffs2_balloc(struct inode *ip, off_t off, int size, struct ucred *cred,
    int flags, struct buf **bpp)
{
	daddr_t lbn, lastlbn, nb, newb, *blkp;
	daddr_t pref, *allocblk, allociblk[NIADDR + 1];
	daddr_t *bap, *allocib;
	int deallocated, osize, nsize, num, i, error, unwindidx, r;
	struct buf *bp, *nbp;
	struct indir indirs[NIADDR + 2];
	struct fs *fs;
	struct vnode *vp;
	struct proc *p;
	
	vp = ITOV(ip);
	fs = ip->i_fs;
	p = curproc;
	unwindidx = -1;

	lbn = lblkno(fs, off);
	size = blkoff(fs, off) + size;

	if (size > fs->fs_bsize)
		panic("ffs2_balloc: block too big");

	if (bpp != NULL)
		*bpp = NULL;

	if (lbn < 0)
		return (EFBIG);

	/*
	 * If the next write will extend the file into a new block, and the
	 * file is currently composed of a fragment, this fragment has to be
	 * extended to be a full block.
	 */
	lastlbn = lblkno(fs, ip->i_ffs2_size);
	if (lastlbn < NDADDR && lastlbn < lbn) {
		nb = lastlbn;
		osize = blksize(fs, ip, nb);
		if (osize < fs->fs_bsize && osize > 0) {
			error = ffs_realloccg(ip, nb, ffs2_blkpref(ip,
			    lastlbn, nb, &ip->i_ffs2_db[0]), osize,
			    (int) fs->fs_bsize, cred, bpp, &newb);
			if (error)
				return (error);

			if (DOINGSOFTDEP(vp))
				softdep_setup_allocdirect(ip, nb, newb,
				    ip->i_ffs2_db[nb], fs->fs_bsize, osize,
				    bpp ? *bpp : NULL);

			ip->i_ffs2_size = lblktosize(fs, nb + 1);
			uvm_vnp_setsize(vp, ip->i_ffs2_size);
			ip->i_ffs2_db[nb] = newb;
			ip->i_flag |= IN_CHANGE | IN_UPDATE;

			if (bpp) {
				if (flags & B_SYNC)
					bwrite(*bpp);
				else
					bawrite(*bpp);
			}
		}
	}

	/*
	 * The first NDADDR blocks are direct.
	 */
	if (lbn < NDADDR) {

		nb = ip->i_ffs2_db[lbn];

		if (nb != 0 && ip->i_ffs2_size >= lblktosize(fs, lbn + 1)) {
			/*
			 * The direct block is already allocated and the file
			 * extends past this block, thus this must be a whole
			 * block. Just read it, if requested.
			 */
			if (bpp != NULL) {
				error = bread(vp, lbn, fs->fs_bsize, bpp);
				if (error) {
					brelse(*bpp);
					return (error);
				}
			}

			return (0);
		}

		if (nb != 0) {
			/*
			 * Consider the need to allocate a fragment.
			 */
			osize = fragroundup(fs, blkoff(fs, ip->i_ffs2_size));
			nsize = fragroundup(fs, size);

			if (nsize <= osize) {
				/*
				 * The existing block is already at least as
				 * big as we want. Just read it, if requested.
				 */
				if (bpp != NULL) {
					error = bread(vp, lbn, fs->fs_bsize,
					    bpp);
					if (error) {
						brelse(*bpp);
						return (error);
					}
					(*bpp)->b_bcount = osize;
				}

				return (0);
			} else {
				/*
				 * The existing block is smaller than we want,
				 * grow it.
				 */
				error = ffs_realloccg(ip, lbn,
				    ffs2_blkpref(ip, lbn, (int) lbn,
				    &ip->i_ffs2_db[0]), osize, nsize, cred,
				    bpp, &newb);
				if (error)
					return (error);

				if (DOINGSOFTDEP(vp))
					softdep_setup_allocdirect(ip, lbn,
					    newb, nb, nsize, osize,
					    bpp ? *bpp : NULL);
			}
		} else {
			/*
			 * The block was not previously allocated, allocate a
			 * new block or fragment.
			 */
			if (ip->i_ffs2_size < lblktosize(fs, lbn + 1))
				nsize = fragroundup(fs, size);
			else
				nsize = fs->fs_bsize;

			error = ffs_alloc(ip, lbn, ffs2_blkpref(ip, lbn,
			    (int) lbn, &ip->i_ffs2_db[0]), nsize, cred, &newb);
			if (error)
				return (error);

			if (bpp != NULL) {
				bp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
				if (nsize < fs->fs_bsize)
					bp->b_bcount = nsize;
				bp->b_blkno = fsbtodb(fs, newb);
				if (flags & B_CLRBUF)
					clrbuf(bp);
				*bpp = bp;
			}

			if (DOINGSOFTDEP(vp))
				softdep_setup_allocdirect(ip, lbn, newb, 0,
				    nsize, 0, bpp ? *bpp : NULL);
		}

		ip->i_ffs2_db[lbn] = newb;
		ip->i_flag |= IN_CHANGE | IN_UPDATE;

		return (0);
	}

	/*
	 * Determine the number of levels of indirection.
	 */
	pref = 0;
	error = ufs_getlbns(vp, lbn, indirs, &num);
	if (error)
		return (error);

#ifdef DIAGNOSTIC
	if (num < 1)
		panic("ffs2_balloc: ufs_bmaparray returned indirect block");
#endif

	/*
	 * Fetch the first indirect block allocating it necessary.
	 */
	--num;
	nb = ip->i_ffs2_ib[indirs[0].in_off];
	allocib = NULL;
	allocblk = allociblk;

	if (nb == 0) {
		pref = ffs2_blkpref(ip, lbn, -indirs[0].in_off - 1, NULL);
		error = ffs_alloc(ip, lbn, pref, (int) fs->fs_bsize, cred,
		    &newb);
		if (error)
			goto fail;

		nb = newb;
		*allocblk++ = nb;
		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0);
		bp->b_blkno = fsbtodb(fs, nb);
		clrbuf(bp);

		if (DOINGSOFTDEP(vp)) {
			softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
			    newb, 0, fs->fs_bsize, 0, bp);
			bdwrite(bp);
		} else {
			/*
			 * Write synchronously so that indirect blocks never
			 * point at garbage.
			 */
			error = bwrite(bp);
			if (error)
				goto fail;
		}

		unwindidx = 0;
		allocib = &ip->i_ffs2_ib[indirs[0].in_off];
		*allocib = nb;
		ip->i_flag |= IN_CHANGE | IN_UPDATE;
	}

	/*
	 * Fetch through the indirect blocks, allocating as necessary.
	 */
	for (i = 1;;) {
		error = bread(vp, indirs[i].in_lbn, (int)fs->fs_bsize, &bp);
		if (error) {
			brelse(bp);
			goto fail;
		}

		bap = (int64_t *) bp->b_data;
		nb = bap[indirs[i].in_off];

		if (i == num)
			break;

		i++;

		if (nb != 0) {
			brelse(bp);
			continue;
		}

		if (pref == 0)
			pref = ffs2_blkpref(ip, lbn, i - num - 1, NULL);

		error = ffs_alloc(ip, lbn, pref, (int) fs->fs_bsize, cred,
		    &newb);
		if (error) {
			brelse(bp);
			goto fail;
		}

		nb = newb;
		*allocblk++ = nb;
		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0);
		nbp->b_blkno = fsbtodb(fs, nb);
		clrbuf(nbp);

		if (DOINGSOFTDEP(vp)) {
			softdep_setup_allocindir_meta(nbp, ip, bp,
			    indirs[i - 1].in_off, nb);
			bdwrite(nbp);
		} else {
			/*
			 * Write synchronously so that indirect blocks never
			 * point at garbage.
			 */
			error = bwrite(nbp);
			if (error) {
				brelse(bp);
				goto fail;
			}
		}

		if (unwindidx < 0)
			unwindidx = i - 1;

		bap[indirs[i - 1].in_off] = nb;

		/*
		 * If required, write synchronously, otherwise use delayed
		 * write.
		 */
		if (flags & B_SYNC)
			bwrite(bp);
		else
			bdwrite(bp);
	}

	/*
	 * Get the data block, allocating if necessary.
	 */
	if (nb == 0) {
		pref = ffs2_blkpref(ip, lbn, indirs[num].in_off, &bap[0]);

		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
		    &newb);
		if (error) {
			brelse(bp);
			goto fail;
		}

		nb = newb;
		*allocblk++ = nb;

		if (bpp != NULL) {
			nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
			nbp->b_blkno = fsbtodb(fs, nb);
			if (flags & B_CLRBUF)
				clrbuf(nbp);
			*bpp = nbp;
		}

		if (DOINGSOFTDEP(vp))
			softdep_setup_allocindir_page(ip, lbn, bp,
			    indirs[num].in_off, nb, 0, bpp ? *bpp : NULL);

		bap[indirs[num].in_off] = nb;

		if (allocib == NULL && unwindidx < 0)
			unwindidx = i - 1;

		/*
		 * If required, write synchronously, otherwise use delayed
		 * write.
		 */
		if (flags & B_SYNC)
			bwrite(bp);
		else
			bdwrite(bp);

		return (0);
	}

	brelse(bp);

	if (bpp != NULL) {
		if (flags & B_CLRBUF) {
			error = bread(vp, lbn, (int)fs->fs_bsize, &nbp);
			if (error) {
				brelse(nbp);
				goto fail;
			}
		} else {
			nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
			nbp->b_blkno = fsbtodb(fs, nb);
			clrbuf(nbp);
		}

		*bpp = nbp;
	}

	return (0);

fail:
	/*
	 * If we have failed to allocate any blocks, simply return the error.
	 * This is the usual case and avoids the need to fsync the file.
	 */
	if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
		return (error);
	/*
	 * If we have failed part way through block allocation, we have to
	 * deallocate any indirect blocks that we have allocated. We have to
	 * fsync the file before we start to get rid of all of its
	 * dependencies so that we do not leave them dangling. We have to sync
	 * it at the end so that the softdep code does not find any untracked
	 * changes. Although this is really slow, running out of disk space is
	 * not expected to be a common occurrence. The error return from fsync
	 * is ignored as we already have an error to return to the user.
	 */
	VOP_FSYNC(vp, p->p_ucred, MNT_WAIT, p);
	if (unwindidx >= 0) {
		/*
		 * First write out any buffers we've created to resolve their
		 * softdeps. This must be done in reverse order of creation so
		 * that we resolve the dependencies in one pass.
		 * Write the cylinder group buffers for these buffers too.
		 */
		 for (i = num; i >= unwindidx; i--) {
		 	if (i == 0)
				break;

			bp = getblk(vp, indirs[i].in_lbn, (int) fs->fs_bsize,
			    0, 0);
			if (bp->b_flags & B_DELWRI) {
				nb = fsbtodb(fs, cgtod(fs, dtog(fs,
				    dbtofsb(fs, bp->b_blkno))));
				bwrite(bp);
				bp = getblk(ip->i_devvp, nb,
				    (int) fs->fs_cgsize, 0, 0);
				if (bp->b_flags & B_DELWRI)
					bwrite(bp);
				else {
					bp->b_flags |= B_INVAL;
					brelse(bp);
				}
			} else {
				bp->b_flags |= B_INVAL;
				brelse(bp);
			}
		}

		if (DOINGSOFTDEP(vp) && unwindidx == 0) {
			ip->i_flag |= IN_CHANGE | IN_UPDATE;
			ffs_update(ip, 1);
		}

		/*
		 * Now that any dependencies that we created have been
		 * resolved, we can undo the partial allocation.
		 */
		if (unwindidx == 0) {
			*allocib = 0;
			ip->i_flag |= IN_CHANGE | IN_UPDATE;
			if (DOINGSOFTDEP(vp))
				ffs_update(ip, 1);
		} else {
			r = bread(vp, indirs[unwindidx].in_lbn,
			    (int)fs->fs_bsize, &bp);
			if (r)
				panic("ffs2_balloc: unwind failed");

			bap = (int64_t *) bp->b_data;
			bap[indirs[unwindidx].in_off] = 0;
			bwrite(bp);
		}

		for (i = unwindidx + 1; i <= num; i++) {
			bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
			    0);
			bp->b_flags |= B_INVAL;
			brelse(bp);
		}
	}

	for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
		ffs_blkfree(ip, *blkp, fs->fs_bsize);
		deallocated += fs->fs_bsize;
	}

	if (deallocated) {
		/*
	 	 * Restore user's disk quota because allocation failed.
	 	 */
		(void) ufs_quota_free_blocks(ip, btodb(deallocated), cred);

		ip->i_ffs2_blocks -= btodb(deallocated);
		ip->i_flag |= IN_CHANGE | IN_UPDATE;
	}
	VOP_FSYNC(vp, p->p_ucred, MNT_WAIT, p);
	return (error);
}