Beispiel #1
0
int
setup(const char *dev)
{
#ifndef VERBOSE_BLOCKMAP
	long bmapsize;
#endif
	struct stat statb;
	int doskipclean;
	u_int64_t maxfilesize;
	int open_flags;
	struct uvnode *ivp;
	struct ubuf *bp;
	int i, isdirty;
	long sn, curseg;
	SEGUSE *sup;
	size_t sumstart;

	havesb = 0;
	doskipclean = skipclean;
	if (stat(dev, &statb) < 0) {
		pfatal("Can't stat %s: %s\n", dev, strerror(errno));
		return (0);
	}
	if (!S_ISCHR(statb.st_mode) && skipclean) {
		pfatal("%s is not a character device", dev);
		if (reply("CONTINUE") == 0)
			return (0);
	}
	if (nflag)
		open_flags = O_RDONLY;
	else
		open_flags = O_RDWR;

	if ((fsreadfd = open(dev, open_flags)) < 0) {
		pfatal("Can't open %s: %s\n", dev, strerror(errno));
		return (0);
	}
	if (nflag) {
		if (preen)
			pfatal("NO WRITE ACCESS");
		printf("** %s (NO WRITE)\n", dev);
		quiet = 0;
	} else if (!preen && !quiet)
		printf("** %s\n", dev);

	fsmodified = 0;
	lfdir = 0;

	/* Initialize time in case we have to write */
	time(&write_time);

	bufinit(0); /* XXX we could make a better guess */
	fs = lfs_init(fsreadfd, bflag, idaddr, 0, debug);
	if (fs == NULL) {
		if (preen)
			printf("%s: ", cdevname());
		errexit("BAD SUPER BLOCK OR IFILE INODE NOT FOUND");
	}

        /* Resize buffer cache now that we have a superblock to guess from. */ 
        bufrehash((lfs_sb_getsegtabsz(fs) + maxino / lfs_sb_getifpb(fs)) << 4);

	if (lfs_sb_getpflags(fs) & LFS_PF_CLEAN) {
		if (doskipclean) {
			if (!quiet)
				pwarn("%sile system is clean; not checking\n",
				      preen ? "f" : "** F");
			return (-1);
		}
		if (!preen)
			pwarn("** File system is already clean\n");
	}

	if (idaddr) {
		daddr_t tdaddr;
		SEGSUM *sp;
		FINFO *fp;
		int bc;

		if (debug)
			pwarn("adjusting offset, serial for -i 0x%jx\n",
				(uintmax_t)idaddr);
		tdaddr = lfs_sntod(fs, lfs_dtosn(fs, idaddr));
		if (lfs_sntod(fs, lfs_dtosn(fs, tdaddr)) == tdaddr) {
			if (tdaddr == lfs_sb_gets0addr(fs))
				tdaddr += lfs_btofsb(fs, LFS_LABELPAD);
			for (i = 0; i < LFS_MAXNUMSB; i++) {
				if (lfs_sb_getsboff(fs, i) == tdaddr)
					tdaddr += lfs_btofsb(fs, LFS_SBPAD);
				if (lfs_sb_getsboff(fs, i) > tdaddr)
					break;
			}
		}
		lfs_sb_setoffset(fs, tdaddr);
		if (debug)
			pwarn("begin with offset/serial 0x%jx/%jd\n",
				(uintmax_t)lfs_sb_getoffset(fs),
				(intmax_t)lfs_sb_getserial(fs));
		while (tdaddr < idaddr) {
			bread(fs->lfs_devvp, LFS_FSBTODB(fs, tdaddr),
			      lfs_sb_getsumsize(fs),
			      0, &bp);
			sp = (SEGSUM *)bp->b_data;
			sumstart = lfs_ss_getsumstart(fs);
			if (lfs_ss_getsumsum(fs, sp) !=
			    cksum((char *)sp + sumstart,
				  lfs_sb_getsumsize(fs) - sumstart)) {
				brelse(bp, 0);
				if (debug)
					printf("bad cksum at %jx\n",
					       (uintmax_t)tdaddr);
				break;
			}
			fp = SEGSUM_FINFOBASE(fs, sp);
			bc = howmany(lfs_ss_getninos(fs, sp), LFS_INOPB(fs)) <<
				(lfs_sb_getversion(fs) > 1 ? lfs_sb_getffshift(fs) :
						       lfs_sb_getbshift(fs));
			for (i = 0; i < lfs_ss_getnfinfo(fs, sp); i++) {
				bc += lfs_fi_getlastlength(fs, fp) + ((lfs_fi_getnblocks(fs, fp) - 1)
					<< lfs_sb_getbshift(fs));
				fp = NEXT_FINFO(fs, fp);
			}

			tdaddr += lfs_btofsb(fs, bc) + 1;
			lfs_sb_setoffset(fs, tdaddr);
			lfs_sb_setserial(fs, lfs_ss_getserial(fs, sp) + 1);
			brelse(bp, 0);
		}

		/*
		 * Set curseg, nextseg appropriately -- inlined from
		 * lfs_newseg()
		 */
		curseg = lfs_dtosn(fs, lfs_sb_getoffset(fs));
		lfs_sb_setcurseg(fs, lfs_sntod(fs, curseg));
		for (sn = curseg + lfs_sb_getinterleave(fs);;) {  
			sn = (sn + 1) % lfs_sb_getnseg(fs);
			if (sn == curseg)
				errx(1, "init: no clean segments");
			LFS_SEGENTRY(sup, fs, sn, bp);
			isdirty = sup->su_flags & SEGUSE_DIRTY;
			brelse(bp, 0);

			if (!isdirty)
				break;
		}

		/* Skip superblock if necessary */
		for (i = 0; i < LFS_MAXNUMSB; i++)
			if (lfs_sb_getoffset(fs) == lfs_sb_getsboff(fs, i))
				lfs_sb_addoffset(fs, lfs_btofsb(fs, LFS_SBPAD));

		++fs->lfs_nactive;
		lfs_sb_setnextseg(fs, lfs_sntod(fs, sn));
		if (debug) {
			pwarn("offset = 0x%" PRIx64 ", serial = %" PRIu64 "\n",
				lfs_sb_getoffset(fs), lfs_sb_getserial(fs));
			pwarn("curseg = %" PRIx64 ", nextseg = %" PRIx64 "\n",
				lfs_sb_getcurseg(fs), lfs_sb_getnextseg(fs));
		}

		if (!nflag && !skipclean) {
			lfs_sb_setidaddr(fs, idaddr);
			fsmodified = 1;
			sbdirty();
		}
	}

	if (debug) {
		pwarn("idaddr    = 0x%jx\n", idaddr ? (uintmax_t)idaddr :
			(uintmax_t)lfs_sb_getidaddr(fs));
		pwarn("dev_bsize = %lu\n", dev_bsize);
		pwarn("lfs_bsize = %lu\n", (unsigned long) lfs_sb_getbsize(fs));
		pwarn("lfs_fsize = %lu\n", (unsigned long) lfs_sb_getfsize(fs));
		pwarn("lfs_frag  = %lu\n", (unsigned long) lfs_sb_getfrag(fs));
		pwarn("lfs_inopb = %lu\n", (unsigned long) lfs_sb_getinopb(fs));
	}
	if (lfs_sb_getversion(fs) == 1)
		maxfsblock = lfs_blkstofrags(fs, lfs_sb_getsize(fs));
	else
		maxfsblock = lfs_sb_getsize(fs);
	maxfilesize = calcmaxfilesize(lfs_sb_getbshift(fs));
	if (/* lfs_sb_getminfree(fs) < 0 || */ lfs_sb_getminfree(fs) > 99) {
		pfatal("IMPOSSIBLE MINFREE=%u IN SUPERBLOCK",
		    lfs_sb_getminfree(fs));
		if (reply("SET TO DEFAULT") == 1) {
			lfs_sb_setminfree(fs, 10);
			sbdirty();
		}
	}
	if (lfs_sb_getbmask(fs) != lfs_sb_getbsize(fs) - 1) {
		pwarn("INCORRECT BMASK=0x%jx IN SUPERBLOCK (SHOULD BE 0x%x)",
		    (uintmax_t)lfs_sb_getbmask(fs),
		    lfs_sb_getbsize(fs) - 1);
		lfs_sb_setbmask(fs, lfs_sb_getbsize(fs) - 1);
		if (preen)
			printf(" (FIXED)\n");
		if (preen || reply("FIX") == 1) {
			sbdirty();
		}
	}
	if (lfs_sb_getffmask(fs) != lfs_sb_getfsize(fs) - 1) {
		pwarn("INCORRECT FFMASK=0x%jx IN SUPERBLOCK (SHOULD BE 0x%x)",
		    (uintmax_t)lfs_sb_getffmask(fs),
		    lfs_sb_getfsize(fs) - 1);
		lfs_sb_setffmask(fs, lfs_sb_getfsize(fs) - 1);
		if (preen)
			printf(" (FIXED)\n");
		if (preen || reply("FIX") == 1) {
			sbdirty();
		}
	}
	if (lfs_sb_getfbmask(fs) != (1U << lfs_sb_getfbshift(fs)) - 1) {
		pwarn("INCORRECT FBMASK=0x%jx IN SUPERBLOCK (SHOULD BE 0x%x)",
		    (uintmax_t)lfs_sb_getfbmask(fs),
		      (1U << lfs_sb_getfbshift(fs)) - 1);
		lfs_sb_setfbmask(fs, (1U << lfs_sb_getfbshift(fs)) - 1);
		if (preen)
			printf(" (FIXED)\n");
		if (preen || reply("FIX") == 1) {
			sbdirty();
		}
	}
	if (lfs_sb_getmaxfilesize(fs) != maxfilesize) {
		pwarn(
		    "INCORRECT MAXFILESIZE=%ju IN SUPERBLOCK (SHOULD BE %ju WITH BSHIFT %u)",
		    (uintmax_t) lfs_sb_getmaxfilesize(fs),
		    (uintmax_t) maxfilesize, lfs_sb_getbshift(fs));
		if (preen)
			printf(" (FIXED)\n");
		if (preen || reply("FIX") == 1) {
			lfs_sb_setmaxfilesize(fs, maxfilesize);
			sbdirty();
		}
	}
	if (lfs_sb_getmaxsymlinklen(fs) != LFS_MAXSYMLINKLEN(fs)) {
		pwarn("INCORRECT MAXSYMLINKLEN=%d IN SUPERBLOCK (SHOULD BE %zu)",
		    lfs_sb_getmaxsymlinklen(fs), LFS_MAXSYMLINKLEN(fs));
		lfs_sb_setmaxsymlinklen(fs, LFS_MAXSYMLINKLEN(fs));
		if (preen)
			printf(" (FIXED)\n");
		if (preen || reply("FIX") == 1) {
			sbdirty();
		}
	}

	/*
	 * Read in the Ifile; we'll be using it a lot.
	 * XXX If the Ifile is corrupted we are in bad shape.  We need to
	 * XXX run through the segment headers of the entire disk to
	 * XXX reconstruct the inode table, then pretend all segments are
	 * XXX dirty while we do the rest.
	 */
	ivp = fs->lfs_ivnode;
	maxino = ((lfs_dino_getsize(fs, VTOI(ivp)->i_din) - (lfs_sb_getcleansz(fs) + lfs_sb_getsegtabsz(fs))
		* lfs_sb_getbsize(fs)) / lfs_sb_getbsize(fs)) * lfs_sb_getifpb(fs);
	if (debug)
		pwarn("maxino    = %llu\n", (unsigned long long)maxino);
	for (i = 0; i < lfs_dino_getsize(fs, VTOI(ivp)->i_din); i += lfs_sb_getbsize(fs)) {
		bread(ivp, i >> lfs_sb_getbshift(fs), lfs_sb_getbsize(fs), 0, &bp);
		/* XXX check B_ERROR */
		brelse(bp, 0);
	}

	/*
	 * allocate and initialize the necessary maps
	 */
	din_table = ecalloc(maxino, sizeof(*din_table));
	seg_table = ecalloc(lfs_sb_getnseg(fs), sizeof(SEGUSE));
	/* Get segment flags */
	for (i = 0; i < lfs_sb_getnseg(fs); i++) {
		LFS_SEGENTRY(sup, fs, i, bp);
		seg_table[i].su_flags = sup->su_flags & ~SEGUSE_ACTIVE;
		if (preen)
			seg_table[i].su_nbytes = sup->su_nbytes;
		brelse(bp, 0);
	}

	/* Initialize Ifile entry */
	din_table[LFS_IFILE_INUM] = lfs_sb_getidaddr(fs);
	seg_table[lfs_dtosn(fs, lfs_sb_getidaddr(fs))].su_nbytes += DINOSIZE(fs);

#ifndef VERBOSE_BLOCKMAP
	bmapsize = roundup(howmany(maxfsblock, NBBY), sizeof(int16_t));
	blockmap = ecalloc(bmapsize, sizeof(char));
#else
	blockmap = ecalloc(maxfsblock, sizeof(ino_t));
#endif
	statemap = ecalloc(maxino, sizeof(char));
	typemap = ecalloc(maxino, sizeof(char));
	lncntp = ecalloc(maxino, sizeof(int16_t));

	if (preen) {
		n_files = lfs_sb_getnfiles(fs);
		n_blks  = lfs_sb_getdsize(fs) - lfs_sb_getbfree(fs);
		numdirs = maxino;
		inplast = 0; 
		listmax = numdirs + 10;
		inpsort = ecalloc(listmax, sizeof(struct inoinfo *));
		inphead = ecalloc(numdirs, sizeof(struct inoinfo *));
	}

	return (1);

	ckfini(0);
	return (0);
}
Beispiel #2
0
int
lfs_bmapv(struct lwp *l, fsid_t *fsidp, BLOCK_INFO *blkiov, int blkcnt)
{
	BLOCK_INFO *blkp;
	IFILE *ifp;
	struct buf *bp;
	struct inode *ip = NULL;
	struct lfs *fs;
	struct mount *mntp;
	struct ulfsmount *ump;
	struct vnode *vp;
	ino_t lastino;
	daddr_t v_daddr;
	int cnt, error;
	int numrefed = 0;

	error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_LFS,
	    KAUTH_REQ_SYSTEM_LFS_BMAPV, NULL, NULL, NULL);
	if (error)
		return (error);

	if ((mntp = vfs_getvfs(fsidp)) == NULL)
		return (ENOENT);

	if ((error = vfs_busy(mntp, NULL)) != 0)
		return (error);

	ump = VFSTOULFS(mntp);
	fs = ump->um_lfs;

	if (fs->lfs_cleaner_thread == NULL)
		fs->lfs_cleaner_thread = curlwp;
	KASSERT(fs->lfs_cleaner_thread == curlwp);

	cnt = blkcnt;

	error = 0;

	/* these were inside the initialization for the for loop */
	vp = NULL;
	v_daddr = LFS_UNUSED_DADDR;
	lastino = LFS_UNUSED_INUM;
	for (blkp = blkiov; cnt--; ++blkp)
	{
		/*
		 * Get the IFILE entry (only once) and see if the file still
		 * exists.
		 */
		if (lastino != blkp->bi_inode) {
			/*
			 * Finish the old file, if there was one.
			 */
			if (vp != NULL) {
				vput(vp);
				vp = NULL;
				numrefed--;
			}

			/*
			 * Start a new file
			 */
			lastino = blkp->bi_inode;
			if (blkp->bi_inode == LFS_IFILE_INUM)
				v_daddr = lfs_sb_getidaddr(fs);
			else {
				LFS_IENTRY(ifp, fs, blkp->bi_inode, bp);
				v_daddr = lfs_if_getdaddr(fs, ifp);
				brelse(bp, 0);
			}
			if (v_daddr == LFS_UNUSED_DADDR) {
				blkp->bi_daddr = LFS_UNUSED_DADDR;
				continue;
			}
			error = lfs_fastvget(mntp, blkp->bi_inode, NULL,
			    LK_SHARED, &vp);
			if (error) {
				DLOG((DLOG_CLEAN, "lfs_bmapv: lfs_fastvget ino"
				      "%d failed with %d",
				      blkp->bi_inode,error));
				KASSERT(vp == NULL);
				continue;
			} else {
				KASSERT(VOP_ISLOCKED(vp));
				numrefed++;
			}
			ip = VTOI(vp);
		} else if (vp == NULL) {
			/*
			 * This can only happen if the vnode is dead.
			 * Keep going.	Note that we DO NOT set the
			 * bi_addr to anything -- if we failed to get
			 * the vnode, for example, we want to assume
			 * conservatively that all of its blocks *are*
			 * located in the segment in question.
			 * lfs_markv will throw them out if we are
			 * wrong.
			 */
			continue;
		}

		/* Past this point we are guaranteed that vp, ip are valid. */

		if (blkp->bi_lbn == LFS_UNUSED_LBN) {
			/*
			 * We just want the inode address, which is
			 * conveniently in v_daddr.
			 */
			blkp->bi_daddr = v_daddr;
		} else {
			daddr_t bi_daddr;

			error = VOP_BMAP(vp, blkp->bi_lbn, NULL,
					 &bi_daddr, NULL);
			if (error)
			{
				blkp->bi_daddr = LFS_UNUSED_DADDR;
				continue;
			}
			blkp->bi_daddr = LFS_DBTOFSB(fs, bi_daddr);
			/* Fill in the block size, too */
			if (blkp->bi_lbn >= 0)
				blkp->bi_size = lfs_blksize(fs, ip, blkp->bi_lbn);
			else
				blkp->bi_size = lfs_sb_getbsize(fs);
		}
	}

	/*
	 * Finish the old file, if there was one.
	 */
	if (vp != NULL) {
		vput(vp);
		vp = NULL;
		numrefed--;
	}

#ifdef DIAGNOSTIC
	if (numrefed != 0)
		panic("lfs_bmapv: numrefed=%d", numrefed);
#endif

	vfs_unbusy(mntp, false, NULL);

	return 0;
}
Beispiel #3
0
int
lfs_markv(struct lwp *l, fsid_t *fsidp, BLOCK_INFO *blkiov,
    int blkcnt)
{
	BLOCK_INFO *blkp;
	IFILE *ifp;
	struct buf *bp;
	struct inode *ip = NULL;
	struct lfs *fs;
	struct mount *mntp;
	struct ulfsmount *ump;
	struct vnode *vp;
	ino_t lastino;
	daddr_t b_daddr;
	int cnt, error;
	int do_again = 0;
	int numrefed = 0;
	ino_t maxino;
	size_t obsize;

	/* number of blocks/inodes that we have already bwrite'ed */
	int nblkwritten, ninowritten;

	error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_LFS,
	    KAUTH_REQ_SYSTEM_LFS_MARKV, NULL, NULL, NULL);
	if (error)
		return (error);

	if ((mntp = vfs_getvfs(fsidp)) == NULL)
		return (ENOENT);

	ump = VFSTOULFS(mntp);
	fs = ump->um_lfs;

	if (fs->lfs_ronly)
		return EROFS;

	maxino = (lfs_fragstoblks(fs, lfs_dino_getblocks(fs, VTOI(fs->lfs_ivnode)->i_din)) -
		      lfs_sb_getcleansz(fs) - lfs_sb_getsegtabsz(fs)) * lfs_sb_getifpb(fs);

	cnt = blkcnt;

	if ((error = vfs_busy(mntp, NULL)) != 0)
		return (error);

	/*
	 * This seglock is just to prevent the fact that we might have to sleep
	 * from allowing the possibility that our blocks might become
	 * invalid.
	 *
	 * It is also important to note here that unless we specify SEGM_CKP,
	 * any Ifile blocks that we might be asked to clean will never get
	 * to the disk.
	 */
	lfs_seglock(fs, SEGM_CLEAN | SEGM_CKP | SEGM_SYNC);

	/* Mark blocks/inodes dirty.  */
	error = 0;

	/* these were inside the initialization for the for loop */
	vp = NULL;
	lastino = LFS_UNUSED_INUM;
	nblkwritten = ninowritten = 0;
	for (blkp = blkiov; cnt--; ++blkp)
	{
		/* Bounds-check incoming data, avoid panic for failed VGET */
		if (blkp->bi_inode <= 0 || blkp->bi_inode >= maxino) {
			error = EINVAL;
			goto err3;
		}
		/*
		 * Get the IFILE entry (only once) and see if the file still
		 * exists.
		 */
		if (lastino != blkp->bi_inode) {
			/*
			 * Finish the old file, if there was one.
			 */
			if (vp != NULL) {
				vput(vp);
				vp = NULL;
				numrefed--;
			}

			/*
			 * Start a new file
			 */
			lastino = blkp->bi_inode;

			/* Get the vnode/inode. */
			error = lfs_fastvget(mntp, blkp->bi_inode, blkp,
			    LK_EXCLUSIVE | LK_NOWAIT, &vp);
			if (error) {
				DLOG((DLOG_CLEAN, "lfs_markv: lfs_fastvget"
				      " failed with %d (ino %d, segment %d)\n",
				      error, blkp->bi_inode,
				      lfs_dtosn(fs, blkp->bi_daddr)));
				/*
				 * If we got EAGAIN, that means that the
				 * Inode was locked.  This is
				 * recoverable: just clean the rest of
				 * this segment, and let the cleaner try
				 * again with another.	(When the
				 * cleaner runs again, this segment will
				 * sort high on the list, since it is
				 * now almost entirely empty.)
				 */
				if (error == EAGAIN) {
					error = 0;
					do_again++;
				} else
					KASSERT(error == ENOENT);
				KASSERT(vp == NULL);
				ip = NULL;
				continue;
			}

			ip = VTOI(vp);
			numrefed++;
			ninowritten++;
		} else if (vp == NULL) {
			/*
			 * This can only happen if the vnode is dead (or
			 * in any case we can't get it...e.g., it is
			 * inlocked).  Keep going.
			 */
			continue;
		}

		/* Past this point we are guaranteed that vp, ip are valid. */

		/* Can't clean VU_DIROP directories in case of truncation */
		/* XXX - maybe we should mark removed dirs specially? */
		if (vp->v_type == VDIR && (vp->v_uflag & VU_DIROP)) {
			do_again++;
			continue;
		}

		/* If this BLOCK_INFO didn't contain a block, keep going. */
		if (blkp->bi_lbn == LFS_UNUSED_LBN) {
			/* XXX need to make sure that the inode gets written in this case */
			/* XXX but only write the inode if it's the right one */
			if (blkp->bi_inode != LFS_IFILE_INUM) {
				LFS_IENTRY(ifp, fs, blkp->bi_inode, bp);
				if (lfs_if_getdaddr(fs, ifp) == blkp->bi_daddr) {
					mutex_enter(&lfs_lock);
					LFS_SET_UINO(ip, IN_CLEANING);
					mutex_exit(&lfs_lock);
				}
				brelse(bp, 0);
			}
			continue;
		}

		b_daddr = 0;
		if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &b_daddr, NULL) ||
		    LFS_DBTOFSB(fs, b_daddr) != blkp->bi_daddr)
		{
			if (lfs_dtosn(fs, LFS_DBTOFSB(fs, b_daddr)) ==
			    lfs_dtosn(fs, blkp->bi_daddr))
			{
				DLOG((DLOG_CLEAN, "lfs_markv: wrong da same seg: %jx vs %jx\n",
				      (intmax_t)blkp->bi_daddr, (intmax_t)LFS_DBTOFSB(fs, b_daddr)));
			}
			do_again++;
			continue;
		}

		/*
		 * Check block sizes.  The blocks being cleaned come from
		 * disk, so they should have the same size as their on-disk
		 * counterparts.
		 */
		if (blkp->bi_lbn >= 0)
			obsize = lfs_blksize(fs, ip, blkp->bi_lbn);
		else
			obsize = lfs_sb_getbsize(fs);
		/* Check for fragment size change */
		if (blkp->bi_lbn >= 0 && blkp->bi_lbn < ULFS_NDADDR) {
			obsize = ip->i_lfs_fragsize[blkp->bi_lbn];
		}
		if (obsize != blkp->bi_size) {
			DLOG((DLOG_CLEAN, "lfs_markv: ino %d lbn %jd wrong"
			      " size (%ld != %d), try again\n",
			      blkp->bi_inode, (intmax_t)blkp->bi_lbn,
			      (long) obsize, blkp->bi_size));
			do_again++;
			continue;
		}

		/*
		 * If we get to here, then we are keeping the block.  If
		 * it is an indirect block, we want to actually put it
		 * in the buffer cache so that it can be updated in the
		 * finish_meta section.	 If it's not, we need to
		 * allocate a fake buffer so that writeseg can perform
		 * the copyin and write the buffer.
		 */
		if (ip->i_number != LFS_IFILE_INUM && blkp->bi_lbn >= 0) {
			/* Data Block */
			bp = lfs_fakebuf(fs, vp, blkp->bi_lbn,
					 blkp->bi_size, blkp->bi_bp);
			/* Pretend we used bread() to get it */
			bp->b_blkno = LFS_FSBTODB(fs, blkp->bi_daddr);
		} else {
			/* Indirect block or ifile */
			if (blkp->bi_size != lfs_sb_getbsize(fs) &&
			    ip->i_number != LFS_IFILE_INUM)
				panic("lfs_markv: partial indirect block?"
				    " size=%d\n", blkp->bi_size);
			bp = getblk(vp, blkp->bi_lbn, blkp->bi_size, 0, 0);
			if (!(bp->b_oflags & (BO_DONE|BO_DELWRI))) {
				/*
				 * The block in question was not found
				 * in the cache; i.e., the block that
				 * getblk() returned is empty.	So, we
				 * can (and should) copy in the
				 * contents, because we've already
				 * determined that this was the right
				 * version of this block on disk.
				 *
				 * And, it can't have changed underneath
				 * us, because we have the segment lock.
				 */
				error = copyin(blkp->bi_bp, bp->b_data, blkp->bi_size);
				if (error)
					goto err2;
			}
		}
		if ((error = lfs_bwrite_ext(bp, BW_CLEAN)) != 0)
			goto err2;

		nblkwritten++;
		/*
		 * XXX should account indirect blocks and ifile pages as well
		 */
		if (nblkwritten + lfs_lblkno(fs, ninowritten * DINOSIZE(fs))
		    > LFS_MARKV_MAX_BLOCKS) {
			DLOG((DLOG_CLEAN, "lfs_markv: writing %d blks %d inos\n",
			      nblkwritten, ninowritten));
			lfs_segwrite(mntp, SEGM_CLEAN);
			nblkwritten = ninowritten = 0;
		}
	}

	/*
	 * Finish the old file, if there was one
	 */
	if (vp != NULL) {
		vput(vp);
		vp = NULL;
		numrefed--;
	}

#ifdef DIAGNOSTIC
	if (numrefed != 0)
		panic("lfs_markv: numrefed=%d", numrefed);
#endif
	DLOG((DLOG_CLEAN, "lfs_markv: writing %d blks %d inos (check point)\n",
	      nblkwritten, ninowritten));

	/*
	 * The last write has to be SEGM_SYNC, because of calling semantics.
	 * It also has to be SEGM_CKP, because otherwise we could write
	 * over the newly cleaned data contained in a checkpoint, and then
	 * we'd be unhappy at recovery time.
	 */
	lfs_segwrite(mntp, SEGM_CLEAN | SEGM_CKP | SEGM_SYNC);

	lfs_segunlock(fs);

	vfs_unbusy(mntp, false, NULL);
	if (error)
		return (error);
	else if (do_again)
		return EAGAIN;

	return 0;

err2:
	DLOG((DLOG_CLEAN, "lfs_markv err2\n"));

	/*
	 * XXX we're here because copyin() failed.
	 * XXX it means that we can't trust the cleanerd.  too bad.
	 * XXX how can we recover from this?
	 */

err3:
	/*
	 * XXX should do segwrite here anyway?
	 */

	if (vp != NULL) {
		vput(vp);
		vp = NULL;
		--numrefed;
	}

	lfs_segunlock(fs);
	vfs_unbusy(mntp, false, NULL);
#ifdef DIAGNOSTIC
	if (numrefed != 0)
		panic("lfs_markv: numrefed=%d", numrefed);
#endif

	return (error);
}
/* VOP_BWRITE ULFS_NIADDR+2 times */
int
lfs_balloc(struct vnode *vp, off_t startoffset, int iosize, kauth_cred_t cred,
    int flags, struct buf **bpp)
{
	int offset;
	daddr_t daddr, idaddr;
	struct buf *ibp, *bp;
	struct inode *ip;
	struct lfs *fs;
	struct indir indirs[ULFS_NIADDR+2], *idp;
	daddr_t	lbn, lastblock;
	int bcount;
	int error, frags, i, nsize, osize, num;

	ip = VTOI(vp);
	fs = ip->i_lfs;
	offset = lfs_blkoff(fs, startoffset);
	KASSERT(iosize <= lfs_sb_getbsize(fs));
	lbn = lfs_lblkno(fs, startoffset);
	/* (void)lfs_check(vp, lbn, 0); */

	ASSERT_MAYBE_SEGLOCK(fs);

	/*
	 * Three cases: it's a block beyond the end of file, it's a block in
	 * the file that may or may not have been assigned a disk address or
	 * we're writing an entire block.
	 *
	 * Note, if the daddr is UNWRITTEN, the block already exists in
	 * the cache (it was read or written earlier).	If so, make sure
	 * we don't count it as a new block or zero out its contents. If
	 * it did not, make sure we allocate any necessary indirect
	 * blocks.
	 *
	 * If we are writing a block beyond the end of the file, we need to
	 * check if the old last block was a fragment.	If it was, we need
	 * to rewrite it.
	 */

	if (bpp)
		*bpp = NULL;

	/* Check for block beyond end of file and fragment extension needed. */
	lastblock = lfs_lblkno(fs, ip->i_size);
	if (lastblock < ULFS_NDADDR && lastblock < lbn) {
		osize = lfs_blksize(fs, ip, lastblock);
		if (osize < lfs_sb_getbsize(fs) && osize > 0) {
			if ((error = lfs_fragextend(vp, osize, lfs_sb_getbsize(fs),
						    lastblock,
						    (bpp ? &bp : NULL), cred)))
				return (error);
			ip->i_size = (lastblock + 1) * lfs_sb_getbsize(fs);
			lfs_dino_setsize(fs, ip->i_din, ip->i_size);
			uvm_vnp_setsize(vp, ip->i_size);
			ip->i_flag |= IN_CHANGE | IN_UPDATE;
			if (bpp)
				(void) VOP_BWRITE(bp->b_vp, bp);
		}
	}

	/*
	 * If the block we are writing is a direct block, it's the last
	 * block in the file, and offset + iosize is less than a full
	 * block, we can write one or more fragments.  There are two cases:
	 * the block is brand new and we should allocate it the correct
	 * size or it already exists and contains some fragments and
	 * may need to extend it.
	 */
	if (lbn < ULFS_NDADDR && lfs_lblkno(fs, ip->i_size) <= lbn) {
		osize = lfs_blksize(fs, ip, lbn);
		nsize = lfs_fragroundup(fs, offset + iosize);
		if (lfs_lblktosize(fs, lbn) >= ip->i_size) {
			/* Brand new block or fragment */
			frags = lfs_numfrags(fs, nsize);
			if (!ISSPACE(fs, frags, cred))
				return ENOSPC;
			if (bpp) {
				*bpp = bp = getblk(vp, lbn, nsize, 0, 0);
				bp->b_blkno = UNWRITTEN;
				if (flags & B_CLRBUF)
					clrbuf(bp);
			}
			ip->i_lfs_effnblks += frags;
			mutex_enter(&lfs_lock);
			lfs_sb_subbfree(fs, frags);
			mutex_exit(&lfs_lock);
			lfs_dino_setdb(fs, ip->i_din, lbn, UNWRITTEN);
		} else {
			if (nsize <= osize) {
				/* No need to extend */
				if (bpp && (error = bread(vp, lbn, osize,
				    0, &bp)))
					return error;
			} else {
				/* Extend existing block */
				if ((error =
				     lfs_fragextend(vp, osize, nsize, lbn,
						    (bpp ? &bp : NULL), cred)))
					return error;
			}
			if (bpp)
				*bpp = bp;
		}
		return 0;
	}

	error = ulfs_bmaparray(vp, lbn, &daddr, &indirs[0], &num, NULL, NULL);
	if (error)
		return (error);

	KASSERT(daddr <= LFS_MAX_DADDR(fs));

	/*
	 * Do byte accounting all at once, so we can gracefully fail *before*
	 * we start assigning blocks.
	 */
	frags = fs->um_seqinc;
	bcount = 0;
	if (daddr == UNASSIGNED) {
		bcount = frags;
	}
	for (i = 1; i < num; ++i) {
		if (!indirs[i].in_exists) {
			bcount += frags;
		}
	}
	if (ISSPACE(fs, bcount, cred)) {
		mutex_enter(&lfs_lock);
		lfs_sb_subbfree(fs, bcount);
		mutex_exit(&lfs_lock);
		ip->i_lfs_effnblks += bcount;
	} else {
		return ENOSPC;
	}

	if (daddr == UNASSIGNED) {
		if (num > 0 && lfs_dino_getib(fs, ip->i_din, indirs[0].in_off) == 0) {
			lfs_dino_setib(fs, ip->i_din, indirs[0].in_off, UNWRITTEN);
		}

		/*
		 * Create new indirect blocks if necessary
		 */
		if (num > 1) {
			idaddr = lfs_dino_getib(fs, ip->i_din, indirs[0].in_off);
			for (i = 1; i < num; ++i) {
				ibp = getblk(vp, indirs[i].in_lbn,
				    lfs_sb_getbsize(fs), 0,0);
				if (!indirs[i].in_exists) {
					clrbuf(ibp);
					ibp->b_blkno = UNWRITTEN;
				} else if (!(ibp->b_oflags & (BO_DELWRI | BO_DONE))) {
					ibp->b_blkno = LFS_FSBTODB(fs, idaddr);
					ibp->b_flags |= B_READ;
					VOP_STRATEGY(vp, ibp);
					biowait(ibp);
				}
				/*
				 * This block exists, but the next one may not.
				 * If that is the case mark it UNWRITTEN to keep
				 * the accounting straight.
				 */
				/* XXX ondisk32 */
				if (((int32_t *)ibp->b_data)[indirs[i].in_off] == 0)
					((int32_t *)ibp->b_data)[indirs[i].in_off] =
						UNWRITTEN;
				/* XXX ondisk32 */
				idaddr = ((int32_t *)ibp->b_data)[indirs[i].in_off];
#ifdef DEBUG
				if (vp == fs->lfs_ivnode) {
					LFS_ENTER_LOG("balloc", __FILE__,
						__LINE__, indirs[i].in_lbn,
						ibp->b_flags, curproc->p_pid);
				}
#endif
				if ((error = VOP_BWRITE(ibp->b_vp, ibp)))
					return error;
			}
		}
	}


	/*
	 * Get the existing block from the cache, if requested.
	 */
	if (bpp)
		*bpp = bp = getblk(vp, lbn, lfs_blksize(fs, ip, lbn), 0, 0);

	/*
	 * Do accounting on blocks that represent pages.
	 */
	if (!bpp)
		lfs_register_block(vp, lbn);

	/*
	 * The block we are writing may be a brand new block
	 * in which case we need to do accounting.
	 *
	 * We can tell a truly new block because ulfs_bmaparray will say
	 * it is UNASSIGNED.  Once we allocate it we will assign it the
	 * disk address UNWRITTEN.
	 */
	if (daddr == UNASSIGNED) {
		if (bpp) {
			if (flags & B_CLRBUF)
				clrbuf(bp);

			/* Note the new address */
			bp->b_blkno = UNWRITTEN;
		}

		switch (num) {
		    case 0:
			lfs_dino_setdb(fs, ip->i_din, lbn, UNWRITTEN);
			break;
		    case 1:
			lfs_dino_setib(fs, ip->i_din, indirs[0].in_off, UNWRITTEN);
			break;
		    default:
			idp = &indirs[num - 1];
			if (bread(vp, idp->in_lbn, lfs_sb_getbsize(fs),
				  B_MODIFY, &ibp))
				panic("lfs_balloc: bread bno %lld",
				    (long long)idp->in_lbn);
			/* XXX ondisk32 */
			((int32_t *)ibp->b_data)[idp->in_off] = UNWRITTEN;
#ifdef DEBUG
			if (vp == fs->lfs_ivnode) {
				LFS_ENTER_LOG("balloc", __FILE__,
					__LINE__, idp->in_lbn,
					ibp->b_flags, curproc->p_pid);
			}
#endif
			VOP_BWRITE(ibp->b_vp, ibp);
		}
	} else if (bpp && !(bp->b_oflags & (BO_DONE|BO_DELWRI))) {
		/*
		 * Not a brand new block, also not in the cache;
		 * read it in from disk.
		 */
		if (iosize == lfs_sb_getbsize(fs))
			/* Optimization: I/O is unnecessary. */
			bp->b_blkno = daddr;
		else {
			/*
			 * We need to read the block to preserve the
			 * existing bytes.
			 */
			bp->b_blkno = daddr;
			bp->b_flags |= B_READ;
			VOP_STRATEGY(vp, bp);
			return (biowait(bp));
		}
	}

	return (0);
}
Beispiel #5
0
/*
 * Release blocks associated with the inode ip and stored in the indirect
 * block bn.  Blocks are free'd in LIFO order up to (but not including)
 * lastbn.  If level is greater than SINGLE, the block is an indirect block
 * and recursive calls to indirtrunc must be used to cleanse other indirect
 * blocks.
 *
 * NB: triple indirect blocks are untested.
 */
static int
lfs_indirtrunc(struct inode *ip, daddr_t lbn, daddr_t dbn,
	       daddr_t lastbn, int level, daddr_t *countp,
	       daddr_t *rcountp, long *lastsegp, size_t *bcp)
{
	int i;
	struct buf *bp;
	struct lfs *fs = ip->i_lfs;
	int32_t *bap;	/* XXX ondisk32 */
	struct vnode *vp;
	daddr_t nb, nlbn, last;
	int32_t *copy = NULL;	/* XXX ondisk32 */
	daddr_t blkcount, rblkcount, factor;
	int nblocks;
	daddr_t blocksreleased = 0, real_released = 0;
	int error = 0, allerror = 0;

	ASSERT_SEGLOCK(fs);
	/*
	 * Calculate index in current block of last
	 * block to be kept.  -1 indicates the entire
	 * block so we need not calculate the index.
	 */
	factor = 1;
	for (i = SINGLE; i < level; i++)
		factor *= LFS_NINDIR(fs);
	last = lastbn;
	if (lastbn > 0)
		last /= factor;
	nblocks = lfs_btofsb(fs, lfs_sb_getbsize(fs));
	/*
	 * Get buffer of block pointers, zero those entries corresponding
	 * to blocks to be free'd, and update on disk copy first.  Since
	 * double(triple) indirect before single(double) indirect, calls
	 * to bmap on these blocks will fail.  However, we already have
	 * the on disk address, so we have to set the b_blkno field
	 * explicitly instead of letting bread do everything for us.
	 */
	vp = ITOV(ip);
	bp = getblk(vp, lbn, lfs_sb_getbsize(fs), 0, 0);
	if (bp->b_oflags & (BO_DONE | BO_DELWRI)) {
		/* Braces must be here in case trace evaluates to nothing. */
		trace(TR_BREADHIT, pack(vp, lfs_sb_getbsize(fs)), lbn);
	} else {
		trace(TR_BREADMISS, pack(vp, lfs_sb_getbsize(fs)), lbn);
		curlwp->l_ru.ru_inblock++; /* pay for read */
		bp->b_flags |= B_READ;
		if (bp->b_bcount > bp->b_bufsize)
			panic("lfs_indirtrunc: bad buffer size");
		bp->b_blkno = LFS_FSBTODB(fs, dbn);
		VOP_STRATEGY(vp, bp);
		error = biowait(bp);
	}
	if (error) {
		brelse(bp, 0);
		*countp = *rcountp = 0;
		return (error);
	}

	bap = (int32_t *)bp->b_data;	/* XXX ondisk32 */
	if (lastbn >= 0) {
		copy = lfs_malloc(fs, lfs_sb_getbsize(fs), LFS_NB_IBLOCK);
		memcpy((void *)copy, (void *)bap, lfs_sb_getbsize(fs));
		memset((void *)&bap[last + 1], 0,
		/* XXX ondisk32 */
		  (u_int)(LFS_NINDIR(fs) - (last + 1)) * sizeof (int32_t));
		error = VOP_BWRITE(bp->b_vp, bp);
		if (error)
			allerror = error;
		bap = copy;
	}

	/*
	 * Recursively free totally unused blocks.
	 */
	for (i = LFS_NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last;
	    i--, nlbn += factor) {
		nb = bap[i];
		if (nb == 0)
			continue;
		if (level > SINGLE) {
			error = lfs_indirtrunc(ip, nlbn, nb,
					       (daddr_t)-1, level - 1,
					       &blkcount, &rblkcount,
					       lastsegp, bcp);
			if (error)
				allerror = error;
			blocksreleased += blkcount;
			real_released += rblkcount;
		}
		lfs_blkfree(fs, ip, nb, lfs_sb_getbsize(fs), lastsegp, bcp);
		if (bap[i] > 0)
			real_released += nblocks;
		blocksreleased += nblocks;
	}

	/*
	 * Recursively free last partial block.
	 */
	if (level > SINGLE && lastbn >= 0) {
		last = lastbn % factor;
		nb = bap[i];
		if (nb != 0) {
			error = lfs_indirtrunc(ip, nlbn, nb,
					       last, level - 1, &blkcount,
					       &rblkcount, lastsegp, bcp);
			if (error)
				allerror = error;
			real_released += rblkcount;
			blocksreleased += blkcount;
		}
	}

	if (copy != NULL) {
		lfs_free(fs, copy, LFS_NB_IBLOCK);
	} else {
		mutex_enter(&bufcache_lock);
		if (bp->b_oflags & BO_DELWRI) {
			LFS_UNLOCK_BUF(bp);
			lfs_sb_addavail(fs, lfs_btofsb(fs, bp->b_bcount));
			wakeup(&fs->lfs_availsleep);
		}
		brelsel(bp, BC_INVAL);
		mutex_exit(&bufcache_lock);
	}

	*countp = blocksreleased;
	*rcountp = real_released;
	return (allerror);
}
Beispiel #6
0
int
lfs_truncate(struct vnode *ovp, off_t length, int ioflag, kauth_cred_t cred)
{
	daddr_t lastblock;
	struct inode *oip = VTOI(ovp);
	daddr_t bn, lbn, lastiblock[ULFS_NIADDR], indir_lbn[ULFS_NIADDR];
	/* XXX ondisk32 */
	int32_t newblks[ULFS_NDADDR + ULFS_NIADDR];
	struct lfs *fs;
	struct buf *bp;
	int offset, size, level;
	daddr_t count, rcount;
	daddr_t blocksreleased = 0, real_released = 0;
	int i, nblocks;
	int aflags, error, allerror = 0;
	off_t osize;
	long lastseg;
	size_t bc;
	int obufsize, odb;
	int usepc;

	if (ovp->v_type == VCHR || ovp->v_type == VBLK ||
	    ovp->v_type == VFIFO || ovp->v_type == VSOCK) {
		KASSERT(oip->i_size == 0);
		return 0;
	}

	if (length < 0)
		return (EINVAL);

	/*
	 * Just return and not update modification times.
	 */
	if (oip->i_size == length) {
		/* still do a uvm_vnp_setsize() as writesize may be larger */
		uvm_vnp_setsize(ovp, length);
		return (0);
	}

	fs = oip->i_lfs;

	if (ovp->v_type == VLNK &&
	    (oip->i_size < fs->um_maxsymlinklen ||
	     (fs->um_maxsymlinklen == 0 &&
	      oip->i_ffs1_blocks == 0))) {
#ifdef DIAGNOSTIC
		if (length != 0)
			panic("lfs_truncate: partial truncate of symlink");
#endif
		memset((char *)SHORTLINK(oip), 0, (u_int)oip->i_size);
		oip->i_size = oip->i_ffs1_size = 0;
		oip->i_flag |= IN_CHANGE | IN_UPDATE;
		return (lfs_update(ovp, NULL, NULL, 0));
	}
	if (oip->i_size == length) {
		oip->i_flag |= IN_CHANGE | IN_UPDATE;
		return (lfs_update(ovp, NULL, NULL, 0));
	}
	lfs_imtime(fs);
	osize = oip->i_size;
	usepc = (ovp->v_type == VREG && ovp != fs->lfs_ivnode);

	ASSERT_NO_SEGLOCK(fs);
	/*
	 * Lengthen the size of the file. We must ensure that the
	 * last byte of the file is allocated. Since the smallest
	 * value of osize is 0, length will be at least 1.
	 */
	if (osize < length) {
		if (length > fs->um_maxfilesize)
			return (EFBIG);
		aflags = B_CLRBUF;
		if (ioflag & IO_SYNC)
			aflags |= B_SYNC;
		if (usepc) {
			if (lfs_lblkno(fs, osize) < ULFS_NDADDR &&
			    lfs_lblkno(fs, osize) != lfs_lblkno(fs, length) &&
			    lfs_blkroundup(fs, osize) != osize) {
				off_t eob;

				eob = lfs_blkroundup(fs, osize);
				uvm_vnp_setwritesize(ovp, eob);
				error = ulfs_balloc_range(ovp, osize,
				    eob - osize, cred, aflags);
				if (error) {
					(void) lfs_truncate(ovp, osize,
						    ioflag & IO_SYNC, cred);
					return error;
				}
				if (ioflag & IO_SYNC) {
					mutex_enter(ovp->v_interlock);
					VOP_PUTPAGES(ovp,
					    trunc_page(osize & lfs_sb_getbmask(fs)),
					    round_page(eob),
					    PGO_CLEANIT | PGO_SYNCIO);
				}
			}
			uvm_vnp_setwritesize(ovp, length);
			error = ulfs_balloc_range(ovp, length - 1, 1, cred,
						 aflags);
			if (error) {
				(void) lfs_truncate(ovp, osize,
						    ioflag & IO_SYNC, cred);
				return error;
			}
			uvm_vnp_setsize(ovp, length);
			oip->i_flag |= IN_CHANGE | IN_UPDATE;
			KASSERT(ovp->v_size == oip->i_size);
			oip->i_lfs_hiblk = lfs_lblkno(fs, oip->i_size + lfs_sb_getbsize(fs) - 1) - 1;
			return (lfs_update(ovp, NULL, NULL, 0));
		} else {
			error = lfs_reserve(fs, ovp, NULL,
			    lfs_btofsb(fs, (ULFS_NIADDR + 2) << lfs_sb_getbshift(fs)));
			if (error)
				return (error);
			error = lfs_balloc(ovp, length - 1, 1, cred,
					   aflags, &bp);
			lfs_reserve(fs, ovp, NULL,
			    -lfs_btofsb(fs, (ULFS_NIADDR + 2) << lfs_sb_getbshift(fs)));
			if (error)
				return (error);
			oip->i_ffs1_size = oip->i_size = length;
			uvm_vnp_setsize(ovp, length);
			(void) VOP_BWRITE(bp->b_vp, bp);
			oip->i_flag |= IN_CHANGE | IN_UPDATE;
			oip->i_lfs_hiblk = lfs_lblkno(fs, oip->i_size + lfs_sb_getbsize(fs) - 1) - 1;
			return (lfs_update(ovp, NULL, NULL, 0));
		}
	}

	if ((error = lfs_reserve(fs, ovp, NULL,
	    lfs_btofsb(fs, (2 * ULFS_NIADDR + 3) << lfs_sb_getbshift(fs)))) != 0)
		return (error);

	/*
	 * Shorten the size of the file. If the file is not being
	 * truncated to a block boundary, the contents of the
	 * partial block following the end of the file must be
	 * zero'ed in case it ever becomes accessible again because
	 * of subsequent file growth. Directories however are not
	 * zero'ed as they should grow back initialized to empty.
	 */
	offset = lfs_blkoff(fs, length);
	lastseg = -1;
	bc = 0;

	if (ovp != fs->lfs_ivnode)
		lfs_seglock(fs, SEGM_PROT);
	if (offset == 0) {
		oip->i_size = oip->i_ffs1_size = length;
	} else if (!usepc) {
		lbn = lfs_lblkno(fs, length);
		aflags = B_CLRBUF;
		if (ioflag & IO_SYNC)
			aflags |= B_SYNC;
		error = lfs_balloc(ovp, length - 1, 1, cred, aflags, &bp);
		if (error) {
			lfs_reserve(fs, ovp, NULL,
			    -lfs_btofsb(fs, (2 * ULFS_NIADDR + 3) << lfs_sb_getbshift(fs)));
			goto errout;
		}
		obufsize = bp->b_bufsize;
		odb = lfs_btofsb(fs, bp->b_bcount);
		oip->i_size = oip->i_ffs1_size = length;
		size = lfs_blksize(fs, oip, lbn);
		if (ovp->v_type != VDIR)
			memset((char *)bp->b_data + offset, 0,
			       (u_int)(size - offset));
		allocbuf(bp, size, 1);
		if ((bp->b_flags & B_LOCKED) != 0 && bp->b_iodone == NULL) {
			mutex_enter(&lfs_lock);
			locked_queue_bytes -= obufsize - bp->b_bufsize;
			mutex_exit(&lfs_lock);
		}
		if (bp->b_oflags & BO_DELWRI) {
			lfs_sb_addavail(fs, odb - lfs_btofsb(fs, size));
			/* XXX shouldn't this wake up on lfs_availsleep? */
		}
		(void) VOP_BWRITE(bp->b_vp, bp);
	} else { /* vp->v_type == VREG && length < osize && offset != 0 */
		/*
		 * When truncating a regular file down to a non-block-aligned
		 * size, we must zero the part of last block which is past
		 * the new EOF.  We must synchronously flush the zeroed pages
		 * to disk since the new pages will be invalidated as soon
		 * as we inform the VM system of the new, smaller size.
		 * We must do this before acquiring the GLOCK, since fetching
		 * the pages will acquire the GLOCK internally.
		 * So there is a window where another thread could see a whole
		 * zeroed page past EOF, but that's life.
		 */
		daddr_t xlbn;
		voff_t eoz;

		aflags = ioflag & IO_SYNC ? B_SYNC : 0;
		error = ulfs_balloc_range(ovp, length - 1, 1, cred, aflags);
		if (error) {
			lfs_reserve(fs, ovp, NULL,
				    -lfs_btofsb(fs, (2 * ULFS_NIADDR + 3) << lfs_sb_getbshift(fs)));
			goto errout;
		}
		xlbn = lfs_lblkno(fs, length);
		size = lfs_blksize(fs, oip, xlbn);
		eoz = MIN(lfs_lblktosize(fs, xlbn) + size, osize);
		ubc_zerorange(&ovp->v_uobj, length, eoz - length,
		    UBC_UNMAP_FLAG(ovp));
		if (round_page(eoz) > round_page(length)) {
			mutex_enter(ovp->v_interlock);
			error = VOP_PUTPAGES(ovp, round_page(length),
			    round_page(eoz),
			    PGO_CLEANIT | PGO_DEACTIVATE |
			    ((ioflag & IO_SYNC) ? PGO_SYNCIO : 0));
			if (error) {
				lfs_reserve(fs, ovp, NULL,
					    -lfs_btofsb(fs, (2 * ULFS_NIADDR + 3) << lfs_sb_getbshift(fs)));
				goto errout;
			}
		}
	}

	genfs_node_wrlock(ovp);

	oip->i_size = oip->i_ffs1_size = length;
	uvm_vnp_setsize(ovp, length);

	/*
	 * Calculate index into inode's block list of
	 * last direct and indirect blocks (if any)
	 * which we want to keep.  Lastblock is -1 when
	 * the file is truncated to 0.
	 */
	/* Avoid sign overflow - XXX assumes that off_t is a quad_t. */
	if (length > QUAD_MAX - lfs_sb_getbsize(fs))
		lastblock = lfs_lblkno(fs, QUAD_MAX - lfs_sb_getbsize(fs));
	else
		lastblock = lfs_lblkno(fs, length + lfs_sb_getbsize(fs) - 1) - 1;
	lastiblock[SINGLE] = lastblock - ULFS_NDADDR;
	lastiblock[DOUBLE] = lastiblock[SINGLE] - LFS_NINDIR(fs);
	lastiblock[TRIPLE] = lastiblock[DOUBLE] - LFS_NINDIR(fs) * LFS_NINDIR(fs);
	nblocks = lfs_btofsb(fs, lfs_sb_getbsize(fs));
	/*
	 * Record changed file and block pointers before we start
	 * freeing blocks.  lastiblock values are also normalized to -1
	 * for calls to lfs_indirtrunc below.
	 */
	memcpy((void *)newblks, (void *)&oip->i_ffs1_db[0], sizeof newblks);
	for (level = TRIPLE; level >= SINGLE; level--)
		if (lastiblock[level] < 0) {
			newblks[ULFS_NDADDR+level] = 0;
			lastiblock[level] = -1;
		}
	for (i = ULFS_NDADDR - 1; i > lastblock; i--)
		newblks[i] = 0;

	oip->i_size = oip->i_ffs1_size = osize;
	error = lfs_vtruncbuf(ovp, lastblock + 1, false, 0);
	if (error && !allerror)
		allerror = error;

	/*
	 * Indirect blocks first.
	 */
	indir_lbn[SINGLE] = -ULFS_NDADDR;
	indir_lbn[DOUBLE] = indir_lbn[SINGLE] - LFS_NINDIR(fs) - 1;
	indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - LFS_NINDIR(fs) * LFS_NINDIR(fs) - 1;
	for (level = TRIPLE; level >= SINGLE; level--) {
		bn = oip->i_ffs1_ib[level];
		if (bn != 0) {
			error = lfs_indirtrunc(oip, indir_lbn[level],
					       bn, lastiblock[level],
					       level, &count, &rcount,
					       &lastseg, &bc);
			if (error)
				allerror = error;
			real_released += rcount;
			blocksreleased += count;
			if (lastiblock[level] < 0) {
				if (oip->i_ffs1_ib[level] > 0)
					real_released += nblocks;
				blocksreleased += nblocks;
				oip->i_ffs1_ib[level] = 0;
				lfs_blkfree(fs, oip, bn, lfs_sb_getbsize(fs),
					    &lastseg, &bc);
        			lfs_deregister_block(ovp, bn);
			}
		}
		if (lastiblock[level] >= 0)
			goto done;
	}

	/*
	 * All whole direct blocks or frags.
	 */
	for (i = ULFS_NDADDR - 1; i > lastblock; i--) {
		long bsize, obsize;

		bn = oip->i_ffs1_db[i];
		if (bn == 0)
			continue;
		bsize = lfs_blksize(fs, oip, i);
		if (oip->i_ffs1_db[i] > 0) {
			/* Check for fragment size changes */
			obsize = oip->i_lfs_fragsize[i];
			real_released += lfs_btofsb(fs, obsize);
			oip->i_lfs_fragsize[i] = 0;
		} else
			obsize = 0;
		blocksreleased += lfs_btofsb(fs, bsize);
		oip->i_ffs1_db[i] = 0;
		lfs_blkfree(fs, oip, bn, obsize, &lastseg, &bc);
        	lfs_deregister_block(ovp, bn);
	}
	if (lastblock < 0)
		goto done;

	/*
	 * Finally, look for a change in size of the
	 * last direct block; release any frags.
	 */
	bn = oip->i_ffs1_db[lastblock];
	if (bn != 0) {
		long oldspace, newspace;
#if 0
		long olddspace;
#endif

		/*
		 * Calculate amount of space we're giving
		 * back as old block size minus new block size.
		 */
		oldspace = lfs_blksize(fs, oip, lastblock);
#if 0
		olddspace = oip->i_lfs_fragsize[lastblock];
#endif

		oip->i_size = oip->i_ffs1_size = length;
		newspace = lfs_blksize(fs, oip, lastblock);
		if (newspace == 0)
			panic("itrunc: newspace");
		if (oldspace - newspace > 0) {
			blocksreleased += lfs_btofsb(fs, oldspace - newspace);
		}
#if 0
		if (bn > 0 && olddspace - newspace > 0) {
			/* No segment accounting here, just vnode */
			real_released += lfs_btofsb(fs, olddspace - newspace);
		}
#endif
	}

done:
	/* Finish segment accounting corrections */
	lfs_update_seguse(fs, oip, lastseg, bc);
#ifdef DIAGNOSTIC
	for (level = SINGLE; level <= TRIPLE; level++)
		if ((newblks[ULFS_NDADDR + level] == 0) !=
		    ((oip->i_ffs1_ib[level]) == 0)) {
			panic("lfs itrunc1");
		}
	for (i = 0; i < ULFS_NDADDR; i++)
		if ((newblks[i] == 0) != (oip->i_ffs1_db[i] == 0)) {
			panic("lfs itrunc2");
		}
	if (length == 0 &&
	    (!LIST_EMPTY(&ovp->v_cleanblkhd) || !LIST_EMPTY(&ovp->v_dirtyblkhd)))
		panic("lfs itrunc3");
#endif /* DIAGNOSTIC */
	/*
	 * Put back the real size.
	 */
	oip->i_size = oip->i_ffs1_size = length;
	oip->i_lfs_effnblks -= blocksreleased;
	oip->i_ffs1_blocks -= real_released;
	mutex_enter(&lfs_lock);
	lfs_sb_addbfree(fs, blocksreleased);
	mutex_exit(&lfs_lock);
#ifdef DIAGNOSTIC
	if (oip->i_size == 0 &&
	    (oip->i_ffs1_blocks != 0 || oip->i_lfs_effnblks != 0)) {
		printf("lfs_truncate: truncate to 0 but %d blks/%jd effblks\n",
		       oip->i_ffs1_blocks, (intmax_t)oip->i_lfs_effnblks);
		panic("lfs_truncate: persistent blocks");
	}
#endif

	/*
	 * If we truncated to zero, take us off the paging queue.
	 */
	mutex_enter(&lfs_lock);
	if (oip->i_size == 0 && oip->i_flags & IN_PAGING) {
		oip->i_flags &= ~IN_PAGING;
		TAILQ_REMOVE(&fs->lfs_pchainhd, oip, i_lfs_pchain);
	}
	mutex_exit(&lfs_lock);

	oip->i_flag |= IN_CHANGE;
#if defined(LFS_QUOTA) || defined(LFS_QUOTA2)
	(void) lfs_chkdq(oip, -blocksreleased, NOCRED, 0);
#endif
	lfs_reserve(fs, ovp, NULL,
	    -lfs_btofsb(fs, (2 * ULFS_NIADDR + 3) << lfs_sb_getbshift(fs)));
	genfs_node_unlock(ovp);
  errout:
	oip->i_lfs_hiblk = lfs_lblkno(fs, oip->i_size + lfs_sb_getbsize(fs) - 1) - 1;
	if (ovp != fs->lfs_ivnode)
		lfs_segunlock(fs);
	return (allerror ? allerror : error);
}