int lfs_bmapv(struct proc *p, fsid_t *fsidp, BLOCK_INFO *blkiov, int blkcnt) { BLOCK_INFO *blkp; IFILE *ifp; struct buf *bp; struct inode *ip = NULL; struct lfs *fs; struct mount *mntp; struct ulfsmount *ump; struct vnode *vp; ino_t lastino; daddr_t v_daddr; int cnt, error; int numrefed = 0; lfs_cleaner_pid = p->p_pid; if ((mntp = vfs_getvfs(fsidp)) == NULL) return (ENOENT); ump = VFSTOULFS(mntp); if ((error = vfs_busy(mntp, NULL)) != 0) return (error); cnt = blkcnt; fs = VFSTOULFS(mntp)->um_lfs; error = 0; /* these were inside the initialization for the for loop */ v_daddr = LFS_UNUSED_DADDR; lastino = LFS_UNUSED_INUM; for (blkp = blkiov; cnt--; ++blkp) { /* * Get the IFILE entry (only once) and see if the file still * exists. */ if (lastino != blkp->bi_inode) { /* * Finish the old file, if there was one. The presence * of a usable vnode in vp is signaled by a valid * v_daddr. */ if (v_daddr != LFS_UNUSED_DADDR) { lfs_vunref(vp); if (VTOI(vp)->i_lfs_iflags & LFSI_BMAP) { mutex_enter(vp->v_interlock); if (vget(vp, LK_NOWAIT) == 0) { if (! vrecycle(vp)) vrele(vp); } } numrefed--; } /* * Start a new file */ lastino = blkp->bi_inode; if (blkp->bi_inode == LFS_IFILE_INUM) v_daddr = fs->lfs_idaddr; else { LFS_IENTRY(ifp, fs, blkp->bi_inode, bp); v_daddr = ifp->if_daddr; brelse(bp, 0); } if (v_daddr == LFS_UNUSED_DADDR) { blkp->bi_daddr = LFS_UNUSED_DADDR; continue; } /* * A regular call to VFS_VGET could deadlock * here. Instead, we try an unlocked access. */ mutex_enter(&ulfs_ihash_lock); vp = ulfs_ihashlookup(ump->um_dev, blkp->bi_inode); if (vp != NULL && !(vp->v_iflag & VI_XLOCK)) { ip = VTOI(vp); mutex_enter(vp->v_interlock); mutex_exit(&ulfs_ihash_lock); if (lfs_vref(vp)) { v_daddr = LFS_UNUSED_DADDR; continue; } numrefed++; } else { mutex_exit(&ulfs_ihash_lock); /* * Don't VFS_VGET if we're being unmounted, * since we hold vfs_busy(). */ if (mntp->mnt_iflag & IMNT_UNMOUNT) { v_daddr = LFS_UNUSED_DADDR; continue; } error = VFS_VGET(mntp, blkp->bi_inode, &vp); if (error) { DLOG((DLOG_CLEAN, "lfs_bmapv: vget ino" "%d failed with %d", blkp->bi_inode,error)); v_daddr = LFS_UNUSED_DADDR; continue; } else { KASSERT(VOP_ISLOCKED(vp)); VTOI(vp)->i_lfs_iflags |= LFSI_BMAP; VOP_UNLOCK(vp); numrefed++; } } ip = VTOI(vp); } else if (v_daddr == LFS_UNUSED_DADDR) { /* * This can only happen if the vnode is dead. * Keep going. Note that we DO NOT set the * bi_addr to anything -- if we failed to get * the vnode, for example, we want to assume * conservatively that all of its blocks *are* * located in the segment in question. * lfs_markv will throw them out if we are * wrong. */ /* blkp->bi_daddr = LFS_UNUSED_DADDR; */ continue; } /* Past this point we are guaranteed that vp, ip are valid. */ if (blkp->bi_lbn == LFS_UNUSED_LBN) { /* * We just want the inode address, which is * conveniently in v_daddr. */ blkp->bi_daddr = v_daddr; } else { daddr_t bi_daddr; /* XXX ondisk32 */ error = VOP_BMAP(vp, blkp->bi_lbn, NULL, &bi_daddr, NULL); if (error) { blkp->bi_daddr = LFS_UNUSED_DADDR; continue; } blkp->bi_daddr = LFS_DBTOFSB(fs, bi_daddr); /* Fill in the block size, too */ if (blkp->bi_lbn >= 0) blkp->bi_size = lfs_blksize(fs, ip, blkp->bi_lbn); else blkp->bi_size = fs->lfs_bsize; } } /* * Finish the old file, if there was one. The presence * of a usable vnode in vp is signaled by a valid v_daddr. */ if (v_daddr != LFS_UNUSED_DADDR) { lfs_vunref(vp); /* Recycle as above. */ if (ip->i_lfs_iflags & LFSI_BMAP) { mutex_enter(vp->v_interlock); if (vget(vp, LK_NOWAIT) == 0) { if (! vrecycle(vp)) vrele(vp); } } numrefed--; } #ifdef DIAGNOSTIC if (numrefed != 0) panic("lfs_bmapv: numrefed=%d", numrefed); #endif vfs_unbusy(mntp, false, NULL); return 0; }
int lfs_markv(struct proc *p, fsid_t *fsidp, BLOCK_INFO *blkiov, int blkcnt) { BLOCK_INFO *blkp; IFILE *ifp; struct buf *bp; struct inode *ip = NULL; struct lfs *fs; struct mount *mntp; struct vnode *vp = NULL; ino_t lastino; daddr_t b_daddr, v_daddr; int cnt, error; int do_again = 0; int numrefed = 0; ino_t maxino; size_t obsize; /* number of blocks/inodes that we have already bwrite'ed */ int nblkwritten, ninowritten; if ((mntp = vfs_getvfs(fsidp)) == NULL) return (ENOENT); fs = VFSTOULFS(mntp)->um_lfs; if (fs->lfs_ronly) return EROFS; maxino = (lfs_fragstoblks(fs, VTOI(fs->lfs_ivnode)->i_ffs1_blocks) - fs->lfs_cleansz - fs->lfs_segtabsz) * fs->lfs_ifpb; cnt = blkcnt; if ((error = vfs_busy(mntp, NULL)) != 0) return (error); /* * This seglock is just to prevent the fact that we might have to sleep * from allowing the possibility that our blocks might become * invalid. * * It is also important to note here that unless we specify SEGM_CKP, * any Ifile blocks that we might be asked to clean will never get * to the disk. */ lfs_seglock(fs, SEGM_CLEAN | SEGM_CKP | SEGM_SYNC); /* Mark blocks/inodes dirty. */ error = 0; /* these were inside the initialization for the for loop */ v_daddr = LFS_UNUSED_DADDR; lastino = LFS_UNUSED_INUM; nblkwritten = ninowritten = 0; for (blkp = blkiov; cnt--; ++blkp) { /* Bounds-check incoming data, avoid panic for failed VGET */ if (blkp->bi_inode <= 0 || blkp->bi_inode >= maxino) { error = EINVAL; goto err3; } /* * Get the IFILE entry (only once) and see if the file still * exists. */ if (lastino != blkp->bi_inode) { /* * Finish the old file, if there was one. The presence * of a usable vnode in vp is signaled by a valid v_daddr. */ if (v_daddr != LFS_UNUSED_DADDR) { lfs_vunref(vp); numrefed--; } /* * Start a new file */ lastino = blkp->bi_inode; if (blkp->bi_inode == LFS_IFILE_INUM) v_daddr = fs->lfs_idaddr; else { LFS_IENTRY(ifp, fs, blkp->bi_inode, bp); /* XXX fix for force write */ v_daddr = ifp->if_daddr; brelse(bp, 0); } if (v_daddr == LFS_UNUSED_DADDR) continue; /* Get the vnode/inode. */ error = lfs_fastvget(mntp, blkp->bi_inode, v_daddr, &vp, (blkp->bi_lbn == LFS_UNUSED_LBN ? blkp->bi_bp : NULL)); if (!error) { numrefed++; } if (error) { DLOG((DLOG_CLEAN, "lfs_markv: lfs_fastvget" " failed with %d (ino %d, segment %d)\n", error, blkp->bi_inode, lfs_dtosn(fs, blkp->bi_daddr))); /* * If we got EAGAIN, that means that the * Inode was locked. This is * recoverable: just clean the rest of * this segment, and let the cleaner try * again with another. (When the * cleaner runs again, this segment will * sort high on the list, since it is * now almost entirely empty.) But, we * still set v_daddr = LFS_UNUSED_ADDR * so as not to test this over and over * again. */ if (error == EAGAIN) { error = 0; do_again++; } #ifdef DIAGNOSTIC else if (error != ENOENT) panic("lfs_markv VFS_VGET FAILED"); #endif /* lastino = LFS_UNUSED_INUM; */ v_daddr = LFS_UNUSED_DADDR; vp = NULL; ip = NULL; continue; } ip = VTOI(vp); ninowritten++; } else if (v_daddr == LFS_UNUSED_DADDR) { /* * This can only happen if the vnode is dead (or * in any case we can't get it...e.g., it is * inlocked). Keep going. */ continue; } /* Past this point we are guaranteed that vp, ip are valid. */ /* Can't clean VU_DIROP directories in case of truncation */ /* XXX - maybe we should mark removed dirs specially? */ if (vp->v_type == VDIR && (vp->v_uflag & VU_DIROP)) { do_again++; continue; } /* If this BLOCK_INFO didn't contain a block, keep going. */ if (blkp->bi_lbn == LFS_UNUSED_LBN) { /* XXX need to make sure that the inode gets written in this case */ /* XXX but only write the inode if it's the right one */ if (blkp->bi_inode != LFS_IFILE_INUM) { LFS_IENTRY(ifp, fs, blkp->bi_inode, bp); if (ifp->if_daddr == blkp->bi_daddr) { mutex_enter(&lfs_lock); LFS_SET_UINO(ip, IN_CLEANING); mutex_exit(&lfs_lock); } brelse(bp, 0); } continue; } b_daddr = 0; if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &b_daddr, NULL) || LFS_DBTOFSB(fs, b_daddr) != blkp->bi_daddr) { if (lfs_dtosn(fs, LFS_DBTOFSB(fs, b_daddr)) == lfs_dtosn(fs, blkp->bi_daddr)) { DLOG((DLOG_CLEAN, "lfs_markv: wrong da same seg: %llx vs %llx\n", (long long)blkp->bi_daddr, (long long)LFS_DBTOFSB(fs, b_daddr))); } do_again++; continue; } /* * Check block sizes. The blocks being cleaned come from * disk, so they should have the same size as their on-disk * counterparts. */ if (blkp->bi_lbn >= 0) obsize = lfs_blksize(fs, ip, blkp->bi_lbn); else obsize = fs->lfs_bsize; /* Check for fragment size change */ if (blkp->bi_lbn >= 0 && blkp->bi_lbn < ULFS_NDADDR) { obsize = ip->i_lfs_fragsize[blkp->bi_lbn]; } if (obsize != blkp->bi_size) { DLOG((DLOG_CLEAN, "lfs_markv: ino %d lbn %lld wrong" " size (%ld != %d), try again\n", blkp->bi_inode, (long long)blkp->bi_lbn, (long) obsize, blkp->bi_size)); do_again++; continue; } /* * If we get to here, then we are keeping the block. If * it is an indirect block, we want to actually put it * in the buffer cache so that it can be updated in the * finish_meta section. If it's not, we need to * allocate a fake buffer so that writeseg can perform * the copyin and write the buffer. */ if (ip->i_number != LFS_IFILE_INUM && blkp->bi_lbn >= 0) { /* Data Block */ bp = lfs_fakebuf(fs, vp, blkp->bi_lbn, blkp->bi_size, blkp->bi_bp); /* Pretend we used bread() to get it */ bp->b_blkno = LFS_FSBTODB(fs, blkp->bi_daddr); } else { /* Indirect block or ifile */ if (blkp->bi_size != fs->lfs_bsize && ip->i_number != LFS_IFILE_INUM) panic("lfs_markv: partial indirect block?" " size=%d\n", blkp->bi_size); bp = getblk(vp, blkp->bi_lbn, blkp->bi_size, 0, 0); if (!(bp->b_oflags & (BO_DONE|BO_DELWRI))) { /* * The block in question was not found * in the cache; i.e., the block that * getblk() returned is empty. So, we * can (and should) copy in the * contents, because we've already * determined that this was the right * version of this block on disk. * * And, it can't have changed underneath * us, because we have the segment lock. */ error = copyin(blkp->bi_bp, bp->b_data, blkp->bi_size); if (error) goto err2; } } if ((error = lfs_bwrite_ext(bp, BW_CLEAN)) != 0) goto err2; nblkwritten++; /* * XXX should account indirect blocks and ifile pages as well */ if (nblkwritten + lfs_lblkno(fs, ninowritten * sizeof (struct ulfs1_dinode)) > LFS_MARKV_MAX_BLOCKS) { DLOG((DLOG_CLEAN, "lfs_markv: writing %d blks %d inos\n", nblkwritten, ninowritten)); lfs_segwrite(mntp, SEGM_CLEAN); nblkwritten = ninowritten = 0; } } /* * Finish the old file, if there was one */ if (v_daddr != LFS_UNUSED_DADDR) { lfs_vunref(vp); numrefed--; } #ifdef DIAGNOSTIC if (numrefed != 0) panic("lfs_markv: numrefed=%d", numrefed); #endif DLOG((DLOG_CLEAN, "lfs_markv: writing %d blks %d inos (check point)\n", nblkwritten, ninowritten)); /* * The last write has to be SEGM_SYNC, because of calling semantics. * It also has to be SEGM_CKP, because otherwise we could write * over the newly cleaned data contained in a checkpoint, and then * we'd be unhappy at recovery time. */ lfs_segwrite(mntp, SEGM_CLEAN | SEGM_CKP | SEGM_SYNC); lfs_segunlock(fs); vfs_unbusy(mntp, false, NULL); if (error) return (error); else if (do_again) return EAGAIN; return 0; err2: DLOG((DLOG_CLEAN, "lfs_markv err2\n")); /* * XXX we're here because copyin() failed. * XXX it means that we can't trust the cleanerd. too bad. * XXX how can we recover from this? */ err3: /* * XXX should do segwrite here anyway? */ if (v_daddr != LFS_UNUSED_DADDR) { lfs_vunref(vp); --numrefed; } lfs_segunlock(fs); vfs_unbusy(mntp, false, NULL); #ifdef DIAGNOSTIC if (numrefed != 0) panic("lfs_markv: numrefed=%d", numrefed); #endif return (error); }
/* VOP_BWRITE ULFS_NIADDR+2 times */ int lfs_balloc(struct vnode *vp, off_t startoffset, int iosize, kauth_cred_t cred, int flags, struct buf **bpp) { int offset; daddr_t daddr, idaddr; struct buf *ibp, *bp; struct inode *ip; struct lfs *fs; struct indir indirs[ULFS_NIADDR+2], *idp; daddr_t lbn, lastblock; int bcount; int error, frags, i, nsize, osize, num; ip = VTOI(vp); fs = ip->i_lfs; offset = lfs_blkoff(fs, startoffset); KASSERT(iosize <= lfs_sb_getbsize(fs)); lbn = lfs_lblkno(fs, startoffset); /* (void)lfs_check(vp, lbn, 0); */ ASSERT_MAYBE_SEGLOCK(fs); /* * Three cases: it's a block beyond the end of file, it's a block in * the file that may or may not have been assigned a disk address or * we're writing an entire block. * * Note, if the daddr is UNWRITTEN, the block already exists in * the cache (it was read or written earlier). If so, make sure * we don't count it as a new block or zero out its contents. If * it did not, make sure we allocate any necessary indirect * blocks. * * If we are writing a block beyond the end of the file, we need to * check if the old last block was a fragment. If it was, we need * to rewrite it. */ if (bpp) *bpp = NULL; /* Check for block beyond end of file and fragment extension needed. */ lastblock = lfs_lblkno(fs, ip->i_size); if (lastblock < ULFS_NDADDR && lastblock < lbn) { osize = lfs_blksize(fs, ip, lastblock); if (osize < lfs_sb_getbsize(fs) && osize > 0) { if ((error = lfs_fragextend(vp, osize, lfs_sb_getbsize(fs), lastblock, (bpp ? &bp : NULL), cred))) return (error); ip->i_size = (lastblock + 1) * lfs_sb_getbsize(fs); lfs_dino_setsize(fs, ip->i_din, ip->i_size); uvm_vnp_setsize(vp, ip->i_size); ip->i_flag |= IN_CHANGE | IN_UPDATE; if (bpp) (void) VOP_BWRITE(bp->b_vp, bp); } } /* * If the block we are writing is a direct block, it's the last * block in the file, and offset + iosize is less than a full * block, we can write one or more fragments. There are two cases: * the block is brand new and we should allocate it the correct * size or it already exists and contains some fragments and * may need to extend it. */ if (lbn < ULFS_NDADDR && lfs_lblkno(fs, ip->i_size) <= lbn) { osize = lfs_blksize(fs, ip, lbn); nsize = lfs_fragroundup(fs, offset + iosize); if (lfs_lblktosize(fs, lbn) >= ip->i_size) { /* Brand new block or fragment */ frags = lfs_numfrags(fs, nsize); if (!ISSPACE(fs, frags, cred)) return ENOSPC; if (bpp) { *bpp = bp = getblk(vp, lbn, nsize, 0, 0); bp->b_blkno = UNWRITTEN; if (flags & B_CLRBUF) clrbuf(bp); } ip->i_lfs_effnblks += frags; mutex_enter(&lfs_lock); lfs_sb_subbfree(fs, frags); mutex_exit(&lfs_lock); lfs_dino_setdb(fs, ip->i_din, lbn, UNWRITTEN); } else { if (nsize <= osize) { /* No need to extend */ if (bpp && (error = bread(vp, lbn, osize, 0, &bp))) return error; } else { /* Extend existing block */ if ((error = lfs_fragextend(vp, osize, nsize, lbn, (bpp ? &bp : NULL), cred))) return error; } if (bpp) *bpp = bp; } return 0; } error = ulfs_bmaparray(vp, lbn, &daddr, &indirs[0], &num, NULL, NULL); if (error) return (error); KASSERT(daddr <= LFS_MAX_DADDR(fs)); /* * Do byte accounting all at once, so we can gracefully fail *before* * we start assigning blocks. */ frags = fs->um_seqinc; bcount = 0; if (daddr == UNASSIGNED) { bcount = frags; } for (i = 1; i < num; ++i) { if (!indirs[i].in_exists) { bcount += frags; } } if (ISSPACE(fs, bcount, cred)) { mutex_enter(&lfs_lock); lfs_sb_subbfree(fs, bcount); mutex_exit(&lfs_lock); ip->i_lfs_effnblks += bcount; } else { return ENOSPC; } if (daddr == UNASSIGNED) { if (num > 0 && lfs_dino_getib(fs, ip->i_din, indirs[0].in_off) == 0) { lfs_dino_setib(fs, ip->i_din, indirs[0].in_off, UNWRITTEN); } /* * Create new indirect blocks if necessary */ if (num > 1) { idaddr = lfs_dino_getib(fs, ip->i_din, indirs[0].in_off); for (i = 1; i < num; ++i) { ibp = getblk(vp, indirs[i].in_lbn, lfs_sb_getbsize(fs), 0,0); if (!indirs[i].in_exists) { clrbuf(ibp); ibp->b_blkno = UNWRITTEN; } else if (!(ibp->b_oflags & (BO_DELWRI | BO_DONE))) { ibp->b_blkno = LFS_FSBTODB(fs, idaddr); ibp->b_flags |= B_READ; VOP_STRATEGY(vp, ibp); biowait(ibp); } /* * This block exists, but the next one may not. * If that is the case mark it UNWRITTEN to keep * the accounting straight. */ /* XXX ondisk32 */ if (((int32_t *)ibp->b_data)[indirs[i].in_off] == 0) ((int32_t *)ibp->b_data)[indirs[i].in_off] = UNWRITTEN; /* XXX ondisk32 */ idaddr = ((int32_t *)ibp->b_data)[indirs[i].in_off]; #ifdef DEBUG if (vp == fs->lfs_ivnode) { LFS_ENTER_LOG("balloc", __FILE__, __LINE__, indirs[i].in_lbn, ibp->b_flags, curproc->p_pid); } #endif if ((error = VOP_BWRITE(ibp->b_vp, ibp))) return error; } } } /* * Get the existing block from the cache, if requested. */ if (bpp) *bpp = bp = getblk(vp, lbn, lfs_blksize(fs, ip, lbn), 0, 0); /* * Do accounting on blocks that represent pages. */ if (!bpp) lfs_register_block(vp, lbn); /* * The block we are writing may be a brand new block * in which case we need to do accounting. * * We can tell a truly new block because ulfs_bmaparray will say * it is UNASSIGNED. Once we allocate it we will assign it the * disk address UNWRITTEN. */ if (daddr == UNASSIGNED) { if (bpp) { if (flags & B_CLRBUF) clrbuf(bp); /* Note the new address */ bp->b_blkno = UNWRITTEN; } switch (num) { case 0: lfs_dino_setdb(fs, ip->i_din, lbn, UNWRITTEN); break; case 1: lfs_dino_setib(fs, ip->i_din, indirs[0].in_off, UNWRITTEN); break; default: idp = &indirs[num - 1]; if (bread(vp, idp->in_lbn, lfs_sb_getbsize(fs), B_MODIFY, &ibp)) panic("lfs_balloc: bread bno %lld", (long long)idp->in_lbn); /* XXX ondisk32 */ ((int32_t *)ibp->b_data)[idp->in_off] = UNWRITTEN; #ifdef DEBUG if (vp == fs->lfs_ivnode) { LFS_ENTER_LOG("balloc", __FILE__, __LINE__, idp->in_lbn, ibp->b_flags, curproc->p_pid); } #endif VOP_BWRITE(ibp->b_vp, ibp); } } else if (bpp && !(bp->b_oflags & (BO_DONE|BO_DELWRI))) { /* * Not a brand new block, also not in the cache; * read it in from disk. */ if (iosize == lfs_sb_getbsize(fs)) /* Optimization: I/O is unnecessary. */ bp->b_blkno = daddr; else { /* * We need to read the block to preserve the * existing bytes. */ bp->b_blkno = daddr; bp->b_flags |= B_READ; VOP_STRATEGY(vp, bp); return (biowait(bp)); } } return (0); }
int lfs_bmapv(struct lwp *l, fsid_t *fsidp, BLOCK_INFO *blkiov, int blkcnt) { BLOCK_INFO *blkp; IFILE *ifp; struct buf *bp; struct inode *ip = NULL; struct lfs *fs; struct mount *mntp; struct ulfsmount *ump; struct vnode *vp; ino_t lastino; daddr_t v_daddr; int cnt, error; int numrefed = 0; error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_LFS, KAUTH_REQ_SYSTEM_LFS_BMAPV, NULL, NULL, NULL); if (error) return (error); if ((mntp = vfs_getvfs(fsidp)) == NULL) return (ENOENT); if ((error = vfs_busy(mntp, NULL)) != 0) return (error); ump = VFSTOULFS(mntp); fs = ump->um_lfs; if (fs->lfs_cleaner_thread == NULL) fs->lfs_cleaner_thread = curlwp; KASSERT(fs->lfs_cleaner_thread == curlwp); cnt = blkcnt; error = 0; /* these were inside the initialization for the for loop */ vp = NULL; v_daddr = LFS_UNUSED_DADDR; lastino = LFS_UNUSED_INUM; for (blkp = blkiov; cnt--; ++blkp) { /* * Get the IFILE entry (only once) and see if the file still * exists. */ if (lastino != blkp->bi_inode) { /* * Finish the old file, if there was one. */ if (vp != NULL) { vput(vp); vp = NULL; numrefed--; } /* * Start a new file */ lastino = blkp->bi_inode; if (blkp->bi_inode == LFS_IFILE_INUM) v_daddr = lfs_sb_getidaddr(fs); else { LFS_IENTRY(ifp, fs, blkp->bi_inode, bp); v_daddr = lfs_if_getdaddr(fs, ifp); brelse(bp, 0); } if (v_daddr == LFS_UNUSED_DADDR) { blkp->bi_daddr = LFS_UNUSED_DADDR; continue; } error = lfs_fastvget(mntp, blkp->bi_inode, NULL, LK_SHARED, &vp); if (error) { DLOG((DLOG_CLEAN, "lfs_bmapv: lfs_fastvget ino" "%d failed with %d", blkp->bi_inode,error)); KASSERT(vp == NULL); continue; } else { KASSERT(VOP_ISLOCKED(vp)); numrefed++; } ip = VTOI(vp); } else if (vp == NULL) { /* * This can only happen if the vnode is dead. * Keep going. Note that we DO NOT set the * bi_addr to anything -- if we failed to get * the vnode, for example, we want to assume * conservatively that all of its blocks *are* * located in the segment in question. * lfs_markv will throw them out if we are * wrong. */ continue; } /* Past this point we are guaranteed that vp, ip are valid. */ if (blkp->bi_lbn == LFS_UNUSED_LBN) { /* * We just want the inode address, which is * conveniently in v_daddr. */ blkp->bi_daddr = v_daddr; } else { daddr_t bi_daddr; error = VOP_BMAP(vp, blkp->bi_lbn, NULL, &bi_daddr, NULL); if (error) { blkp->bi_daddr = LFS_UNUSED_DADDR; continue; } blkp->bi_daddr = LFS_DBTOFSB(fs, bi_daddr); /* Fill in the block size, too */ if (blkp->bi_lbn >= 0) blkp->bi_size = lfs_blksize(fs, ip, blkp->bi_lbn); else blkp->bi_size = lfs_sb_getbsize(fs); } } /* * Finish the old file, if there was one. */ if (vp != NULL) { vput(vp); vp = NULL; numrefed--; } #ifdef DIAGNOSTIC if (numrefed != 0) panic("lfs_bmapv: numrefed=%d", numrefed); #endif vfs_unbusy(mntp, false, NULL); return 0; }
int lfs_truncate(struct vnode *ovp, off_t length, int ioflag, kauth_cred_t cred) { daddr_t lastblock; struct inode *oip = VTOI(ovp); daddr_t bn, lbn, lastiblock[ULFS_NIADDR], indir_lbn[ULFS_NIADDR]; /* XXX ondisk32 */ int32_t newblks[ULFS_NDADDR + ULFS_NIADDR]; struct lfs *fs; struct buf *bp; int offset, size, level; daddr_t count, rcount; daddr_t blocksreleased = 0, real_released = 0; int i, nblocks; int aflags, error, allerror = 0; off_t osize; long lastseg; size_t bc; int obufsize, odb; int usepc; if (ovp->v_type == VCHR || ovp->v_type == VBLK || ovp->v_type == VFIFO || ovp->v_type == VSOCK) { KASSERT(oip->i_size == 0); return 0; } if (length < 0) return (EINVAL); /* * Just return and not update modification times. */ if (oip->i_size == length) { /* still do a uvm_vnp_setsize() as writesize may be larger */ uvm_vnp_setsize(ovp, length); return (0); } fs = oip->i_lfs; if (ovp->v_type == VLNK && (oip->i_size < fs->um_maxsymlinklen || (fs->um_maxsymlinklen == 0 && oip->i_ffs1_blocks == 0))) { #ifdef DIAGNOSTIC if (length != 0) panic("lfs_truncate: partial truncate of symlink"); #endif memset((char *)SHORTLINK(oip), 0, (u_int)oip->i_size); oip->i_size = oip->i_ffs1_size = 0; oip->i_flag |= IN_CHANGE | IN_UPDATE; return (lfs_update(ovp, NULL, NULL, 0)); } if (oip->i_size == length) { oip->i_flag |= IN_CHANGE | IN_UPDATE; return (lfs_update(ovp, NULL, NULL, 0)); } lfs_imtime(fs); osize = oip->i_size; usepc = (ovp->v_type == VREG && ovp != fs->lfs_ivnode); ASSERT_NO_SEGLOCK(fs); /* * Lengthen the size of the file. We must ensure that the * last byte of the file is allocated. Since the smallest * value of osize is 0, length will be at least 1. */ if (osize < length) { if (length > fs->um_maxfilesize) return (EFBIG); aflags = B_CLRBUF; if (ioflag & IO_SYNC) aflags |= B_SYNC; if (usepc) { if (lfs_lblkno(fs, osize) < ULFS_NDADDR && lfs_lblkno(fs, osize) != lfs_lblkno(fs, length) && lfs_blkroundup(fs, osize) != osize) { off_t eob; eob = lfs_blkroundup(fs, osize); uvm_vnp_setwritesize(ovp, eob); error = ulfs_balloc_range(ovp, osize, eob - osize, cred, aflags); if (error) { (void) lfs_truncate(ovp, osize, ioflag & IO_SYNC, cred); return error; } if (ioflag & IO_SYNC) { mutex_enter(ovp->v_interlock); VOP_PUTPAGES(ovp, trunc_page(osize & lfs_sb_getbmask(fs)), round_page(eob), PGO_CLEANIT | PGO_SYNCIO); } } uvm_vnp_setwritesize(ovp, length); error = ulfs_balloc_range(ovp, length - 1, 1, cred, aflags); if (error) { (void) lfs_truncate(ovp, osize, ioflag & IO_SYNC, cred); return error; } uvm_vnp_setsize(ovp, length); oip->i_flag |= IN_CHANGE | IN_UPDATE; KASSERT(ovp->v_size == oip->i_size); oip->i_lfs_hiblk = lfs_lblkno(fs, oip->i_size + lfs_sb_getbsize(fs) - 1) - 1; return (lfs_update(ovp, NULL, NULL, 0)); } else { error = lfs_reserve(fs, ovp, NULL, lfs_btofsb(fs, (ULFS_NIADDR + 2) << lfs_sb_getbshift(fs))); if (error) return (error); error = lfs_balloc(ovp, length - 1, 1, cred, aflags, &bp); lfs_reserve(fs, ovp, NULL, -lfs_btofsb(fs, (ULFS_NIADDR + 2) << lfs_sb_getbshift(fs))); if (error) return (error); oip->i_ffs1_size = oip->i_size = length; uvm_vnp_setsize(ovp, length); (void) VOP_BWRITE(bp->b_vp, bp); oip->i_flag |= IN_CHANGE | IN_UPDATE; oip->i_lfs_hiblk = lfs_lblkno(fs, oip->i_size + lfs_sb_getbsize(fs) - 1) - 1; return (lfs_update(ovp, NULL, NULL, 0)); } } if ((error = lfs_reserve(fs, ovp, NULL, lfs_btofsb(fs, (2 * ULFS_NIADDR + 3) << lfs_sb_getbshift(fs)))) != 0) return (error); /* * Shorten the size of the file. If the file is not being * truncated to a block boundary, the contents of the * partial block following the end of the file must be * zero'ed in case it ever becomes accessible again because * of subsequent file growth. Directories however are not * zero'ed as they should grow back initialized to empty. */ offset = lfs_blkoff(fs, length); lastseg = -1; bc = 0; if (ovp != fs->lfs_ivnode) lfs_seglock(fs, SEGM_PROT); if (offset == 0) { oip->i_size = oip->i_ffs1_size = length; } else if (!usepc) { lbn = lfs_lblkno(fs, length); aflags = B_CLRBUF; if (ioflag & IO_SYNC) aflags |= B_SYNC; error = lfs_balloc(ovp, length - 1, 1, cred, aflags, &bp); if (error) { lfs_reserve(fs, ovp, NULL, -lfs_btofsb(fs, (2 * ULFS_NIADDR + 3) << lfs_sb_getbshift(fs))); goto errout; } obufsize = bp->b_bufsize; odb = lfs_btofsb(fs, bp->b_bcount); oip->i_size = oip->i_ffs1_size = length; size = lfs_blksize(fs, oip, lbn); if (ovp->v_type != VDIR) memset((char *)bp->b_data + offset, 0, (u_int)(size - offset)); allocbuf(bp, size, 1); if ((bp->b_flags & B_LOCKED) != 0 && bp->b_iodone == NULL) { mutex_enter(&lfs_lock); locked_queue_bytes -= obufsize - bp->b_bufsize; mutex_exit(&lfs_lock); } if (bp->b_oflags & BO_DELWRI) { lfs_sb_addavail(fs, odb - lfs_btofsb(fs, size)); /* XXX shouldn't this wake up on lfs_availsleep? */ } (void) VOP_BWRITE(bp->b_vp, bp); } else { /* vp->v_type == VREG && length < osize && offset != 0 */ /* * When truncating a regular file down to a non-block-aligned * size, we must zero the part of last block which is past * the new EOF. We must synchronously flush the zeroed pages * to disk since the new pages will be invalidated as soon * as we inform the VM system of the new, smaller size. * We must do this before acquiring the GLOCK, since fetching * the pages will acquire the GLOCK internally. * So there is a window where another thread could see a whole * zeroed page past EOF, but that's life. */ daddr_t xlbn; voff_t eoz; aflags = ioflag & IO_SYNC ? B_SYNC : 0; error = ulfs_balloc_range(ovp, length - 1, 1, cred, aflags); if (error) { lfs_reserve(fs, ovp, NULL, -lfs_btofsb(fs, (2 * ULFS_NIADDR + 3) << lfs_sb_getbshift(fs))); goto errout; } xlbn = lfs_lblkno(fs, length); size = lfs_blksize(fs, oip, xlbn); eoz = MIN(lfs_lblktosize(fs, xlbn) + size, osize); ubc_zerorange(&ovp->v_uobj, length, eoz - length, UBC_UNMAP_FLAG(ovp)); if (round_page(eoz) > round_page(length)) { mutex_enter(ovp->v_interlock); error = VOP_PUTPAGES(ovp, round_page(length), round_page(eoz), PGO_CLEANIT | PGO_DEACTIVATE | ((ioflag & IO_SYNC) ? PGO_SYNCIO : 0)); if (error) { lfs_reserve(fs, ovp, NULL, -lfs_btofsb(fs, (2 * ULFS_NIADDR + 3) << lfs_sb_getbshift(fs))); goto errout; } } } genfs_node_wrlock(ovp); oip->i_size = oip->i_ffs1_size = length; uvm_vnp_setsize(ovp, length); /* * Calculate index into inode's block list of * last direct and indirect blocks (if any) * which we want to keep. Lastblock is -1 when * the file is truncated to 0. */ /* Avoid sign overflow - XXX assumes that off_t is a quad_t. */ if (length > QUAD_MAX - lfs_sb_getbsize(fs)) lastblock = lfs_lblkno(fs, QUAD_MAX - lfs_sb_getbsize(fs)); else lastblock = lfs_lblkno(fs, length + lfs_sb_getbsize(fs) - 1) - 1; lastiblock[SINGLE] = lastblock - ULFS_NDADDR; lastiblock[DOUBLE] = lastiblock[SINGLE] - LFS_NINDIR(fs); lastiblock[TRIPLE] = lastiblock[DOUBLE] - LFS_NINDIR(fs) * LFS_NINDIR(fs); nblocks = lfs_btofsb(fs, lfs_sb_getbsize(fs)); /* * Record changed file and block pointers before we start * freeing blocks. lastiblock values are also normalized to -1 * for calls to lfs_indirtrunc below. */ memcpy((void *)newblks, (void *)&oip->i_ffs1_db[0], sizeof newblks); for (level = TRIPLE; level >= SINGLE; level--) if (lastiblock[level] < 0) { newblks[ULFS_NDADDR+level] = 0; lastiblock[level] = -1; } for (i = ULFS_NDADDR - 1; i > lastblock; i--) newblks[i] = 0; oip->i_size = oip->i_ffs1_size = osize; error = lfs_vtruncbuf(ovp, lastblock + 1, false, 0); if (error && !allerror) allerror = error; /* * Indirect blocks first. */ indir_lbn[SINGLE] = -ULFS_NDADDR; indir_lbn[DOUBLE] = indir_lbn[SINGLE] - LFS_NINDIR(fs) - 1; indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - LFS_NINDIR(fs) * LFS_NINDIR(fs) - 1; for (level = TRIPLE; level >= SINGLE; level--) { bn = oip->i_ffs1_ib[level]; if (bn != 0) { error = lfs_indirtrunc(oip, indir_lbn[level], bn, lastiblock[level], level, &count, &rcount, &lastseg, &bc); if (error) allerror = error; real_released += rcount; blocksreleased += count; if (lastiblock[level] < 0) { if (oip->i_ffs1_ib[level] > 0) real_released += nblocks; blocksreleased += nblocks; oip->i_ffs1_ib[level] = 0; lfs_blkfree(fs, oip, bn, lfs_sb_getbsize(fs), &lastseg, &bc); lfs_deregister_block(ovp, bn); } } if (lastiblock[level] >= 0) goto done; } /* * All whole direct blocks or frags. */ for (i = ULFS_NDADDR - 1; i > lastblock; i--) { long bsize, obsize; bn = oip->i_ffs1_db[i]; if (bn == 0) continue; bsize = lfs_blksize(fs, oip, i); if (oip->i_ffs1_db[i] > 0) { /* Check for fragment size changes */ obsize = oip->i_lfs_fragsize[i]; real_released += lfs_btofsb(fs, obsize); oip->i_lfs_fragsize[i] = 0; } else obsize = 0; blocksreleased += lfs_btofsb(fs, bsize); oip->i_ffs1_db[i] = 0; lfs_blkfree(fs, oip, bn, obsize, &lastseg, &bc); lfs_deregister_block(ovp, bn); } if (lastblock < 0) goto done; /* * Finally, look for a change in size of the * last direct block; release any frags. */ bn = oip->i_ffs1_db[lastblock]; if (bn != 0) { long oldspace, newspace; #if 0 long olddspace; #endif /* * Calculate amount of space we're giving * back as old block size minus new block size. */ oldspace = lfs_blksize(fs, oip, lastblock); #if 0 olddspace = oip->i_lfs_fragsize[lastblock]; #endif oip->i_size = oip->i_ffs1_size = length; newspace = lfs_blksize(fs, oip, lastblock); if (newspace == 0) panic("itrunc: newspace"); if (oldspace - newspace > 0) { blocksreleased += lfs_btofsb(fs, oldspace - newspace); } #if 0 if (bn > 0 && olddspace - newspace > 0) { /* No segment accounting here, just vnode */ real_released += lfs_btofsb(fs, olddspace - newspace); } #endif } done: /* Finish segment accounting corrections */ lfs_update_seguse(fs, oip, lastseg, bc); #ifdef DIAGNOSTIC for (level = SINGLE; level <= TRIPLE; level++) if ((newblks[ULFS_NDADDR + level] == 0) != ((oip->i_ffs1_ib[level]) == 0)) { panic("lfs itrunc1"); } for (i = 0; i < ULFS_NDADDR; i++) if ((newblks[i] == 0) != (oip->i_ffs1_db[i] == 0)) { panic("lfs itrunc2"); } if (length == 0 && (!LIST_EMPTY(&ovp->v_cleanblkhd) || !LIST_EMPTY(&ovp->v_dirtyblkhd))) panic("lfs itrunc3"); #endif /* DIAGNOSTIC */ /* * Put back the real size. */ oip->i_size = oip->i_ffs1_size = length; oip->i_lfs_effnblks -= blocksreleased; oip->i_ffs1_blocks -= real_released; mutex_enter(&lfs_lock); lfs_sb_addbfree(fs, blocksreleased); mutex_exit(&lfs_lock); #ifdef DIAGNOSTIC if (oip->i_size == 0 && (oip->i_ffs1_blocks != 0 || oip->i_lfs_effnblks != 0)) { printf("lfs_truncate: truncate to 0 but %d blks/%jd effblks\n", oip->i_ffs1_blocks, (intmax_t)oip->i_lfs_effnblks); panic("lfs_truncate: persistent blocks"); } #endif /* * If we truncated to zero, take us off the paging queue. */ mutex_enter(&lfs_lock); if (oip->i_size == 0 && oip->i_flags & IN_PAGING) { oip->i_flags &= ~IN_PAGING; TAILQ_REMOVE(&fs->lfs_pchainhd, oip, i_lfs_pchain); } mutex_exit(&lfs_lock); oip->i_flag |= IN_CHANGE; #if defined(LFS_QUOTA) || defined(LFS_QUOTA2) (void) lfs_chkdq(oip, -blocksreleased, NOCRED, 0); #endif lfs_reserve(fs, ovp, NULL, -lfs_btofsb(fs, (2 * ULFS_NIADDR + 3) << lfs_sb_getbshift(fs))); genfs_node_unlock(ovp); errout: oip->i_lfs_hiblk = lfs_lblkno(fs, oip->i_size + lfs_sb_getbsize(fs) - 1) - 1; if (ovp != fs->lfs_ivnode) lfs_segunlock(fs); return (allerror ? allerror : error); }