/* VOP_BWRITE ULFS_NIADDR+2 times */ int lfs_balloc(struct vnode *vp, off_t startoffset, int iosize, kauth_cred_t cred, int flags, struct buf **bpp) { int offset; daddr_t daddr, idaddr; struct buf *ibp, *bp; struct inode *ip; struct lfs *fs; struct indir indirs[ULFS_NIADDR+2], *idp; daddr_t lbn, lastblock; int bcount; int error, frags, i, nsize, osize, num; ip = VTOI(vp); fs = ip->i_lfs; offset = lfs_blkoff(fs, startoffset); KASSERT(iosize <= lfs_sb_getbsize(fs)); lbn = lfs_lblkno(fs, startoffset); /* (void)lfs_check(vp, lbn, 0); */ ASSERT_MAYBE_SEGLOCK(fs); /* * Three cases: it's a block beyond the end of file, it's a block in * the file that may or may not have been assigned a disk address or * we're writing an entire block. * * Note, if the daddr is UNWRITTEN, the block already exists in * the cache (it was read or written earlier). If so, make sure * we don't count it as a new block or zero out its contents. If * it did not, make sure we allocate any necessary indirect * blocks. * * If we are writing a block beyond the end of the file, we need to * check if the old last block was a fragment. If it was, we need * to rewrite it. */ if (bpp) *bpp = NULL; /* Check for block beyond end of file and fragment extension needed. */ lastblock = lfs_lblkno(fs, ip->i_size); if (lastblock < ULFS_NDADDR && lastblock < lbn) { osize = lfs_blksize(fs, ip, lastblock); if (osize < lfs_sb_getbsize(fs) && osize > 0) { if ((error = lfs_fragextend(vp, osize, lfs_sb_getbsize(fs), lastblock, (bpp ? &bp : NULL), cred))) return (error); ip->i_size = (lastblock + 1) * lfs_sb_getbsize(fs); lfs_dino_setsize(fs, ip->i_din, ip->i_size); uvm_vnp_setsize(vp, ip->i_size); ip->i_flag |= IN_CHANGE | IN_UPDATE; if (bpp) (void) VOP_BWRITE(bp->b_vp, bp); } } /* * If the block we are writing is a direct block, it's the last * block in the file, and offset + iosize is less than a full * block, we can write one or more fragments. There are two cases: * the block is brand new and we should allocate it the correct * size or it already exists and contains some fragments and * may need to extend it. */ if (lbn < ULFS_NDADDR && lfs_lblkno(fs, ip->i_size) <= lbn) { osize = lfs_blksize(fs, ip, lbn); nsize = lfs_fragroundup(fs, offset + iosize); if (lfs_lblktosize(fs, lbn) >= ip->i_size) { /* Brand new block or fragment */ frags = lfs_numfrags(fs, nsize); if (!ISSPACE(fs, frags, cred)) return ENOSPC; if (bpp) { *bpp = bp = getblk(vp, lbn, nsize, 0, 0); bp->b_blkno = UNWRITTEN; if (flags & B_CLRBUF) clrbuf(bp); } ip->i_lfs_effnblks += frags; mutex_enter(&lfs_lock); lfs_sb_subbfree(fs, frags); mutex_exit(&lfs_lock); lfs_dino_setdb(fs, ip->i_din, lbn, UNWRITTEN); } else { if (nsize <= osize) { /* No need to extend */ if (bpp && (error = bread(vp, lbn, osize, 0, &bp))) return error; } else { /* Extend existing block */ if ((error = lfs_fragextend(vp, osize, nsize, lbn, (bpp ? &bp : NULL), cred))) return error; } if (bpp) *bpp = bp; } return 0; } error = ulfs_bmaparray(vp, lbn, &daddr, &indirs[0], &num, NULL, NULL); if (error) return (error); KASSERT(daddr <= LFS_MAX_DADDR(fs)); /* * Do byte accounting all at once, so we can gracefully fail *before* * we start assigning blocks. */ frags = fs->um_seqinc; bcount = 0; if (daddr == UNASSIGNED) { bcount = frags; } for (i = 1; i < num; ++i) { if (!indirs[i].in_exists) { bcount += frags; } } if (ISSPACE(fs, bcount, cred)) { mutex_enter(&lfs_lock); lfs_sb_subbfree(fs, bcount); mutex_exit(&lfs_lock); ip->i_lfs_effnblks += bcount; } else { return ENOSPC; } if (daddr == UNASSIGNED) { if (num > 0 && lfs_dino_getib(fs, ip->i_din, indirs[0].in_off) == 0) { lfs_dino_setib(fs, ip->i_din, indirs[0].in_off, UNWRITTEN); } /* * Create new indirect blocks if necessary */ if (num > 1) { idaddr = lfs_dino_getib(fs, ip->i_din, indirs[0].in_off); for (i = 1; i < num; ++i) { ibp = getblk(vp, indirs[i].in_lbn, lfs_sb_getbsize(fs), 0,0); if (!indirs[i].in_exists) { clrbuf(ibp); ibp->b_blkno = UNWRITTEN; } else if (!(ibp->b_oflags & (BO_DELWRI | BO_DONE))) { ibp->b_blkno = LFS_FSBTODB(fs, idaddr); ibp->b_flags |= B_READ; VOP_STRATEGY(vp, ibp); biowait(ibp); } /* * This block exists, but the next one may not. * If that is the case mark it UNWRITTEN to keep * the accounting straight. */ /* XXX ondisk32 */ if (((int32_t *)ibp->b_data)[indirs[i].in_off] == 0) ((int32_t *)ibp->b_data)[indirs[i].in_off] = UNWRITTEN; /* XXX ondisk32 */ idaddr = ((int32_t *)ibp->b_data)[indirs[i].in_off]; #ifdef DEBUG if (vp == fs->lfs_ivnode) { LFS_ENTER_LOG("balloc", __FILE__, __LINE__, indirs[i].in_lbn, ibp->b_flags, curproc->p_pid); } #endif if ((error = VOP_BWRITE(ibp->b_vp, ibp))) return error; } } } /* * Get the existing block from the cache, if requested. */ if (bpp) *bpp = bp = getblk(vp, lbn, lfs_blksize(fs, ip, lbn), 0, 0); /* * Do accounting on blocks that represent pages. */ if (!bpp) lfs_register_block(vp, lbn); /* * The block we are writing may be a brand new block * in which case we need to do accounting. * * We can tell a truly new block because ulfs_bmaparray will say * it is UNASSIGNED. Once we allocate it we will assign it the * disk address UNWRITTEN. */ if (daddr == UNASSIGNED) { if (bpp) { if (flags & B_CLRBUF) clrbuf(bp); /* Note the new address */ bp->b_blkno = UNWRITTEN; } switch (num) { case 0: lfs_dino_setdb(fs, ip->i_din, lbn, UNWRITTEN); break; case 1: lfs_dino_setib(fs, ip->i_din, indirs[0].in_off, UNWRITTEN); break; default: idp = &indirs[num - 1]; if (bread(vp, idp->in_lbn, lfs_sb_getbsize(fs), B_MODIFY, &ibp)) panic("lfs_balloc: bread bno %lld", (long long)idp->in_lbn); /* XXX ondisk32 */ ((int32_t *)ibp->b_data)[idp->in_off] = UNWRITTEN; #ifdef DEBUG if (vp == fs->lfs_ivnode) { LFS_ENTER_LOG("balloc", __FILE__, __LINE__, idp->in_lbn, ibp->b_flags, curproc->p_pid); } #endif VOP_BWRITE(ibp->b_vp, ibp); } } else if (bpp && !(bp->b_oflags & (BO_DONE|BO_DELWRI))) { /* * Not a brand new block, also not in the cache; * read it in from disk. */ if (iosize == lfs_sb_getbsize(fs)) /* Optimization: I/O is unnecessary. */ bp->b_blkno = daddr; else { /* * We need to read the block to preserve the * existing bytes. */ bp->b_blkno = daddr; bp->b_flags |= B_READ; VOP_STRATEGY(vp, bp); return (biowait(bp)); } } return (0); }
int lfs_truncate(struct vnode *ovp, off_t length, int ioflag, kauth_cred_t cred) { daddr_t lastblock; struct inode *oip = VTOI(ovp); daddr_t bn, lbn, lastiblock[ULFS_NIADDR], indir_lbn[ULFS_NIADDR]; /* XXX ondisk32 */ int32_t newblks[ULFS_NDADDR + ULFS_NIADDR]; struct lfs *fs; struct buf *bp; int offset, size, level; daddr_t count, rcount; daddr_t blocksreleased = 0, real_released = 0; int i, nblocks; int aflags, error, allerror = 0; off_t osize; long lastseg; size_t bc; int obufsize, odb; int usepc; if (ovp->v_type == VCHR || ovp->v_type == VBLK || ovp->v_type == VFIFO || ovp->v_type == VSOCK) { KASSERT(oip->i_size == 0); return 0; } if (length < 0) return (EINVAL); /* * Just return and not update modification times. */ if (oip->i_size == length) { /* still do a uvm_vnp_setsize() as writesize may be larger */ uvm_vnp_setsize(ovp, length); return (0); } fs = oip->i_lfs; if (ovp->v_type == VLNK && (oip->i_size < fs->um_maxsymlinklen || (fs->um_maxsymlinklen == 0 && oip->i_ffs1_blocks == 0))) { #ifdef DIAGNOSTIC if (length != 0) panic("lfs_truncate: partial truncate of symlink"); #endif memset((char *)SHORTLINK(oip), 0, (u_int)oip->i_size); oip->i_size = oip->i_ffs1_size = 0; oip->i_flag |= IN_CHANGE | IN_UPDATE; return (lfs_update(ovp, NULL, NULL, 0)); } if (oip->i_size == length) { oip->i_flag |= IN_CHANGE | IN_UPDATE; return (lfs_update(ovp, NULL, NULL, 0)); } lfs_imtime(fs); osize = oip->i_size; usepc = (ovp->v_type == VREG && ovp != fs->lfs_ivnode); ASSERT_NO_SEGLOCK(fs); /* * Lengthen the size of the file. We must ensure that the * last byte of the file is allocated. Since the smallest * value of osize is 0, length will be at least 1. */ if (osize < length) { if (length > fs->um_maxfilesize) return (EFBIG); aflags = B_CLRBUF; if (ioflag & IO_SYNC) aflags |= B_SYNC; if (usepc) { if (lfs_lblkno(fs, osize) < ULFS_NDADDR && lfs_lblkno(fs, osize) != lfs_lblkno(fs, length) && lfs_blkroundup(fs, osize) != osize) { off_t eob; eob = lfs_blkroundup(fs, osize); uvm_vnp_setwritesize(ovp, eob); error = ulfs_balloc_range(ovp, osize, eob - osize, cred, aflags); if (error) { (void) lfs_truncate(ovp, osize, ioflag & IO_SYNC, cred); return error; } if (ioflag & IO_SYNC) { mutex_enter(ovp->v_interlock); VOP_PUTPAGES(ovp, trunc_page(osize & lfs_sb_getbmask(fs)), round_page(eob), PGO_CLEANIT | PGO_SYNCIO); } } uvm_vnp_setwritesize(ovp, length); error = ulfs_balloc_range(ovp, length - 1, 1, cred, aflags); if (error) { (void) lfs_truncate(ovp, osize, ioflag & IO_SYNC, cred); return error; } uvm_vnp_setsize(ovp, length); oip->i_flag |= IN_CHANGE | IN_UPDATE; KASSERT(ovp->v_size == oip->i_size); oip->i_lfs_hiblk = lfs_lblkno(fs, oip->i_size + lfs_sb_getbsize(fs) - 1) - 1; return (lfs_update(ovp, NULL, NULL, 0)); } else { error = lfs_reserve(fs, ovp, NULL, lfs_btofsb(fs, (ULFS_NIADDR + 2) << lfs_sb_getbshift(fs))); if (error) return (error); error = lfs_balloc(ovp, length - 1, 1, cred, aflags, &bp); lfs_reserve(fs, ovp, NULL, -lfs_btofsb(fs, (ULFS_NIADDR + 2) << lfs_sb_getbshift(fs))); if (error) return (error); oip->i_ffs1_size = oip->i_size = length; uvm_vnp_setsize(ovp, length); (void) VOP_BWRITE(bp->b_vp, bp); oip->i_flag |= IN_CHANGE | IN_UPDATE; oip->i_lfs_hiblk = lfs_lblkno(fs, oip->i_size + lfs_sb_getbsize(fs) - 1) - 1; return (lfs_update(ovp, NULL, NULL, 0)); } } if ((error = lfs_reserve(fs, ovp, NULL, lfs_btofsb(fs, (2 * ULFS_NIADDR + 3) << lfs_sb_getbshift(fs)))) != 0) return (error); /* * Shorten the size of the file. If the file is not being * truncated to a block boundary, the contents of the * partial block following the end of the file must be * zero'ed in case it ever becomes accessible again because * of subsequent file growth. Directories however are not * zero'ed as they should grow back initialized to empty. */ offset = lfs_blkoff(fs, length); lastseg = -1; bc = 0; if (ovp != fs->lfs_ivnode) lfs_seglock(fs, SEGM_PROT); if (offset == 0) { oip->i_size = oip->i_ffs1_size = length; } else if (!usepc) { lbn = lfs_lblkno(fs, length); aflags = B_CLRBUF; if (ioflag & IO_SYNC) aflags |= B_SYNC; error = lfs_balloc(ovp, length - 1, 1, cred, aflags, &bp); if (error) { lfs_reserve(fs, ovp, NULL, -lfs_btofsb(fs, (2 * ULFS_NIADDR + 3) << lfs_sb_getbshift(fs))); goto errout; } obufsize = bp->b_bufsize; odb = lfs_btofsb(fs, bp->b_bcount); oip->i_size = oip->i_ffs1_size = length; size = lfs_blksize(fs, oip, lbn); if (ovp->v_type != VDIR) memset((char *)bp->b_data + offset, 0, (u_int)(size - offset)); allocbuf(bp, size, 1); if ((bp->b_flags & B_LOCKED) != 0 && bp->b_iodone == NULL) { mutex_enter(&lfs_lock); locked_queue_bytes -= obufsize - bp->b_bufsize; mutex_exit(&lfs_lock); } if (bp->b_oflags & BO_DELWRI) { lfs_sb_addavail(fs, odb - lfs_btofsb(fs, size)); /* XXX shouldn't this wake up on lfs_availsleep? */ } (void) VOP_BWRITE(bp->b_vp, bp); } else { /* vp->v_type == VREG && length < osize && offset != 0 */ /* * When truncating a regular file down to a non-block-aligned * size, we must zero the part of last block which is past * the new EOF. We must synchronously flush the zeroed pages * to disk since the new pages will be invalidated as soon * as we inform the VM system of the new, smaller size. * We must do this before acquiring the GLOCK, since fetching * the pages will acquire the GLOCK internally. * So there is a window where another thread could see a whole * zeroed page past EOF, but that's life. */ daddr_t xlbn; voff_t eoz; aflags = ioflag & IO_SYNC ? B_SYNC : 0; error = ulfs_balloc_range(ovp, length - 1, 1, cred, aflags); if (error) { lfs_reserve(fs, ovp, NULL, -lfs_btofsb(fs, (2 * ULFS_NIADDR + 3) << lfs_sb_getbshift(fs))); goto errout; } xlbn = lfs_lblkno(fs, length); size = lfs_blksize(fs, oip, xlbn); eoz = MIN(lfs_lblktosize(fs, xlbn) + size, osize); ubc_zerorange(&ovp->v_uobj, length, eoz - length, UBC_UNMAP_FLAG(ovp)); if (round_page(eoz) > round_page(length)) { mutex_enter(ovp->v_interlock); error = VOP_PUTPAGES(ovp, round_page(length), round_page(eoz), PGO_CLEANIT | PGO_DEACTIVATE | ((ioflag & IO_SYNC) ? PGO_SYNCIO : 0)); if (error) { lfs_reserve(fs, ovp, NULL, -lfs_btofsb(fs, (2 * ULFS_NIADDR + 3) << lfs_sb_getbshift(fs))); goto errout; } } } genfs_node_wrlock(ovp); oip->i_size = oip->i_ffs1_size = length; uvm_vnp_setsize(ovp, length); /* * Calculate index into inode's block list of * last direct and indirect blocks (if any) * which we want to keep. Lastblock is -1 when * the file is truncated to 0. */ /* Avoid sign overflow - XXX assumes that off_t is a quad_t. */ if (length > QUAD_MAX - lfs_sb_getbsize(fs)) lastblock = lfs_lblkno(fs, QUAD_MAX - lfs_sb_getbsize(fs)); else lastblock = lfs_lblkno(fs, length + lfs_sb_getbsize(fs) - 1) - 1; lastiblock[SINGLE] = lastblock - ULFS_NDADDR; lastiblock[DOUBLE] = lastiblock[SINGLE] - LFS_NINDIR(fs); lastiblock[TRIPLE] = lastiblock[DOUBLE] - LFS_NINDIR(fs) * LFS_NINDIR(fs); nblocks = lfs_btofsb(fs, lfs_sb_getbsize(fs)); /* * Record changed file and block pointers before we start * freeing blocks. lastiblock values are also normalized to -1 * for calls to lfs_indirtrunc below. */ memcpy((void *)newblks, (void *)&oip->i_ffs1_db[0], sizeof newblks); for (level = TRIPLE; level >= SINGLE; level--) if (lastiblock[level] < 0) { newblks[ULFS_NDADDR+level] = 0; lastiblock[level] = -1; } for (i = ULFS_NDADDR - 1; i > lastblock; i--) newblks[i] = 0; oip->i_size = oip->i_ffs1_size = osize; error = lfs_vtruncbuf(ovp, lastblock + 1, false, 0); if (error && !allerror) allerror = error; /* * Indirect blocks first. */ indir_lbn[SINGLE] = -ULFS_NDADDR; indir_lbn[DOUBLE] = indir_lbn[SINGLE] - LFS_NINDIR(fs) - 1; indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - LFS_NINDIR(fs) * LFS_NINDIR(fs) - 1; for (level = TRIPLE; level >= SINGLE; level--) { bn = oip->i_ffs1_ib[level]; if (bn != 0) { error = lfs_indirtrunc(oip, indir_lbn[level], bn, lastiblock[level], level, &count, &rcount, &lastseg, &bc); if (error) allerror = error; real_released += rcount; blocksreleased += count; if (lastiblock[level] < 0) { if (oip->i_ffs1_ib[level] > 0) real_released += nblocks; blocksreleased += nblocks; oip->i_ffs1_ib[level] = 0; lfs_blkfree(fs, oip, bn, lfs_sb_getbsize(fs), &lastseg, &bc); lfs_deregister_block(ovp, bn); } } if (lastiblock[level] >= 0) goto done; } /* * All whole direct blocks or frags. */ for (i = ULFS_NDADDR - 1; i > lastblock; i--) { long bsize, obsize; bn = oip->i_ffs1_db[i]; if (bn == 0) continue; bsize = lfs_blksize(fs, oip, i); if (oip->i_ffs1_db[i] > 0) { /* Check for fragment size changes */ obsize = oip->i_lfs_fragsize[i]; real_released += lfs_btofsb(fs, obsize); oip->i_lfs_fragsize[i] = 0; } else obsize = 0; blocksreleased += lfs_btofsb(fs, bsize); oip->i_ffs1_db[i] = 0; lfs_blkfree(fs, oip, bn, obsize, &lastseg, &bc); lfs_deregister_block(ovp, bn); } if (lastblock < 0) goto done; /* * Finally, look for a change in size of the * last direct block; release any frags. */ bn = oip->i_ffs1_db[lastblock]; if (bn != 0) { long oldspace, newspace; #if 0 long olddspace; #endif /* * Calculate amount of space we're giving * back as old block size minus new block size. */ oldspace = lfs_blksize(fs, oip, lastblock); #if 0 olddspace = oip->i_lfs_fragsize[lastblock]; #endif oip->i_size = oip->i_ffs1_size = length; newspace = lfs_blksize(fs, oip, lastblock); if (newspace == 0) panic("itrunc: newspace"); if (oldspace - newspace > 0) { blocksreleased += lfs_btofsb(fs, oldspace - newspace); } #if 0 if (bn > 0 && olddspace - newspace > 0) { /* No segment accounting here, just vnode */ real_released += lfs_btofsb(fs, olddspace - newspace); } #endif } done: /* Finish segment accounting corrections */ lfs_update_seguse(fs, oip, lastseg, bc); #ifdef DIAGNOSTIC for (level = SINGLE; level <= TRIPLE; level++) if ((newblks[ULFS_NDADDR + level] == 0) != ((oip->i_ffs1_ib[level]) == 0)) { panic("lfs itrunc1"); } for (i = 0; i < ULFS_NDADDR; i++) if ((newblks[i] == 0) != (oip->i_ffs1_db[i] == 0)) { panic("lfs itrunc2"); } if (length == 0 && (!LIST_EMPTY(&ovp->v_cleanblkhd) || !LIST_EMPTY(&ovp->v_dirtyblkhd))) panic("lfs itrunc3"); #endif /* DIAGNOSTIC */ /* * Put back the real size. */ oip->i_size = oip->i_ffs1_size = length; oip->i_lfs_effnblks -= blocksreleased; oip->i_ffs1_blocks -= real_released; mutex_enter(&lfs_lock); lfs_sb_addbfree(fs, blocksreleased); mutex_exit(&lfs_lock); #ifdef DIAGNOSTIC if (oip->i_size == 0 && (oip->i_ffs1_blocks != 0 || oip->i_lfs_effnblks != 0)) { printf("lfs_truncate: truncate to 0 but %d blks/%jd effblks\n", oip->i_ffs1_blocks, (intmax_t)oip->i_lfs_effnblks); panic("lfs_truncate: persistent blocks"); } #endif /* * If we truncated to zero, take us off the paging queue. */ mutex_enter(&lfs_lock); if (oip->i_size == 0 && oip->i_flags & IN_PAGING) { oip->i_flags &= ~IN_PAGING; TAILQ_REMOVE(&fs->lfs_pchainhd, oip, i_lfs_pchain); } mutex_exit(&lfs_lock); oip->i_flag |= IN_CHANGE; #if defined(LFS_QUOTA) || defined(LFS_QUOTA2) (void) lfs_chkdq(oip, -blocksreleased, NOCRED, 0); #endif lfs_reserve(fs, ovp, NULL, -lfs_btofsb(fs, (2 * ULFS_NIADDR + 3) << lfs_sb_getbshift(fs))); genfs_node_unlock(ovp); errout: oip->i_lfs_hiblk = lfs_lblkno(fs, oip->i_size + lfs_sb_getbsize(fs) - 1) - 1; if (ovp != fs->lfs_ivnode) lfs_segunlock(fs); return (allerror ? allerror : error); }