/* * Rewrite an existing directory entry to point at the inode * supplied. The parameters describing the directory entry are * set up by a call to namei. */ int ufs_dirrewrite(struct inode *dp, struct inode *oip, ufsino_t newinum, int newtype, int isrmdir) { struct buf *bp; struct direct *ep; struct vnode *vdp = ITOV(dp); int error; error = UFS_BUFATOFF(dp, (off_t)dp->i_offset, (char **)&ep, &bp); if (error) return (error); ep->d_ino = newinum; if (vdp->v_mount->mnt_maxsymlinklen > 0) ep->d_type = newtype; oip->i_effnlink--; if (DOINGSOFTDEP(vdp)) { softdep_change_linkcnt(oip, 0); softdep_setup_directory_change(bp, dp, oip, newinum, isrmdir); bdwrite(bp); } else { DIP_ADD(oip, nlink, -1); oip->i_flag |= IN_CHANGE; UFS_WAPBL_UPDATE(oip, MNT_WAIT); if (DOINGASYNC(vdp)) { bdwrite(bp); error = 0; } else { error = VOP_BWRITE(bp); } } dp->i_flag |= IN_CHANGE | IN_UPDATE; UFS_WAPBL_UPDATE(dp, MNT_WAIT); return (error); }
/* * Update the access, modified, and inode change times as specified by the * IN_ACCESS, IN_UPDATE, and IN_CHANGE flags respectively. Write the inode * to disk if the IN_MODIFIED flag is set (it may be set initially, or by * the timestamp update). The IN_LAZYMOD flag is set to force a write * later if not now. If we write now, then clear both IN_MODIFIED and * IN_LAZYMOD to reflect the presumably successful write, and if waitfor is * set, then wait for the write to complete. */ int ffs_update(struct vnode *vp, int waitfor) { struct fs *fs; struct buf *bp; struct inode *ip; int error; ufs_itimes(vp); ip = VTOI(vp); if ((ip->i_flag & IN_MODIFIED) == 0 && waitfor == 0) return (0); ip->i_flag &= ~(IN_LAZYMOD | IN_MODIFIED); fs = ip->i_fs; if (fs->fs_ronly) return (0); /* * The vnode type is usually set to VBAD if an unrecoverable I/O * error has occured (such as when reading the inode). Clear the * modified bits but do not write anything out in this case. */ if (vp->v_type == VBAD) return (0); /* * Ensure that uid and gid are correct. This is a temporary * fix until fsck has been changed to do the update. */ if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */ ip->i_din.di_ouid = ip->i_uid; /* XXX */ ip->i_din.di_ogid = ip->i_gid; /* XXX */ } /* XXX */ error = bread(ip->i_devvp, fsbtodoff(fs, ino_to_fsba(fs, ip->i_number)), (int)fs->fs_bsize, &bp); if (error) { brelse(bp); return (error); } if (DOINGSOFTDEP(vp)) softdep_update_inodeblock(ip, bp, waitfor); else if (ip->i_effnlink != ip->i_nlink) panic("ffs_update: bad link cnt"); *((struct ufs1_dinode *)bp->b_data + ino_to_fsbo(fs, ip->i_number)) = ip->i_din; if (waitfor && !DOINGASYNC(vp)) { return (bwrite(bp)); } else if (vm_page_count_severe() || buf_dirty_count_severe()) { return (bwrite(bp)); } else { if (bp->b_bufsize == fs->fs_bsize) bp->b_flags |= B_CLUSTEROK; bdwrite(bp); return (0); } }
/* * ffs_balloc(struct vnode *a_vp, ufs_daddr_t a_lbn, int a_size, * struct ucred *a_cred, int a_flags, struct buf *a_bpp) * * Balloc defines the structure of filesystem storage by allocating * the physical blocks on a device given the inode and the logical * block number in a file. * * NOTE: B_CLRBUF - this flag tells balloc to clear invalid portions * of the buffer. However, any dirty bits will override missing * valid bits. This case occurs when writable mmaps are truncated * and then extended. */ int ffs_balloc(struct vop_balloc_args *ap) { struct inode *ip; ufs_daddr_t lbn; int size; struct ucred *cred; int flags; struct fs *fs; ufs_daddr_t nb; struct buf *bp, *nbp, *dbp; struct vnode *vp; struct indir indirs[NIADDR + 2]; ufs_daddr_t newb, *bap, pref; int deallocated, osize, nsize, num, i, error; ufs_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; ufs_daddr_t *lbns_remfree, lbns[NIADDR + 1]; int unwindidx; int seqcount; vp = ap->a_vp; ip = VTOI(vp); fs = ip->i_fs; lbn = lblkno(fs, ap->a_startoffset); size = blkoff(fs, ap->a_startoffset) + ap->a_size; if (size > fs->fs_bsize) panic("ffs_balloc: blk too big"); *ap->a_bpp = NULL; if (lbn < 0) return (EFBIG); cred = ap->a_cred; flags = ap->a_flags; /* * The vnode must be locked for us to be able to safely mess * around with the inode. */ if (vn_islocked(vp) != LK_EXCLUSIVE) { panic("ffs_balloc: vnode %p not exclusively locked!", vp); } /* * If the next write will extend the file into a new block, * and the file is currently composed of a fragment * this fragment has to be extended to be a full block. */ nb = lblkno(fs, ip->i_size); if (nb < NDADDR && nb < lbn) { /* * The filesize prior to this write can fit in direct * blocks (ex. fragmentation is possibly done) * we are now extending the file write beyond * the block which has end of the file prior to this write. */ osize = blksize(fs, ip, nb); /* * osize gives disk allocated size in the last block. It is * either in fragments or a file system block size. */ if (osize < fs->fs_bsize && osize > 0) { /* A few fragments are already allocated, since the * current extends beyond this block allocated the * complete block as fragments are on in last block. */ error = ffs_realloccg(ip, nb, ffs_blkpref(ip, nb, (int)nb, &ip->i_db[0]), osize, (int)fs->fs_bsize, cred, &bp); if (error) return (error); if (DOINGSOFTDEP(vp)) softdep_setup_allocdirect(ip, nb, dofftofsb(fs, bp->b_bio2.bio_offset), ip->i_db[nb], fs->fs_bsize, osize, bp); /* adjust the inode size, we just grew */ ip->i_size = smalllblktosize(fs, nb + 1); ip->i_db[nb] = dofftofsb(fs, bp->b_bio2.bio_offset); ip->i_flag |= IN_CHANGE | IN_UPDATE; if (flags & B_SYNC) bwrite(bp); else bawrite(bp); /* bp is already released here */ } } /* * The first NDADDR blocks are direct blocks */ if (lbn < NDADDR) { nb = ip->i_db[lbn]; if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) { error = bread(vp, lblktodoff(fs, lbn), fs->fs_bsize, &bp); if (error) { brelse(bp); return (error); } bp->b_bio2.bio_offset = fsbtodoff(fs, nb); *ap->a_bpp = bp; return (0); } if (nb != 0) { /* * Consider need to reallocate a fragment. */ osize = fragroundup(fs, blkoff(fs, ip->i_size)); nsize = fragroundup(fs, size); if (nsize <= osize) { error = bread(vp, lblktodoff(fs, lbn), osize, &bp); if (error) { brelse(bp); return (error); } bp->b_bio2.bio_offset = fsbtodoff(fs, nb); } else { /* * NOTE: ffs_realloccg() issues a bread(). */ error = ffs_realloccg(ip, lbn, ffs_blkpref(ip, lbn, (int)lbn, &ip->i_db[0]), osize, nsize, cred, &bp); if (error) return (error); if (DOINGSOFTDEP(vp)) softdep_setup_allocdirect(ip, lbn, dofftofsb(fs, bp->b_bio2.bio_offset), nb, nsize, osize, bp); } } else { if (ip->i_size < smalllblktosize(fs, lbn + 1)) nsize = fragroundup(fs, size); else nsize = fs->fs_bsize; error = ffs_alloc(ip, lbn, ffs_blkpref(ip, lbn, (int)lbn, &ip->i_db[0]), nsize, cred, &newb); if (error) return (error); bp = getblk(vp, lblktodoff(fs, lbn), nsize, 0, 0); bp->b_bio2.bio_offset = fsbtodoff(fs, newb); if (flags & B_CLRBUF) vfs_bio_clrbuf(bp); if (DOINGSOFTDEP(vp)) softdep_setup_allocdirect(ip, lbn, newb, 0, nsize, 0, bp); } ip->i_db[lbn] = dofftofsb(fs, bp->b_bio2.bio_offset); ip->i_flag |= IN_CHANGE | IN_UPDATE; *ap->a_bpp = bp; return (0); } /* * Determine the number of levels of indirection. */ pref = 0; if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0) return(error); #ifdef DIAGNOSTIC if (num < 1) panic ("ffs_balloc: ufs_bmaparray returned indirect block"); #endif /* * Get a handle on the data block buffer before working through * indirect blocks to avoid a deadlock between the VM system holding * a locked VM page and issuing a BMAP (which tries to lock the * indirect blocks), and the filesystem holding a locked indirect * block and then trying to read a data block (which tries to lock * the underlying VM pages). */ dbp = getblk(vp, lblktodoff(fs, lbn), fs->fs_bsize, 0, 0); /* * Setup undo history */ allocib = NULL; allocblk = allociblk; lbns_remfree = lbns; unwindidx = -1; /* * Fetch the first indirect block directly from the inode, allocating * one if necessary. */ --num; nb = ip->i_ib[indirs[0].in_off]; if (nb == 0) { pref = ffs_blkpref(ip, lbn, 0, NULL); /* * If the filesystem has run out of space we can skip the * full fsync/undo of the main [fail] case since no undo * history has been built yet. Hence the goto fail2. */ if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) != 0) goto fail2; nb = newb; *allocblk++ = nb; *lbns_remfree++ = indirs[1].in_lbn; bp = getblk(vp, lblktodoff(fs, indirs[1].in_lbn), fs->fs_bsize, 0, 0); bp->b_bio2.bio_offset = fsbtodoff(fs, nb); vfs_bio_clrbuf(bp); if (DOINGSOFTDEP(vp)) { softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off, newb, 0, fs->fs_bsize, 0, bp); bdwrite(bp); } else { /* * Write synchronously so that indirect blocks * never point at garbage. */ if (DOINGASYNC(vp)) bdwrite(bp); else if ((error = bwrite(bp)) != 0) goto fail; } allocib = &ip->i_ib[indirs[0].in_off]; *allocib = nb; ip->i_flag |= IN_CHANGE | IN_UPDATE; } /* * Fetch through the indirect blocks, allocating as necessary. */ for (i = 1;;) { error = bread(vp, lblktodoff(fs, indirs[i].in_lbn), (int)fs->fs_bsize, &bp); if (error) { brelse(bp); goto fail; } bap = (ufs_daddr_t *)bp->b_data; nb = bap[indirs[i].in_off]; if (i == num) break; i += 1; if (nb != 0) { bqrelse(bp); continue; } if (pref == 0) pref = ffs_blkpref(ip, lbn, 0, NULL); if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) != 0) { brelse(bp); goto fail; } nb = newb; *allocblk++ = nb; *lbns_remfree++ = indirs[i].in_lbn; nbp = getblk(vp, lblktodoff(fs, indirs[i].in_lbn), fs->fs_bsize, 0, 0); nbp->b_bio2.bio_offset = fsbtodoff(fs, nb); vfs_bio_clrbuf(nbp); if (DOINGSOFTDEP(vp)) { softdep_setup_allocindir_meta(nbp, ip, bp, indirs[i - 1].in_off, nb); bdwrite(nbp); } else { /* * Write synchronously so that indirect blocks * never point at garbage. */ if ((error = bwrite(nbp)) != 0) { brelse(bp); goto fail; } } bap[indirs[i - 1].in_off] = nb; if (allocib == NULL && unwindidx < 0) unwindidx = i - 1; /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & B_SYNC) { bwrite(bp); } else { if (bp->b_bufsize == fs->fs_bsize) bp->b_flags |= B_CLUSTEROK; bdwrite(bp); } } /* * Get the data block, allocating if necessary. We have already * called getblk() on the data block buffer, dbp. If we have to * allocate it and B_CLRBUF has been set the inference is an intention * to zero out the related disk blocks, so we do not have to issue * a read. Instead we simply call vfs_bio_clrbuf(). If B_CLRBUF is * not set the caller intends to overwrite the entire contents of the * buffer and we don't waste time trying to clean up the contents. * * bp references the current indirect block. When allocating, * the block must be updated. */ if (nb == 0) { pref = ffs_blkpref(ip, lbn, indirs[i].in_off, &bap[0]); error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb); if (error) { brelse(bp); goto fail; } nb = newb; *allocblk++ = nb; *lbns_remfree++ = lbn; dbp->b_bio2.bio_offset = fsbtodoff(fs, nb); if (flags & B_CLRBUF) vfs_bio_clrbuf(dbp); if (DOINGSOFTDEP(vp)) softdep_setup_allocindir_page(ip, lbn, bp, indirs[i].in_off, nb, 0, dbp); bap[indirs[i].in_off] = nb; /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & B_SYNC) { bwrite(bp); } else { if (bp->b_bufsize == fs->fs_bsize) bp->b_flags |= B_CLUSTEROK; bdwrite(bp); } *ap->a_bpp = dbp; return (0); } brelse(bp); /* * At this point all related indirect blocks have been allocated * if necessary and released. bp is no longer valid. dbp holds * our getblk()'d data block. * * XXX we previously performed a cluster_read operation here. */ if (flags & B_CLRBUF) { /* * If B_CLRBUF is set we must validate the invalid portions * of the buffer. This typically requires a read-before- * write. The strategy call will fill in bio_offset in that * case. * * If we hit this case we do a cluster read if possible * since nearby data blocks are likely to be accessed soon * too. */ if ((dbp->b_flags & B_CACHE) == 0) { bqrelse(dbp); seqcount = (flags & B_SEQMASK) >> B_SEQSHIFT; if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { error = cluster_read(vp, (off_t)ip->i_size, lblktodoff(fs, lbn), (int)fs->fs_bsize, fs->fs_bsize, seqcount * BKVASIZE, &dbp); } else { error = bread(vp, lblktodoff(fs, lbn), (int)fs->fs_bsize, &dbp); } if (error) goto fail; } else {
/* * Balloc defines the structure of filesystem storage * by allocating the physical blocks on a device given * the inode and the logical block number in a file. * This is the allocation strategy for UFS1. Below is * the allocation strategy for UFS2. */ int ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size, struct ucred *cred, int flags, struct buf **bpp) { struct inode *ip; struct ufs1_dinode *dp; ufs_lbn_t lbn, lastlbn; struct fs *fs; ufs1_daddr_t nb; struct buf *bp, *nbp; struct ufsmount *ump; struct indir indirs[NIADDR + 2]; int deallocated, osize, nsize, num, i, error; ufs2_daddr_t newb; ufs1_daddr_t *bap, pref; ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1]; int unwindidx = -1; int saved_inbdflush; static struct timeval lastfail; static int curfail; int reclaimed; ip = VTOI(vp); dp = ip->i_din1; fs = ip->i_fs; ump = ip->i_ump; lbn = lblkno(fs, startoffset); size = blkoff(fs, startoffset) + size; reclaimed = 0; if (size > fs->fs_bsize) panic("ffs_balloc_ufs1: blk too big"); *bpp = NULL; if (flags & IO_EXT) return (EOPNOTSUPP); if (lbn < 0) return (EFBIG); if (DOINGSOFTDEP(vp)) softdep_prealloc(vp, MNT_WAIT); /* * If the next write will extend the file into a new block, * and the file is currently composed of a fragment * this fragment has to be extended to be a full block. */ lastlbn = lblkno(fs, ip->i_size); if (lastlbn < NDADDR && lastlbn < lbn) { nb = lastlbn; osize = blksize(fs, ip, nb); if (osize < fs->fs_bsize && osize > 0) { UFS_LOCK(ump); error = ffs_realloccg(ip, nb, dp->di_db[nb], ffs_blkpref_ufs1(ip, lastlbn, (int)nb, &dp->di_db[0]), osize, (int)fs->fs_bsize, flags, cred, &bp); if (error) return (error); if (DOINGSOFTDEP(vp)) softdep_setup_allocdirect(ip, nb, dbtofsb(fs, bp->b_blkno), dp->di_db[nb], fs->fs_bsize, osize, bp); ip->i_size = smalllblktosize(fs, nb + 1); dp->di_size = ip->i_size; dp->di_db[nb] = dbtofsb(fs, bp->b_blkno); ip->i_flag |= IN_CHANGE | IN_UPDATE; if (flags & IO_SYNC) bwrite(bp); else bawrite(bp); } } /* * The first NDADDR blocks are direct blocks */ if (lbn < NDADDR) { if (flags & BA_METAONLY) panic("ffs_balloc_ufs1: BA_METAONLY for direct block"); nb = dp->di_db[lbn]; if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) { error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp); if (error) { brelse(bp); return (error); } bp->b_blkno = fsbtodb(fs, nb); *bpp = bp; return (0); } if (nb != 0) { /* * Consider need to reallocate a fragment. */ osize = fragroundup(fs, blkoff(fs, ip->i_size)); nsize = fragroundup(fs, size); if (nsize <= osize) { error = bread(vp, lbn, osize, NOCRED, &bp); if (error) { brelse(bp); return (error); } bp->b_blkno = fsbtodb(fs, nb); } else { UFS_LOCK(ump); error = ffs_realloccg(ip, lbn, dp->di_db[lbn], ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]), osize, nsize, flags, cred, &bp); if (error) return (error); if (DOINGSOFTDEP(vp)) softdep_setup_allocdirect(ip, lbn, dbtofsb(fs, bp->b_blkno), nb, nsize, osize, bp); } } else { if (ip->i_size < smalllblktosize(fs, lbn + 1)) nsize = fragroundup(fs, size); else nsize = fs->fs_bsize; UFS_LOCK(ump); error = ffs_alloc(ip, lbn, ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]), nsize, flags, cred, &newb); if (error) return (error); bp = getblk(vp, lbn, nsize, 0, 0, 0); bp->b_blkno = fsbtodb(fs, newb); if (flags & BA_CLRBUF) vfs_bio_clrbuf(bp); if (DOINGSOFTDEP(vp)) softdep_setup_allocdirect(ip, lbn, newb, 0, nsize, 0, bp); } dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno); ip->i_flag |= IN_CHANGE | IN_UPDATE; *bpp = bp; return (0); } /* * Determine the number of levels of indirection. */ pref = 0; if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0) return(error); #ifdef INVARIANTS if (num < 1) panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block"); #endif saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH); /* * Fetch the first indirect block allocating if necessary. */ --num; nb = dp->di_ib[indirs[0].in_off]; allocib = NULL; allocblk = allociblk; lbns_remfree = lbns; if (nb == 0) { UFS_LOCK(ump); pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0); if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred, &newb)) != 0) { curthread_pflags_restore(saved_inbdflush); return (error); } nb = newb; *allocblk++ = nb; *lbns_remfree++ = indirs[1].in_lbn; bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, 0); bp->b_blkno = fsbtodb(fs, nb); vfs_bio_clrbuf(bp); if (DOINGSOFTDEP(vp)) { softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off, newb, 0, fs->fs_bsize, 0, bp); bdwrite(bp); } else { /* * Write synchronously so that indirect blocks * never point at garbage. */ if (DOINGASYNC(vp)) bdwrite(bp); else if ((error = bwrite(bp)) != 0) goto fail; } allocib = &dp->di_ib[indirs[0].in_off]; *allocib = nb; ip->i_flag |= IN_CHANGE | IN_UPDATE; } /* * Fetch through the indirect blocks, allocating as necessary. */ retry: for (i = 1;;) { error = bread(vp, indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp); if (error) { brelse(bp); goto fail; } bap = (ufs1_daddr_t *)bp->b_data; nb = bap[indirs[i].in_off]; if (i == num) break; i += 1; if (nb != 0) { bqrelse(bp); continue; } UFS_LOCK(ump); if (pref == 0) pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0); if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags | IO_BUFLOCKED, cred, &newb)) != 0) { brelse(bp); if (++reclaimed == 1) { UFS_LOCK(ump); softdep_request_cleanup(fs, vp, cred, FLUSH_BLOCKS_WAIT); UFS_UNLOCK(ump); goto retry; } if (ppsratecheck(&lastfail, &curfail, 1)) { ffs_fserr(fs, ip->i_number, "filesystem full"); uprintf("\n%s: write failed, filesystem " "is full\n", fs->fs_fsmnt); } goto fail; } nb = newb; *allocblk++ = nb; *lbns_remfree++ = indirs[i].in_lbn; nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0); nbp->b_blkno = fsbtodb(fs, nb); vfs_bio_clrbuf(nbp); if (DOINGSOFTDEP(vp)) { softdep_setup_allocindir_meta(nbp, ip, bp, indirs[i - 1].in_off, nb); bdwrite(nbp); } else { /* * Write synchronously so that indirect blocks * never point at garbage. */ if ((error = bwrite(nbp)) != 0) { brelse(bp); goto fail; } } bap[indirs[i - 1].in_off] = nb; if (allocib == NULL && unwindidx < 0) unwindidx = i - 1; /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & IO_SYNC) { bwrite(bp); } else { if (bp->b_bufsize == fs->fs_bsize) bp->b_flags |= B_CLUSTEROK; bdwrite(bp); } } /* * If asked only for the indirect block, then return it. */ if (flags & BA_METAONLY) { curthread_pflags_restore(saved_inbdflush); *bpp = bp; return (0); } /* * Get the data block, allocating if necessary. */ if (nb == 0) { UFS_LOCK(ump); pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off, &bap[0]); error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags | IO_BUFLOCKED, cred, &newb); if (error) { brelse(bp); if (++reclaimed == 1) { UFS_LOCK(ump); softdep_request_cleanup(fs, vp, cred, FLUSH_BLOCKS_WAIT); UFS_UNLOCK(ump); goto retry; } if (ppsratecheck(&lastfail, &curfail, 1)) { ffs_fserr(fs, ip->i_number, "filesystem full"); uprintf("\n%s: write failed, filesystem " "is full\n", fs->fs_fsmnt); } goto fail; } nb = newb; *allocblk++ = nb; *lbns_remfree++ = lbn; nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0); nbp->b_blkno = fsbtodb(fs, nb); if (flags & BA_CLRBUF) vfs_bio_clrbuf(nbp); if (DOINGSOFTDEP(vp)) softdep_setup_allocindir_page(ip, lbn, bp, indirs[i].in_off, nb, 0, nbp); bap[indirs[i].in_off] = nb; /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & IO_SYNC) { bwrite(bp); } else { if (bp->b_bufsize == fs->fs_bsize) bp->b_flags |= B_CLUSTEROK; bdwrite(bp); } curthread_pflags_restore(saved_inbdflush); *bpp = nbp; return (0); } brelse(bp); if (flags & BA_CLRBUF) { int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT; if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { error = cluster_read(vp, ip->i_size, lbn, (int)fs->fs_bsize, NOCRED, MAXBSIZE, seqcount, &nbp); } else { error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp); } if (error) { brelse(nbp); goto fail; } } else {
/* * Update the access, modified, and inode change times as specified by the * IN_ACCESS, IN_UPDATE, and IN_CHANGE flags respectively. Write the inode * to disk if the IN_MODIFIED flag is set (it may be set initially, or by * the timestamp update). The IN_LAZYMOD flag is set to force a write * later if not now. The IN_LAZYACCESS is set instead of IN_MODIFIED if the fs * is currently being suspended (or is suspended) and vnode has been accessed. * If we write now, then clear IN_MODIFIED, IN_LAZYACCESS and IN_LAZYMOD to * reflect the presumably successful write, and if waitfor is set, then wait * for the write to complete. */ int ffs_update (vnode *vp, int waitfor) { int error = 0; print("HARVEY TODO: %s\n", __func__); #if 0 struct fs *fs; struct buf *bp; struct inode *ip; int flags, error; ASSERT_VOP_ELOCKED(vp, "ffs_update"); ufs_itimes(vp); ip = VTOI(vp); if ((ip->i_flag & IN_MODIFIED) == 0 && waitfor == 0) return (0); ip->i_flag &= ~(IN_LAZYACCESS | IN_LAZYMOD | IN_MODIFIED); fs = ITOFS(ip); if (fs->fs_ronly && ITOUMP(ip)->um_fsckpid == 0) return (0); /* * If we are updating a snapshot and another process is currently * writing the buffer containing the inode for this snapshot then * a deadlock can occur when it tries to check the snapshot to see * if that block needs to be copied. Thus when updating a snapshot * we check to see if the buffer is already locked, and if it is * we drop the snapshot lock until the buffer has been written * and is available to us. We have to grab a reference to the * snapshot vnode to prevent it from being removed while we are * waiting for the buffer. */ flags = 0; if (IS_SNAPSHOT(ip)) flags = GB_LOCK_NOWAIT; loop: error = breadn_flags(ITODEVVP(ip), fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), (int) fs->fs_bsize, 0, 0, 0, NOCRED, flags, &bp); if (error != 0) { if (error != EBUSY) return (error); KASSERT((IS_SNAPSHOT(ip)), ("EBUSY from non-snapshot")); /* * Wait for our inode block to become available. * * Hold a reference to the vnode to protect against * ffs_snapgone(). Since we hold a reference, it can only * get reclaimed (VI_DOOMED flag) in a forcible downgrade * or unmount. For an unmount, the entire filesystem will be * gone, so we cannot attempt to touch anything associated * with it while the vnode is unlocked; all we can do is * pause briefly and try again. If when we relock the vnode * we discover that it has been reclaimed, updating it is no * longer necessary and we can just return an error. */ vref(vp); VOP_UNLOCK(vp, 0); pause("ffsupd", 1); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); vrele(vp); if ((vp->v_iflag & VI_DOOMED) != 0) return (ENOENT); goto loop; } if (DOINGSOFTDEP(vp)) softdep_update_inodeblock(ip, bp, waitfor); else if (ip->i_effnlink != ip->i_nlink) panic("ffs_update: bad link cnt"); if (I_IS_UFS1(ip)) { *((struct ufs1_dinode *)bp->b_data + ino_to_fsbo(fs, ip->i_number)) = *ip->i_din1; /* XXX: FIX? The entropy here is desirable, but the harvesting may be expensive */ random_harvest_queue(&(ip->i_din1), sizeof(ip->i_din1), 1, RANDOM_FS_ATIME); } else { *((struct ufs2_dinode *)bp->b_data + ino_to_fsbo(fs, ip->i_number)) = *ip->i_din2; /* XXX: FIX? The entropy here is desirable, but the harvesting may be expensive */ random_harvest_queue(&(ip->i_din2), sizeof(ip->i_din2), 1, RANDOM_FS_ATIME); } if (waitfor) error = bwrite(bp); else if (vm_page_count_severe() || buf_dirty_count_severe()) { bawrite(bp); error = 0; } else { if (bp->b_bufsize == fs->fs_bsize) bp->b_flags |= B_CLUSTEROK; bdwrite(bp); error = 0; } #endif // 0 return (error); }
/* * Balloc defines the structure of file system storage * by allocating the physical blocks on a device given * the inode and the logical block number in a file. */ int ffs1_balloc(struct inode *ip, off_t startoffset, int size, struct ucred *cred, int flags, struct buf **bpp) { daddr_t lbn, nb, newb, pref; struct fs *fs; struct buf *bp, *nbp; struct vnode *vp; struct proc *p; struct indir indirs[NIADDR + 2]; int32_t *bap; int deallocated, osize, nsize, num, i, error; int32_t *allocib, *blkp, *allocblk, allociblk[NIADDR+1]; int unwindidx = -1; vp = ITOV(ip); fs = ip->i_fs; p = curproc; lbn = lblkno(fs, startoffset); size = blkoff(fs, startoffset) + size; if (size > fs->fs_bsize) panic("ffs1_balloc: blk too big"); if (bpp != NULL) *bpp = NULL; if (lbn < 0) return (EFBIG); /* * If the next write will extend the file into a new block, * and the file is currently composed of a fragment * this fragment has to be extended to be a full block. */ nb = lblkno(fs, ip->i_ffs1_size); if (nb < NDADDR && nb < lbn) { osize = blksize(fs, ip, nb); if (osize < fs->fs_bsize && osize > 0) { error = ffs_realloccg(ip, nb, ffs1_blkpref(ip, nb, (int)nb, &ip->i_ffs1_db[0]), osize, (int)fs->fs_bsize, cred, bpp, &newb); if (error) return (error); if (DOINGSOFTDEP(vp)) softdep_setup_allocdirect(ip, nb, newb, ip->i_ffs1_db[nb], fs->fs_bsize, osize, bpp ? *bpp : NULL); ip->i_ffs1_size = lblktosize(fs, nb + 1); uvm_vnp_setsize(vp, ip->i_ffs1_size); ip->i_ffs1_db[nb] = newb; ip->i_flag |= IN_CHANGE | IN_UPDATE; if (bpp != NULL) { if (flags & B_SYNC) bwrite(*bpp); else bawrite(*bpp); } } } /* * The first NDADDR blocks are direct blocks */ if (lbn < NDADDR) { nb = ip->i_ffs1_db[lbn]; if (nb != 0 && ip->i_ffs1_size >= lblktosize(fs, lbn + 1)) { /* * The block is an already-allocated direct block * and the file already extends past this block, * thus this must be a whole block. * Just read the block (if requested). */ if (bpp != NULL) { error = bread(vp, lbn, fs->fs_bsize, bpp); if (error) { brelse(*bpp); return (error); } } return (0); } if (nb != 0) { /* * Consider need to reallocate a fragment. */ osize = fragroundup(fs, blkoff(fs, ip->i_ffs1_size)); nsize = fragroundup(fs, size); if (nsize <= osize) { /* * The existing block is already * at least as big as we want. * Just read the block (if requested). */ if (bpp != NULL) { error = bread(vp, lbn, fs->fs_bsize, bpp); if (error) { brelse(*bpp); return (error); } (*bpp)->b_bcount = osize; } return (0); } else { /* * The existing block is smaller than we * want, grow it. */ error = ffs_realloccg(ip, lbn, ffs1_blkpref(ip, lbn, (int)lbn, &ip->i_ffs1_db[0]), osize, nsize, cred, bpp, &newb); if (error) return (error); if (DOINGSOFTDEP(vp)) softdep_setup_allocdirect(ip, lbn, newb, nb, nsize, osize, bpp ? *bpp : NULL); } } else { /* * The block was not previously allocated, * allocate a new block or fragment. */ if (ip->i_ffs1_size < lblktosize(fs, lbn + 1)) nsize = fragroundup(fs, size); else nsize = fs->fs_bsize; error = ffs_alloc(ip, lbn, ffs1_blkpref(ip, lbn, (int)lbn, &ip->i_ffs1_db[0]), nsize, cred, &newb); if (error) return (error); if (bpp != NULL) { *bpp = getblk(vp, lbn, fs->fs_bsize, 0, 0); if (nsize < fs->fs_bsize) (*bpp)->b_bcount = nsize; (*bpp)->b_blkno = fsbtodb(fs, newb); if (flags & B_CLRBUF) clrbuf(*bpp); } if (DOINGSOFTDEP(vp)) softdep_setup_allocdirect(ip, lbn, newb, 0, nsize, 0, bpp ? *bpp : NULL); } ip->i_ffs1_db[lbn] = newb; ip->i_flag |= IN_CHANGE | IN_UPDATE; return (0); } /* * Determine the number of levels of indirection. */ pref = 0; if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0) return(error); #ifdef DIAGNOSTIC if (num < 1) panic ("ffs1_balloc: ufs_bmaparray returned indirect block"); #endif /* * Fetch the first indirect block allocating if necessary. */ --num; nb = ip->i_ffs1_ib[indirs[0].in_off]; allocib = NULL; allocblk = allociblk; if (nb == 0) { pref = ffs1_blkpref(ip, lbn, -indirs[0].in_off - 1, NULL); error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb); if (error) goto fail; nb = newb; *allocblk++ = nb; bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0); bp->b_blkno = fsbtodb(fs, nb); clrbuf(bp); if (DOINGSOFTDEP(vp)) { softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off, newb, 0, fs->fs_bsize, 0, bp); bdwrite(bp); } else { /* * Write synchronously so that indirect blocks * never point at garbage. */ if ((error = bwrite(bp)) != 0) goto fail; } allocib = &ip->i_ffs1_ib[indirs[0].in_off]; *allocib = nb; ip->i_flag |= IN_CHANGE | IN_UPDATE; } /* * Fetch through the indirect blocks, allocating as necessary. */ for (i = 1;;) { error = bread(vp, indirs[i].in_lbn, (int)fs->fs_bsize, &bp); if (error) { brelse(bp); goto fail; } bap = (int32_t *)bp->b_data; nb = bap[indirs[i].in_off]; if (i == num) break; i++; if (nb != 0) { brelse(bp); continue; } if (pref == 0) pref = ffs1_blkpref(ip, lbn, i - num - 1, NULL); error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb); if (error) { brelse(bp); goto fail; } nb = newb; *allocblk++ = nb; nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0); nbp->b_blkno = fsbtodb(fs, nb); clrbuf(nbp); if (DOINGSOFTDEP(vp)) { softdep_setup_allocindir_meta(nbp, ip, bp, indirs[i - 1].in_off, nb); bdwrite(nbp); } else { /* * Write synchronously so that indirect blocks * never point at garbage. */ if ((error = bwrite(nbp)) != 0) { brelse(bp); goto fail; } } bap[indirs[i - 1].in_off] = nb; if (allocib == NULL && unwindidx < 0) unwindidx = i - 1; /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & B_SYNC) { bwrite(bp); } else { bdwrite(bp); } } /* * Get the data block, allocating if necessary. */ if (nb == 0) { pref = ffs1_blkpref(ip, lbn, indirs[i].in_off, &bap[0]); error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb); if (error) { brelse(bp); goto fail; } nb = newb; *allocblk++ = nb; if (bpp != NULL) { nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0); nbp->b_blkno = fsbtodb(fs, nb); if (flags & B_CLRBUF) clrbuf(nbp); *bpp = nbp; } if (DOINGSOFTDEP(vp)) softdep_setup_allocindir_page(ip, lbn, bp, indirs[i].in_off, nb, 0, bpp ? *bpp : NULL); bap[indirs[i].in_off] = nb; /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & B_SYNC) { bwrite(bp); } else { bdwrite(bp); } return (0); } brelse(bp); if (bpp != NULL) { if (flags & B_CLRBUF) { error = bread(vp, lbn, (int)fs->fs_bsize, &nbp); if (error) { brelse(nbp); goto fail; } } else { nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0); nbp->b_blkno = fsbtodb(fs, nb); } *bpp = nbp; } return (0); fail: /* * If we have failed to allocate any blocks, simply return the error. * This is the usual case and avoids the need to fsync the file. */ if (allocblk == allociblk && allocib == NULL && unwindidx == -1) return (error); /* * If we have failed part way through block allocation, we have to * deallocate any indirect blocks that we have allocated. We have to * fsync the file before we start to get rid of all of its * dependencies so that we do not leave them dangling. We have to sync * it at the end so that the softdep code does not find any untracked * changes. Although this is really slow, running out of disk space is * not expected to be a common occurrence. The error return from fsync * is ignored as we already have an error to return to the user. */ VOP_FSYNC(vp, p->p_ucred, MNT_WAIT, p); for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) { ffs_blkfree(ip, *blkp, fs->fs_bsize); deallocated += fs->fs_bsize; } if (allocib != NULL) { *allocib = 0; } else if (unwindidx >= 0) { int r; r = bread(vp, indirs[unwindidx].in_lbn, (int)fs->fs_bsize, &bp); if (r) panic("Could not unwind indirect block, error %d", r); bap = (int32_t *)bp->b_data; bap[indirs[unwindidx].in_off] = 0; if (flags & B_SYNC) { bwrite(bp); } else { bdwrite(bp); } } if (deallocated) { /* * Restore user's disk quota because allocation failed. */ (void)ufs_quota_free_blocks(ip, btodb(deallocated), cred); ip->i_ffs1_blocks -= btodb(deallocated); ip->i_flag |= IN_CHANGE | IN_UPDATE; } VOP_FSYNC(vp, p->p_ucred, MNT_WAIT, p); return (error); }
/* * Write a directory entry after a call to namei, using the parameters * that it left in nameidata. The argument dirp is the new directory * entry contents. Dvp is a pointer to the directory to be written, * which was left locked by namei. Remaining parameters (dp->i_offset, * dp->i_count) indicate how the space for the new entry is to be obtained. * Non-null bp indicates that a directory is being created (for the * soft dependency code). */ int ufs_direnter(struct vnode *dvp, struct vnode *tvp, struct direct *dirp, struct componentname *cnp, struct buf *newdirbp) { struct ucred *cr; struct proc *p; int newentrysize; struct inode *dp; struct buf *bp; u_int dsize; struct direct *ep, *nep; int error, ret, blkoff, loc, spacefree, flags; char *dirbuf; UFS_WAPBL_JLOCK_ASSERT(dvp->v_mount); error = 0; cr = cnp->cn_cred; p = cnp->cn_proc; dp = VTOI(dvp); newentrysize = DIRSIZ(FSFMT(dvp), dirp); if (dp->i_count == 0) { /* * If dp->i_count is 0, then namei could find no * space in the directory. Here, dp->i_offset will * be on a directory block boundary and we will write the * new entry into a fresh block. */ if (dp->i_offset & (DIRBLKSIZ - 1)) panic("ufs_direnter: newblk"); flags = B_CLRBUF; if (!DOINGSOFTDEP(dvp)) flags |= B_SYNC; if ((error = UFS_BUF_ALLOC(dp, (off_t)dp->i_offset, DIRBLKSIZ, cr, flags, &bp)) != 0) { if (DOINGSOFTDEP(dvp) && newdirbp != NULL) bdwrite(newdirbp); return (error); } DIP_ASSIGN(dp, size, dp->i_offset + DIRBLKSIZ); dp->i_flag |= IN_CHANGE | IN_UPDATE; uvm_vnp_setsize(dvp, DIP(dp, size)); dirp->d_reclen = DIRBLKSIZ; blkoff = dp->i_offset & (VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_iosize - 1); memcpy(bp->b_data + blkoff, dirp, newentrysize); #ifdef UFS_DIRHASH if (dp->i_dirhash != NULL) { ufsdirhash_newblk(dp, dp->i_offset); ufsdirhash_add(dp, dirp, dp->i_offset); ufsdirhash_checkblock(dp, (char *)bp->b_data + blkoff, dp->i_offset); } #endif if (DOINGSOFTDEP(dvp)) { /* * Ensure that the entire newly allocated block is a * valid directory so that future growth within the * block does not have to ensure that the block is * written before the inode. */ blkoff += DIRBLKSIZ; while (blkoff < bp->b_bcount) { ((struct direct *) (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ; blkoff += DIRBLKSIZ; } if (softdep_setup_directory_add(bp, dp, dp->i_offset, dirp->d_ino, newdirbp, 1) == 0) { bdwrite(bp); return (UFS_UPDATE(dp, 0)); } /* We have just allocated a directory block in an * indirect block. Rather than tracking when it gets * claimed by the inode, we simply do a VOP_FSYNC * now to ensure that it is there (in case the user * does a future fsync). Note that we have to unlock * the inode for the entry that we just entered, as * the VOP_FSYNC may need to lock other inodes which * can lead to deadlock if we also hold a lock on * the newly entered node. */ if ((error = VOP_BWRITE(bp))) return (error); if (tvp != NULL) VOP_UNLOCK(tvp, 0); error = VOP_FSYNC(dvp, p->p_ucred, MNT_WAIT); if (tvp != NULL) vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p); return (error); } error = VOP_BWRITE(bp); ret = UFS_UPDATE(dp, !DOINGSOFTDEP(dvp)); if (error == 0) return (ret); return (error); } /* * If dp->i_count is non-zero, then namei found space for the new * entry in the range dp->i_offset to dp->i_offset + dp->i_count * in the directory. To use this space, we may have to compact * the entries located there, by copying them together towards the * beginning of the block, leaving the free space in one usable * chunk at the end. */ /* * Increase size of directory if entry eats into new space. * This should never push the size past a new multiple of * DIRBLKSIZE. * * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN. */ if (dp->i_offset + dp->i_count > DIP(dp, size)) { DIP_ASSIGN(dp, size, dp->i_offset + dp->i_count); dp->i_flag |= IN_CHANGE | IN_UPDATE; UFS_WAPBL_UPDATE(dp, MNT_WAIT); } /* * Get the block containing the space for the new directory entry. */ if ((error = UFS_BUFATOFF(dp, (off_t)dp->i_offset, &dirbuf, &bp)) != 0) { if (DOINGSOFTDEP(dvp) && newdirbp != NULL) bdwrite(newdirbp); return (error); } /* * Find space for the new entry. In the simple case, the entry at * offset base will have the space. If it does not, then namei * arranged that compacting the region dp->i_offset to * dp->i_offset + dp->i_count would yield the space. */ ep = (struct direct *)dirbuf; dsize = ep->d_ino ? DIRSIZ(FSFMT(dvp), ep) : 0; spacefree = ep->d_reclen - dsize; for (loc = ep->d_reclen; loc < dp->i_count; ) { nep = (struct direct *)(dirbuf + loc); /* Trim the existing slot (NB: dsize may be zero). */ ep->d_reclen = dsize; ep = (struct direct *)((char *)ep + dsize); /* Read nep->d_reclen now as the memmove() may clobber it. */ loc += nep->d_reclen; if (nep->d_ino == 0) { /* * A mid-block unused entry. Such entries are * never created by the kernel, but fsck_ffs * can create them (and it doesn't fix them). * * Add up the free space, and initialise the * relocated entry since we don't memmove it. */ spacefree += nep->d_reclen; ep->d_ino = 0; dsize = 0; continue; } dsize = DIRSIZ(FSFMT(dvp), nep); spacefree += nep->d_reclen - dsize; #ifdef UFS_DIRHASH if (dp->i_dirhash != NULL) ufsdirhash_move(dp, nep, dp->i_offset + ((char *)nep - dirbuf), dp->i_offset + ((char *)ep - dirbuf)); #endif if (DOINGSOFTDEP(dvp)) softdep_change_directoryentry_offset(dp, dirbuf, (caddr_t)nep, (caddr_t)ep, dsize); else memmove(ep, nep, dsize); } /* * Here, `ep' points to a directory entry containing `dsize' in-use * bytes followed by `spacefree' unused bytes. If ep->d_ino == 0, * then the entry is completely unused (dsize == 0). The value * of ep->d_reclen is always indeterminate. * * Update the pointer fields in the previous entry (if any), * copy in the new entry, and write out the block. */ if (ep->d_ino == 0) { if (spacefree + dsize < newentrysize) panic("ufs_direnter: compact1"); dirp->d_reclen = spacefree + dsize; } else { if (spacefree < newentrysize) panic("ufs_direnter: compact2"); dirp->d_reclen = spacefree; ep->d_reclen = dsize; ep = (struct direct *)((char *)ep + dsize); } #ifdef UFS_DIRHASH if (dp->i_dirhash != NULL && (ep->d_ino == 0 || dirp->d_reclen == spacefree)) ufsdirhash_add(dp, dirp, dp->i_offset + ((char *)ep - dirbuf)); #endif memcpy(ep, dirp, newentrysize); #ifdef UFS_DIRHASH if (dp->i_dirhash != NULL) ufsdirhash_checkblock(dp, dirbuf - (dp->i_offset & (DIRBLKSIZ - 1)), dp->i_offset & ~(DIRBLKSIZ - 1)); #endif if (DOINGSOFTDEP(dvp)) { (void)softdep_setup_directory_add(bp, dp, dp->i_offset + (caddr_t)ep - dirbuf, dirp->d_ino, newdirbp, 0); bdwrite(bp); } else { error = VOP_BWRITE(bp); } dp->i_flag |= IN_CHANGE | IN_UPDATE; /* * If all went well, and the directory can be shortened, proceed * with the truncation. Note that we have to unlock the inode for * the entry that we just entered, as the truncation may need to * lock other inodes which can lead to deadlock if we also hold a * lock on the newly entered node. */ if (error == 0 && dp->i_endoff && dp->i_endoff < DIP(dp, size)) { if (tvp != NULL) VOP_UNLOCK(tvp, 0); #ifdef UFS_DIRHASH if (dp->i_dirhash != NULL) ufsdirhash_dirtrunc(dp, dp->i_endoff); #endif error = UFS_TRUNCATE(dp, (off_t)dp->i_endoff, IO_SYNC, cr); if (tvp != NULL) vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p); } UFS_WAPBL_UPDATE(dp, MNT_WAIT); return (error); }
/* * Look up a FFS dinode number to find its incore vnode, otherwise read it * in from disk. If it is in core, wait for the lock bit to clear, then * return the inode locked. Detection and handling of mount points must be * done by the calling routine. */ int ffs_vget(struct mount *mp, ino_t ino, struct vnode **vpp) { struct fs *fs; struct inode *ip; struct ufs1_dinode *dp1; #ifdef FFS2 struct ufs2_dinode *dp2; #endif struct ufsmount *ump; struct buf *bp; struct vnode *vp; dev_t dev; int error; ump = VFSTOUFS(mp); dev = ump->um_dev; retry: if ((*vpp = ufs_ihashget(dev, ino)) != NULL) return (0); /* Allocate a new vnode/inode. */ if ((error = getnewvnode(VT_UFS, mp, ffs_vnodeop_p, &vp)) != 0) { *vpp = NULL; return (error); } #ifdef VFSDEBUG vp->v_flag |= VLOCKSWORK; #endif /* XXX - we use the same pool for ffs and mfs */ ip = pool_get(&ffs_ino_pool, PR_WAITOK); bzero((caddr_t)ip, sizeof(struct inode)); lockinit(&ip->i_lock, PINOD, "inode", 0, 0); ip->i_ump = ump; VREF(ip->i_devvp); vp->v_data = ip; ip->i_vnode = vp; ip->i_fs = fs = ump->um_fs; ip->i_dev = dev; ip->i_number = ino; ip->i_vtbl = &ffs_vtbl; /* * Put it onto its hash chain and lock it so that other requests for * this inode will block if they arrive while we are sleeping waiting * for old data structures to be purged or for the contents of the * disk portion of this inode to be read. */ error = ufs_ihashins(ip); if (error) { /* * VOP_INACTIVE will treat this as a stale file * and recycle it quickly */ vrele(vp); if (error == EEXIST) goto retry; return (error); } /* Read in the disk contents for the inode, copy into the inode. */ error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)), (int)fs->fs_bsize, NOCRED, &bp); if (error) { /* * The inode does not contain anything useful, so it would * be misleading to leave it on its hash chain. With mode * still zero, it will be unlinked and returned to the free * list by vput(). */ vput(vp); brelse(bp); *vpp = NULL; return (error); } #ifdef FFS2 if (ip->i_ump->um_fstype == UM_UFS2) { ip->i_din2 = pool_get(&ffs_dinode2_pool, PR_WAITOK); dp2 = (struct ufs2_dinode *) bp->b_data + ino_to_fsbo(fs, ino); *ip->i_din2 = *dp2; } else #endif { ip->i_din1 = pool_get(&ffs_dinode1_pool, PR_WAITOK); dp1 = (struct ufs1_dinode *) bp->b_data + ino_to_fsbo(fs, ino); *ip->i_din1 = *dp1; } brelse(bp); if (DOINGSOFTDEP(vp)) softdep_load_inodeblock(ip); else ip->i_effnlink = DIP(ip, nlink); /* * Initialize the vnode from the inode, check for aliases. * Note that the underlying vnode may have changed. */ error = ufs_vinit(mp, ffs_specop_p, FFS_FIFOOPS, &vp); if (error) { vput(vp); *vpp = NULL; return (error); } /* * Set up a generation number for this inode if it does not * already have one. This should only happen on old filesystems. */ if (DIP(ip, gen) == 0) { DIP_ASSIGN(ip, gen, arc4random() & INT_MAX); if (DIP(ip, gen) == 0 || DIP(ip, gen) == -1) DIP_ASSIGN(ip, gen, 1); /* Shouldn't happen */ if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) ip->i_flag |= IN_MODIFIED; } /* * Ensure that uid and gid are correct. This is a temporary * fix until fsck has been changed to do the update. */ if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_inodefmt < FS_44INODEFMT) { ip->i_ffs1_uid = ip->i_din1->di_ouid; ip->i_ffs1_gid = ip->i_din1->di_ogid; } *vpp = vp; return (0); }
/* * Rmdir system call. */ int ufs_rmdir(void *v) { struct vop_rmdir_args *ap = v; struct vnode *vp = ap->a_vp; struct vnode *dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; struct inode *ip, *dp; int error; ip = VTOI(vp); dp = VTOI(dvp); /* * No rmdir "." or of mounted on directories. */ if (dp == ip || vp->v_mountedhere != NULL) { if (dp == ip) vrele(dvp); else vput(dvp); vput(vp); return (EINVAL); } /* * Do not remove a directory that is in the process of being renamed. * Verify the directory is empty (and valid). Rmdir ".." will not be * valid since ".." will contain a reference to the current directory * and thus be non-empty. */ error = 0; if (ip->i_flag & IN_RENAME) { error = EINVAL; goto out; } if (ip->i_effnlink != 2 || !ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) { error = ENOTEMPTY; goto out; } if ((DIP(dp, flags) & APPEND) || (DIP(ip, flags) & (IMMUTABLE | APPEND))) { error = EPERM; goto out; } /* * Delete reference to directory before purging * inode. If we crash in between, the directory * will be reattached to lost+found, */ dp->i_effnlink--; ip->i_effnlink--; if (DOINGSOFTDEP(vp)) { softdep_change_linkcnt(dp, 0); softdep_change_linkcnt(ip, 0); } if ((error = ufs_dirremove(dvp, ip, cnp->cn_flags, 1)) != 0) { dp->i_effnlink++; ip->i_effnlink++; if (DOINGSOFTDEP(vp)) { softdep_change_linkcnt(dp, 0); softdep_change_linkcnt(ip, 0); } goto out; } VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK); cache_purge(dvp); /* * Truncate inode. The only stuff left in the directory is "." and * "..". The "." reference is inconsequential since we are quashing * it. The soft dependency code will arrange to do these operations * after the parent directory entry has been deleted on disk, so * when running with that code we avoid doing them now. */ if (!DOINGSOFTDEP(vp)) { int ioflag; DIP_ADD(dp, nlink, -1); dp->i_flag |= IN_CHANGE; DIP_ADD(ip, nlink, -1); ip->i_flag |= IN_CHANGE; ioflag = DOINGASYNC(vp) ? 0 : IO_SYNC; error = UFS_TRUNCATE(ip, (off_t)0, ioflag, cnp->cn_cred); } cache_purge(vp); #ifdef UFS_DIRHASH /* Kill any active hash; i_effnlink == 0, so it will not come back. */ if (ip->i_dirhash != NULL) ufsdirhash_free(ip); #endif out: VN_KNOTE(vp, NOTE_DELETE); vput(dvp); vput(vp); return (error); }
/* * Mkdir system call */ int ufs_mkdir(void *v) { struct vop_mkdir_args *ap = v; struct vnode *dvp = ap->a_dvp; struct vattr *vap = ap->a_vap; struct componentname *cnp = ap->a_cnp; struct inode *ip, *dp; struct vnode *tvp; struct buf *bp; struct direct newdir; struct dirtemplate dirtemplate, *dtp; int error, dmode, blkoff; #ifdef DIAGNOSTIC if ((cnp->cn_flags & HASBUF) == 0) panic("ufs_mkdir: no name"); #endif dp = VTOI(dvp); if ((nlink_t) DIP(dp, nlink) >= LINK_MAX) { error = EMLINK; goto out; } dmode = vap->va_mode & 0777; dmode |= IFDIR; /* * Must simulate part of ufs_makeinode here to acquire the inode, * but not have it entered in the parent directory. The entry is * made later after writing "." and ".." entries. */ if ((error = UFS_INODE_ALLOC(dp, dmode, cnp->cn_cred, &tvp)) != 0) goto out; ip = VTOI(tvp); DIP_ASSIGN(ip, uid, cnp->cn_cred->cr_uid); DIP_ASSIGN(ip, gid, DIP(dp, gid)); if ((error = getinoquota(ip)) || (error = ufs_quota_alloc_inode(ip, cnp->cn_cred))) { pool_put(&namei_pool, cnp->cn_pnbuf); UFS_INODE_FREE(ip, ip->i_number, dmode); vput(tvp); vput(dvp); return (error); } ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; DIP_ASSIGN(ip, mode, dmode); tvp->v_type = VDIR; /* Rest init'd in getnewvnode(). */ ip->i_effnlink = 2; DIP_ASSIGN(ip, nlink, 2); if (DOINGSOFTDEP(tvp)) softdep_change_linkcnt(ip, 0); /* * Bump link count in parent directory to reflect work done below. * Should be done before reference is create so cleanup is * possible if we crash. */ dp->i_effnlink++; DIP_ADD(dp, nlink, 1); dp->i_flag |= IN_CHANGE; if (DOINGSOFTDEP(dvp)) softdep_change_linkcnt(dp, 0); if ((error = UFS_UPDATE(dp, !DOINGSOFTDEP(dvp))) != 0) goto bad; /* * Initialize directory with "." and ".." from static template. */ if (dvp->v_mount->mnt_maxsymlinklen > 0) dtp = &mastertemplate; else dtp = (struct dirtemplate *)&omastertemplate; dirtemplate = *dtp; dirtemplate.dot_ino = ip->i_number; dirtemplate.dotdot_ino = dp->i_number; if ((error = UFS_BUF_ALLOC(ip, (off_t)0, DIRBLKSIZ, cnp->cn_cred, B_CLRBUF, &bp)) != 0) goto bad; DIP_ASSIGN(ip, size, DIRBLKSIZ); ip->i_flag |= IN_CHANGE | IN_UPDATE; uvm_vnp_setsize(tvp, DIP(ip, size)); bcopy((caddr_t)&dirtemplate, (caddr_t)bp->b_data, sizeof dirtemplate); if (DOINGSOFTDEP(tvp)) { /* * Ensure that the entire newly allocated block is a * valid directory so that future growth within the * block does not have to ensure that the block is * written before the inode */ blkoff = DIRBLKSIZ; while (blkoff < bp->b_bcount) { ((struct direct *) (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ; blkoff += DIRBLKSIZ; } } if ((error = UFS_UPDATE(ip, !DOINGSOFTDEP(tvp))) != 0) { (void)VOP_BWRITE(bp); goto bad; } /* * Directory set up, now install its entry in the parent directory. * * If we are not doing soft dependencies, then we must write out the * buffer containing the new directory body before entering the new * name in the parent. If we are doing soft dependencies, then the * buffer containing the new directory body will be passed to and * released in the soft dependency code after the code has attached * an appropriate ordering dependency to the buffer which ensures that * the buffer is written before the new name is written in the parent. */ if (!DOINGSOFTDEP(dvp) && ((error = VOP_BWRITE(bp)) != 0)) goto bad; ufs_makedirentry(ip, cnp, &newdir); error = ufs_direnter(dvp, tvp, &newdir, cnp, bp); bad: if (error == 0) { VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK); *ap->a_vpp = tvp; } else { dp->i_effnlink--; DIP_ADD(dp, nlink, -1); dp->i_flag |= IN_CHANGE; if (DOINGSOFTDEP(dvp)) softdep_change_linkcnt(dp, 0); /* * No need to do an explicit VOP_TRUNCATE here, vrele will * do this for us because we set the link count to 0. */ ip->i_effnlink = 0; DIP_ASSIGN(ip, nlink, 0); ip->i_flag |= IN_CHANGE; if (DOINGSOFTDEP(tvp)) softdep_change_linkcnt(ip, 0); vput(tvp); } out: pool_put(&namei_pool, cnp->cn_pnbuf); vput(dvp); return (error); }
/* * Last reference to an inode. If necessary, write or delete it. */ int ufs_inactive(void *v) { struct vop_inactive_args *ap = v; struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); struct proc *p = ap->a_p; mode_t mode; int error = 0; #ifdef DIAGNOSTIC extern int prtactive; if (prtactive && vp->v_usecount != 0) vprint("ffs_inactive: pushing active", vp); #endif /* * Ignore inodes related to stale file handles. */ if (ip->i_din1 == NULL || DIP(ip, mode) == 0) goto out; if (DIP(ip, nlink) <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { if (getinoquota(ip) == 0) (void)ufs_quota_free_inode(ip, NOCRED); error = UFS_TRUNCATE(ip, (off_t)0, IO_EXT | IO_NORMAL, NOCRED); DIP_ASSIGN(ip, rdev, 0); mode = DIP(ip, mode); DIP_ASSIGN(ip, mode, 0); ip->i_flag |= IN_CHANGE | IN_UPDATE; /* * Setting the mode to zero needs to wait for the inode to be * written just as does a change to the link count. So, rather * than creating a new entry point to do the same thing, we * just use softdep_change_linkcnt(). Also, we can't let * softdep co-opt us to help on its worklist, as we may end up * trying to recycle vnodes and getting to this same point a * couple of times, blowing the kernel stack. However, this * could be optimized by checking if we are coming from * vrele(), vput() or vclean() (by checking for VXLOCK) and * just avoiding the co-opt to happen in the last case. */ if (DOINGSOFTDEP(vp)) softdep_change_linkcnt(ip, 1); UFS_INODE_FREE(ip, ip->i_number, mode); } if (ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) { UFS_UPDATE(ip, 0); } out: VOP_UNLOCK(vp, 0, p); /* * If we are done with the inode, reclaim it * so that it can be reused immediately. */ if (ip->i_din1 == NULL || DIP(ip, mode) == 0) vrecycle(vp, p); return (error); }
/* * Last reference to an inode. If necessary, write or delete it. */ int ufs_inactive(void *v) { struct vop_inactive_args /* { struct vnode *a_vp; struct bool *a_recycle; } */ *ap = v; struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); struct mount *transmp; mode_t mode; int error = 0; int logged = 0; UFS_WAPBL_JUNLOCK_ASSERT(vp->v_mount); transmp = vp->v_mount; fstrans_start(transmp, FSTRANS_SHARED); /* * Ignore inodes related to stale file handles. */ if (ip->i_mode == 0) goto out; if (ip->i_ffs_effnlink == 0 && DOINGSOFTDEP(vp)) softdep_releasefile(ip); if (ip->i_nlink <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { error = UFS_WAPBL_BEGIN(vp->v_mount); if (error) goto out; logged = 1; #ifdef QUOTA (void)chkiq(ip, -1, NOCRED, 0); #endif #ifdef UFS_EXTATTR ufs_extattr_vnode_inactive(vp, curlwp); #endif if (ip->i_size != 0) { /* * When journaling, only truncate one indirect block * at a time */ if (vp->v_mount->mnt_wapbl) { uint64_t incr = MNINDIR(ip->i_ump) << vp->v_mount->mnt_fs_bshift; /* Power of 2 */ uint64_t base = NDADDR << vp->v_mount->mnt_fs_bshift; while (!error && ip->i_size > base + incr) { /* * round down to next full indirect * block boundary. */ uint64_t nsize = base + ((ip->i_size - base - 1) & ~(incr - 1)); error = UFS_TRUNCATE(vp, nsize, 0, NOCRED); if (error) break; UFS_WAPBL_END(vp->v_mount); error = UFS_WAPBL_BEGIN(vp->v_mount); if (error) goto out; } } if (!error) error = UFS_TRUNCATE(vp, (off_t)0, 0, NOCRED); } /* * Setting the mode to zero needs to wait for the inode * to be written just as does a change to the link count. * So, rather than creating a new entry point to do the * same thing, we just use softdep_change_linkcnt(). */ DIP_ASSIGN(ip, rdev, 0); mode = ip->i_mode; ip->i_mode = 0; DIP_ASSIGN(ip, mode, 0); ip->i_flag |= IN_CHANGE | IN_UPDATE; mutex_enter(&vp->v_interlock); vp->v_iflag |= VI_FREEING; mutex_exit(&vp->v_interlock); if (DOINGSOFTDEP(vp)) softdep_change_linkcnt(ip); UFS_VFREE(vp, ip->i_number, mode); } if (ip->i_flag & (IN_CHANGE | IN_UPDATE | IN_MODIFIED)) { if (!logged++) { int err; err = UFS_WAPBL_BEGIN(vp->v_mount); if (err) goto out; } UFS_UPDATE(vp, NULL, NULL, 0); } if (logged) UFS_WAPBL_END(vp->v_mount); out: /* * If we are done with the inode, reclaim it * so that it can be reused immediately. */ *ap->a_recycle = (ip->i_mode == 0); VOP_UNLOCK(vp, 0); fstrans_done(transmp); return (error); }
/* * Balloc defines the structure of file system storage * by allocating the physical blocks on a device given * the inode and the logical block number in a file. * This is the allocation strategy for UFS2. Above is * the allocation strategy for UFS1. */ int ffs_balloc_ufs2(vnode *vp, off_t startoffset, int size, Ucred *cred, int flags, Buf **bpp) { int error = 0; print("HARVEY TODO: %s\n", __func__); #if 0 struct inode *ip; struct ufs2_dinode *dp; ufs_lbn_t lbn, lastlbn; struct fs *fs; struct buf *bp, *nbp; struct ufsmount *ump; struct indir indirs[UFS_NIADDR + 2]; ufs2_daddr_t nb, newb, *bap, pref; ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[UFS_NIADDR + 1]; ufs2_daddr_t *lbns_remfree, lbns[UFS_NIADDR + 1]; int deallocated, osize, nsize, num, i, error; int unwindidx = -1; int saved_inbdflush; static struct timeval lastfail; static int curfail; int gbflags, reclaimed; ip = VTOI(vp); dp = ip->i_din2; fs = ITOFS(ip); ump = ITOUMP(ip); lbn = lblkno(fs, startoffset); size = blkoff(fs, startoffset) + size; reclaimed = 0; if (size > fs->fs_bsize) panic("ffs_balloc_ufs2: blk too big"); *bpp = nil; if (lbn < 0) return (EFBIG); gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0; if (DOINGSOFTDEP(vp)) softdep_prealloc(vp, MNT_WAIT); /* * Check for allocating external data. */ if (flags & IO_EXT) { if (lbn >= UFS_NXADDR) return (EFBIG); /* * If the next write will extend the data into a new block, * and the data is currently composed of a fragment * this fragment has to be extended to be a full block. */ lastlbn = lblkno(fs, dp->di_extsize); if (lastlbn < lbn) { nb = lastlbn; osize = sblksize(fs, dp->di_extsize, nb); if (osize < fs->fs_bsize && osize > 0) { UFS_LOCK(ump); error = ffs_realloccg(ip, -1 - nb, dp->di_extb[nb], ffs_blkpref_ufs2(ip, lastlbn, (int)nb, &dp->di_extb[0]), osize, (int)fs->fs_bsize, flags, cred, &bp); if (error) return (error); if (DOINGSOFTDEP(vp)) softdep_setup_allocext(ip, nb, dbtofsb(fs, bp->b_blkno), dp->di_extb[nb], fs->fs_bsize, osize, bp); dp->di_extsize = smalllblktosize(fs, nb + 1); dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno); bp->b_xflags |= BX_ALTDATA; ip->i_flag |= IN_CHANGE; if (flags & IO_SYNC) bwrite(bp); else bawrite(bp); } } /* * All blocks are direct blocks */ if (flags & BA_METAONLY) panic("ffs_balloc_ufs2: BA_METAONLY for ext block"); nb = dp->di_extb[lbn]; if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) { error = bread_gb(vp, -1 - lbn, fs->fs_bsize, NOCRED, gbflags, &bp); if (error) { brelse(bp); return (error); } bp->b_blkno = fsbtodb(fs, nb); bp->b_xflags |= BX_ALTDATA; *bpp = bp; return (0); } if (nb != 0) { /* * Consider need to reallocate a fragment. */ osize = fragroundup(fs, blkoff(fs, dp->di_extsize)); nsize = fragroundup(fs, size); if (nsize <= osize) { error = bread_gb(vp, -1 - lbn, osize, NOCRED, gbflags, &bp); if (error) { brelse(bp); return (error); } bp->b_blkno = fsbtodb(fs, nb); bp->b_xflags |= BX_ALTDATA; } else { UFS_LOCK(ump); error = ffs_realloccg(ip, -1 - lbn, dp->di_extb[lbn], ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]), osize, nsize, flags, cred, &bp); if (error) return (error); bp->b_xflags |= BX_ALTDATA; if (DOINGSOFTDEP(vp)) softdep_setup_allocext(ip, lbn, dbtofsb(fs, bp->b_blkno), nb, nsize, osize, bp); } } else { if (dp->di_extsize < smalllblktosize(fs, lbn + 1)) nsize = fragroundup(fs, size); else nsize = fs->fs_bsize; UFS_LOCK(ump); error = ffs_alloc(ip, lbn, ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]), nsize, flags, cred, &newb); if (error) return (error); bp = getblk(vp, -1 - lbn, nsize, 0, 0, gbflags); bp->b_blkno = fsbtodb(fs, newb); bp->b_xflags |= BX_ALTDATA; if (flags & BA_CLRBUF) vfs_bio_clrbuf(bp); if (DOINGSOFTDEP(vp)) softdep_setup_allocext(ip, lbn, newb, 0, nsize, 0, bp); } dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno); ip->i_flag |= IN_CHANGE; *bpp = bp; return (0); } /* * If the next write will extend the file into a new block, * and the file is currently composed of a fragment * this fragment has to be extended to be a full block. */ lastlbn = lblkno(fs, ip->i_size); if (lastlbn < UFS_NDADDR && lastlbn < lbn) { nb = lastlbn; osize = blksize(fs, ip, nb); if (osize < fs->fs_bsize && osize > 0) { UFS_LOCK(ump); error = ffs_realloccg(ip, nb, dp->di_db[nb], ffs_blkpref_ufs2(ip, lastlbn, (int)nb, &dp->di_db[0]), osize, (int)fs->fs_bsize, flags, cred, &bp); if (error) return (error); if (DOINGSOFTDEP(vp)) softdep_setup_allocdirect(ip, nb, dbtofsb(fs, bp->b_blkno), dp->di_db[nb], fs->fs_bsize, osize, bp); ip->i_size = smalllblktosize(fs, nb + 1); dp->di_size = ip->i_size; dp->di_db[nb] = dbtofsb(fs, bp->b_blkno); ip->i_flag |= IN_CHANGE | IN_UPDATE; if (flags & IO_SYNC) bwrite(bp); else bawrite(bp); } } /* * The first UFS_NDADDR blocks are direct blocks */ if (lbn < UFS_NDADDR) { if (flags & BA_METAONLY) panic("ffs_balloc_ufs2: BA_METAONLY for direct block"); nb = dp->di_db[lbn]; if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) { error = bread_gb(vp, lbn, fs->fs_bsize, NOCRED, gbflags, &bp); if (error) { brelse(bp); return (error); } bp->b_blkno = fsbtodb(fs, nb); *bpp = bp; return (0); } if (nb != 0) { /* * Consider need to reallocate a fragment. */ osize = fragroundup(fs, blkoff(fs, ip->i_size)); nsize = fragroundup(fs, size); if (nsize <= osize) { error = bread_gb(vp, lbn, osize, NOCRED, gbflags, &bp); if (error) { brelse(bp); return (error); } bp->b_blkno = fsbtodb(fs, nb); } else { UFS_LOCK(ump); error = ffs_realloccg(ip, lbn, dp->di_db[lbn], ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_db[0]), osize, nsize, flags, cred, &bp); if (error) return (error); if (DOINGSOFTDEP(vp)) softdep_setup_allocdirect(ip, lbn, dbtofsb(fs, bp->b_blkno), nb, nsize, osize, bp); } } else { if (ip->i_size < smalllblktosize(fs, lbn + 1)) nsize = fragroundup(fs, size); else nsize = fs->fs_bsize; UFS_LOCK(ump); error = ffs_alloc(ip, lbn, ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_db[0]), nsize, flags, cred, &newb); if (error) return (error); bp = getblk(vp, lbn, nsize, 0, 0, gbflags); bp->b_blkno = fsbtodb(fs, newb); if (flags & BA_CLRBUF) vfs_bio_clrbuf(bp); if (DOINGSOFTDEP(vp)) softdep_setup_allocdirect(ip, lbn, newb, 0, nsize, 0, bp); } dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno); ip->i_flag |= IN_CHANGE | IN_UPDATE; *bpp = bp; return (0); } /* * Determine the number of levels of indirection. */ pref = 0; if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0) return(error); #ifdef INVARIANTS if (num < 1) panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block"); #endif saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH); /* * Fetch the first indirect block allocating if necessary. */ --num; nb = dp->di_ib[indirs[0].in_off]; allocib = nil; allocblk = allociblk; lbns_remfree = lbns; if (nb == 0) { UFS_LOCK(ump); pref = ffs_blkpref_ufs2(ip, lbn, -indirs[0].in_off - 1, (ufs2_daddr_t *)0); if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred, &newb)) != 0) { curthread_pflags_restore(saved_inbdflush); return (error); } pref = newb + fs->fs_frag; nb = newb; MPASS(allocblk < allociblk + nitems(allociblk)); MPASS(lbns_remfree < lbns + nitems(lbns)); *allocblk++ = nb; *lbns_remfree++ = indirs[1].in_lbn; bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, GB_UNMAPPED); bp->b_blkno = fsbtodb(fs, nb); vfs_bio_clrbuf(bp); if (DOINGSOFTDEP(vp)) { softdep_setup_allocdirect(ip, UFS_NDADDR + indirs[0].in_off, newb, 0, fs->fs_bsize, 0, bp); bdwrite(bp); } else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) { if (bp->b_bufsize == fs->fs_bsize) bp->b_flags |= B_CLUSTEROK; bdwrite(bp); } else { if ((error = bwrite(bp)) != 0) goto fail; } allocib = &dp->di_ib[indirs[0].in_off]; *allocib = nb; ip->i_flag |= IN_CHANGE | IN_UPDATE; } /* * Fetch through the indirect blocks, allocating as necessary. */ retry: for (i = 1;;) { error = bread(vp, indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp); if (error) { brelse(bp); goto fail; } bap = (ufs2_daddr_t *)bp->b_data; nb = bap[indirs[i].in_off]; if (i == num) break; i += 1; if (nb != 0) { bqrelse(bp); continue; } UFS_LOCK(ump); /* * If parent indirect has just been allocated, try to cluster * immediately following it. */ if (pref == 0) pref = ffs_blkpref_ufs2(ip, lbn, i - num - 1, (ufs2_daddr_t *)0); if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags | IO_BUFLOCKED, cred, &newb)) != 0) { brelse(bp); if (DOINGSOFTDEP(vp) && ++reclaimed == 1) { UFS_LOCK(ump); softdep_request_cleanup(fs, vp, cred, FLUSH_BLOCKS_WAIT); UFS_UNLOCK(ump); goto retry; } if (ppsratecheck(&lastfail, &curfail, 1)) { ffs_fserr(fs, ip->i_number, "filesystem full"); uprintf("\n%s: write failed, filesystem " "is full\n", fs->fs_fsmnt); } goto fail; } pref = newb + fs->fs_frag; nb = newb; MPASS(allocblk < allociblk + nitems(allociblk)); MPASS(lbns_remfree < lbns + nitems(lbns)); *allocblk++ = nb; *lbns_remfree++ = indirs[i].in_lbn; nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, GB_UNMAPPED); nbp->b_blkno = fsbtodb(fs, nb); vfs_bio_clrbuf(nbp); if (DOINGSOFTDEP(vp)) { softdep_setup_allocindir_meta(nbp, ip, bp, indirs[i - 1].in_off, nb); bdwrite(nbp); } else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) { if (nbp->b_bufsize == fs->fs_bsize) nbp->b_flags |= B_CLUSTEROK; bdwrite(nbp); } else { if ((error = bwrite(nbp)) != 0) { brelse(bp); goto fail; } } bap[indirs[i - 1].in_off] = nb; if (allocib == nil && unwindidx < 0) unwindidx = i - 1; /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & IO_SYNC) { bwrite(bp); } else { if (bp->b_bufsize == fs->fs_bsize) bp->b_flags |= B_CLUSTEROK; bdwrite(bp); } } /* * If asked only for the indirect block, then return it. */ if (flags & BA_METAONLY) { curthread_pflags_restore(saved_inbdflush); *bpp = bp; return (0); } /* * Get the data block, allocating if necessary. */ if (nb == 0) { UFS_LOCK(ump); /* * If allocating metadata at the front of the cylinder * group and parent indirect block has just been allocated, * then cluster next to it if it is the first indirect in * the file. Otherwise it has been allocated in the metadata * area, so we want to find our own place out in the data area. */ if (pref == 0 || (lbn > UFS_NDADDR && fs->fs_metaspace != 0)) pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off, &bap[0]); error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags | IO_BUFLOCKED, cred, &newb); if (error) { brelse(bp); if (DOINGSOFTDEP(vp) && ++reclaimed == 1) { UFS_LOCK(ump); softdep_request_cleanup(fs, vp, cred, FLUSH_BLOCKS_WAIT); UFS_UNLOCK(ump); goto retry; } if (ppsratecheck(&lastfail, &curfail, 1)) { ffs_fserr(fs, ip->i_number, "filesystem full"); uprintf("\n%s: write failed, filesystem " "is full\n", fs->fs_fsmnt); } goto fail; } nb = newb; MPASS(allocblk < allociblk + nitems(allociblk)); MPASS(lbns_remfree < lbns + nitems(lbns)); *allocblk++ = nb; *lbns_remfree++ = lbn; nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags); nbp->b_blkno = fsbtodb(fs, nb); if (flags & BA_CLRBUF) vfs_bio_clrbuf(nbp); if (DOINGSOFTDEP(vp)) softdep_setup_allocindir_page(ip, lbn, bp, indirs[i].in_off, nb, 0, nbp); bap[indirs[i].in_off] = nb; /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & IO_SYNC) { bwrite(bp); } else { if (bp->b_bufsize == fs->fs_bsize) bp->b_flags |= B_CLUSTEROK; bdwrite(bp); } curthread_pflags_restore(saved_inbdflush); *bpp = nbp; return (0); } brelse(bp); /* * If requested clear invalid portions of the buffer. If we * have to do a read-before-write (typical if BA_CLRBUF is set), * try to do some read-ahead in the sequential case to reduce * the number of I/O transactions. */ if (flags & BA_CLRBUF) { int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT; if (seqcount != 0 && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0 && !(vm_page_count_severe() || buf_dirty_count_severe())) { error = cluster_read(vp, ip->i_size, lbn, (int)fs->fs_bsize, NOCRED, MAXBSIZE, seqcount, gbflags, &nbp); } else { error = bread_gb(vp, lbn, (int)fs->fs_bsize, NOCRED, gbflags, &nbp); } if (error) { brelse(nbp); goto fail; } } else {
/* * Last reference to an inode. If necessary, write or delete it. */ int ufs_inactive(void *v) { struct vop_inactive_args *ap = v; struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); struct fs *fs = ip->i_fs; struct proc *p = curproc; mode_t mode; int error = 0, logged = 0, truncate_error = 0; #ifdef DIAGNOSTIC extern int prtactive; if (prtactive && vp->v_usecount != 0) vprint("ufs_inactive: pushing active", vp); #endif UFS_WAPBL_JUNLOCK_ASSERT(vp->v_mount); /* * Ignore inodes related to stale file handles. */ if (ip->i_din1 == NULL || DIP(ip, mode) == 0) goto out; if (DIP(ip, nlink) <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { error = UFS_WAPBL_BEGIN(vp->v_mount); if (error) goto out; logged = 1; if (getinoquota(ip) == 0) (void)ufs_quota_free_inode(ip, NOCRED); if (DIP(ip, size) != 0 && vp->v_mount->mnt_wapbl) { /* * When journaling, only truncate one indirect block at * a time. */ uint64_t incr = MNINDIR(ip->i_ump) << fs->fs_bshift; uint64_t base = NDADDR << fs->fs_bshift; while (!error && DIP(ip, size) > base + incr) { /* * round down to next full indirect block * boundary. */ uint64_t nsize = base + ((DIP(ip, size) - base - 1) & ~(incr - 1)); error = UFS_TRUNCATE(ip, nsize, 0, NOCRED); if (error) break; UFS_WAPBL_END(vp->v_mount); error = UFS_WAPBL_BEGIN(vp->v_mount); if (error) goto out; } } if (error == 0) { truncate_error = UFS_TRUNCATE(ip, (off_t)0, 0, NOCRED); /* XXX pedro: remove me */ if (truncate_error) printf("UFS_TRUNCATE()=%d\n", truncate_error); } DIP_ASSIGN(ip, rdev, 0); mode = DIP(ip, mode); DIP_ASSIGN(ip, mode, 0); ip->i_flag |= IN_CHANGE | IN_UPDATE; /* * Setting the mode to zero needs to wait for the inode to be * written just as does a change to the link count. So, rather * than creating a new entry point to do the same thing, we * just use softdep_change_linkcnt(). Also, we can't let * softdep co-opt us to help on its worklist, as we may end up * trying to recycle vnodes and getting to this same point a * couple of times, blowing the kernel stack. However, this * could be optimized by checking if we are coming from * vrele(), vput() or vclean() (by checking for VXLOCK) and * just avoiding the co-opt to happen in the last case. */ if (DOINGSOFTDEP(vp)) softdep_change_linkcnt(ip, 1); UFS_INODE_FREE(ip, ip->i_number, mode); } if (ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) { if (!logged++) { int err; err = UFS_WAPBL_BEGIN(vp->v_mount); if (err) { error = err; goto out; } } UFS_UPDATE(ip, 0); } if (logged) UFS_WAPBL_END(vp->v_mount); out: VOP_UNLOCK(vp, 0); /* * If we are done with the inode, reclaim it * so that it can be reused immediately. */ if (error == 0 && truncate_error == 0 && (ip->i_din1 == NULL || DIP(ip, mode) == 0)) vrecycle(vp, p); return (truncate_error ? truncate_error : error); }
/* * Truncate the inode oip to at most length size, freeing the * disk blocks. */ int ffs_truncate(struct inode *oip, off_t length, int flags, struct ucred *cred) { struct vnode *ovp; daddr64_t lastblock; daddr64_t bn, lbn, lastiblock[NIADDR], indir_lbn[NIADDR]; daddr64_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR]; struct fs *fs; struct buf *bp; int offset, size, level; long count, nblocks, vflags, blocksreleased = 0; int i, aflags, error, allerror, indirect = 0; off_t osize; extern int num_indirdep; extern int max_indirdep; if (length < 0) return (EINVAL); ovp = ITOV(oip); if (ovp->v_type != VREG && ovp->v_type != VDIR && ovp->v_type != VLNK) return (0); if (DIP(oip, size) == length) return (0); if (ovp->v_type == VLNK && (DIP(oip, size) < ovp->v_mount->mnt_maxsymlinklen || (ovp->v_mount->mnt_maxsymlinklen == 0 && oip->i_din1->di_blocks == 0))) { #ifdef DIAGNOSTIC if (length != 0) panic("ffs_truncate: partial truncate of symlink"); #endif memset(SHORTLINK(oip), 0, (size_t) DIP(oip, size)); DIP_ASSIGN(oip, size, 0); oip->i_flag |= IN_CHANGE | IN_UPDATE; return (UFS_UPDATE(oip, MNT_WAIT)); } if ((error = getinoquota(oip)) != 0) return (error); uvm_vnp_setsize(ovp, length); oip->i_ci.ci_lasta = oip->i_ci.ci_clen = oip->i_ci.ci_cstart = oip->i_ci.ci_lastw = 0; if (DOINGSOFTDEP(ovp)) { if (length > 0 || softdep_slowdown(ovp)) { /* * If a file is only partially truncated, then * we have to clean up the data structures * describing the allocation past the truncation * point. Finding and deallocating those structures * is a lot of work. Since partial truncation occurs * rarely, we solve the problem by syncing the file * so that it will have no data structures left. */ if ((error = VOP_FSYNC(ovp, cred, MNT_WAIT)) != 0) return (error); } else { (void)ufs_quota_free_blocks(oip, DIP(oip, blocks), NOCRED); softdep_setup_freeblocks(oip, length); (void) vinvalbuf(ovp, 0, cred, curproc, 0, 0); oip->i_flag |= IN_CHANGE | IN_UPDATE; return (UFS_UPDATE(oip, 0)); } } fs = oip->i_fs; osize = DIP(oip, size); /* * Lengthen the size of the file. We must ensure that the * last byte of the file is allocated. Since the smallest * value of osize is 0, length will be at least 1. */ if (osize < length) { if (length > fs->fs_maxfilesize) return (EFBIG); aflags = B_CLRBUF; if (flags & IO_SYNC) aflags |= B_SYNC; error = UFS_BUF_ALLOC(oip, length - 1, 1, cred, aflags, &bp); if (error) return (error); if (bp->b_lblkno >= NDADDR) indirect = 1; DIP_ASSIGN(oip, size, length); uvm_vnp_setsize(ovp, length); (void) uvm_vnp_uncache(ovp); if (aflags & B_SYNC) bwrite(bp); else bawrite(bp); oip->i_flag |= IN_CHANGE | IN_UPDATE; error = UFS_UPDATE(oip, MNT_WAIT); if (DOINGSOFTDEP(ovp) && num_indirdep > max_indirdep) if (indirect) { /* * If the number of pending indirect block * dependencies is sufficiently close to the * maximum number of simultaneously mappable * buffers force a sync on the vnode to prevent * buffer cache exhaustion. */ VOP_FSYNC(ovp, curproc->p_ucred, MNT_WAIT); } return (error); } uvm_vnp_setsize(ovp, length); /* * Shorten the size of the file. If the file is not being * truncated to a block boundary, the contents of the * partial block following the end of the file must be * zero'ed in case it ever becomes accessible again because * of subsequent file growth. Directories however are not * zero'ed as they should grow back initialized to empty. */ offset = blkoff(fs, length); if (offset == 0) { DIP_ASSIGN(oip, size, length); } else { lbn = lblkno(fs, length); aflags = B_CLRBUF; if (flags & IO_SYNC) aflags |= B_SYNC; error = UFS_BUF_ALLOC(oip, length - 1, 1, cred, aflags, &bp); if (error) return (error); /* * When we are doing soft updates and the UFS_BALLOC * above fills in a direct block hole with a full sized * block that will be truncated down to a fragment below, * we must flush out the block dependency with an FSYNC * so that we do not get a soft updates inconsistency * when we create the fragment below. */ if (DOINGSOFTDEP(ovp) && lbn < NDADDR && fragroundup(fs, blkoff(fs, length)) < fs->fs_bsize && (error = VOP_FSYNC(ovp, cred, MNT_WAIT)) != 0) return (error); DIP_ASSIGN(oip, size, length); size = blksize(fs, oip, lbn); (void) uvm_vnp_uncache(ovp); if (ovp->v_type != VDIR) bzero((char *)bp->b_data + offset, (u_int)(size - offset)); bp->b_bcount = size; if (aflags & B_SYNC) bwrite(bp); else bawrite(bp); } /* * Calculate index into inode's block list of * last direct and indirect blocks (if any) * which we want to keep. Lastblock is -1 when * the file is truncated to 0. */ lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1; lastiblock[SINGLE] = lastblock - NDADDR; lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs); lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs); nblocks = btodb(fs->fs_bsize); /* * Update file and block pointers on disk before we start freeing * blocks. If we crash before free'ing blocks below, the blocks * will be returned to the free list. lastiblock values are also * normalized to -1 for calls to ffs_indirtrunc below. */ for (level = TRIPLE; level >= SINGLE; level--) { oldblks[NDADDR + level] = DIP(oip, ib[level]); if (lastiblock[level] < 0) { DIP_ASSIGN(oip, ib[level], 0); lastiblock[level] = -1; } } for (i = 0; i < NDADDR; i++) { oldblks[i] = DIP(oip, db[i]); if (i > lastblock) DIP_ASSIGN(oip, db[i], 0); } oip->i_flag |= IN_CHANGE | IN_UPDATE; if ((error = UFS_UPDATE(oip, MNT_WAIT)) != 0) allerror = error; /* * Having written the new inode to disk, save its new configuration * and put back the old block pointers long enough to process them. * Note that we save the new block configuration so we can check it * when we are done. */ for (i = 0; i < NDADDR; i++) { newblks[i] = DIP(oip, db[i]); DIP_ASSIGN(oip, db[i], oldblks[i]); } for (i = 0; i < NIADDR; i++) { newblks[NDADDR + i] = DIP(oip, ib[i]); DIP_ASSIGN(oip, ib[i], oldblks[NDADDR + i]); } DIP_ASSIGN(oip, size, osize); vflags = ((length > 0) ? V_SAVE : 0) | V_SAVEMETA; allerror = vinvalbuf(ovp, vflags, cred, curproc, 0, 0); /* * Indirect blocks first. */ indir_lbn[SINGLE] = -NDADDR; indir_lbn[DOUBLE] = indir_lbn[SINGLE] - NINDIR(fs) - 1; indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - NINDIR(fs) * NINDIR(fs) - 1; for (level = TRIPLE; level >= SINGLE; level--) { bn = DIP(oip, ib[level]); if (bn != 0) { error = ffs_indirtrunc(oip, indir_lbn[level], fsbtodb(fs, bn), lastiblock[level], level, &count); if (error) allerror = error; blocksreleased += count; if (lastiblock[level] < 0) { DIP_ASSIGN(oip, ib[level], 0); ffs_blkfree(oip, bn, fs->fs_bsize); blocksreleased += nblocks; } } if (lastiblock[level] >= 0) goto done; } /* * All whole direct blocks or frags. */ for (i = NDADDR - 1; i > lastblock; i--) { long bsize; bn = DIP(oip, db[i]); if (bn == 0) continue; DIP_ASSIGN(oip, db[i], 0); bsize = blksize(fs, oip, i); ffs_blkfree(oip, bn, bsize); blocksreleased += btodb(bsize); } if (lastblock < 0) goto done; /* * Finally, look for a change in size of the * last direct block; release any frags. */ bn = DIP(oip, db[lastblock]); if (bn != 0) { long oldspace, newspace; /* * Calculate amount of space we're giving * back as old block size minus new block size. */ oldspace = blksize(fs, oip, lastblock); DIP_ASSIGN(oip, size, length); newspace = blksize(fs, oip, lastblock); if (newspace == 0) panic("ffs_truncate: newspace"); if (oldspace - newspace > 0) { /* * Block number of space to be free'd is * the old block # plus the number of frags * required for the storage we're keeping. */ bn += numfrags(fs, newspace); ffs_blkfree(oip, bn, oldspace - newspace); blocksreleased += btodb(oldspace - newspace); } } done: #ifdef DIAGNOSTIC for (level = SINGLE; level <= TRIPLE; level++) if (newblks[NDADDR + level] != DIP(oip, ib[level])) panic("ffs_truncate1"); for (i = 0; i < NDADDR; i++) if (newblks[i] != DIP(oip, db[i])) panic("ffs_truncate2"); #endif /* DIAGNOSTIC */ /* * Put back the real size. */ DIP_ASSIGN(oip, size, length); DIP_ADD(oip, blocks, -blocksreleased); oip->i_flag |= IN_CHANGE; (void)ufs_quota_free_blocks(oip, blocksreleased, NOCRED); return (allerror); }
/* * Update the access, modified, and inode change times as specified by the * IN_ACCESS, IN_UPDATE, and IN_CHANGE flags respectively. The IN_MODIFIED * flag is used to specify that the inode needs to be updated but that the * times have already been set. The access and modified times are taken from * the second and third parameters; the inode change time is always taken * from the current time. If waitfor is set, then wait for the disk write * of the inode to complete. */ int ffs_update(struct inode *ip, struct timespec *atime, struct timespec *mtime, int waitfor) { struct vnode *vp; struct fs *fs; struct buf *bp; int error; struct timespec ts; vp = ITOV(ip); if (vp->v_mount->mnt_flag & MNT_RDONLY) { ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE); return (0); } if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && waitfor != MNT_WAIT) return (0); getnanotime(&ts); if (ip->i_flag & IN_ACCESS) { DIP_ASSIGN(ip, atime, atime ? atime->tv_sec : ts.tv_sec); DIP_ASSIGN(ip, atimensec, atime ? atime->tv_nsec : ts.tv_nsec); } if (ip->i_flag & IN_UPDATE) { DIP_ASSIGN(ip, mtime, mtime ? mtime->tv_sec : ts.tv_sec); DIP_ASSIGN(ip, mtimensec, mtime ? mtime->tv_nsec : ts.tv_nsec); ip->i_modrev++; } if (ip->i_flag & IN_CHANGE) { DIP_ASSIGN(ip, ctime, ts.tv_sec); DIP_ASSIGN(ip, ctimensec, ts.tv_nsec); } ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE); fs = ip->i_fs; /* * Ensure that uid and gid are correct. This is a temporary * fix until fsck has been changed to do the update. */ if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_inodefmt < FS_44INODEFMT) { ip->i_din1->di_ouid = ip->i_ffs1_uid; ip->i_din1->di_ogid = ip->i_ffs1_gid; } error = bread(ip->i_devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), (int)fs->fs_bsize, &bp); if (error) { brelse(bp); return (error); } if (DOINGSOFTDEP(vp)) softdep_update_inodeblock(ip, bp, waitfor); else if (ip->i_effnlink != DIP(ip, nlink)) panic("ffs_update: bad link cnt"); #ifdef FFS2 if (ip->i_ump->um_fstype == UM_UFS2) *((struct ufs2_dinode *)bp->b_data + ino_to_fsbo(fs, ip->i_number)) = *ip->i_din2; else #endif *((struct ufs1_dinode *)bp->b_data + ino_to_fsbo(fs, ip->i_number)) = *ip->i_din1; if (waitfor && !DOINGASYNC(vp)) { return (bwrite(bp)); } else { bdwrite(bp); return (0); } }
/* * Allocate a new inode. */ int ufs_makeinode(int mode, struct vnode *dvp, struct vnode **vpp, struct componentname *cnp) { struct inode *ip, *pdir; struct direct newdir; struct vnode *tvp; int error; pdir = VTOI(dvp); #ifdef DIAGNOSTIC if ((cnp->cn_flags & HASBUF) == 0) panic("ufs_makeinode: no name"); #endif *vpp = NULL; if ((mode & IFMT) == 0) mode |= IFREG; if ((error = UFS_INODE_ALLOC(pdir, mode, cnp->cn_cred, &tvp)) != 0) { pool_put(&namei_pool, cnp->cn_pnbuf); vput(dvp); return (error); } ip = VTOI(tvp); DIP_ASSIGN(ip, gid, DIP(pdir, gid)); DIP_ASSIGN(ip, uid, cnp->cn_cred->cr_uid); if ((error = getinoquota(ip)) || (error = ufs_quota_alloc_inode(ip, cnp->cn_cred))) { pool_put(&namei_pool, cnp->cn_pnbuf); UFS_INODE_FREE(ip, ip->i_number, mode); vput(tvp); vput(dvp); return (error); } ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; DIP_ASSIGN(ip, mode, mode); tvp->v_type = IFTOVT(mode); /* Rest init'd in getnewvnode(). */ ip->i_effnlink = 1; DIP_ASSIGN(ip, nlink, 1); if (DOINGSOFTDEP(tvp)) softdep_change_linkcnt(ip, 0); if ((DIP(ip, mode) & ISGID) && !groupmember(DIP(ip, gid), cnp->cn_cred) && suser_ucred(cnp->cn_cred)) DIP_AND(ip, mode, ~ISGID); /* * Make sure inode goes to disk before directory entry. */ if ((error = UFS_UPDATE(ip, !DOINGSOFTDEP(tvp))) != 0) goto bad; ufs_makedirentry(ip, cnp, &newdir); if ((error = ufs_direnter(dvp, tvp, &newdir, cnp, NULL)) != 0) goto bad; if ((cnp->cn_flags & SAVESTART) == 0) pool_put(&namei_pool, cnp->cn_pnbuf); vput(dvp); *vpp = tvp; return (0); bad: /* * Write error occurred trying to update the inode * or the directory so must deallocate the inode. */ pool_put(&namei_pool, cnp->cn_pnbuf); vput(dvp); ip->i_effnlink = 0; DIP_ASSIGN(ip, nlink, 0); ip->i_flag |= IN_CHANGE; if (DOINGSOFTDEP(tvp)) softdep_change_linkcnt(ip, 0); tvp->v_type = VNON; vput(tvp); return (error); }
/* * Truncate the inode oip to at most length size, freeing the * disk blocks. */ int ffs_truncate(struct vnode *vp, off_t length, int flags, struct ucred *cred) { struct vnode *ovp = vp; ufs_daddr_t lastblock; struct inode *oip; ufs_daddr_t bn, lbn, lastiblock[NIADDR], indir_lbn[NIADDR]; ufs_daddr_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR]; struct fs *fs; struct buf *bp; int offset, size, level; long count, nblocks, blocksreleased = 0; int i; int aflags, error, allerror; off_t osize; oip = VTOI(ovp); fs = oip->i_fs; if (length < 0) return (EINVAL); if (length > fs->fs_maxfilesize) return (EFBIG); if (ovp->v_type == VLNK && (oip->i_size < ovp->v_mount->mnt_maxsymlinklen || oip->i_din.di_blocks == 0)) { #ifdef DIAGNOSTIC if (length != 0) panic("ffs_truncate: partial truncate of symlink"); #endif /* DIAGNOSTIC */ bzero((char *)&oip->i_shortlink, (uint)oip->i_size); oip->i_size = 0; oip->i_flag |= IN_CHANGE | IN_UPDATE; return (ffs_update(ovp, 1)); } if (oip->i_size == length) { oip->i_flag |= IN_CHANGE | IN_UPDATE; return (ffs_update(ovp, 0)); } if (fs->fs_ronly) panic("ffs_truncate: read-only filesystem"); #ifdef QUOTA error = ufs_getinoquota(oip); if (error) return (error); #endif ovp->v_lasta = ovp->v_clen = ovp->v_cstart = ovp->v_lastw = 0; if (DOINGSOFTDEP(ovp)) { if (length > 0 || softdep_slowdown(ovp)) { /* * If a file is only partially truncated, then * we have to clean up the data structures * describing the allocation past the truncation * point. Finding and deallocating those structures * is a lot of work. Since partial truncation occurs * rarely, we solve the problem by syncing the file * so that it will have no data structures left. */ if ((error = VOP_FSYNC(ovp, MNT_WAIT, 0)) != 0) return (error); } else { #ifdef QUOTA (void) ufs_chkdq(oip, -oip->i_blocks, NOCRED, 0); #endif softdep_setup_freeblocks(oip, length); vinvalbuf(ovp, 0, 0, 0); nvnode_pager_setsize(ovp, 0, fs->fs_bsize, 0); oip->i_flag |= IN_CHANGE | IN_UPDATE; return (ffs_update(ovp, 0)); } } osize = oip->i_size; /* * Lengthen the size of the file. We must ensure that the * last byte of the file is allocated. Since the smallest * value of osize is 0, length will be at least 1. * * nvextendbuf() only breads the old buffer. The blocksize * of the new buffer must be specified so it knows how large * to make the VM object. */ if (osize < length) { nvextendbuf(vp, osize, length, blkoffsize(fs, oip, osize), /* oblksize */ blkoffresize(fs, length), /* nblksize */ blkoff(fs, osize), blkoff(fs, length), 0); aflags = B_CLRBUF; if (flags & IO_SYNC) aflags |= B_SYNC; /* BALLOC will reallocate the fragment at the old EOF */ error = VOP_BALLOC(ovp, length - 1, 1, cred, aflags, &bp); if (error) return (error); oip->i_size = length; if (bp->b_bufsize == fs->fs_bsize) bp->b_flags |= B_CLUSTEROK; if (aflags & B_SYNC) bwrite(bp); else bawrite(bp); oip->i_flag |= IN_CHANGE | IN_UPDATE; return (ffs_update(ovp, 1)); } /* * Shorten the size of the file. * * NOTE: The block size specified in nvtruncbuf() is the blocksize * of the buffer containing length prior to any reallocation * of the block. */ allerror = nvtruncbuf(ovp, length, blkoffsize(fs, oip, length), blkoff(fs, length), 0); offset = blkoff(fs, length); if (offset == 0) { oip->i_size = length; } else { lbn = lblkno(fs, length); aflags = B_CLRBUF; if (flags & IO_SYNC) aflags |= B_SYNC; error = VOP_BALLOC(ovp, length - 1, 1, cred, aflags, &bp); if (error) return (error); /* * When we are doing soft updates and the UFS_BALLOC * above fills in a direct block hole with a full sized * block that will be truncated down to a fragment below, * we must flush out the block dependency with an FSYNC * so that we do not get a soft updates inconsistency * when we create the fragment below. * * nvtruncbuf() may have re-dirtied the underlying block * as part of its truncation zeroing code. To avoid a * 'locking against myself' panic in the second fsync we * can simply undirty the bp since the redirtying was * related to areas of the buffer that we are going to * throw away anyway, and we will b*write() the remainder * anyway down below. */ if (DOINGSOFTDEP(ovp) && lbn < NDADDR && fragroundup(fs, blkoff(fs, length)) < fs->fs_bsize) { bundirty(bp); error = VOP_FSYNC(ovp, MNT_WAIT, 0); if (error) { bdwrite(bp); return (error); } } oip->i_size = length; size = blksize(fs, oip, lbn); #if 0 /* remove - nvtruncbuf deals with this */ if (ovp->v_type != VDIR) bzero((char *)bp->b_data + offset, (uint)(size - offset)); #endif /* Kirk's code has reallocbuf(bp, size, 1) here */ allocbuf(bp, size); if (bp->b_bufsize == fs->fs_bsize) bp->b_flags |= B_CLUSTEROK; if (aflags & B_SYNC) bwrite(bp); else bawrite(bp); } /* * Calculate index into inode's block list of * last direct and indirect blocks (if any) * which we want to keep. Lastblock is -1 when * the file is truncated to 0. */ lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1; lastiblock[SINGLE] = lastblock - NDADDR; lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs); lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs); nblocks = btodb(fs->fs_bsize); /* * Update file and block pointers on disk before we start freeing * blocks. If we crash before free'ing blocks below, the blocks * will be returned to the free list. lastiblock values are also * normalized to -1 for calls to ffs_indirtrunc below. */ bcopy((caddr_t)&oip->i_db[0], (caddr_t)oldblks, sizeof oldblks); for (level = TRIPLE; level >= SINGLE; level--) if (lastiblock[level] < 0) { oip->i_ib[level] = 0; lastiblock[level] = -1; } for (i = NDADDR - 1; i > lastblock; i--) oip->i_db[i] = 0; oip->i_flag |= IN_CHANGE | IN_UPDATE; error = ffs_update(ovp, 1); if (error && allerror == 0) allerror = error; /* * Having written the new inode to disk, save its new configuration * and put back the old block pointers long enough to process them. * Note that we save the new block configuration so we can check it * when we are done. */ bcopy((caddr_t)&oip->i_db[0], (caddr_t)newblks, sizeof newblks); bcopy((caddr_t)oldblks, (caddr_t)&oip->i_db[0], sizeof oldblks); oip->i_size = osize; if (error && allerror == 0) allerror = error; /* * Indirect blocks first. */ indir_lbn[SINGLE] = -NDADDR; indir_lbn[DOUBLE] = indir_lbn[SINGLE] - NINDIR(fs) - 1; indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - NINDIR(fs) * NINDIR(fs) - 1; for (level = TRIPLE; level >= SINGLE; level--) { bn = oip->i_ib[level]; if (bn != 0) { error = ffs_indirtrunc(oip, indir_lbn[level], fsbtodb(fs, bn), lastiblock[level], level, &count); if (error) allerror = error; blocksreleased += count; if (lastiblock[level] < 0) { oip->i_ib[level] = 0; ffs_blkfree(oip, bn, fs->fs_bsize); blocksreleased += nblocks; } } if (lastiblock[level] >= 0) goto done; } /* * All whole direct blocks or frags. */ for (i = NDADDR - 1; i > lastblock; i--) { long bsize; bn = oip->i_db[i]; if (bn == 0) continue; oip->i_db[i] = 0; bsize = blksize(fs, oip, i); ffs_blkfree(oip, bn, bsize); blocksreleased += btodb(bsize); } if (lastblock < 0) goto done; /* * Finally, look for a change in size of the * last direct block; release any frags. */ bn = oip->i_db[lastblock]; if (bn != 0) { long oldspace, newspace; /* * Calculate amount of space we're giving * back as old block size minus new block size. */ oldspace = blksize(fs, oip, lastblock); oip->i_size = length; newspace = blksize(fs, oip, lastblock); if (newspace == 0) panic("ffs_truncate: newspace"); if (oldspace - newspace > 0) { /* * Block number of space to be free'd is * the old block # plus the number of frags * required for the storage we're keeping. */ bn += numfrags(fs, newspace); ffs_blkfree(oip, bn, oldspace - newspace); blocksreleased += btodb(oldspace - newspace); } } done: #ifdef DIAGNOSTIC for (level = SINGLE; level <= TRIPLE; level++) if (newblks[NDADDR + level] != oip->i_ib[level]) panic("ffs_truncate1"); for (i = 0; i < NDADDR; i++) if (newblks[i] != oip->i_db[i]) panic("ffs_truncate2"); if (length == 0 && !RB_EMPTY(&ovp->v_rbdirty_tree)) panic("ffs_truncate3"); #endif /* DIAGNOSTIC */ /* * Put back the real size. */ oip->i_size = length; oip->i_blocks -= blocksreleased; if (oip->i_blocks < 0) /* sanity */ oip->i_blocks = 0; oip->i_flag |= IN_CHANGE; #ifdef QUOTA (void) ufs_chkdq(oip, -blocksreleased, NOCRED, 0); #endif return (allerror); }
/* * link vnode call */ int ufs_link(void *v) { struct vop_link_args *ap = v; struct vnode *dvp = ap->a_dvp; struct vnode *vp = ap->a_vp; struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; struct inode *ip; struct direct newdir; int error; #ifdef DIAGNOSTIC if ((cnp->cn_flags & HASBUF) == 0) panic("ufs_link: no name"); #endif if (vp->v_type == VDIR) { VOP_ABORTOP(dvp, cnp); error = EPERM; goto out2; } if (dvp->v_mount != vp->v_mount) { VOP_ABORTOP(dvp, cnp); error = EXDEV; goto out2; } if (dvp != vp && (error = vn_lock(vp, LK_EXCLUSIVE, p))) { VOP_ABORTOP(dvp, cnp); goto out2; } ip = VTOI(vp); if ((nlink_t) DIP(ip, nlink) >= LINK_MAX) { VOP_ABORTOP(dvp, cnp); error = EMLINK; goto out1; } if (DIP(ip, flags) & (IMMUTABLE | APPEND)) { VOP_ABORTOP(dvp, cnp); error = EPERM; goto out1; } ip->i_effnlink++; DIP_ADD(ip, nlink, 1); ip->i_flag |= IN_CHANGE; if (DOINGSOFTDEP(vp)) softdep_change_linkcnt(ip, 0); if ((error = UFS_UPDATE(ip, !DOINGSOFTDEP(vp))) == 0) { ufs_makedirentry(ip, cnp, &newdir); error = ufs_direnter(dvp, vp, &newdir, cnp, NULL); } if (error) { ip->i_effnlink--; DIP_ADD(ip, nlink, -1); ip->i_flag |= IN_CHANGE; if (DOINGSOFTDEP(vp)) softdep_change_linkcnt(ip, 0); } pool_put(&namei_pool, cnp->cn_pnbuf); VN_KNOTE(vp, NOTE_LINK); VN_KNOTE(dvp, NOTE_WRITE); out1: if (dvp != vp) VOP_UNLOCK(vp, 0); out2: vput(dvp); return (error); }
/* * Rename system call. * rename("foo", "bar"); * is essentially * unlink("bar"); * link("foo", "bar"); * unlink("foo"); * but ``atomically''. Can't do full commit without saving state in the * inode on disk which isn't feasible at this time. Best we can do is * always guarantee the target exists. * * Basic algorithm is: * * 1) Bump link count on source while we're linking it to the * target. This also ensure the inode won't be deleted out * from underneath us while we work (it may be truncated by * a concurrent `trunc' or `open' for creation). * 2) Link source to destination. If destination already exists, * delete it first. * 3) Unlink source reference to inode if still around. If a * directory was moved and the parent of the destination * is different from the source, patch the ".." entry in the * directory. */ int ufs_rename(void *v) { struct vop_rename_args *ap = v; struct vnode *tvp = ap->a_tvp; struct vnode *tdvp = ap->a_tdvp; struct vnode *fvp = ap->a_fvp; struct vnode *fdvp = ap->a_fdvp; struct componentname *tcnp = ap->a_tcnp; struct componentname *fcnp = ap->a_fcnp; struct proc *p = fcnp->cn_proc; struct inode *ip, *xp, *dp; struct direct newdir; int doingdirectory = 0, oldparent = 0, newparent = 0; int error = 0; #ifdef DIAGNOSTIC if ((tcnp->cn_flags & HASBUF) == 0 || (fcnp->cn_flags & HASBUF) == 0) panic("ufs_rename: no name"); #endif /* * Check for cross-device rename. */ if ((fvp->v_mount != tdvp->v_mount) || (tvp && (fvp->v_mount != tvp->v_mount))) { error = EXDEV; abortit: VOP_ABORTOP(tdvp, tcnp); if (tdvp == tvp) vrele(tdvp); else vput(tdvp); if (tvp) vput(tvp); VOP_ABORTOP(fdvp, fcnp); vrele(fdvp); vrele(fvp); return (error); } if (tvp && ((DIP(VTOI(tvp), flags) & (IMMUTABLE | APPEND)) || (DIP(VTOI(tdvp), flags) & APPEND))) { error = EPERM; goto abortit; } /* * Check if just deleting a link name or if we've lost a race. * If another process completes the same rename after we've looked * up the source and have blocked looking up the target, then the * source and target inodes may be identical now although the * names were never linked. */ if (fvp == tvp) { if (fvp->v_type == VDIR) { /* * Linked directories are impossible, so we must * have lost the race. Pretend that the rename * completed before the lookup. */ error = ENOENT; goto abortit; } /* Release destination completely. */ VOP_ABORTOP(tdvp, tcnp); vput(tdvp); vput(tvp); /* * Delete source. There is another race now that everything * is unlocked, but this doesn't cause any new complications. * relookup() may find a file that is unrelated to the * original one, or it may fail. Too bad. */ vrele(fvp); fcnp->cn_flags &= ~MODMASK; fcnp->cn_flags |= LOCKPARENT | LOCKLEAF; if ((fcnp->cn_flags & SAVESTART) == 0) panic("ufs_rename: lost from startdir"); fcnp->cn_nameiop = DELETE; if ((error = vfs_relookup(fdvp, &fvp, fcnp)) != 0) return (error); /* relookup did vrele() */ vrele(fdvp); return (VOP_REMOVE(fdvp, fvp, fcnp)); } if ((error = vn_lock(fvp, LK_EXCLUSIVE, p)) != 0) goto abortit; /* fvp, tdvp, tvp now locked */ dp = VTOI(fdvp); ip = VTOI(fvp); if ((nlink_t) DIP(ip, nlink) >= LINK_MAX) { VOP_UNLOCK(fvp, 0); error = EMLINK; goto abortit; } if ((DIP(ip, flags) & (IMMUTABLE | APPEND)) || (DIP(dp, flags) & APPEND)) { VOP_UNLOCK(fvp, 0); error = EPERM; goto abortit; } if ((DIP(ip, mode) & IFMT) == IFDIR) { error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred); if (!error && tvp) error = VOP_ACCESS(tvp, VWRITE, tcnp->cn_cred); if (error) { VOP_UNLOCK(fvp, 0); error = EACCES; goto abortit; } /* * Avoid ".", "..", and aliases of "." for obvious reasons. */ if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') || dp == ip || (fcnp->cn_flags & ISDOTDOT) || (tcnp->cn_flags & ISDOTDOT) || (ip->i_flag & IN_RENAME)) { VOP_UNLOCK(fvp, 0); error = EINVAL; goto abortit; } ip->i_flag |= IN_RENAME; oldparent = dp->i_number; doingdirectory = 1; } VN_KNOTE(fdvp, NOTE_WRITE); /* XXX right place? */ /* * When the target exists, both the directory * and target vnodes are returned locked. */ dp = VTOI(tdvp); xp = NULL; if (tvp) xp = VTOI(tvp); /* * 1) Bump link count while we're moving stuff * around. If we crash somewhere before * completing our work, the link count * may be wrong, but correctable. */ ip->i_effnlink++; DIP_ADD(ip, nlink, 1); ip->i_flag |= IN_CHANGE; if (DOINGSOFTDEP(fvp)) softdep_change_linkcnt(ip, 0); if ((error = UFS_UPDATE(ip, !DOINGSOFTDEP(fvp))) != 0) { VOP_UNLOCK(fvp, 0); goto bad; } /* * If ".." must be changed (ie the directory gets a new * parent) then the source directory must not be in the * directory hierarchy above the target, as this would * orphan everything below the source directory. Also * the user must have write permission in the source so * as to be able to change "..". We must repeat the call * to namei, as the parent directory is unlocked by the * call to checkpath(). */ error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred); VOP_UNLOCK(fvp, 0); /* tdvp and tvp locked */ if (oldparent != dp->i_number) newparent = dp->i_number; if (doingdirectory && newparent) { if (error) /* write access check above */ goto bad; if (xp != NULL) vput(tvp); /* * Compensate for the reference ufs_checkpath() loses. */ vref(tdvp); /* Only tdvp is locked */ if ((error = ufs_checkpath(ip, dp, tcnp->cn_cred)) != 0) { vrele(tdvp); goto out; } if ((tcnp->cn_flags & SAVESTART) == 0) panic("ufs_rename: lost to startdir"); if ((error = vfs_relookup(tdvp, &tvp, tcnp)) != 0) goto out; vrele(tdvp); /* relookup() acquired a reference */ dp = VTOI(tdvp); xp = NULL; if (tvp) xp = VTOI(tvp); } /* * 2) If target doesn't exist, link the target * to the source and unlink the source. * Otherwise, rewrite the target directory * entry to reference the source inode and * expunge the original entry's existence. */ if (xp == NULL) { if (dp->i_dev != ip->i_dev) panic("rename: EXDEV"); /* * Account for ".." in new directory. * When source and destination have the same * parent we don't fool with the link count. */ if (doingdirectory && newparent) { if ((nlink_t) DIP(dp, nlink) >= LINK_MAX) { error = EMLINK; goto bad; } dp->i_effnlink++; DIP_ADD(dp, nlink, 1); dp->i_flag |= IN_CHANGE; if (DOINGSOFTDEP(tdvp)) softdep_change_linkcnt(dp, 0); if ((error = UFS_UPDATE(dp, !DOINGSOFTDEP(tdvp))) != 0) { dp->i_effnlink--; DIP_ADD(dp, nlink, -1); dp->i_flag |= IN_CHANGE; if (DOINGSOFTDEP(tdvp)) softdep_change_linkcnt(dp, 0); goto bad; } } ufs_makedirentry(ip, tcnp, &newdir); if ((error = ufs_direnter(tdvp, NULL, &newdir, tcnp, NULL)) != 0) { if (doingdirectory && newparent) { dp->i_effnlink--; DIP_ADD(dp, nlink, -1); dp->i_flag |= IN_CHANGE; if (DOINGSOFTDEP(tdvp)) softdep_change_linkcnt(dp, 0); (void)UFS_UPDATE(dp, 1); } goto bad; } VN_KNOTE(tdvp, NOTE_WRITE); vput(tdvp); } else { if (xp->i_dev != dp->i_dev || xp->i_dev != ip->i_dev) panic("rename: EXDEV"); /* * Short circuit rename(foo, foo). */ if (xp->i_number == ip->i_number) panic("ufs_rename: same file"); /* * If the parent directory is "sticky", then the user must * own the parent directory, or the destination of the rename, * otherwise the destination may not be changed (except by * root). This implements append-only directories. */ if ((DIP(dp, mode) & S_ISTXT) && tcnp->cn_cred->cr_uid != 0 && tcnp->cn_cred->cr_uid != DIP(dp, uid) && DIP(xp, uid )!= tcnp->cn_cred->cr_uid) { error = EPERM; goto bad; } /* * Target must be empty if a directory and have no links * to it. Also, ensure source and target are compatible * (both directories, or both not directories). */ if ((DIP(xp, mode) & IFMT) == IFDIR) { if (xp->i_effnlink > 2 || !ufs_dirempty(xp, dp->i_number, tcnp->cn_cred)) { error = ENOTEMPTY; goto bad; } if (!doingdirectory) { error = ENOTDIR; goto bad; } cache_purge(tdvp); } else if (doingdirectory) { error = EISDIR; goto bad; } if ((error = ufs_dirrewrite(dp, xp, ip->i_number, IFTODT(DIP(ip, mode)), (doingdirectory && newparent) ? newparent : doingdirectory)) != 0) goto bad; if (doingdirectory) { if (!newparent) { dp->i_effnlink--; if (DOINGSOFTDEP(tdvp)) softdep_change_linkcnt(dp, 0); } xp->i_effnlink--; if (DOINGSOFTDEP(tvp)) softdep_change_linkcnt(xp, 0); } if (doingdirectory && !DOINGSOFTDEP(tvp)) { /* * Truncate inode. The only stuff left in the directory * is "." and "..". The "." reference is inconsequential * since we are quashing it. We have removed the "." * reference and the reference in the parent directory, * but there may be other hard links. The soft * dependency code will arrange to do these operations * after the parent directory entry has been deleted on * disk, so when running with that code we avoid doing * them now. */ if (!newparent) { DIP_ADD(dp, nlink, -1); dp->i_flag |= IN_CHANGE; } DIP_ADD(xp, nlink, -1); xp->i_flag |= IN_CHANGE; if ((error = UFS_TRUNCATE(VTOI(tvp), (off_t)0, IO_SYNC, tcnp->cn_cred)) != 0) goto bad; } VN_KNOTE(tdvp, NOTE_WRITE); vput(tdvp); VN_KNOTE(tvp, NOTE_DELETE); vput(tvp); xp = NULL; } /* * 3) Unlink the source. */ fcnp->cn_flags &= ~MODMASK; fcnp->cn_flags |= LOCKPARENT | LOCKLEAF; if ((fcnp->cn_flags & SAVESTART) == 0) panic("ufs_rename: lost from startdir"); if ((error = vfs_relookup(fdvp, &fvp, fcnp)) != 0) { vrele(ap->a_fvp); return (error); } vrele(fdvp); if (fvp == NULL) { /* * From name has disappeared. */ if (doingdirectory) panic("ufs_rename: lost dir entry"); vrele(ap->a_fvp); return (0); } xp = VTOI(fvp); dp = VTOI(fdvp); /* * Ensure that the directory entry still exists and has not * changed while the new name has been entered. If the source is * a file then the entry may have been unlinked or renamed. In * either case there is no further work to be done. If the source * is a directory then it cannot have been rmdir'ed; the IN_RENAME * flag ensures that it cannot be moved by another rename or removed * by a rmdir. */ if (xp != ip) { if (doingdirectory) panic("ufs_rename: lost dir entry"); } else { /* * If the source is a directory with a * new parent, the link count of the old * parent directory must be decremented * and ".." set to point to the new parent. */ if (doingdirectory && newparent) { xp->i_offset = mastertemplate.dot_reclen; ufs_dirrewrite(xp, dp, newparent, DT_DIR, 0); cache_purge(fdvp); } error = ufs_dirremove(fdvp, xp, fcnp->cn_flags, 0); xp->i_flag &= ~IN_RENAME; } VN_KNOTE(fvp, NOTE_RENAME); if (dp) vput(fdvp); if (xp) vput(fvp); vrele(ap->a_fvp); return (error); bad: if (xp) vput(ITOV(xp)); vput(ITOV(dp)); out: vrele(fdvp); if (doingdirectory) ip->i_flag &= ~IN_RENAME; if (vn_lock(fvp, LK_EXCLUSIVE, p) == 0) { ip->i_effnlink--; DIP_ADD(ip, nlink, -1); ip->i_flag |= IN_CHANGE; ip->i_flag &= ~IN_RENAME; if (DOINGSOFTDEP(fvp)) softdep_change_linkcnt(ip, 0); vput(fvp); } else vrele(fvp); return (error); }
/* * Truncate the inode ip to at most length size, freeing the * disk blocks. */ int ffs_truncate(vnode *vp, off_t length, int flags, Ucred *cred) { print("HARVEY TODO: %s\n", __func__); #if 0 struct inode *ip; ufs2_daddr_t bn, lbn, lastblock, lastiblock[UFS_NIADDR]; ufs2_daddr_t indir_lbn[UFS_NIADDR], oldblks[UFS_NDADDR + UFS_NIADDR]; ufs2_daddr_t newblks[UFS_NDADDR + UFS_NIADDR]; ufs2_daddr_t count, blocksreleased = 0, datablocks, blkno; struct bufobj *bo; struct fs *fs; struct buf *bp; struct ufsmount *ump; int softdeptrunc, journaltrunc; int needextclean, extblocks; int offset, size, level, nblocks; int i, error, allerror, indiroff, waitforupdate; off_t osize; ip = VTOI(vp); ump = VFSTOUFS(vp->v_mount); fs = ump->um_fs; bo = &vp->v_bufobj; ASSERT_VOP_LOCKED(vp, "ffs_truncate"); if (length < 0) return (EINVAL); if (length > fs->fs_maxfilesize) return (EFBIG); #ifdef QUOTA error = getinoquota(ip); if (error) return (error); #endif /* * Historically clients did not have to specify which data * they were truncating. So, if not specified, we assume * traditional behavior, e.g., just the normal data. */ if ((flags & (IO_EXT | IO_NORMAL)) == 0) flags |= IO_NORMAL; if (!DOINGSOFTDEP(vp) && !DOINGASYNC(vp)) flags |= IO_SYNC; waitforupdate = (flags & IO_SYNC) != 0 || !DOINGASYNC(vp); /* * If we are truncating the extended-attributes, and cannot * do it with soft updates, then do it slowly here. If we are * truncating both the extended attributes and the file contents * (e.g., the file is being unlinked), then pick it off with * soft updates below. */ allerror = 0; needextclean = 0; softdeptrunc = 0; journaltrunc = DOINGSUJ(vp); if (journaltrunc == 0 && DOINGSOFTDEP(vp) && length == 0) softdeptrunc = !softdep_slowdown(vp); extblocks = 0; datablocks = DIP(ip, i_blocks); if (fs->fs_magic == FS_UFS2_MAGIC && ip->i_din2->di_extsize > 0) { extblocks = btodb(fragroundup(fs, ip->i_din2->di_extsize)); datablocks -= extblocks; } if ((flags & IO_EXT) && extblocks > 0) { if (length != 0) panic("ffs_truncate: partial trunc of extdata"); if (softdeptrunc || journaltrunc) { if ((flags & IO_NORMAL) == 0) goto extclean; needextclean = 1; } else { if ((error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0) return (error); #ifdef QUOTA (void) chkdq(ip, -extblocks, NOCRED, 0); #endif vinvalbuf(vp, V_ALT, 0, 0); vn_pages_remove(vp, OFF_TO_IDX(lblktosize(fs, -extblocks)), 0); osize = ip->i_din2->di_extsize; ip->i_din2->di_blocks -= extblocks; ip->i_din2->di_extsize = 0; for (i = 0; i < UFS_NXADDR; i++) { oldblks[i] = ip->i_din2->di_extb[i]; ip->i_din2->di_extb[i] = 0; } ip->i_flag |= IN_CHANGE; if ((error = ffs_update(vp, waitforupdate))) return (error); for (i = 0; i < UFS_NXADDR; i++) { if (oldblks[i] == 0) continue; ffs_blkfree(ump, fs, ITODEVVP(ip), oldblks[i], sblksize(fs, osize, i), ip->i_number, vp->v_type, nil); } } } if ((flags & IO_NORMAL) == 0) return (0); if (vp->v_type == VLNK && (ip->i_size < vp->v_mount->mnt_maxsymlinklen || datablocks == 0)) { #ifdef INVARIANTS if (length != 0) panic("ffs_truncate: partial truncate of symlink"); #endif bzero(SHORTLINK(ip), (uint)ip->i_size); ip->i_size = 0; DIP_SET(ip, i_size, 0); ip->i_flag |= IN_CHANGE | IN_UPDATE; if (needextclean) goto extclean; return (ffs_update(vp, waitforupdate)); } if (ip->i_size == length) { ip->i_flag |= IN_CHANGE | IN_UPDATE; if (needextclean) goto extclean; return (ffs_update(vp, 0)); } if (fs->fs_ronly) panic("ffs_truncate: read-only filesystem"); if (IS_SNAPSHOT(ip)) ffs_snapremove(vp); vp->v_lasta = vp->v_clen = vp->v_cstart = vp->v_lastw = 0; osize = ip->i_size; /* * Lengthen the size of the file. We must ensure that the * last byte of the file is allocated. Since the smallest * value of osize is 0, length will be at least 1. */ if (osize < length) { vnode_pager_setsize(vp, length); flags |= BA_CLRBUF; error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp); if (error) { vnode_pager_setsize(vp, osize); return (error); } ip->i_size = length; DIP_SET(ip, i_size, length); if (bp->b_bufsize == fs->fs_bsize) bp->b_flags |= B_CLUSTEROK; if (flags & IO_SYNC) bwrite(bp); else if (DOINGASYNC(vp)) bdwrite(bp); else bawrite(bp); ip->i_flag |= IN_CHANGE | IN_UPDATE; return (ffs_update(vp, waitforupdate)); } /* * Lookup block number for a given offset. Zero length files * have no blocks, so return a blkno of -1. */ lbn = lblkno(fs, length - 1); if (length == 0) { blkno = -1; } else if (lbn < UFS_NDADDR) { blkno = DIP(ip, i_db[lbn]); } else { error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), fs->fs_bsize, cred, BA_METAONLY, &bp); if (error) return (error); indiroff = (lbn - UFS_NDADDR) % NINDIR(fs); if (I_IS_UFS1(ip)) blkno = ((ufs1_daddr_t *)(bp->b_data))[indiroff]; else blkno = ((ufs2_daddr_t *)(bp->b_data))[indiroff]; /* * If the block number is non-zero, then the indirect block * must have been previously allocated and need not be written. * If the block number is zero, then we may have allocated * the indirect block and hence need to write it out. */ if (blkno != 0) brelse(bp); else if (flags & IO_SYNC) bwrite(bp); else bdwrite(bp); } /* * If the block number at the new end of the file is zero, * then we must allocate it to ensure that the last block of * the file is allocated. Soft updates does not handle this * case, so here we have to clean up the soft updates data * structures describing the allocation past the truncation * point. Finding and deallocating those structures is a lot of * work. Since partial truncation with a hole at the end occurs * rarely, we solve the problem by syncing the file so that it * will have no soft updates data structures left. */ if (blkno == 0 && (error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0) return (error); if (blkno != 0 && DOINGSOFTDEP(vp)) { if (softdeptrunc == 0 && journaltrunc == 0) { /* * If soft updates cannot handle this truncation, * clean up soft dependency data structures and * fall through to the synchronous truncation. */ if ((error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0) return (error); } else { flags = IO_NORMAL | (needextclean ? IO_EXT: 0); if (journaltrunc) softdep_journal_freeblocks(ip, cred, length, flags); else softdep_setup_freeblocks(ip, length, flags); ASSERT_VOP_LOCKED(vp, "ffs_truncate1"); if (journaltrunc == 0) { ip->i_flag |= IN_CHANGE | IN_UPDATE; error = ffs_update(vp, 0); } return (error); } } /* * Shorten the size of the file. If the last block of the * shortened file is unallocated, we must allocate it. * Additionally, if the file is not being truncated to a * block boundary, the contents of the partial block * following the end of the file must be zero'ed in * case it ever becomes accessible again because of * subsequent file growth. Directories however are not * zero'ed as they should grow back initialized to empty. */ offset = blkoff(fs, length); if (blkno != 0 && offset == 0) { ip->i_size = length; DIP_SET(ip, i_size, length); } else { lbn = lblkno(fs, length); flags |= BA_CLRBUF; error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp); if (error) return (error); /* * When we are doing soft updates and the UFS_BALLOC * above fills in a direct block hole with a full sized * block that will be truncated down to a fragment below, * we must flush out the block dependency with an FSYNC * so that we do not get a soft updates inconsistency * when we create the fragment below. */ if (DOINGSOFTDEP(vp) && lbn < UFS_NDADDR && fragroundup(fs, blkoff(fs, length)) < fs->fs_bsize && (error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0) return (error); ip->i_size = length; DIP_SET(ip, i_size, length); size = blksize(fs, ip, lbn); if (vp->v_type != VDIR && offset != 0) bzero((char *)bp->b_data + offset, (uint)(size - offset)); /* Kirk's code has reallocbuf(bp, size, 1) here */ allocbuf(bp, size); if (bp->b_bufsize == fs->fs_bsize) bp->b_flags |= B_CLUSTEROK; if (flags & IO_SYNC) bwrite(bp); else if (DOINGASYNC(vp)) bdwrite(bp); else bawrite(bp); } /* * Calculate index into inode's block list of * last direct and indirect blocks (if any) * which we want to keep. Lastblock is -1 when * the file is truncated to 0. */ lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1; lastiblock[SINGLE] = lastblock - UFS_NDADDR; lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs); lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs); nblocks = btodb(fs->fs_bsize); /* * Update file and block pointers on disk before we start freeing * blocks. If we crash before free'ing blocks below, the blocks * will be returned to the free list. lastiblock values are also * normalized to -1 for calls to ffs_indirtrunc below. */ for (level = TRIPLE; level >= SINGLE; level--) { oldblks[UFS_NDADDR + level] = DIP(ip, i_ib[level]); if (lastiblock[level] < 0) { DIP_SET(ip, i_ib[level], 0); lastiblock[level] = -1; } } for (i = 0; i < UFS_NDADDR; i++) { oldblks[i] = DIP(ip, i_db[i]); if (i > lastblock) DIP_SET(ip, i_db[i], 0); } ip->i_flag |= IN_CHANGE | IN_UPDATE; allerror = ffs_update(vp, waitforupdate); /* * Having written the new inode to disk, save its new configuration * and put back the old block pointers long enough to process them. * Note that we save the new block configuration so we can check it * when we are done. */ for (i = 0; i < UFS_NDADDR; i++) { newblks[i] = DIP(ip, i_db[i]); DIP_SET(ip, i_db[i], oldblks[i]); } for (i = 0; i < UFS_NIADDR; i++) { newblks[UFS_NDADDR + i] = DIP(ip, i_ib[i]); DIP_SET(ip, i_ib[i], oldblks[UFS_NDADDR + i]); } ip->i_size = osize; DIP_SET(ip, i_size, osize); error = vtruncbuf(vp, cred, length, fs->fs_bsize); if (error && (allerror == 0)) allerror = error; /* * Indirect blocks first. */ indir_lbn[SINGLE] = -UFS_NDADDR; indir_lbn[DOUBLE] = indir_lbn[SINGLE] - NINDIR(fs) - 1; indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - NINDIR(fs) * NINDIR(fs) - 1; for (level = TRIPLE; level >= SINGLE; level--) { bn = DIP(ip, i_ib[level]); if (bn != 0) { error = ffs_indirtrunc(ip, indir_lbn[level], fsbtodb(fs, bn), lastiblock[level], level, &count); if (error) allerror = error; blocksreleased += count; if (lastiblock[level] < 0) { DIP_SET(ip, i_ib[level], 0); ffs_blkfree(ump, fs, ump->um_devvp, bn, fs->fs_bsize, ip->i_number, vp->v_type, nil); blocksreleased += nblocks; } } if (lastiblock[level] >= 0) goto done; } /* * All whole direct blocks or frags. */ for (i = UFS_NDADDR - 1; i > lastblock; i--) { long bsize; bn = DIP(ip, i_db[i]); if (bn == 0) continue; DIP_SET(ip, i_db[i], 0); bsize = blksize(fs, ip, i); ffs_blkfree(ump, fs, ump->um_devvp, bn, bsize, ip->i_number, vp->v_type, nil); blocksreleased += btodb(bsize); } if (lastblock < 0) goto done; /* * Finally, look for a change in size of the * last direct block; release any frags. */ bn = DIP(ip, i_db[lastblock]); if (bn != 0) { long oldspace, newspace; /* * Calculate amount of space we're giving * back as old block size minus new block size. */ oldspace = blksize(fs, ip, lastblock); ip->i_size = length; DIP_SET(ip, i_size, length); newspace = blksize(fs, ip, lastblock); if (newspace == 0) panic("ffs_truncate: newspace"); if (oldspace - newspace > 0) { /* * Block number of space to be free'd is * the old block # plus the number of frags * required for the storage we're keeping. */ bn += numfrags(fs, newspace); ffs_blkfree(ump, fs, ump->um_devvp, bn, oldspace - newspace, ip->i_number, vp->v_type, nil); blocksreleased += btodb(oldspace - newspace); } } done: #ifdef INVARIANTS for (level = SINGLE; level <= TRIPLE; level++) if (newblks[UFS_NDADDR + level] != DIP(ip, i_ib[level])) panic("ffs_truncate1"); for (i = 0; i < UFS_NDADDR; i++) if (newblks[i] != DIP(ip, i_db[i])) panic("ffs_truncate2"); BO_LOCK(bo); if (length == 0 && (fs->fs_magic != FS_UFS2_MAGIC || ip->i_din2->di_extsize == 0) && (bo->bo_dirty.bv_cnt > 0 || bo->bo_clean.bv_cnt > 0)) panic("ffs_truncate3"); BO_UNLOCK(bo); #endif /* INVARIANTS */ /* * Put back the real size. */ ip->i_size = length; DIP_SET(ip, i_size, length); if (DIP(ip, i_blocks) >= blocksreleased) DIP_SET(ip, i_blocks, DIP(ip, i_blocks) - blocksreleased); else /* sanity */ DIP_SET(ip, i_blocks, 0); ip->i_flag |= IN_CHANGE; #ifdef QUOTA (void) chkdq(ip, -blocksreleased, NOCRED, 0); #endif return (allerror); extclean: if (journaltrunc) softdep_journal_freeblocks(ip, cred, length, IO_EXT); else softdep_setup_freeblocks(ip, length, IO_EXT); return (ffs_update(vp, waitforupdate)); #endif // 0 return 0; }
/* * Remove a directory entry after a call to namei, using * the parameters which it left in nameidata. The entry * dp->i_offset contains the offset into the directory of the * entry to be eliminated. The dp->i_count field contains the * size of the previous record in the directory. If this * is 0, the first entry is being deleted, so we need only * zero the inode number to mark the entry as free. If the * entry is not the first in the directory, we must reclaim * the space of the now empty record by adding the record size * to the size of the previous entry. */ int ufs_dirremove(struct vnode *dvp, struct inode *ip, int flags, int isrmdir) { struct inode *dp; struct direct *ep; struct buf *bp; int error; UFS_WAPBL_JLOCK_ASSERT(dvp->v_mount); dp = VTOI(dvp); if ((error = UFS_BUFATOFF(dp, (off_t)(dp->i_offset - dp->i_count), (char **)&ep, &bp)) != 0) return (error); #ifdef UFS_DIRHASH /* * Remove the dirhash entry. This is complicated by the fact * that `ep' is the previous entry when dp->i_count != 0. */ if (dp->i_dirhash != NULL) ufsdirhash_remove(dp, (dp->i_count == 0) ? ep : (struct direct *)((char *)ep + ep->d_reclen), dp->i_offset); #endif if (dp->i_count == 0) { /* * First entry in block: set d_ino to zero. */ ep->d_ino = 0; } else { /* * Collapse new free space into previous entry. */ ep->d_reclen += dp->i_reclen; } #ifdef UFS_DIRHASH if (dp->i_dirhash != NULL) ufsdirhash_checkblock(dp, (char *)ep - ((dp->i_offset - dp->i_count) & (DIRBLKSIZ - 1)), dp->i_offset & ~(DIRBLKSIZ - 1)); #endif if (DOINGSOFTDEP(dvp)) { if (ip) { ip->i_effnlink--; softdep_change_linkcnt(ip, 0); softdep_setup_remove(bp, dp, ip, isrmdir); } if (softdep_slowdown(dvp)) { error = bwrite(bp); } else { bdwrite(bp); error = 0; } } else { if (ip) { ip->i_effnlink--; DIP_ADD(ip, nlink, -1); ip->i_flag |= IN_CHANGE; UFS_WAPBL_UPDATE(ip, 0); } if (DOINGASYNC(dvp) && dp->i_count != 0) { bdwrite(bp); error = 0; } else error = bwrite(bp); } dp->i_flag |= IN_CHANGE | IN_UPDATE; UFS_WAPBL_UPDATE(dp, 0); return (error); }
int ffs2_balloc(struct inode *ip, off_t off, int size, struct ucred *cred, int flags, struct buf **bpp) { daddr_t lbn, lastlbn, nb, newb, *blkp; daddr_t pref, *allocblk, allociblk[NIADDR + 1]; daddr_t *bap, *allocib; int deallocated, osize, nsize, num, i, error, unwindidx, r; struct buf *bp, *nbp; struct indir indirs[NIADDR + 2]; struct fs *fs; struct vnode *vp; struct proc *p; vp = ITOV(ip); fs = ip->i_fs; p = curproc; unwindidx = -1; lbn = lblkno(fs, off); size = blkoff(fs, off) + size; if (size > fs->fs_bsize) panic("ffs2_balloc: block too big"); if (bpp != NULL) *bpp = NULL; if (lbn < 0) return (EFBIG); /* * If the next write will extend the file into a new block, and the * file is currently composed of a fragment, this fragment has to be * extended to be a full block. */ lastlbn = lblkno(fs, ip->i_ffs2_size); if (lastlbn < NDADDR && lastlbn < lbn) { nb = lastlbn; osize = blksize(fs, ip, nb); if (osize < fs->fs_bsize && osize > 0) { error = ffs_realloccg(ip, nb, ffs2_blkpref(ip, lastlbn, nb, &ip->i_ffs2_db[0]), osize, (int) fs->fs_bsize, cred, bpp, &newb); if (error) return (error); if (DOINGSOFTDEP(vp)) softdep_setup_allocdirect(ip, nb, newb, ip->i_ffs2_db[nb], fs->fs_bsize, osize, bpp ? *bpp : NULL); ip->i_ffs2_size = lblktosize(fs, nb + 1); uvm_vnp_setsize(vp, ip->i_ffs2_size); ip->i_ffs2_db[nb] = newb; ip->i_flag |= IN_CHANGE | IN_UPDATE; if (bpp) { if (flags & B_SYNC) bwrite(*bpp); else bawrite(*bpp); } } } /* * The first NDADDR blocks are direct. */ if (lbn < NDADDR) { nb = ip->i_ffs2_db[lbn]; if (nb != 0 && ip->i_ffs2_size >= lblktosize(fs, lbn + 1)) { /* * The direct block is already allocated and the file * extends past this block, thus this must be a whole * block. Just read it, if requested. */ if (bpp != NULL) { error = bread(vp, lbn, fs->fs_bsize, bpp); if (error) { brelse(*bpp); return (error); } } return (0); } if (nb != 0) { /* * Consider the need to allocate a fragment. */ osize = fragroundup(fs, blkoff(fs, ip->i_ffs2_size)); nsize = fragroundup(fs, size); if (nsize <= osize) { /* * The existing block is already at least as * big as we want. Just read it, if requested. */ if (bpp != NULL) { error = bread(vp, lbn, fs->fs_bsize, bpp); if (error) { brelse(*bpp); return (error); } (*bpp)->b_bcount = osize; } return (0); } else { /* * The existing block is smaller than we want, * grow it. */ error = ffs_realloccg(ip, lbn, ffs2_blkpref(ip, lbn, (int) lbn, &ip->i_ffs2_db[0]), osize, nsize, cred, bpp, &newb); if (error) return (error); if (DOINGSOFTDEP(vp)) softdep_setup_allocdirect(ip, lbn, newb, nb, nsize, osize, bpp ? *bpp : NULL); } } else { /* * The block was not previously allocated, allocate a * new block or fragment. */ if (ip->i_ffs2_size < lblktosize(fs, lbn + 1)) nsize = fragroundup(fs, size); else nsize = fs->fs_bsize; error = ffs_alloc(ip, lbn, ffs2_blkpref(ip, lbn, (int) lbn, &ip->i_ffs2_db[0]), nsize, cred, &newb); if (error) return (error); if (bpp != NULL) { bp = getblk(vp, lbn, fs->fs_bsize, 0, 0); if (nsize < fs->fs_bsize) bp->b_bcount = nsize; bp->b_blkno = fsbtodb(fs, newb); if (flags & B_CLRBUF) clrbuf(bp); *bpp = bp; } if (DOINGSOFTDEP(vp)) softdep_setup_allocdirect(ip, lbn, newb, 0, nsize, 0, bpp ? *bpp : NULL); } ip->i_ffs2_db[lbn] = newb; ip->i_flag |= IN_CHANGE | IN_UPDATE; return (0); } /* * Determine the number of levels of indirection. */ pref = 0; error = ufs_getlbns(vp, lbn, indirs, &num); if (error) return (error); #ifdef DIAGNOSTIC if (num < 1) panic("ffs2_balloc: ufs_bmaparray returned indirect block"); #endif /* * Fetch the first indirect block allocating it necessary. */ --num; nb = ip->i_ffs2_ib[indirs[0].in_off]; allocib = NULL; allocblk = allociblk; if (nb == 0) { pref = ffs2_blkpref(ip, lbn, -indirs[0].in_off - 1, NULL); error = ffs_alloc(ip, lbn, pref, (int) fs->fs_bsize, cred, &newb); if (error) goto fail; nb = newb; *allocblk++ = nb; bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0); bp->b_blkno = fsbtodb(fs, nb); clrbuf(bp); if (DOINGSOFTDEP(vp)) { softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off, newb, 0, fs->fs_bsize, 0, bp); bdwrite(bp); } else { /* * Write synchronously so that indirect blocks never * point at garbage. */ error = bwrite(bp); if (error) goto fail; } unwindidx = 0; allocib = &ip->i_ffs2_ib[indirs[0].in_off]; *allocib = nb; ip->i_flag |= IN_CHANGE | IN_UPDATE; } /* * Fetch through the indirect blocks, allocating as necessary. */ for (i = 1;;) { error = bread(vp, indirs[i].in_lbn, (int)fs->fs_bsize, &bp); if (error) { brelse(bp); goto fail; } bap = (int64_t *) bp->b_data; nb = bap[indirs[i].in_off]; if (i == num) break; i++; if (nb != 0) { brelse(bp); continue; } if (pref == 0) pref = ffs2_blkpref(ip, lbn, i - num - 1, NULL); error = ffs_alloc(ip, lbn, pref, (int) fs->fs_bsize, cred, &newb); if (error) { brelse(bp); goto fail; } nb = newb; *allocblk++ = nb; nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0); nbp->b_blkno = fsbtodb(fs, nb); clrbuf(nbp); if (DOINGSOFTDEP(vp)) { softdep_setup_allocindir_meta(nbp, ip, bp, indirs[i - 1].in_off, nb); bdwrite(nbp); } else { /* * Write synchronously so that indirect blocks never * point at garbage. */ error = bwrite(nbp); if (error) { brelse(bp); goto fail; } } if (unwindidx < 0) unwindidx = i - 1; bap[indirs[i - 1].in_off] = nb; /* * If required, write synchronously, otherwise use delayed * write. */ if (flags & B_SYNC) bwrite(bp); else bdwrite(bp); } /* * Get the data block, allocating if necessary. */ if (nb == 0) { pref = ffs2_blkpref(ip, lbn, indirs[num].in_off, &bap[0]); error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb); if (error) { brelse(bp); goto fail; } nb = newb; *allocblk++ = nb; if (bpp != NULL) { nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0); nbp->b_blkno = fsbtodb(fs, nb); if (flags & B_CLRBUF) clrbuf(nbp); *bpp = nbp; } if (DOINGSOFTDEP(vp)) softdep_setup_allocindir_page(ip, lbn, bp, indirs[num].in_off, nb, 0, bpp ? *bpp : NULL); bap[indirs[num].in_off] = nb; if (allocib == NULL && unwindidx < 0) unwindidx = i - 1; /* * If required, write synchronously, otherwise use delayed * write. */ if (flags & B_SYNC) bwrite(bp); else bdwrite(bp); return (0); } brelse(bp); if (bpp != NULL) { if (flags & B_CLRBUF) { error = bread(vp, lbn, (int)fs->fs_bsize, &nbp); if (error) { brelse(nbp); goto fail; } } else { nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0); nbp->b_blkno = fsbtodb(fs, nb); clrbuf(nbp); } *bpp = nbp; } return (0); fail: /* * If we have failed to allocate any blocks, simply return the error. * This is the usual case and avoids the need to fsync the file. */ if (allocblk == allociblk && allocib == NULL && unwindidx == -1) return (error); /* * If we have failed part way through block allocation, we have to * deallocate any indirect blocks that we have allocated. We have to * fsync the file before we start to get rid of all of its * dependencies so that we do not leave them dangling. We have to sync * it at the end so that the softdep code does not find any untracked * changes. Although this is really slow, running out of disk space is * not expected to be a common occurrence. The error return from fsync * is ignored as we already have an error to return to the user. */ VOP_FSYNC(vp, p->p_ucred, MNT_WAIT, p); if (unwindidx >= 0) { /* * First write out any buffers we've created to resolve their * softdeps. This must be done in reverse order of creation so * that we resolve the dependencies in one pass. * Write the cylinder group buffers for these buffers too. */ for (i = num; i >= unwindidx; i--) { if (i == 0) break; bp = getblk(vp, indirs[i].in_lbn, (int) fs->fs_bsize, 0, 0); if (bp->b_flags & B_DELWRI) { nb = fsbtodb(fs, cgtod(fs, dtog(fs, dbtofsb(fs, bp->b_blkno)))); bwrite(bp); bp = getblk(ip->i_devvp, nb, (int) fs->fs_cgsize, 0, 0); if (bp->b_flags & B_DELWRI) bwrite(bp); else { bp->b_flags |= B_INVAL; brelse(bp); } } else { bp->b_flags |= B_INVAL; brelse(bp); } } if (DOINGSOFTDEP(vp) && unwindidx == 0) { ip->i_flag |= IN_CHANGE | IN_UPDATE; ffs_update(ip, 1); } /* * Now that any dependencies that we created have been * resolved, we can undo the partial allocation. */ if (unwindidx == 0) { *allocib = 0; ip->i_flag |= IN_CHANGE | IN_UPDATE; if (DOINGSOFTDEP(vp)) ffs_update(ip, 1); } else { r = bread(vp, indirs[unwindidx].in_lbn, (int)fs->fs_bsize, &bp); if (r) panic("ffs2_balloc: unwind failed"); bap = (int64_t *) bp->b_data; bap[indirs[unwindidx].in_off] = 0; bwrite(bp); } for (i = unwindidx + 1; i <= num; i++) { bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0, 0); bp->b_flags |= B_INVAL; brelse(bp); } } for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) { ffs_blkfree(ip, *blkp, fs->fs_bsize); deallocated += fs->fs_bsize; } if (deallocated) { /* * Restore user's disk quota because allocation failed. */ (void) ufs_quota_free_blocks(ip, btodb(deallocated), cred); ip->i_ffs2_blocks -= btodb(deallocated); ip->i_flag |= IN_CHANGE | IN_UPDATE; } VOP_FSYNC(vp, p->p_ucred, MNT_WAIT, p); return (error); }