/* * ffs_balloc(struct vnode *a_vp, ufs_daddr_t a_lbn, int a_size, * struct ucred *a_cred, int a_flags, struct buf *a_bpp) * * Balloc defines the structure of filesystem storage by allocating * the physical blocks on a device given the inode and the logical * block number in a file. * * NOTE: B_CLRBUF - this flag tells balloc to clear invalid portions * of the buffer. However, any dirty bits will override missing * valid bits. This case occurs when writable mmaps are truncated * and then extended. */ int ffs_balloc(struct vop_balloc_args *ap) { struct inode *ip; ufs_daddr_t lbn; int size; struct ucred *cred; int flags; struct fs *fs; ufs_daddr_t nb; struct buf *bp, *nbp, *dbp; struct vnode *vp; struct indir indirs[NIADDR + 2]; ufs_daddr_t newb, *bap, pref; int deallocated, osize, nsize, num, i, error; ufs_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; ufs_daddr_t *lbns_remfree, lbns[NIADDR + 1]; int unwindidx; int seqcount; vp = ap->a_vp; ip = VTOI(vp); fs = ip->i_fs; lbn = lblkno(fs, ap->a_startoffset); size = blkoff(fs, ap->a_startoffset) + ap->a_size; if (size > fs->fs_bsize) panic("ffs_balloc: blk too big"); *ap->a_bpp = NULL; if (lbn < 0) return (EFBIG); cred = ap->a_cred; flags = ap->a_flags; /* * The vnode must be locked for us to be able to safely mess * around with the inode. */ if (vn_islocked(vp) != LK_EXCLUSIVE) { panic("ffs_balloc: vnode %p not exclusively locked!", vp); } /* * If the next write will extend the file into a new block, * and the file is currently composed of a fragment * this fragment has to be extended to be a full block. */ nb = lblkno(fs, ip->i_size); if (nb < NDADDR && nb < lbn) { /* * The filesize prior to this write can fit in direct * blocks (ex. fragmentation is possibly done) * we are now extending the file write beyond * the block which has end of the file prior to this write. */ osize = blksize(fs, ip, nb); /* * osize gives disk allocated size in the last block. It is * either in fragments or a file system block size. */ if (osize < fs->fs_bsize && osize > 0) { /* A few fragments are already allocated, since the * current extends beyond this block allocated the * complete block as fragments are on in last block. */ error = ffs_realloccg(ip, nb, ffs_blkpref(ip, nb, (int)nb, &ip->i_db[0]), osize, (int)fs->fs_bsize, cred, &bp); if (error) return (error); if (DOINGSOFTDEP(vp)) softdep_setup_allocdirect(ip, nb, dofftofsb(fs, bp->b_bio2.bio_offset), ip->i_db[nb], fs->fs_bsize, osize, bp); /* adjust the inode size, we just grew */ ip->i_size = smalllblktosize(fs, nb + 1); ip->i_db[nb] = dofftofsb(fs, bp->b_bio2.bio_offset); ip->i_flag |= IN_CHANGE | IN_UPDATE; if (flags & B_SYNC) bwrite(bp); else bawrite(bp); /* bp is already released here */ } } /* * The first NDADDR blocks are direct blocks */ if (lbn < NDADDR) { nb = ip->i_db[lbn]; if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) { error = bread(vp, lblktodoff(fs, lbn), fs->fs_bsize, &bp); if (error) { brelse(bp); return (error); } bp->b_bio2.bio_offset = fsbtodoff(fs, nb); *ap->a_bpp = bp; return (0); } if (nb != 0) { /* * Consider need to reallocate a fragment. */ osize = fragroundup(fs, blkoff(fs, ip->i_size)); nsize = fragroundup(fs, size); if (nsize <= osize) { error = bread(vp, lblktodoff(fs, lbn), osize, &bp); if (error) { brelse(bp); return (error); } bp->b_bio2.bio_offset = fsbtodoff(fs, nb); } else { /* * NOTE: ffs_realloccg() issues a bread(). */ error = ffs_realloccg(ip, lbn, ffs_blkpref(ip, lbn, (int)lbn, &ip->i_db[0]), osize, nsize, cred, &bp); if (error) return (error); if (DOINGSOFTDEP(vp)) softdep_setup_allocdirect(ip, lbn, dofftofsb(fs, bp->b_bio2.bio_offset), nb, nsize, osize, bp); } } else { if (ip->i_size < smalllblktosize(fs, lbn + 1)) nsize = fragroundup(fs, size); else nsize = fs->fs_bsize; error = ffs_alloc(ip, lbn, ffs_blkpref(ip, lbn, (int)lbn, &ip->i_db[0]), nsize, cred, &newb); if (error) return (error); bp = getblk(vp, lblktodoff(fs, lbn), nsize, 0, 0); bp->b_bio2.bio_offset = fsbtodoff(fs, newb); if (flags & B_CLRBUF) vfs_bio_clrbuf(bp); if (DOINGSOFTDEP(vp)) softdep_setup_allocdirect(ip, lbn, newb, 0, nsize, 0, bp); } ip->i_db[lbn] = dofftofsb(fs, bp->b_bio2.bio_offset); ip->i_flag |= IN_CHANGE | IN_UPDATE; *ap->a_bpp = bp; return (0); } /* * Determine the number of levels of indirection. */ pref = 0; if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0) return(error); #ifdef DIAGNOSTIC if (num < 1) panic ("ffs_balloc: ufs_bmaparray returned indirect block"); #endif /* * Get a handle on the data block buffer before working through * indirect blocks to avoid a deadlock between the VM system holding * a locked VM page and issuing a BMAP (which tries to lock the * indirect blocks), and the filesystem holding a locked indirect * block and then trying to read a data block (which tries to lock * the underlying VM pages). */ dbp = getblk(vp, lblktodoff(fs, lbn), fs->fs_bsize, 0, 0); /* * Setup undo history */ allocib = NULL; allocblk = allociblk; lbns_remfree = lbns; unwindidx = -1; /* * Fetch the first indirect block directly from the inode, allocating * one if necessary. */ --num; nb = ip->i_ib[indirs[0].in_off]; if (nb == 0) { pref = ffs_blkpref(ip, lbn, 0, NULL); /* * If the filesystem has run out of space we can skip the * full fsync/undo of the main [fail] case since no undo * history has been built yet. Hence the goto fail2. */ if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) != 0) goto fail2; nb = newb; *allocblk++ = nb; *lbns_remfree++ = indirs[1].in_lbn; bp = getblk(vp, lblktodoff(fs, indirs[1].in_lbn), fs->fs_bsize, 0, 0); bp->b_bio2.bio_offset = fsbtodoff(fs, nb); vfs_bio_clrbuf(bp); if (DOINGSOFTDEP(vp)) { softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off, newb, 0, fs->fs_bsize, 0, bp); bdwrite(bp); } else { /* * Write synchronously so that indirect blocks * never point at garbage. */ if (DOINGASYNC(vp)) bdwrite(bp); else if ((error = bwrite(bp)) != 0) goto fail; } allocib = &ip->i_ib[indirs[0].in_off]; *allocib = nb; ip->i_flag |= IN_CHANGE | IN_UPDATE; } /* * Fetch through the indirect blocks, allocating as necessary. */ for (i = 1;;) { error = bread(vp, lblktodoff(fs, indirs[i].in_lbn), (int)fs->fs_bsize, &bp); if (error) { brelse(bp); goto fail; } bap = (ufs_daddr_t *)bp->b_data; nb = bap[indirs[i].in_off]; if (i == num) break; i += 1; if (nb != 0) { bqrelse(bp); continue; } if (pref == 0) pref = ffs_blkpref(ip, lbn, 0, NULL); if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) != 0) { brelse(bp); goto fail; } nb = newb; *allocblk++ = nb; *lbns_remfree++ = indirs[i].in_lbn; nbp = getblk(vp, lblktodoff(fs, indirs[i].in_lbn), fs->fs_bsize, 0, 0); nbp->b_bio2.bio_offset = fsbtodoff(fs, nb); vfs_bio_clrbuf(nbp); if (DOINGSOFTDEP(vp)) { softdep_setup_allocindir_meta(nbp, ip, bp, indirs[i - 1].in_off, nb); bdwrite(nbp); } else { /* * Write synchronously so that indirect blocks * never point at garbage. */ if ((error = bwrite(nbp)) != 0) { brelse(bp); goto fail; } } bap[indirs[i - 1].in_off] = nb; if (allocib == NULL && unwindidx < 0) unwindidx = i - 1; /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & B_SYNC) { bwrite(bp); } else { if (bp->b_bufsize == fs->fs_bsize) bp->b_flags |= B_CLUSTEROK; bdwrite(bp); } } /* * Get the data block, allocating if necessary. We have already * called getblk() on the data block buffer, dbp. If we have to * allocate it and B_CLRBUF has been set the inference is an intention * to zero out the related disk blocks, so we do not have to issue * a read. Instead we simply call vfs_bio_clrbuf(). If B_CLRBUF is * not set the caller intends to overwrite the entire contents of the * buffer and we don't waste time trying to clean up the contents. * * bp references the current indirect block. When allocating, * the block must be updated. */ if (nb == 0) { pref = ffs_blkpref(ip, lbn, indirs[i].in_off, &bap[0]); error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb); if (error) { brelse(bp); goto fail; } nb = newb; *allocblk++ = nb; *lbns_remfree++ = lbn; dbp->b_bio2.bio_offset = fsbtodoff(fs, nb); if (flags & B_CLRBUF) vfs_bio_clrbuf(dbp); if (DOINGSOFTDEP(vp)) softdep_setup_allocindir_page(ip, lbn, bp, indirs[i].in_off, nb, 0, dbp); bap[indirs[i].in_off] = nb; /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & B_SYNC) { bwrite(bp); } else { if (bp->b_bufsize == fs->fs_bsize) bp->b_flags |= B_CLUSTEROK; bdwrite(bp); } *ap->a_bpp = dbp; return (0); } brelse(bp); /* * At this point all related indirect blocks have been allocated * if necessary and released. bp is no longer valid. dbp holds * our getblk()'d data block. * * XXX we previously performed a cluster_read operation here. */ if (flags & B_CLRBUF) { /* * If B_CLRBUF is set we must validate the invalid portions * of the buffer. This typically requires a read-before- * write. The strategy call will fill in bio_offset in that * case. * * If we hit this case we do a cluster read if possible * since nearby data blocks are likely to be accessed soon * too. */ if ((dbp->b_flags & B_CACHE) == 0) { bqrelse(dbp); seqcount = (flags & B_SEQMASK) >> B_SEQSHIFT; if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { error = cluster_read(vp, (off_t)ip->i_size, lblktodoff(fs, lbn), (int)fs->fs_bsize, fs->fs_bsize, seqcount * BKVASIZE, &dbp); } else { error = bread(vp, lblktodoff(fs, lbn), (int)fs->fs_bsize, &dbp); } if (error) goto fail; } else {
/* * Find a suitable location for the journal in the filesystem. * * Our strategy here is to look for a contiguous block of free space * at least "logfile" MB in size (plus room for any indirect blocks). * We start at the middle of the filesystem and check each cylinder * group working outwards. If "logfile" MB is not available as a * single contigous chunk, then return the address and size of the * largest chunk found. * * XXX * At what stage does the search fail? Is if the largest space we could * find is less than a quarter the requested space reasonable? If the * search fails entirely, return a block address if "0" it indicate this. */ void wapbl_find_log_start(struct mount *mp, struct vnode *vp, off_t logsize, daddr_t *addr, daddr_t *indir_addr, size_t *size) { struct ufsmount *ump = VFSTOUFS(mp); struct fs *fs = ump->um_fs; struct vnode *devvp = ump->um_devvp; struct cg *cgp; struct buf *bp; uint8_t *blksfree; daddr_t blkno, best_addr, start_addr; daddr_t desired_blks, min_desired_blks; daddr_t freeblks, best_blks; int bpcg, cg, error, fixedsize, indir_blks, n, s; #ifdef FFS_EI const int needswap = UFS_FSNEEDSWAP(fs); #endif if (logsize == 0) { fixedsize = 0; /* We can adjust the size if tight */ logsize = lfragtosize(fs, fs->fs_dsize) / UFS_WAPBL_JOURNAL_SCALE; DPRINTF("suggested log size = %lld\n", logsize); logsize = max(logsize, UFS_WAPBL_MIN_JOURNAL_SIZE); logsize = min(logsize, UFS_WAPBL_MAX_JOURNAL_SIZE); DPRINTF("adjusted log size = %lld\n", logsize); } else { fixedsize = 1; DPRINTF("fixed log size = %lld\n", logsize); } desired_blks = logsize / fs->fs_bsize; DPRINTF("desired blocks = %lld\n", desired_blks); /* add in number of indirect blocks needed */ indir_blks = 0; if (desired_blks >= NDADDR) { struct indir indirs[NIADDR + 2]; int num; error = ufs_getlbns(vp, desired_blks, indirs, &num); if (error) { printf("%s: ufs_getlbns failed, error %d!\n", __func__, error); goto bad; } switch (num) { case 2: indir_blks = 1; /* 1st level indirect */ break; case 3: indir_blks = 1 + /* 1st level indirect */ 1 + /* 2nd level indirect */ indirs[1].in_off + 1; /* extra 1st level indirect */ break; default: printf("%s: unexpected numlevels %d from ufs_getlbns\n", __func__, num); *size = 0; goto bad; } desired_blks += indir_blks; } DPRINTF("desired blocks = %lld (including indirect)\n", desired_blks); /* * If a specific size wasn't requested, allow for a smaller log * if we're really tight for space... */ min_desired_blks = desired_blks; if (!fixedsize) min_desired_blks = desired_blks / 4; /* Look at number of blocks per CG. If it's too small, bail early. */ bpcg = fragstoblks(fs, fs->fs_fpg); if (min_desired_blks > bpcg) { printf("ffs_wapbl: cylinder group size of %lld MB " " is not big enough for journal\n", lblktosize(fs, bpcg) / (1024 * 1024)); goto bad; } /* * Start with the middle cylinder group, and search outwards in * both directions until we either find the requested log size * or reach the start/end of the file system. If we reach the * start/end without finding enough space for the full requested * log size, use the largest extent found if it is large enough * to satisfy the our minimum size. * * XXX * Can we just use the cluster contigsum stuff (esp on UFS2) * here to simplify this search code? */ best_addr = 0; best_blks = 0; for (cg = fs->fs_ncg / 2, s = 0, n = 1; best_blks < desired_blks && cg >= 0 && cg < fs->fs_ncg; s++, n = -n, cg += n * s) { DPRINTF("check cg %d of %d\n", cg, fs->fs_ncg); error = bread(devvp, fsbtodb(fs, cgtod(fs, cg)), fs->fs_cgsize, &bp); if (error) { continue; } cgp = (struct cg *)bp->b_data; if (!cg_chkmagic(cgp)) { brelse(bp); continue; } blksfree = cg_blksfree(cgp); for (blkno = 0; blkno < bpcg;) { /* look for next free block */ /* XXX use scanc() and fragtbl[] here? */ for (; blkno < bpcg - min_desired_blks; blkno++) if (ffs_isblock(fs, blksfree, blkno)) break; /* past end of search space in this CG? */ if (blkno >= bpcg - min_desired_blks) break; /* count how many free blocks in this extent */ start_addr = blkno; for (freeblks = 0; blkno < bpcg; blkno++, freeblks++) if (!ffs_isblock(fs, blksfree, blkno)) break; if (freeblks > best_blks) { best_blks = freeblks; best_addr = blkstofrags(fs, start_addr) + cgbase(fs, cg); if (freeblks >= desired_blks) { DPRINTF("found len %lld" " at offset %lld in gc\n", freeblks, start_addr); break; } } } brelse(bp); } DPRINTF("best found len = %lld, wanted %lld" " at addr %lld\n", best_blks, desired_blks, best_addr); if (best_blks < min_desired_blks) { *addr = 0; *indir_addr = 0; } else { /* put indirect blocks at start, and data blocks after */ *addr = best_addr + blkstofrags(fs, indir_blks); *indir_addr = best_addr; } *size = min(desired_blks, best_blks) - indir_blks; return; bad: *addr = 0; *indir_addr = 0; *size = 0; return; }
static int ffs_balloc_ufs1(struct inode *ip, off_t offset, int bufsize, struct buf **bpp) { makefs_daddr_t lbn, lastlbn; int size; int32_t nb; struct buf *bp, *nbp; struct fs *fs = ip->i_fs; struct indir indirs[UFS_NIADDR + 2]; makefs_daddr_t newb, pref; int32_t *bap; int osize, nsize, num, i, error; int32_t *allocblk, allociblk[UFS_NIADDR + 1]; int32_t *allocib; const int needswap = UFS_FSNEEDSWAP(fs); lbn = lblkno(fs, offset); size = blkoff(fs, offset) + bufsize; if (bpp != NULL) { *bpp = NULL; } assert(size <= fs->fs_bsize); if (lbn < 0) return (EFBIG); /* * If the next write will extend the file into a new block, * and the file is currently composed of a fragment * this fragment has to be extended to be a full block. */ lastlbn = lblkno(fs, ip->i_ffs1_size); if (lastlbn < UFS_NDADDR && lastlbn < lbn) { nb = lastlbn; osize = blksize(fs, ip, nb); if (osize < fs->fs_bsize && osize > 0) { warnx("need to ffs_realloccg; not supported!"); abort(); } } /* * The first UFS_NDADDR blocks are direct blocks */ if (lbn < UFS_NDADDR) { nb = ufs_rw32(ip->i_ffs1_db[lbn], needswap); if (nb != 0 && ip->i_ffs1_size >= (uint64_t)lblktosize(fs, lbn + 1)) { /* * The block is an already-allocated direct block * and the file already extends past this block, * thus this must be a whole block. * Just read the block (if requested). */ if (bpp != NULL) { error = bread(ip->i_devvp, lbn, fs->fs_bsize, NULL, bpp); if (error) { brelse(*bpp); return (error); } } return (0); } if (nb != 0) { /* * Consider need to reallocate a fragment. */ osize = fragroundup(fs, blkoff(fs, ip->i_ffs1_size)); nsize = fragroundup(fs, size); if (nsize <= osize) { /* * The existing block is already * at least as big as we want. * Just read the block (if requested). */ if (bpp != NULL) { error = bread(ip->i_devvp, lbn, osize, NULL, bpp); if (error) { brelse(*bpp); return (error); } } return 0; } else { warnx("need to ffs_realloccg; not supported!"); abort(); } } else { /* * the block was not previously allocated, * allocate a new block or fragment. */ if (ip->i_ffs1_size < (uint64_t)lblktosize(fs, lbn + 1)) nsize = fragroundup(fs, size); else nsize = fs->fs_bsize; error = ffs_alloc(ip, lbn, ffs_blkpref_ufs1(ip, lbn, (int)lbn, &ip->i_ffs1_db[0]), nsize, &newb); if (error) return (error); if (bpp != NULL) { bp = getblk(ip->i_devvp, lbn, nsize, 0, 0, 0); bp->b_blkno = fsbtodb(fs, newb); clrbuf(bp); *bpp = bp; } } ip->i_ffs1_db[lbn] = ufs_rw32((int32_t)newb, needswap); return (0); } /* * Determine the number of levels of indirection. */ pref = 0; if ((error = ufs_getlbns(ip, lbn, indirs, &num)) != 0) return (error); if (num < 1) { warnx("ffs_balloc: ufs_getlbns returned indirect block"); abort(); } /* * Fetch the first indirect block allocating if necessary. */ --num; nb = ufs_rw32(ip->i_ffs1_ib[indirs[0].in_off], needswap); allocib = NULL; allocblk = allociblk; if (nb == 0) { pref = ffs_blkpref_ufs1(ip, lbn, 0, (int32_t *)0); error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, &newb); if (error) return error; nb = newb; *allocblk++ = nb; bp = getblk(ip->i_devvp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, 0); bp->b_blkno = fsbtodb(fs, nb); clrbuf(bp); /* * Write synchronously so that indirect blocks * never point at garbage. */ if ((error = bwrite(bp)) != 0) return error; allocib = &ip->i_ffs1_ib[indirs[0].in_off]; *allocib = ufs_rw32((int32_t)nb, needswap); } /* * Fetch through the indirect blocks, allocating as necessary. */ for (i = 1;;) { error = bread(ip->i_devvp, indirs[i].in_lbn, fs->fs_bsize, NULL, &bp); if (error) { brelse(bp); return error; } bap = (int32_t *)bp->b_data; nb = ufs_rw32(bap[indirs[i].in_off], needswap); if (i == num) break; i++; if (nb != 0) { brelse(bp); continue; } if (pref == 0) pref = ffs_blkpref_ufs1(ip, lbn, 0, (int32_t *)0); error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, &newb); if (error) { brelse(bp); return error; } nb = newb; *allocblk++ = nb; nbp = getblk(ip->i_devvp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0); nbp->b_blkno = fsbtodb(fs, nb); clrbuf(nbp); /* * Write synchronously so that indirect blocks * never point at garbage. */ if ((error = bwrite(nbp)) != 0) { brelse(bp); return error; } bap[indirs[i - 1].in_off] = ufs_rw32(nb, needswap); bwrite(bp); } /* * Get the data block, allocating if necessary. */ if (nb == 0) { pref = ffs_blkpref_ufs1(ip, lbn, indirs[num].in_off, &bap[0]); error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, &newb); if (error) { brelse(bp); return error; } nb = newb; *allocblk++ = nb; if (bpp != NULL) { nbp = getblk(ip->i_devvp, lbn, fs->fs_bsize, 0, 0, 0); nbp->b_blkno = fsbtodb(fs, nb); clrbuf(nbp); *bpp = nbp; } bap[indirs[num].in_off] = ufs_rw32(nb, needswap); /* * If required, write synchronously, otherwise use * delayed write. */ bwrite(bp); return (0); } brelse(bp); if (bpp != NULL) { error = bread(ip->i_devvp, lbn, (int)fs->fs_bsize, NULL, &nbp); if (error) { brelse(nbp); return error; } *bpp = nbp; } return (0); }
/* * Balloc defines the structure of file system storage * by allocating the physical blocks on a device given * the inode and the logical block number in a file. */ ffs_balloc( register struct inode *ip, register ufs_daddr_t lbn, int size, kauth_cred_t cred, struct buf **bpp, int flags, int * blk_alloc) { register struct fs *fs; register ufs_daddr_t nb; struct buf *bp, *nbp; struct vnode *vp = ITOV(ip); struct indir indirs[NIADDR + 2]; ufs_daddr_t newb, *bap, pref; int deallocated, osize, nsize, num, i, error; ufs_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; int devBlockSize=0; int alloc_buffer = 1; struct mount *mp=vp->v_mount; #if REV_ENDIAN_FS int rev_endian=(mp->mnt_flag & MNT_REVEND); #endif /* REV_ENDIAN_FS */ *bpp = NULL; if (lbn < 0) return (EFBIG); fs = ip->i_fs; if (flags & B_NOBUFF) alloc_buffer = 0; if (blk_alloc) *blk_alloc = 0; /* * If the next write will extend the file into a new block, * and the file is currently composed of a fragment * this fragment has to be extended to be a full block. */ nb = lblkno(fs, ip->i_size); if (nb < NDADDR && nb < lbn) { /* the filesize prior to this write can fit in direct * blocks (ie. fragmentaion is possibly done) * we are now extending the file write beyond * the block which has end of file prior to this write */ osize = blksize(fs, ip, nb); /* osize gives disk allocated size in the last block. It is * either in fragments or a file system block size */ if (osize < fs->fs_bsize && osize > 0) { /* few fragments are already allocated,since the * current extends beyond this block * allocate the complete block as fragments are only * in last block */ error = ffs_realloccg(ip, nb, ffs_blkpref(ip, nb, (int)nb, &ip->i_db[0]), osize, (int)fs->fs_bsize, cred, &bp); if (error) return (error); /* adjust the inode size we just grew */ /* it is in nb+1 as nb starts from 0 */ ip->i_size = (nb + 1) * fs->fs_bsize; ubc_setsize(vp, (off_t)ip->i_size); ip->i_db[nb] = dbtofsb(fs, (ufs_daddr_t)buf_blkno(bp)); ip->i_flag |= IN_CHANGE | IN_UPDATE; if ((flags & B_SYNC) || (!alloc_buffer)) { if (!alloc_buffer) buf_setflags(bp, B_NOCACHE); buf_bwrite(bp); } else buf_bdwrite(bp); /* note that bp is already released here */ } } /* * The first NDADDR blocks are direct blocks */ if (lbn < NDADDR) { nb = ip->i_db[lbn]; if (nb != 0 && ip->i_size >= (lbn + 1) * fs->fs_bsize) { if (alloc_buffer) { error = (int)buf_bread(vp, (daddr64_t)((unsigned)lbn), fs->fs_bsize, NOCRED, &bp); if (error) { buf_brelse(bp); return (error); } *bpp = bp; } return (0); } if (nb != 0) { /* * Consider need to reallocate a fragment. */ osize = fragroundup(fs, blkoff(fs, ip->i_size)); nsize = fragroundup(fs, size); if (nsize <= osize) { if (alloc_buffer) { error = (int)buf_bread(vp, (daddr64_t)((unsigned)lbn), osize, NOCRED, &bp); if (error) { buf_brelse(bp); return (error); } ip->i_flag |= IN_CHANGE | IN_UPDATE; *bpp = bp; return (0); } else { ip->i_flag |= IN_CHANGE | IN_UPDATE; return (0); } } else { error = ffs_realloccg(ip, lbn, ffs_blkpref(ip, lbn, (int)lbn, &ip->i_db[0]), osize, nsize, cred, &bp); if (error) return (error); ip->i_db[lbn] = dbtofsb(fs, (ufs_daddr_t)buf_blkno(bp)); ip->i_flag |= IN_CHANGE | IN_UPDATE; /* adjust the inode size we just grew */ ip->i_size = (lbn * fs->fs_bsize) + size; ubc_setsize(vp, (off_t)ip->i_size); if (!alloc_buffer) { buf_setflags(bp, B_NOCACHE); if (flags & B_SYNC) buf_bwrite(bp); else buf_bdwrite(bp); } else *bpp = bp; return (0); } } else { if (ip->i_size < (lbn + 1) * fs->fs_bsize) nsize = fragroundup(fs, size); else nsize = fs->fs_bsize; error = ffs_alloc(ip, lbn, ffs_blkpref(ip, lbn, (int)lbn, &ip->i_db[0]), nsize, cred, &newb); if (error) return (error); if (alloc_buffer) { bp = buf_getblk(vp, (daddr64_t)((unsigned)lbn), nsize, 0, 0, BLK_WRITE); buf_setblkno(bp, (daddr64_t)((unsigned)fsbtodb(fs, newb))); if (flags & B_CLRBUF) buf_clear(bp); } ip->i_db[lbn] = newb; ip->i_flag |= IN_CHANGE | IN_UPDATE; if (blk_alloc) { *blk_alloc = nsize; } if (alloc_buffer) *bpp = bp; return (0); } } /* * Determine the number of levels of indirection. */ pref = 0; if (error = ufs_getlbns(vp, lbn, indirs, &num)) return(error); #if DIAGNOSTIC if (num < 1) panic ("ffs_balloc: ufs_bmaparray returned indirect block"); #endif /* * Fetch the first indirect block allocating if necessary. */ --num; nb = ip->i_ib[indirs[0].in_off]; allocib = NULL; allocblk = allociblk; if (nb == 0) { pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0); if (error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) return (error); nb = newb; *allocblk++ = nb; bp = buf_getblk(vp, (daddr64_t)((unsigned)(indirs[1].in_lbn)), fs->fs_bsize, 0, 0, BLK_META); buf_setblkno(bp, (daddr64_t)((unsigned)fsbtodb(fs, nb))); buf_clear(bp); /* * Write synchronously conditional on mount flags. */ if ((vp)->v_mount->mnt_flag & MNT_ASYNC) { error = 0; buf_bdwrite(bp); } else if ((error = buf_bwrite(bp)) != 0) { goto fail; } allocib = &ip->i_ib[indirs[0].in_off]; *allocib = nb; ip->i_flag |= IN_CHANGE | IN_UPDATE; } /* * Fetch through the indirect blocks, allocating as necessary. */ for (i = 1;;) { error = (int)buf_meta_bread(vp, (daddr64_t)((unsigned)(indirs[i].in_lbn)), (int)fs->fs_bsize, NOCRED, &bp); if (error) { buf_brelse(bp); goto fail; } bap = (ufs_daddr_t *)buf_dataptr(bp); #if REV_ENDIAN_FS if (rev_endian) nb = OSSwapInt32(bap[indirs[i].in_off]); else { #endif /* REV_ENDIAN_FS */ nb = bap[indirs[i].in_off]; #if REV_ENDIAN_FS } #endif /* REV_ENDIAN_FS */ if (i == num) break; i += 1; if (nb != 0) { buf_brelse(bp); continue; } if (pref == 0) pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0); if (error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) { buf_brelse(bp); goto fail; } nb = newb; *allocblk++ = nb; nbp = buf_getblk(vp, (daddr64_t)((unsigned)(indirs[i].in_lbn)), fs->fs_bsize, 0, 0, BLK_META); buf_setblkno(nbp, (daddr64_t)((unsigned)fsbtodb(fs, nb))); buf_clear(nbp); /* * Write synchronously conditional on mount flags. */ if ((vp)->v_mount->mnt_flag & MNT_ASYNC) { error = 0; buf_bdwrite(nbp); } else if (error = buf_bwrite(nbp)) { buf_brelse(bp); goto fail; } #if REV_ENDIAN_FS if (rev_endian) bap[indirs[i - 1].in_off] = OSSwapInt32(nb); else { #endif /* REV_ENDIAN_FS */ bap[indirs[i - 1].in_off] = nb; #if REV_ENDIAN_FS } #endif /* REV_ENDIAN_FS */ /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & B_SYNC) { buf_bwrite(bp); } else { buf_bdwrite(bp); } } /* * Get the data block, allocating if necessary. */ if (nb == 0) { pref = ffs_blkpref(ip, lbn, indirs[i].in_off, &bap[0]); if (error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) { buf_brelse(bp); goto fail; } nb = newb; *allocblk++ = nb; #if REV_ENDIAN_FS if (rev_endian) bap[indirs[i].in_off] = OSSwapInt32(nb); else { #endif /* REV_ENDIAN_FS */ bap[indirs[i].in_off] = nb; #if REV_ENDIAN_FS } #endif /* REV_ENDIAN_FS */ /* * If required, write synchronously, otherwise use * delayed write. */ if ((flags & B_SYNC)) { buf_bwrite(bp); } else { buf_bdwrite(bp); } if(alloc_buffer ) { nbp = buf_getblk(vp, (daddr64_t)((unsigned)lbn), fs->fs_bsize, 0, 0, BLK_WRITE); buf_setblkno(nbp, (daddr64_t)((unsigned)fsbtodb(fs, nb))); if (flags & B_CLRBUF) buf_clear(nbp); } if (blk_alloc) { *blk_alloc = fs->fs_bsize; } if(alloc_buffer) *bpp = nbp; return (0); } buf_brelse(bp); if (alloc_buffer) { if (flags & B_CLRBUF) { error = (int)buf_bread(vp, (daddr64_t)((unsigned)lbn), (int)fs->fs_bsize, NOCRED, &nbp); if (error) { buf_brelse(nbp); goto fail; } } else { nbp = buf_getblk(vp, (daddr64_t)((unsigned)lbn), fs->fs_bsize, 0, 0, BLK_WRITE); buf_setblkno(nbp, (daddr64_t)((unsigned)fsbtodb(fs, nb))); } *bpp = nbp; } return (0); fail: /* * If we have failed part way through block allocation, we * have to deallocate any indirect blocks that we have allocated. */ for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) { ffs_blkfree(ip, *blkp, fs->fs_bsize); deallocated += fs->fs_bsize; } if (allocib != NULL) *allocib = 0; if (deallocated) { devBlockSize = vfs_devblocksize(mp); #if QUOTA /* * Restore user's disk quota because allocation failed. */ (void) chkdq(ip, (int64_t)-deallocated, cred, FORCE); #endif /* QUOTA */ ip->i_blocks -= btodb(deallocated, devBlockSize); ip->i_flag |= IN_CHANGE | IN_UPDATE; } return (error); }
/* * Balloc defines the structure of filesystem storage * by allocating the physical blocks on a device given * the inode and the logical block number in a file. * This is the allocation strategy for UFS1. Below is * the allocation strategy for UFS2. */ int ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size, struct ucred *cred, int flags, struct buf **bpp) { struct inode *ip; struct ufs1_dinode *dp; ufs_lbn_t lbn, lastlbn; struct fs *fs; ufs1_daddr_t nb; struct buf *bp, *nbp; struct ufsmount *ump; struct indir indirs[NIADDR + 2]; int deallocated, osize, nsize, num, i, error; ufs2_daddr_t newb; ufs1_daddr_t *bap, pref; ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1]; int unwindidx = -1; int saved_inbdflush; static struct timeval lastfail; static int curfail; int reclaimed; ip = VTOI(vp); dp = ip->i_din1; fs = ip->i_fs; ump = ip->i_ump; lbn = lblkno(fs, startoffset); size = blkoff(fs, startoffset) + size; reclaimed = 0; if (size > fs->fs_bsize) panic("ffs_balloc_ufs1: blk too big"); *bpp = NULL; if (flags & IO_EXT) return (EOPNOTSUPP); if (lbn < 0) return (EFBIG); if (DOINGSOFTDEP(vp)) softdep_prealloc(vp, MNT_WAIT); /* * If the next write will extend the file into a new block, * and the file is currently composed of a fragment * this fragment has to be extended to be a full block. */ lastlbn = lblkno(fs, ip->i_size); if (lastlbn < NDADDR && lastlbn < lbn) { nb = lastlbn; osize = blksize(fs, ip, nb); if (osize < fs->fs_bsize && osize > 0) { UFS_LOCK(ump); error = ffs_realloccg(ip, nb, dp->di_db[nb], ffs_blkpref_ufs1(ip, lastlbn, (int)nb, &dp->di_db[0]), osize, (int)fs->fs_bsize, flags, cred, &bp); if (error) return (error); if (DOINGSOFTDEP(vp)) softdep_setup_allocdirect(ip, nb, dbtofsb(fs, bp->b_blkno), dp->di_db[nb], fs->fs_bsize, osize, bp); ip->i_size = smalllblktosize(fs, nb + 1); dp->di_size = ip->i_size; dp->di_db[nb] = dbtofsb(fs, bp->b_blkno); ip->i_flag |= IN_CHANGE | IN_UPDATE; if (flags & IO_SYNC) bwrite(bp); else bawrite(bp); } } /* * The first NDADDR blocks are direct blocks */ if (lbn < NDADDR) { if (flags & BA_METAONLY) panic("ffs_balloc_ufs1: BA_METAONLY for direct block"); nb = dp->di_db[lbn]; if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) { error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp); if (error) { brelse(bp); return (error); } bp->b_blkno = fsbtodb(fs, nb); *bpp = bp; return (0); } if (nb != 0) { /* * Consider need to reallocate a fragment. */ osize = fragroundup(fs, blkoff(fs, ip->i_size)); nsize = fragroundup(fs, size); if (nsize <= osize) { error = bread(vp, lbn, osize, NOCRED, &bp); if (error) { brelse(bp); return (error); } bp->b_blkno = fsbtodb(fs, nb); } else { UFS_LOCK(ump); error = ffs_realloccg(ip, lbn, dp->di_db[lbn], ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]), osize, nsize, flags, cred, &bp); if (error) return (error); if (DOINGSOFTDEP(vp)) softdep_setup_allocdirect(ip, lbn, dbtofsb(fs, bp->b_blkno), nb, nsize, osize, bp); } } else { if (ip->i_size < smalllblktosize(fs, lbn + 1)) nsize = fragroundup(fs, size); else nsize = fs->fs_bsize; UFS_LOCK(ump); error = ffs_alloc(ip, lbn, ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]), nsize, flags, cred, &newb); if (error) return (error); bp = getblk(vp, lbn, nsize, 0, 0, 0); bp->b_blkno = fsbtodb(fs, newb); if (flags & BA_CLRBUF) vfs_bio_clrbuf(bp); if (DOINGSOFTDEP(vp)) softdep_setup_allocdirect(ip, lbn, newb, 0, nsize, 0, bp); } dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno); ip->i_flag |= IN_CHANGE | IN_UPDATE; *bpp = bp; return (0); } /* * Determine the number of levels of indirection. */ pref = 0; if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0) return(error); #ifdef INVARIANTS if (num < 1) panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block"); #endif saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH); /* * Fetch the first indirect block allocating if necessary. */ --num; nb = dp->di_ib[indirs[0].in_off]; allocib = NULL; allocblk = allociblk; lbns_remfree = lbns; if (nb == 0) { UFS_LOCK(ump); pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0); if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred, &newb)) != 0) { curthread_pflags_restore(saved_inbdflush); return (error); } nb = newb; *allocblk++ = nb; *lbns_remfree++ = indirs[1].in_lbn; bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, 0); bp->b_blkno = fsbtodb(fs, nb); vfs_bio_clrbuf(bp); if (DOINGSOFTDEP(vp)) { softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off, newb, 0, fs->fs_bsize, 0, bp); bdwrite(bp); } else { /* * Write synchronously so that indirect blocks * never point at garbage. */ if (DOINGASYNC(vp)) bdwrite(bp); else if ((error = bwrite(bp)) != 0) goto fail; } allocib = &dp->di_ib[indirs[0].in_off]; *allocib = nb; ip->i_flag |= IN_CHANGE | IN_UPDATE; } /* * Fetch through the indirect blocks, allocating as necessary. */ retry: for (i = 1;;) { error = bread(vp, indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp); if (error) { brelse(bp); goto fail; } bap = (ufs1_daddr_t *)bp->b_data; nb = bap[indirs[i].in_off]; if (i == num) break; i += 1; if (nb != 0) { bqrelse(bp); continue; } UFS_LOCK(ump); if (pref == 0) pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0); if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags | IO_BUFLOCKED, cred, &newb)) != 0) { brelse(bp); if (++reclaimed == 1) { UFS_LOCK(ump); softdep_request_cleanup(fs, vp, cred, FLUSH_BLOCKS_WAIT); UFS_UNLOCK(ump); goto retry; } if (ppsratecheck(&lastfail, &curfail, 1)) { ffs_fserr(fs, ip->i_number, "filesystem full"); uprintf("\n%s: write failed, filesystem " "is full\n", fs->fs_fsmnt); } goto fail; } nb = newb; *allocblk++ = nb; *lbns_remfree++ = indirs[i].in_lbn; nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0); nbp->b_blkno = fsbtodb(fs, nb); vfs_bio_clrbuf(nbp); if (DOINGSOFTDEP(vp)) { softdep_setup_allocindir_meta(nbp, ip, bp, indirs[i - 1].in_off, nb); bdwrite(nbp); } else { /* * Write synchronously so that indirect blocks * never point at garbage. */ if ((error = bwrite(nbp)) != 0) { brelse(bp); goto fail; } } bap[indirs[i - 1].in_off] = nb; if (allocib == NULL && unwindidx < 0) unwindidx = i - 1; /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & IO_SYNC) { bwrite(bp); } else { if (bp->b_bufsize == fs->fs_bsize) bp->b_flags |= B_CLUSTEROK; bdwrite(bp); } } /* * If asked only for the indirect block, then return it. */ if (flags & BA_METAONLY) { curthread_pflags_restore(saved_inbdflush); *bpp = bp; return (0); } /* * Get the data block, allocating if necessary. */ if (nb == 0) { UFS_LOCK(ump); pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off, &bap[0]); error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags | IO_BUFLOCKED, cred, &newb); if (error) { brelse(bp); if (++reclaimed == 1) { UFS_LOCK(ump); softdep_request_cleanup(fs, vp, cred, FLUSH_BLOCKS_WAIT); UFS_UNLOCK(ump); goto retry; } if (ppsratecheck(&lastfail, &curfail, 1)) { ffs_fserr(fs, ip->i_number, "filesystem full"); uprintf("\n%s: write failed, filesystem " "is full\n", fs->fs_fsmnt); } goto fail; } nb = newb; *allocblk++ = nb; *lbns_remfree++ = lbn; nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0); nbp->b_blkno = fsbtodb(fs, nb); if (flags & BA_CLRBUF) vfs_bio_clrbuf(nbp); if (DOINGSOFTDEP(vp)) softdep_setup_allocindir_page(ip, lbn, bp, indirs[i].in_off, nb, 0, nbp); bap[indirs[i].in_off] = nb; /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & IO_SYNC) { bwrite(bp); } else { if (bp->b_bufsize == fs->fs_bsize) bp->b_flags |= B_CLUSTEROK; bdwrite(bp); } curthread_pflags_restore(saved_inbdflush); *bpp = nbp; return (0); } brelse(bp); if (flags & BA_CLRBUF) { int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT; if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { error = cluster_read(vp, ip->i_size, lbn, (int)fs->fs_bsize, NOCRED, MAXBSIZE, seqcount, &nbp); } else { error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp); } if (error) { brelse(nbp); goto fail; } } else {
/* * Balloc defines the structure of file system storage * by allocating the physical blocks on a device given * the inode and the logical block number in a file. * This is the allocation strategy for UFS2. Above is * the allocation strategy for UFS1. */ int ffs_balloc_ufs2(vnode *vp, off_t startoffset, int size, Ucred *cred, int flags, Buf **bpp) { int error = 0; print("HARVEY TODO: %s\n", __func__); #if 0 struct inode *ip; struct ufs2_dinode *dp; ufs_lbn_t lbn, lastlbn; struct fs *fs; struct buf *bp, *nbp; struct ufsmount *ump; struct indir indirs[UFS_NIADDR + 2]; ufs2_daddr_t nb, newb, *bap, pref; ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[UFS_NIADDR + 1]; ufs2_daddr_t *lbns_remfree, lbns[UFS_NIADDR + 1]; int deallocated, osize, nsize, num, i, error; int unwindidx = -1; int saved_inbdflush; static struct timeval lastfail; static int curfail; int gbflags, reclaimed; ip = VTOI(vp); dp = ip->i_din2; fs = ITOFS(ip); ump = ITOUMP(ip); lbn = lblkno(fs, startoffset); size = blkoff(fs, startoffset) + size; reclaimed = 0; if (size > fs->fs_bsize) panic("ffs_balloc_ufs2: blk too big"); *bpp = nil; if (lbn < 0) return (EFBIG); gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0; if (DOINGSOFTDEP(vp)) softdep_prealloc(vp, MNT_WAIT); /* * Check for allocating external data. */ if (flags & IO_EXT) { if (lbn >= UFS_NXADDR) return (EFBIG); /* * If the next write will extend the data into a new block, * and the data is currently composed of a fragment * this fragment has to be extended to be a full block. */ lastlbn = lblkno(fs, dp->di_extsize); if (lastlbn < lbn) { nb = lastlbn; osize = sblksize(fs, dp->di_extsize, nb); if (osize < fs->fs_bsize && osize > 0) { UFS_LOCK(ump); error = ffs_realloccg(ip, -1 - nb, dp->di_extb[nb], ffs_blkpref_ufs2(ip, lastlbn, (int)nb, &dp->di_extb[0]), osize, (int)fs->fs_bsize, flags, cred, &bp); if (error) return (error); if (DOINGSOFTDEP(vp)) softdep_setup_allocext(ip, nb, dbtofsb(fs, bp->b_blkno), dp->di_extb[nb], fs->fs_bsize, osize, bp); dp->di_extsize = smalllblktosize(fs, nb + 1); dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno); bp->b_xflags |= BX_ALTDATA; ip->i_flag |= IN_CHANGE; if (flags & IO_SYNC) bwrite(bp); else bawrite(bp); } } /* * All blocks are direct blocks */ if (flags & BA_METAONLY) panic("ffs_balloc_ufs2: BA_METAONLY for ext block"); nb = dp->di_extb[lbn]; if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) { error = bread_gb(vp, -1 - lbn, fs->fs_bsize, NOCRED, gbflags, &bp); if (error) { brelse(bp); return (error); } bp->b_blkno = fsbtodb(fs, nb); bp->b_xflags |= BX_ALTDATA; *bpp = bp; return (0); } if (nb != 0) { /* * Consider need to reallocate a fragment. */ osize = fragroundup(fs, blkoff(fs, dp->di_extsize)); nsize = fragroundup(fs, size); if (nsize <= osize) { error = bread_gb(vp, -1 - lbn, osize, NOCRED, gbflags, &bp); if (error) { brelse(bp); return (error); } bp->b_blkno = fsbtodb(fs, nb); bp->b_xflags |= BX_ALTDATA; } else { UFS_LOCK(ump); error = ffs_realloccg(ip, -1 - lbn, dp->di_extb[lbn], ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]), osize, nsize, flags, cred, &bp); if (error) return (error); bp->b_xflags |= BX_ALTDATA; if (DOINGSOFTDEP(vp)) softdep_setup_allocext(ip, lbn, dbtofsb(fs, bp->b_blkno), nb, nsize, osize, bp); } } else { if (dp->di_extsize < smalllblktosize(fs, lbn + 1)) nsize = fragroundup(fs, size); else nsize = fs->fs_bsize; UFS_LOCK(ump); error = ffs_alloc(ip, lbn, ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]), nsize, flags, cred, &newb); if (error) return (error); bp = getblk(vp, -1 - lbn, nsize, 0, 0, gbflags); bp->b_blkno = fsbtodb(fs, newb); bp->b_xflags |= BX_ALTDATA; if (flags & BA_CLRBUF) vfs_bio_clrbuf(bp); if (DOINGSOFTDEP(vp)) softdep_setup_allocext(ip, lbn, newb, 0, nsize, 0, bp); } dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno); ip->i_flag |= IN_CHANGE; *bpp = bp; return (0); } /* * If the next write will extend the file into a new block, * and the file is currently composed of a fragment * this fragment has to be extended to be a full block. */ lastlbn = lblkno(fs, ip->i_size); if (lastlbn < UFS_NDADDR && lastlbn < lbn) { nb = lastlbn; osize = blksize(fs, ip, nb); if (osize < fs->fs_bsize && osize > 0) { UFS_LOCK(ump); error = ffs_realloccg(ip, nb, dp->di_db[nb], ffs_blkpref_ufs2(ip, lastlbn, (int)nb, &dp->di_db[0]), osize, (int)fs->fs_bsize, flags, cred, &bp); if (error) return (error); if (DOINGSOFTDEP(vp)) softdep_setup_allocdirect(ip, nb, dbtofsb(fs, bp->b_blkno), dp->di_db[nb], fs->fs_bsize, osize, bp); ip->i_size = smalllblktosize(fs, nb + 1); dp->di_size = ip->i_size; dp->di_db[nb] = dbtofsb(fs, bp->b_blkno); ip->i_flag |= IN_CHANGE | IN_UPDATE; if (flags & IO_SYNC) bwrite(bp); else bawrite(bp); } } /* * The first UFS_NDADDR blocks are direct blocks */ if (lbn < UFS_NDADDR) { if (flags & BA_METAONLY) panic("ffs_balloc_ufs2: BA_METAONLY for direct block"); nb = dp->di_db[lbn]; if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) { error = bread_gb(vp, lbn, fs->fs_bsize, NOCRED, gbflags, &bp); if (error) { brelse(bp); return (error); } bp->b_blkno = fsbtodb(fs, nb); *bpp = bp; return (0); } if (nb != 0) { /* * Consider need to reallocate a fragment. */ osize = fragroundup(fs, blkoff(fs, ip->i_size)); nsize = fragroundup(fs, size); if (nsize <= osize) { error = bread_gb(vp, lbn, osize, NOCRED, gbflags, &bp); if (error) { brelse(bp); return (error); } bp->b_blkno = fsbtodb(fs, nb); } else { UFS_LOCK(ump); error = ffs_realloccg(ip, lbn, dp->di_db[lbn], ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_db[0]), osize, nsize, flags, cred, &bp); if (error) return (error); if (DOINGSOFTDEP(vp)) softdep_setup_allocdirect(ip, lbn, dbtofsb(fs, bp->b_blkno), nb, nsize, osize, bp); } } else { if (ip->i_size < smalllblktosize(fs, lbn + 1)) nsize = fragroundup(fs, size); else nsize = fs->fs_bsize; UFS_LOCK(ump); error = ffs_alloc(ip, lbn, ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_db[0]), nsize, flags, cred, &newb); if (error) return (error); bp = getblk(vp, lbn, nsize, 0, 0, gbflags); bp->b_blkno = fsbtodb(fs, newb); if (flags & BA_CLRBUF) vfs_bio_clrbuf(bp); if (DOINGSOFTDEP(vp)) softdep_setup_allocdirect(ip, lbn, newb, 0, nsize, 0, bp); } dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno); ip->i_flag |= IN_CHANGE | IN_UPDATE; *bpp = bp; return (0); } /* * Determine the number of levels of indirection. */ pref = 0; if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0) return(error); #ifdef INVARIANTS if (num < 1) panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block"); #endif saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH); /* * Fetch the first indirect block allocating if necessary. */ --num; nb = dp->di_ib[indirs[0].in_off]; allocib = nil; allocblk = allociblk; lbns_remfree = lbns; if (nb == 0) { UFS_LOCK(ump); pref = ffs_blkpref_ufs2(ip, lbn, -indirs[0].in_off - 1, (ufs2_daddr_t *)0); if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred, &newb)) != 0) { curthread_pflags_restore(saved_inbdflush); return (error); } pref = newb + fs->fs_frag; nb = newb; MPASS(allocblk < allociblk + nitems(allociblk)); MPASS(lbns_remfree < lbns + nitems(lbns)); *allocblk++ = nb; *lbns_remfree++ = indirs[1].in_lbn; bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, GB_UNMAPPED); bp->b_blkno = fsbtodb(fs, nb); vfs_bio_clrbuf(bp); if (DOINGSOFTDEP(vp)) { softdep_setup_allocdirect(ip, UFS_NDADDR + indirs[0].in_off, newb, 0, fs->fs_bsize, 0, bp); bdwrite(bp); } else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) { if (bp->b_bufsize == fs->fs_bsize) bp->b_flags |= B_CLUSTEROK; bdwrite(bp); } else { if ((error = bwrite(bp)) != 0) goto fail; } allocib = &dp->di_ib[indirs[0].in_off]; *allocib = nb; ip->i_flag |= IN_CHANGE | IN_UPDATE; } /* * Fetch through the indirect blocks, allocating as necessary. */ retry: for (i = 1;;) { error = bread(vp, indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp); if (error) { brelse(bp); goto fail; } bap = (ufs2_daddr_t *)bp->b_data; nb = bap[indirs[i].in_off]; if (i == num) break; i += 1; if (nb != 0) { bqrelse(bp); continue; } UFS_LOCK(ump); /* * If parent indirect has just been allocated, try to cluster * immediately following it. */ if (pref == 0) pref = ffs_blkpref_ufs2(ip, lbn, i - num - 1, (ufs2_daddr_t *)0); if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags | IO_BUFLOCKED, cred, &newb)) != 0) { brelse(bp); if (DOINGSOFTDEP(vp) && ++reclaimed == 1) { UFS_LOCK(ump); softdep_request_cleanup(fs, vp, cred, FLUSH_BLOCKS_WAIT); UFS_UNLOCK(ump); goto retry; } if (ppsratecheck(&lastfail, &curfail, 1)) { ffs_fserr(fs, ip->i_number, "filesystem full"); uprintf("\n%s: write failed, filesystem " "is full\n", fs->fs_fsmnt); } goto fail; } pref = newb + fs->fs_frag; nb = newb; MPASS(allocblk < allociblk + nitems(allociblk)); MPASS(lbns_remfree < lbns + nitems(lbns)); *allocblk++ = nb; *lbns_remfree++ = indirs[i].in_lbn; nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, GB_UNMAPPED); nbp->b_blkno = fsbtodb(fs, nb); vfs_bio_clrbuf(nbp); if (DOINGSOFTDEP(vp)) { softdep_setup_allocindir_meta(nbp, ip, bp, indirs[i - 1].in_off, nb); bdwrite(nbp); } else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) { if (nbp->b_bufsize == fs->fs_bsize) nbp->b_flags |= B_CLUSTEROK; bdwrite(nbp); } else { if ((error = bwrite(nbp)) != 0) { brelse(bp); goto fail; } } bap[indirs[i - 1].in_off] = nb; if (allocib == nil && unwindidx < 0) unwindidx = i - 1; /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & IO_SYNC) { bwrite(bp); } else { if (bp->b_bufsize == fs->fs_bsize) bp->b_flags |= B_CLUSTEROK; bdwrite(bp); } } /* * If asked only for the indirect block, then return it. */ if (flags & BA_METAONLY) { curthread_pflags_restore(saved_inbdflush); *bpp = bp; return (0); } /* * Get the data block, allocating if necessary. */ if (nb == 0) { UFS_LOCK(ump); /* * If allocating metadata at the front of the cylinder * group and parent indirect block has just been allocated, * then cluster next to it if it is the first indirect in * the file. Otherwise it has been allocated in the metadata * area, so we want to find our own place out in the data area. */ if (pref == 0 || (lbn > UFS_NDADDR && fs->fs_metaspace != 0)) pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off, &bap[0]); error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags | IO_BUFLOCKED, cred, &newb); if (error) { brelse(bp); if (DOINGSOFTDEP(vp) && ++reclaimed == 1) { UFS_LOCK(ump); softdep_request_cleanup(fs, vp, cred, FLUSH_BLOCKS_WAIT); UFS_UNLOCK(ump); goto retry; } if (ppsratecheck(&lastfail, &curfail, 1)) { ffs_fserr(fs, ip->i_number, "filesystem full"); uprintf("\n%s: write failed, filesystem " "is full\n", fs->fs_fsmnt); } goto fail; } nb = newb; MPASS(allocblk < allociblk + nitems(allociblk)); MPASS(lbns_remfree < lbns + nitems(lbns)); *allocblk++ = nb; *lbns_remfree++ = lbn; nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags); nbp->b_blkno = fsbtodb(fs, nb); if (flags & BA_CLRBUF) vfs_bio_clrbuf(nbp); if (DOINGSOFTDEP(vp)) softdep_setup_allocindir_page(ip, lbn, bp, indirs[i].in_off, nb, 0, nbp); bap[indirs[i].in_off] = nb; /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & IO_SYNC) { bwrite(bp); } else { if (bp->b_bufsize == fs->fs_bsize) bp->b_flags |= B_CLUSTEROK; bdwrite(bp); } curthread_pflags_restore(saved_inbdflush); *bpp = nbp; return (0); } brelse(bp); /* * If requested clear invalid portions of the buffer. If we * have to do a read-before-write (typical if BA_CLRBUF is set), * try to do some read-ahead in the sequential case to reduce * the number of I/O transactions. */ if (flags & BA_CLRBUF) { int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT; if (seqcount != 0 && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0 && !(vm_page_count_severe() || buf_dirty_count_severe())) { error = cluster_read(vp, ip->i_size, lbn, (int)fs->fs_bsize, NOCRED, MAXBSIZE, seqcount, gbflags, &nbp); } else { error = bread_gb(vp, lbn, (int)fs->fs_bsize, NOCRED, gbflags, &nbp); } if (error) { brelse(nbp); goto fail; } } else {
/* * ffs_blkalloc allocates a disk block for ffs_pageout(), as a consequence * it does no buf_breads (that could lead to deadblock as the page may be already * marked busy as it is being paged out. Also important to note that we are not * growing the file in pageouts. So ip->i_size cannot increase by this call * due to the way UBC works. * This code is derived from ffs_balloc and many cases of that are dealt * in ffs_balloc are not applicable here * Do not call with B_CLRBUF flags as this should only be called only * from pageouts */ ffs_blkalloc( struct inode *ip, ufs_daddr_t lbn, int size, kauth_cred_t cred, int flags) { register struct fs *fs; register ufs_daddr_t nb; struct buf *bp, *nbp; struct vnode *vp = ITOV(ip); struct indir indirs[NIADDR + 2]; ufs_daddr_t newb, *bap, pref; int deallocated, osize, nsize, num, i, error; ufs_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; int devBlockSize=0; struct mount *mp=vp->v_mount; #if REV_ENDIAN_FS int rev_endian=(mp->mnt_flag & MNT_REVEND); #endif /* REV_ENDIAN_FS */ fs = ip->i_fs; if(size > fs->fs_bsize) panic("ffs_blkalloc: too large for allocation"); /* * If the next write will extend the file into a new block, * and the file is currently composed of a fragment * this fragment has to be extended to be a full block. */ nb = lblkno(fs, ip->i_size); if (nb < NDADDR && nb < lbn) { panic("ffs_blkalloc():cannot extend file: i_size %d, lbn %d", ip->i_size, lbn); } /* * The first NDADDR blocks are direct blocks */ if (lbn < NDADDR) { nb = ip->i_db[lbn]; if (nb != 0 && ip->i_size >= (lbn + 1) * fs->fs_bsize) { /* TBD: trivial case; the block is already allocated */ return (0); } if (nb != 0) { /* * Consider need to reallocate a fragment. */ osize = fragroundup(fs, blkoff(fs, ip->i_size)); nsize = fragroundup(fs, size); if (nsize > osize) { panic("ffs_allocblk: trying to extend a fragment"); } return(0); } else { if (ip->i_size < (lbn + 1) * fs->fs_bsize) nsize = fragroundup(fs, size); else nsize = fs->fs_bsize; error = ffs_alloc(ip, lbn, ffs_blkpref(ip, lbn, (int)lbn, &ip->i_db[0]), nsize, cred, &newb); if (error) return (error); ip->i_db[lbn] = newb; ip->i_flag |= IN_CHANGE | IN_UPDATE; return (0); } } /* * Determine the number of levels of indirection. */ pref = 0; if (error = ufs_getlbns(vp, lbn, indirs, &num)) return(error); if(num == 0) { panic("ffs_blkalloc: file with direct blocks only"); } /* * Fetch the first indirect block allocating if necessary. */ --num; nb = ip->i_ib[indirs[0].in_off]; allocib = NULL; allocblk = allociblk; if (nb == 0) { pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0); if (error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) return (error); nb = newb; *allocblk++ = nb; bp = buf_getblk(vp, (daddr64_t)((unsigned)(indirs[1].in_lbn)), fs->fs_bsize, 0, 0, BLK_META); buf_setblkno(bp, (daddr64_t)((unsigned)fsbtodb(fs, nb))); buf_clear(bp); /* * Write synchronously conditional on mount flags. */ if ((vp)->v_mount->mnt_flag & MNT_ASYNC) { error = 0; buf_bdwrite(bp); } else if (error = buf_bwrite(bp)) { goto fail; } allocib = &ip->i_ib[indirs[0].in_off]; *allocib = nb; ip->i_flag |= IN_CHANGE | IN_UPDATE; } /* * Fetch through the indirect blocks, allocating as necessary. */ for (i = 1;;) { error = (int)buf_meta_bread(vp, (daddr64_t)((unsigned)(indirs[i].in_lbn)), (int)fs->fs_bsize, NOCRED, &bp); if (error) { buf_brelse(bp); goto fail; } bap = (ufs_daddr_t *)buf_dataptr(bp); #if REV_ENDIAN_FS if (rev_endian) nb = OSSwapInt32(bap[indirs[i].in_off]); else { #endif /* REV_ENDIAN_FS */ nb = bap[indirs[i].in_off]; #if REV_ENDIAN_FS } #endif /* REV_ENDIAN_FS */ if (i == num) break; i += 1; if (nb != 0) { buf_brelse(bp); continue; } if (pref == 0) pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0); if (error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) { buf_brelse(bp); goto fail; } nb = newb; *allocblk++ = nb; nbp = buf_getblk(vp, (daddr64_t)((unsigned)(indirs[i].in_lbn)), fs->fs_bsize, 0, 0, BLK_META); buf_setblkno(nbp, (daddr64_t)((unsigned)fsbtodb(fs, nb))); buf_clear(nbp); /* * Write synchronously conditional on mount flags. */ if ((vp)->v_mount->mnt_flag & MNT_ASYNC) { error = 0; buf_bdwrite(nbp); } else if (error = buf_bwrite(nbp)) { buf_brelse(bp); goto fail; } #if REV_ENDIAN_FS if (rev_endian) bap[indirs[i - 1].in_off] = OSSwapInt32(nb); else { #endif /* REV_ENDIAN_FS */ bap[indirs[i - 1].in_off] = nb; #if REV_ENDIAN_FS } #endif /* REV_ENDIAN_FS */ /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & B_SYNC) { buf_bwrite(bp); } else { buf_bdwrite(bp); } } /* * Get the data block, allocating if necessary. */ if (nb == 0) { pref = ffs_blkpref(ip, lbn, indirs[i].in_off, &bap[0]); if (error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) { buf_brelse(bp); goto fail; } nb = newb; *allocblk++ = nb; #if REV_ENDIAN_FS if (rev_endian) bap[indirs[i].in_off] = OSSwapInt32(nb); else { #endif /* REV_ENDIAN_FS */ bap[indirs[i].in_off] = nb; #if REV_ENDIAN_FS } #endif /* REV_ENDIAN_FS */ /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & B_SYNC) { buf_bwrite(bp); } else { buf_bdwrite(bp); } return (0); } buf_brelse(bp); return (0); fail: /* * If we have failed part way through block allocation, we * have to deallocate any indirect blocks that we have allocated. */ for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) { ffs_blkfree(ip, *blkp, fs->fs_bsize); deallocated += fs->fs_bsize; } if (allocib != NULL) *allocib = 0; if (deallocated) { devBlockSize = vfs_devblocksize(mp); #if QUOTA /* * Restore user's disk quota because allocation failed. */ (void) chkdq(ip, (int64_t)-deallocated, cred, FORCE); #endif /* QUOTA */ ip->i_blocks -= btodb(deallocated, devBlockSize); ip->i_flag |= IN_CHANGE | IN_UPDATE; } return (error); }
/* * Balloc defines the structure of file system storage * by allocating the physical blocks on a device given * the inode and the logical block number in a file. */ int ext2fs_balloc(struct inode *ip, daddr_t bn, int size, kauth_cred_t cred, struct buf **bpp, int flags) { struct m_ext2fs *fs; daddr_t nb; struct buf *bp, *nbp; struct vnode *vp = ITOV(ip); struct indir indirs[EXT2FS_NIADDR + 2]; daddr_t newb, lbn, pref; int32_t *bap; /* XXX ondisk32 */ int num, i, error; u_int deallocated; daddr_t *blkp, *allocblk, allociblk[EXT2FS_NIADDR + 1]; int32_t *allocib; /* XXX ondisk32 */ int unwindidx = -1; UVMHIST_FUNC("ext2fs_balloc"); UVMHIST_CALLED(ubchist); UVMHIST_LOG(ubchist, "bn 0x%x", bn,0,0,0); if (bpp != NULL) { *bpp = NULL; } if (bn < 0) return (EFBIG); fs = ip->i_e2fs; lbn = bn; /* * The first EXT2FS_NDADDR blocks are direct blocks */ if (bn < EXT2FS_NDADDR) { /* XXX ondisk32 */ nb = fs2h32(ip->i_e2fs_blocks[bn]); if (nb != 0) { /* * the block is already allocated, just read it. */ if (bpp != NULL) { error = bread(vp, bn, fs->e2fs_bsize, NOCRED, B_MODIFY, &bp); if (error) { return (error); } *bpp = bp; } return (0); } /* * allocate a new direct block. */ error = ext2fs_alloc(ip, bn, ext2fs_blkpref(ip, bn, bn, &ip->i_e2fs_blocks[0]), cred, &newb); if (error) return (error); ip->i_e2fs_last_lblk = lbn; ip->i_e2fs_last_blk = newb; /* XXX ondisk32 */ ip->i_e2fs_blocks[bn] = h2fs32((int32_t)newb); ip->i_flag |= IN_CHANGE | IN_UPDATE; if (bpp != NULL) { bp = getblk(vp, bn, fs->e2fs_bsize, 0, 0); bp->b_blkno = EXT2_FSBTODB(fs, newb); if (flags & B_CLRBUF) clrbuf(bp); *bpp = bp; } return (0); } /* * Determine the number of levels of indirection. */ pref = 0; if ((error = ufs_getlbns(vp, bn, indirs, &num)) != 0) return(error); #ifdef DIAGNOSTIC if (num < 1) panic ("ext2fs_balloc: ufs_getlbns returned indirect block\n"); #endif /* * Fetch the first indirect block allocating if necessary. */ --num; /* XXX ondisk32 */ nb = fs2h32(ip->i_e2fs_blocks[EXT2FS_NDADDR + indirs[0].in_off]); allocib = NULL; allocblk = allociblk; if (nb == 0) { pref = ext2fs_blkpref(ip, lbn, 0, (int32_t *)0); error = ext2fs_alloc(ip, lbn, pref, cred, &newb); if (error) return (error); nb = newb; *allocblk++ = nb; ip->i_e2fs_last_blk = newb; bp = getblk(vp, indirs[1].in_lbn, fs->e2fs_bsize, 0, 0); bp->b_blkno = EXT2_FSBTODB(fs, newb); clrbuf(bp); /* * Write synchronously so that indirect blocks * never point at garbage. */ if ((error = bwrite(bp)) != 0) goto fail; unwindidx = 0; allocib = &ip->i_e2fs_blocks[EXT2FS_NDADDR + indirs[0].in_off]; /* XXX ondisk32 */ *allocib = h2fs32((int32_t)newb); ip->i_flag |= IN_CHANGE | IN_UPDATE; } /* * Fetch through the indirect blocks, allocating as necessary. */ for (i = 1;;) { error = bread(vp, indirs[i].in_lbn, (int)fs->e2fs_bsize, NOCRED, 0, &bp); if (error) { goto fail; } bap = (int32_t *)bp->b_data; /* XXX ondisk32 */ nb = fs2h32(bap[indirs[i].in_off]); if (i == num) break; i++; if (nb != 0) { brelse(bp, 0); continue; } pref = ext2fs_blkpref(ip, lbn, 0, (int32_t *)0); error = ext2fs_alloc(ip, lbn, pref, cred, &newb); if (error) { brelse(bp, 0); goto fail; } nb = newb; *allocblk++ = nb; ip->i_e2fs_last_blk = newb; nbp = getblk(vp, indirs[i].in_lbn, fs->e2fs_bsize, 0, 0); nbp->b_blkno = EXT2_FSBTODB(fs, nb); clrbuf(nbp); /* * Write synchronously so that indirect blocks * never point at garbage. */ if ((error = bwrite(nbp)) != 0) { brelse(bp, 0); goto fail; } if (unwindidx < 0) unwindidx = i - 1; /* XXX ondisk32 */ bap[indirs[i - 1].in_off] = h2fs32((int32_t)nb); /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & B_SYNC) { bwrite(bp); } else { bdwrite(bp); } } /* * Get the data block, allocating if necessary. */ if (nb == 0) { pref = ext2fs_blkpref(ip, lbn, indirs[num].in_off, &bap[0]); error = ext2fs_alloc(ip, lbn, pref, cred, &newb); if (error) { brelse(bp, 0); goto fail; } nb = newb; *allocblk++ = nb; ip->i_e2fs_last_lblk = lbn; ip->i_e2fs_last_blk = newb; /* XXX ondisk32 */ bap[indirs[num].in_off] = h2fs32((int32_t)nb); /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & B_SYNC) { bwrite(bp); } else { bdwrite(bp); } if (bpp != NULL) { nbp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0); nbp->b_blkno = EXT2_FSBTODB(fs, nb); if (flags & B_CLRBUF) clrbuf(nbp); *bpp = nbp; } return (0); } brelse(bp, 0); if (bpp != NULL) { if (flags & B_CLRBUF) { error = bread(vp, lbn, (int)fs->e2fs_bsize, NOCRED, B_MODIFY, &nbp); if (error) { goto fail; } } else { nbp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0); nbp->b_blkno = EXT2_FSBTODB(fs, nb); } *bpp = nbp; } return (0); fail: /* * If we have failed part way through block allocation, we * have to deallocate any indirect blocks that we have allocated. */ for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) { ext2fs_blkfree(ip, *blkp); deallocated += fs->e2fs_bsize; } if (unwindidx >= 0) { if (unwindidx == 0) { *allocib = 0; } else { int r; r = bread(vp, indirs[unwindidx].in_lbn, (int)fs->e2fs_bsize, NOCRED, B_MODIFY, &bp); if (r) { panic("Could not unwind indirect block, error %d", r); } else { bap = (int32_t *)bp->b_data; /* XXX ondisk32 */ bap[indirs[unwindidx].in_off] = 0; if (flags & B_SYNC) bwrite(bp); else bdwrite(bp); } } for (i = unwindidx + 1; i <= num; i++) { bp = getblk(vp, indirs[i].in_lbn, (int)fs->e2fs_bsize, 0, 0); brelse(bp, BC_INVAL); } } if (deallocated) { ext2fs_setnblock(ip, ext2fs_nblock(ip) - btodb(deallocated)); ip->i_e2fs_flags |= IN_CHANGE | IN_UPDATE; } return error; }
/* * Indirect blocks are now on the vnode for the file. They are given negative * logical block numbers. Indirect blocks are addressed by the negative * address of the first data block to which they point. Double indirect blocks * are addressed by one less than the address of the first indirect block to * which they point. Triple indirect blocks are addressed by one less than * the address of the first double indirect block to which they point. * * ufs_bmaparray does the bmap conversion, and if requested returns the * array of logical blocks which must be traversed to get to a block. * Each entry contains the offset into that block that gets you to the * next block and the disk address of the block (if it is assigned). */ int ufs_bmaparray(struct vnode *vp, daddr64_t bn, daddr64_t *bnp, struct indir *ap, int *nump, int *runp) { struct inode *ip; struct buf *bp; struct ufsmount *ump; struct mount *mp; struct vnode *devvp; struct indir a[NIADDR+1], *xap; daddr64_t daddr, metalbn; int error, maxrun = 0, num; ip = VTOI(vp); mp = vp->v_mount; ump = VFSTOUFS(mp); #ifdef DIAGNOSTIC if ((ap != NULL && nump == NULL) || (ap == NULL && nump != NULL)) panic("ufs_bmaparray: invalid arguments"); #endif if (runp) { /* * XXX * If MAXBSIZE is the largest transfer the disks can handle, * we probably want maxrun to be 1 block less so that we * don't create a block larger than the device can handle. */ *runp = 0; maxrun = MAXBSIZE / mp->mnt_stat.f_iosize - 1; } xap = ap == NULL ? a : ap; if (!nump) nump = # if ((error = ufs_getlbns(vp, bn, xap, nump)) != 0) return (error); num = *nump; if (num == 0) { *bnp = blkptrtodb(ump, DIP(ip, db[bn])); if (*bnp == 0) *bnp = -1; else if (runp) for (++bn; bn < NDADDR && *runp < maxrun && is_sequential(ump, DIP(ip, db[bn - 1]), DIP(ip, db[bn])); ++bn, ++*runp); return (0); } /* Get disk address out of indirect block array */ daddr = DIP(ip, ib[xap->in_off]); devvp = VFSTOUFS(vp->v_mount)->um_devvp; for (bp = NULL, ++xap; --num; ++xap) { /* * Exit the loop if there is no disk address assigned yet and * the indirect block isn't in the cache, or if we were * looking for an indirect block and we've found it. */ metalbn = xap->in_lbn; if ((daddr == 0 && !incore(vp, metalbn)) || metalbn == bn) break; /* * If we get here, we've either got the block in the cache * or we have a disk address for it, go fetch it. */ if (bp) brelse(bp); xap->in_exists = 1; bp = getblk(vp, metalbn, mp->mnt_stat.f_iosize, 0, 0); if (bp->b_flags & (B_DONE | B_DELWRI)) { ; } #ifdef DIAGNOSTIC else if (!daddr) panic("ufs_bmaparray: indirect block not in cache"); #endif else { bp->b_blkno = blkptrtodb(ump, daddr); bp->b_flags |= B_READ; bcstats.pendingreads++; bcstats.numreads++; VOP_STRATEGY(bp); curproc->p_ru.ru_inblock++; /* XXX */ if ((error = biowait(bp)) != 0) { brelse(bp); return (error); } } #ifdef FFS2 if (ip->i_ump->um_fstype == UM_UFS2) { daddr = ((int64_t *)bp->b_data)[xap->in_off]; if (num == 1 && daddr && runp) for (bn = xap->in_off + 1; bn < MNINDIR(ump) && *runp < maxrun && is_sequential(ump, ((int64_t *)bp->b_data)[bn - 1], ((int64_t *)bp->b_data)[bn]); ++bn, ++*runp); continue; } #endif /* FFS2 */ daddr = ((int32_t *)bp->b_data)[xap->in_off]; if (num == 1 && daddr && runp) for (bn = xap->in_off + 1; bn < MNINDIR(ump) && *runp < maxrun && is_sequential(ump, ((int32_t *)bp->b_data)[bn - 1], ((int32_t *)bp->b_data)[bn]); ++bn, ++*runp); } if (bp) brelse(bp); daddr = blkptrtodb(ump, daddr); *bnp = daddr == 0 ? -1 : daddr; return (0); }
int ffs_fsync(void *v) { struct vop_fsync_args /* { struct vnode *a_vp; kauth_cred_t a_cred; int a_flags; off_t a_offlo; off_t a_offhi; struct lwp *a_l; } */ *ap = v; struct buf *bp; int num, error, i; struct indir ia[NIADDR + 1]; int bsize; daddr_t blk_high; struct vnode *vp; struct mount *mp; vp = ap->a_vp; mp = vp->v_mount; fstrans_start(mp, FSTRANS_LAZY); if ((ap->a_offlo == 0 && ap->a_offhi == 0) || (vp->v_type != VREG)) { error = ffs_full_fsync(vp, ap->a_flags); goto out; } bsize = mp->mnt_stat.f_iosize; blk_high = ap->a_offhi / bsize; if (ap->a_offhi % bsize != 0) blk_high++; /* * First, flush all pages in range. */ mutex_enter(vp->v_interlock); error = VOP_PUTPAGES(vp, trunc_page(ap->a_offlo), round_page(ap->a_offhi), PGO_CLEANIT | ((ap->a_flags & FSYNC_WAIT) ? PGO_SYNCIO : 0)); if (error) { goto out; } #ifdef WAPBL KASSERT(vp->v_type == VREG); if (mp->mnt_wapbl) { /* * Don't bother writing out metadata if the syncer is * making the request. We will let the sync vnode * write it out in a single burst through a call to * VFS_SYNC(). */ if ((ap->a_flags & (FSYNC_DATAONLY | FSYNC_LAZY)) != 0) { fstrans_done(mp); return 0; } error = 0; if (vp->v_tag == VT_UFS && VTOI(vp)->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE | IN_MODIFY | IN_MODIFIED | IN_ACCESSED)) { error = UFS_WAPBL_BEGIN(mp); if (error) { fstrans_done(mp); return error; } error = ffs_update(vp, NULL, NULL, UPDATE_CLOSE | ((ap->a_flags & FSYNC_WAIT) ? UPDATE_WAIT : 0)); UFS_WAPBL_END(mp); } if (error || (ap->a_flags & FSYNC_NOLOG) != 0) { fstrans_done(mp); return error; } error = wapbl_flush(mp->mnt_wapbl, 0); fstrans_done(mp); return error; } #endif /* WAPBL */ /* * Then, flush indirect blocks. */ if (blk_high >= NDADDR) { error = ufs_getlbns(vp, blk_high, ia, &num); if (error) goto out; mutex_enter(&bufcache_lock); for (i = 0; i < num; i++) { if ((bp = incore(vp, ia[i].in_lbn)) == NULL) continue; if ((bp->b_cflags & BC_BUSY) != 0 || (bp->b_oflags & BO_DELWRI) == 0) continue; bp->b_cflags |= BC_BUSY | BC_VFLUSH; mutex_exit(&bufcache_lock); bawrite(bp); mutex_enter(&bufcache_lock); } mutex_exit(&bufcache_lock); } if (ap->a_flags & FSYNC_WAIT) { mutex_enter(vp->v_interlock); while (vp->v_numoutput > 0) cv_wait(&vp->v_cv, vp->v_interlock); mutex_exit(vp->v_interlock); } error = ffs_update(vp, NULL, NULL, UPDATE_CLOSE | (((ap->a_flags & (FSYNC_WAIT | FSYNC_DATAONLY)) == FSYNC_WAIT) ? UPDATE_WAIT : 0)); if (error == 0 && ap->a_flags & FSYNC_CACHE) { int l = 0; VOP_IOCTL(VTOI(vp)->i_devvp, DIOCCACHESYNC, &l, FWRITE, curlwp->l_cred); } out: fstrans_done(mp); return error; }
int ufs_bmaparray( vnode *vp, ufs2_daddr_t bn, ufs2_daddr_t *bnp, Buf *nbp, int *runp, int *runb) { Buf *bp; Indir a[UFS_NIADDR+1], *ap; ufs2_daddr_t daddr; ufs_lbn_t metalbn; int error, num, maxrun = 0; int *nump; inode *ip = vp->data; MountPoint *mp = vp->mount; ufsmount *ump = mp->mnt_data; if (runp) { maxrun = mp->mnt_iosize_max / mp->mnt_stat.f_iosize - 1; *runp = 0; } if (runb) { *runb = 0; } ap = a; nump = # error = ufs_getlbns(vp, bn, ap, nump); if (error) return error; num = *nump; if (num == 0) { if (bn >= 0 && bn < UFS_NDADDR) { *bnp = blkptrtodb(ump, ip->din2->di_db[bn]); } else if (bn < 0 && bn >= -UFS_NXADDR) { *bnp = blkptrtodb(ump, ip->din2->di_extb[-1 - bn]); if (*bnp == 0) *bnp = -1; if (nbp == nil) panic("ufs_bmaparray: mapping ext data"); // TODO HARVEY Mark ALTDATA? //nbp->b_xflags |= BX_ALTDATA; return (0); } else { panic("ufs_bmaparray: blkno out of range"); } /* * Since this is FFS independent code, we are out of * scope for the definitions of BLK_NOCOPY and * BLK_SNAP, but we do know that they will fall in * the range 1..um_seqinc, so we use that test and * return a request for a zeroed out buffer if attempts * are made to read a BLK_NOCOPY or BLK_SNAP block. */ if ((ip->i_flags & SF_SNAPSHOT) && ip->din2->di_db[bn] > 0 && ip->din2->di_db[bn] < ump->um_seqinc) { *bnp = -1; } else if (*bnp == 0) { if (ip->i_flags & SF_SNAPSHOT) *bnp = blkptrtodb(ump, bn * ump->um_seqinc); else *bnp = -1; } else if (runp) { ufs2_daddr_t bnb = bn; for (++bn; bn < UFS_NDADDR && *runp < maxrun && is_sequential(ump, ip->din2->di_db[bn - 1], ip->din2->di_db[bn]); ++bn, ++*runp) ; bn = bnb; if (runb && (bn > 0)) { for (--bn; (bn >= 0) && (*runb < maxrun) && is_sequential(ump, ip->din2->di_db[bn], ip->din2->di_db[bn+1]); --bn, ++*runb) ; } } return (0); } /* Get disk address out of indirect block array */ daddr = ip->din2->di_ib[ap->in_off]; for (bp = nil, ++ap; --num; ++ap) { /* * Exit the loop if there is no disk address assigned yet and * the indirect block isn't in the cache, or if we were * looking for an indirect block and we've found it. */ metalbn = ap->in_lbn; // TODO HARVEY Going to have to revisit this when we implement // writing, so we can read writes before they've been flushed // to disk. //if ((daddr == 0 && !incore(&vp->v_bufobj, metalbn)) || metalbn == bn) // break; if (daddr == 0 || metalbn == bn) break; /* * If we get here, we've either got the block in the cache * or we have a disk address for it, go fetch it. */ if (bp) releasebuf(bp); bp = getblk(vp, metalbn, mp->mnt_stat.f_iosize, 0); // TODO HARVEY Revisit when we manage a cache of Bufs /*if ((bp->b_flags & B_CACHE) == 0) { #ifdef INVARIANTS if (!daddr) panic("ufs_bmaparray: indirect block not in cache"); #endif bp->b_blkno = blkptrtodb(ump, daddr); bp->b_iocmd = BIO_READ; bp->b_flags &= ~B_INVAL; bp->b_ioflags &= ~BIO_ERROR; vfs_busy_pages(bp, 0); bp->b_iooffset = dbtob(bp->b_blkno); ffs_geom_strategy(bp); curthread->td_ru.ru_inblock++; error = bufwait(bp); if (error) { brelse(bp); return (error); } }*/ daddr = ((ufs2_daddr_t *)bp->data)[ap->in_off]; if (num == 1 && daddr && runp) { for (bn = ap->in_off + 1; bn < ump->um_nindir && *runp < maxrun && is_sequential(ump, ((ufs2_daddr_t *)bp->data)[bn - 1], ((ufs2_daddr_t *)bp->data)[bn]); ++bn, ++*runp); bn = ap->in_off; if (runb && bn) { for (--bn; bn >= 0 && *runb < maxrun && is_sequential(ump, ((ufs2_daddr_t *)bp->data)[bn], ((ufs2_daddr_t *)bp->data)[bn + 1]); --bn, ++*runb); } } } if (bp) releasebuf(bp); /* * Since this is FFS independent code, we are out of scope for the * definitions of BLK_NOCOPY and BLK_SNAP, but we do know that they * will fall in the range 1..um_seqinc, so we use that test and * return a request for a zeroed out buffer if attempts are made * to read a BLK_NOCOPY or BLK_SNAP block. */ if ((ip->i_flags & SF_SNAPSHOT) && daddr > 0 && daddr < ump->um_seqinc) { *bnp = -1; return (0); } *bnp = blkptrtodb(ump, daddr); if (*bnp == 0) { if (ip->i_flags & SF_SNAPSHOT) *bnp = blkptrtodb(ump, bn * ump->um_seqinc); else *bnp = -1; } return 0; }
/* * Balloc defines the structure of file system storage * by allocating the physical blocks on a device given * the inode and the logical block number in a file. */ int ffs1_balloc(struct inode *ip, off_t startoffset, int size, struct ucred *cred, int flags, struct buf **bpp) { daddr_t lbn, nb, newb, pref; struct fs *fs; struct buf *bp, *nbp; struct vnode *vp; struct proc *p; struct indir indirs[NIADDR + 2]; int32_t *bap; int deallocated, osize, nsize, num, i, error; int32_t *allocib, *blkp, *allocblk, allociblk[NIADDR+1]; int unwindidx = -1; vp = ITOV(ip); fs = ip->i_fs; p = curproc; lbn = lblkno(fs, startoffset); size = blkoff(fs, startoffset) + size; if (size > fs->fs_bsize) panic("ffs1_balloc: blk too big"); if (bpp != NULL) *bpp = NULL; if (lbn < 0) return (EFBIG); /* * If the next write will extend the file into a new block, * and the file is currently composed of a fragment * this fragment has to be extended to be a full block. */ nb = lblkno(fs, ip->i_ffs1_size); if (nb < NDADDR && nb < lbn) { osize = blksize(fs, ip, nb); if (osize < fs->fs_bsize && osize > 0) { error = ffs_realloccg(ip, nb, ffs1_blkpref(ip, nb, (int)nb, &ip->i_ffs1_db[0]), osize, (int)fs->fs_bsize, cred, bpp, &newb); if (error) return (error); if (DOINGSOFTDEP(vp)) softdep_setup_allocdirect(ip, nb, newb, ip->i_ffs1_db[nb], fs->fs_bsize, osize, bpp ? *bpp : NULL); ip->i_ffs1_size = lblktosize(fs, nb + 1); uvm_vnp_setsize(vp, ip->i_ffs1_size); ip->i_ffs1_db[nb] = newb; ip->i_flag |= IN_CHANGE | IN_UPDATE; if (bpp != NULL) { if (flags & B_SYNC) bwrite(*bpp); else bawrite(*bpp); } } } /* * The first NDADDR blocks are direct blocks */ if (lbn < NDADDR) { nb = ip->i_ffs1_db[lbn]; if (nb != 0 && ip->i_ffs1_size >= lblktosize(fs, lbn + 1)) { /* * The block is an already-allocated direct block * and the file already extends past this block, * thus this must be a whole block. * Just read the block (if requested). */ if (bpp != NULL) { error = bread(vp, lbn, fs->fs_bsize, bpp); if (error) { brelse(*bpp); return (error); } } return (0); } if (nb != 0) { /* * Consider need to reallocate a fragment. */ osize = fragroundup(fs, blkoff(fs, ip->i_ffs1_size)); nsize = fragroundup(fs, size); if (nsize <= osize) { /* * The existing block is already * at least as big as we want. * Just read the block (if requested). */ if (bpp != NULL) { error = bread(vp, lbn, fs->fs_bsize, bpp); if (error) { brelse(*bpp); return (error); } (*bpp)->b_bcount = osize; } return (0); } else { /* * The existing block is smaller than we * want, grow it. */ error = ffs_realloccg(ip, lbn, ffs1_blkpref(ip, lbn, (int)lbn, &ip->i_ffs1_db[0]), osize, nsize, cred, bpp, &newb); if (error) return (error); if (DOINGSOFTDEP(vp)) softdep_setup_allocdirect(ip, lbn, newb, nb, nsize, osize, bpp ? *bpp : NULL); } } else { /* * The block was not previously allocated, * allocate a new block or fragment. */ if (ip->i_ffs1_size < lblktosize(fs, lbn + 1)) nsize = fragroundup(fs, size); else nsize = fs->fs_bsize; error = ffs_alloc(ip, lbn, ffs1_blkpref(ip, lbn, (int)lbn, &ip->i_ffs1_db[0]), nsize, cred, &newb); if (error) return (error); if (bpp != NULL) { *bpp = getblk(vp, lbn, fs->fs_bsize, 0, 0); if (nsize < fs->fs_bsize) (*bpp)->b_bcount = nsize; (*bpp)->b_blkno = fsbtodb(fs, newb); if (flags & B_CLRBUF) clrbuf(*bpp); } if (DOINGSOFTDEP(vp)) softdep_setup_allocdirect(ip, lbn, newb, 0, nsize, 0, bpp ? *bpp : NULL); } ip->i_ffs1_db[lbn] = newb; ip->i_flag |= IN_CHANGE | IN_UPDATE; return (0); } /* * Determine the number of levels of indirection. */ pref = 0; if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0) return(error); #ifdef DIAGNOSTIC if (num < 1) panic ("ffs1_balloc: ufs_bmaparray returned indirect block"); #endif /* * Fetch the first indirect block allocating if necessary. */ --num; nb = ip->i_ffs1_ib[indirs[0].in_off]; allocib = NULL; allocblk = allociblk; if (nb == 0) { pref = ffs1_blkpref(ip, lbn, -indirs[0].in_off - 1, NULL); error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb); if (error) goto fail; nb = newb; *allocblk++ = nb; bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0); bp->b_blkno = fsbtodb(fs, nb); clrbuf(bp); if (DOINGSOFTDEP(vp)) { softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off, newb, 0, fs->fs_bsize, 0, bp); bdwrite(bp); } else { /* * Write synchronously so that indirect blocks * never point at garbage. */ if ((error = bwrite(bp)) != 0) goto fail; } allocib = &ip->i_ffs1_ib[indirs[0].in_off]; *allocib = nb; ip->i_flag |= IN_CHANGE | IN_UPDATE; } /* * Fetch through the indirect blocks, allocating as necessary. */ for (i = 1;;) { error = bread(vp, indirs[i].in_lbn, (int)fs->fs_bsize, &bp); if (error) { brelse(bp); goto fail; } bap = (int32_t *)bp->b_data; nb = bap[indirs[i].in_off]; if (i == num) break; i++; if (nb != 0) { brelse(bp); continue; } if (pref == 0) pref = ffs1_blkpref(ip, lbn, i - num - 1, NULL); error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb); if (error) { brelse(bp); goto fail; } nb = newb; *allocblk++ = nb; nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0); nbp->b_blkno = fsbtodb(fs, nb); clrbuf(nbp); if (DOINGSOFTDEP(vp)) { softdep_setup_allocindir_meta(nbp, ip, bp, indirs[i - 1].in_off, nb); bdwrite(nbp); } else { /* * Write synchronously so that indirect blocks * never point at garbage. */ if ((error = bwrite(nbp)) != 0) { brelse(bp); goto fail; } } bap[indirs[i - 1].in_off] = nb; if (allocib == NULL && unwindidx < 0) unwindidx = i - 1; /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & B_SYNC) { bwrite(bp); } else { bdwrite(bp); } } /* * Get the data block, allocating if necessary. */ if (nb == 0) { pref = ffs1_blkpref(ip, lbn, indirs[i].in_off, &bap[0]); error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb); if (error) { brelse(bp); goto fail; } nb = newb; *allocblk++ = nb; if (bpp != NULL) { nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0); nbp->b_blkno = fsbtodb(fs, nb); if (flags & B_CLRBUF) clrbuf(nbp); *bpp = nbp; } if (DOINGSOFTDEP(vp)) softdep_setup_allocindir_page(ip, lbn, bp, indirs[i].in_off, nb, 0, bpp ? *bpp : NULL); bap[indirs[i].in_off] = nb; /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & B_SYNC) { bwrite(bp); } else { bdwrite(bp); } return (0); } brelse(bp); if (bpp != NULL) { if (flags & B_CLRBUF) { error = bread(vp, lbn, (int)fs->fs_bsize, &nbp); if (error) { brelse(nbp); goto fail; } } else { nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0); nbp->b_blkno = fsbtodb(fs, nb); } *bpp = nbp; } return (0); fail: /* * If we have failed to allocate any blocks, simply return the error. * This is the usual case and avoids the need to fsync the file. */ if (allocblk == allociblk && allocib == NULL && unwindidx == -1) return (error); /* * If we have failed part way through block allocation, we have to * deallocate any indirect blocks that we have allocated. We have to * fsync the file before we start to get rid of all of its * dependencies so that we do not leave them dangling. We have to sync * it at the end so that the softdep code does not find any untracked * changes. Although this is really slow, running out of disk space is * not expected to be a common occurrence. The error return from fsync * is ignored as we already have an error to return to the user. */ VOP_FSYNC(vp, p->p_ucred, MNT_WAIT, p); for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) { ffs_blkfree(ip, *blkp, fs->fs_bsize); deallocated += fs->fs_bsize; } if (allocib != NULL) { *allocib = 0; } else if (unwindidx >= 0) { int r; r = bread(vp, indirs[unwindidx].in_lbn, (int)fs->fs_bsize, &bp); if (r) panic("Could not unwind indirect block, error %d", r); bap = (int32_t *)bp->b_data; bap[indirs[unwindidx].in_off] = 0; if (flags & B_SYNC) { bwrite(bp); } else { bdwrite(bp); } } if (deallocated) { /* * Restore user's disk quota because allocation failed. */ (void)ufs_quota_free_blocks(ip, btodb(deallocated), cred); ip->i_ffs1_blocks -= btodb(deallocated); ip->i_flag |= IN_CHANGE | IN_UPDATE; } VOP_FSYNC(vp, p->p_ucred, MNT_WAIT, p); return (error); }
int ffs2_balloc(struct inode *ip, off_t off, int size, struct ucred *cred, int flags, struct buf **bpp) { daddr_t lbn, lastlbn, nb, newb, *blkp; daddr_t pref, *allocblk, allociblk[NIADDR + 1]; daddr_t *bap, *allocib; int deallocated, osize, nsize, num, i, error, unwindidx, r; struct buf *bp, *nbp; struct indir indirs[NIADDR + 2]; struct fs *fs; struct vnode *vp; struct proc *p; vp = ITOV(ip); fs = ip->i_fs; p = curproc; unwindidx = -1; lbn = lblkno(fs, off); size = blkoff(fs, off) + size; if (size > fs->fs_bsize) panic("ffs2_balloc: block too big"); if (bpp != NULL) *bpp = NULL; if (lbn < 0) return (EFBIG); /* * If the next write will extend the file into a new block, and the * file is currently composed of a fragment, this fragment has to be * extended to be a full block. */ lastlbn = lblkno(fs, ip->i_ffs2_size); if (lastlbn < NDADDR && lastlbn < lbn) { nb = lastlbn; osize = blksize(fs, ip, nb); if (osize < fs->fs_bsize && osize > 0) { error = ffs_realloccg(ip, nb, ffs2_blkpref(ip, lastlbn, nb, &ip->i_ffs2_db[0]), osize, (int) fs->fs_bsize, cred, bpp, &newb); if (error) return (error); if (DOINGSOFTDEP(vp)) softdep_setup_allocdirect(ip, nb, newb, ip->i_ffs2_db[nb], fs->fs_bsize, osize, bpp ? *bpp : NULL); ip->i_ffs2_size = lblktosize(fs, nb + 1); uvm_vnp_setsize(vp, ip->i_ffs2_size); ip->i_ffs2_db[nb] = newb; ip->i_flag |= IN_CHANGE | IN_UPDATE; if (bpp) { if (flags & B_SYNC) bwrite(*bpp); else bawrite(*bpp); } } } /* * The first NDADDR blocks are direct. */ if (lbn < NDADDR) { nb = ip->i_ffs2_db[lbn]; if (nb != 0 && ip->i_ffs2_size >= lblktosize(fs, lbn + 1)) { /* * The direct block is already allocated and the file * extends past this block, thus this must be a whole * block. Just read it, if requested. */ if (bpp != NULL) { error = bread(vp, lbn, fs->fs_bsize, bpp); if (error) { brelse(*bpp); return (error); } } return (0); } if (nb != 0) { /* * Consider the need to allocate a fragment. */ osize = fragroundup(fs, blkoff(fs, ip->i_ffs2_size)); nsize = fragroundup(fs, size); if (nsize <= osize) { /* * The existing block is already at least as * big as we want. Just read it, if requested. */ if (bpp != NULL) { error = bread(vp, lbn, fs->fs_bsize, bpp); if (error) { brelse(*bpp); return (error); } (*bpp)->b_bcount = osize; } return (0); } else { /* * The existing block is smaller than we want, * grow it. */ error = ffs_realloccg(ip, lbn, ffs2_blkpref(ip, lbn, (int) lbn, &ip->i_ffs2_db[0]), osize, nsize, cred, bpp, &newb); if (error) return (error); if (DOINGSOFTDEP(vp)) softdep_setup_allocdirect(ip, lbn, newb, nb, nsize, osize, bpp ? *bpp : NULL); } } else { /* * The block was not previously allocated, allocate a * new block or fragment. */ if (ip->i_ffs2_size < lblktosize(fs, lbn + 1)) nsize = fragroundup(fs, size); else nsize = fs->fs_bsize; error = ffs_alloc(ip, lbn, ffs2_blkpref(ip, lbn, (int) lbn, &ip->i_ffs2_db[0]), nsize, cred, &newb); if (error) return (error); if (bpp != NULL) { bp = getblk(vp, lbn, fs->fs_bsize, 0, 0); if (nsize < fs->fs_bsize) bp->b_bcount = nsize; bp->b_blkno = fsbtodb(fs, newb); if (flags & B_CLRBUF) clrbuf(bp); *bpp = bp; } if (DOINGSOFTDEP(vp)) softdep_setup_allocdirect(ip, lbn, newb, 0, nsize, 0, bpp ? *bpp : NULL); } ip->i_ffs2_db[lbn] = newb; ip->i_flag |= IN_CHANGE | IN_UPDATE; return (0); } /* * Determine the number of levels of indirection. */ pref = 0; error = ufs_getlbns(vp, lbn, indirs, &num); if (error) return (error); #ifdef DIAGNOSTIC if (num < 1) panic("ffs2_balloc: ufs_bmaparray returned indirect block"); #endif /* * Fetch the first indirect block allocating it necessary. */ --num; nb = ip->i_ffs2_ib[indirs[0].in_off]; allocib = NULL; allocblk = allociblk; if (nb == 0) { pref = ffs2_blkpref(ip, lbn, -indirs[0].in_off - 1, NULL); error = ffs_alloc(ip, lbn, pref, (int) fs->fs_bsize, cred, &newb); if (error) goto fail; nb = newb; *allocblk++ = nb; bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0); bp->b_blkno = fsbtodb(fs, nb); clrbuf(bp); if (DOINGSOFTDEP(vp)) { softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off, newb, 0, fs->fs_bsize, 0, bp); bdwrite(bp); } else { /* * Write synchronously so that indirect blocks never * point at garbage. */ error = bwrite(bp); if (error) goto fail; } unwindidx = 0; allocib = &ip->i_ffs2_ib[indirs[0].in_off]; *allocib = nb; ip->i_flag |= IN_CHANGE | IN_UPDATE; } /* * Fetch through the indirect blocks, allocating as necessary. */ for (i = 1;;) { error = bread(vp, indirs[i].in_lbn, (int)fs->fs_bsize, &bp); if (error) { brelse(bp); goto fail; } bap = (int64_t *) bp->b_data; nb = bap[indirs[i].in_off]; if (i == num) break; i++; if (nb != 0) { brelse(bp); continue; } if (pref == 0) pref = ffs2_blkpref(ip, lbn, i - num - 1, NULL); error = ffs_alloc(ip, lbn, pref, (int) fs->fs_bsize, cred, &newb); if (error) { brelse(bp); goto fail; } nb = newb; *allocblk++ = nb; nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0); nbp->b_blkno = fsbtodb(fs, nb); clrbuf(nbp); if (DOINGSOFTDEP(vp)) { softdep_setup_allocindir_meta(nbp, ip, bp, indirs[i - 1].in_off, nb); bdwrite(nbp); } else { /* * Write synchronously so that indirect blocks never * point at garbage. */ error = bwrite(nbp); if (error) { brelse(bp); goto fail; } } if (unwindidx < 0) unwindidx = i - 1; bap[indirs[i - 1].in_off] = nb; /* * If required, write synchronously, otherwise use delayed * write. */ if (flags & B_SYNC) bwrite(bp); else bdwrite(bp); } /* * Get the data block, allocating if necessary. */ if (nb == 0) { pref = ffs2_blkpref(ip, lbn, indirs[num].in_off, &bap[0]); error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb); if (error) { brelse(bp); goto fail; } nb = newb; *allocblk++ = nb; if (bpp != NULL) { nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0); nbp->b_blkno = fsbtodb(fs, nb); if (flags & B_CLRBUF) clrbuf(nbp); *bpp = nbp; } if (DOINGSOFTDEP(vp)) softdep_setup_allocindir_page(ip, lbn, bp, indirs[num].in_off, nb, 0, bpp ? *bpp : NULL); bap[indirs[num].in_off] = nb; if (allocib == NULL && unwindidx < 0) unwindidx = i - 1; /* * If required, write synchronously, otherwise use delayed * write. */ if (flags & B_SYNC) bwrite(bp); else bdwrite(bp); return (0); } brelse(bp); if (bpp != NULL) { if (flags & B_CLRBUF) { error = bread(vp, lbn, (int)fs->fs_bsize, &nbp); if (error) { brelse(nbp); goto fail; } } else { nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0); nbp->b_blkno = fsbtodb(fs, nb); clrbuf(nbp); } *bpp = nbp; } return (0); fail: /* * If we have failed to allocate any blocks, simply return the error. * This is the usual case and avoids the need to fsync the file. */ if (allocblk == allociblk && allocib == NULL && unwindidx == -1) return (error); /* * If we have failed part way through block allocation, we have to * deallocate any indirect blocks that we have allocated. We have to * fsync the file before we start to get rid of all of its * dependencies so that we do not leave them dangling. We have to sync * it at the end so that the softdep code does not find any untracked * changes. Although this is really slow, running out of disk space is * not expected to be a common occurrence. The error return from fsync * is ignored as we already have an error to return to the user. */ VOP_FSYNC(vp, p->p_ucred, MNT_WAIT, p); if (unwindidx >= 0) { /* * First write out any buffers we've created to resolve their * softdeps. This must be done in reverse order of creation so * that we resolve the dependencies in one pass. * Write the cylinder group buffers for these buffers too. */ for (i = num; i >= unwindidx; i--) { if (i == 0) break; bp = getblk(vp, indirs[i].in_lbn, (int) fs->fs_bsize, 0, 0); if (bp->b_flags & B_DELWRI) { nb = fsbtodb(fs, cgtod(fs, dtog(fs, dbtofsb(fs, bp->b_blkno)))); bwrite(bp); bp = getblk(ip->i_devvp, nb, (int) fs->fs_cgsize, 0, 0); if (bp->b_flags & B_DELWRI) bwrite(bp); else { bp->b_flags |= B_INVAL; brelse(bp); } } else { bp->b_flags |= B_INVAL; brelse(bp); } } if (DOINGSOFTDEP(vp) && unwindidx == 0) { ip->i_flag |= IN_CHANGE | IN_UPDATE; ffs_update(ip, 1); } /* * Now that any dependencies that we created have been * resolved, we can undo the partial allocation. */ if (unwindidx == 0) { *allocib = 0; ip->i_flag |= IN_CHANGE | IN_UPDATE; if (DOINGSOFTDEP(vp)) ffs_update(ip, 1); } else { r = bread(vp, indirs[unwindidx].in_lbn, (int)fs->fs_bsize, &bp); if (r) panic("ffs2_balloc: unwind failed"); bap = (int64_t *) bp->b_data; bap[indirs[unwindidx].in_off] = 0; bwrite(bp); } for (i = unwindidx + 1; i <= num; i++) { bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0, 0); bp->b_flags |= B_INVAL; brelse(bp); } } for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) { ffs_blkfree(ip, *blkp, fs->fs_bsize); deallocated += fs->fs_bsize; } if (deallocated) { /* * Restore user's disk quota because allocation failed. */ (void) ufs_quota_free_blocks(ip, btodb(deallocated), cred); ip->i_ffs2_blocks -= btodb(deallocated); ip->i_flag |= IN_CHANGE | IN_UPDATE; } VOP_FSYNC(vp, p->p_ucred, MNT_WAIT, p); return (error); }
int ext2fs_bmaparray(struct vnode *vp, #undef struct daddr_t bn, daddr_t *bnp, struct indir *ap, int *nump, int *runp) { struct inode *ip; struct buf *bp, *cbp; #define struct // struct ufsmount *ump; struct mount *mp; #undef struct struct indir a[NIADDR+1], *xap; daddr_t daddr; daddr_t metalbn; int error, maxrun = 0, num; ip = VTOI(vp); mp = EXT2_SIMPLE_FILE_SYSTEM_PRIVATE_DATA_FROM_THIS(vp->Filesystem); // mp = vp->v_mount; !!!! need to fix this badly! // ump = ip->i_ump; NEED TO DO SOMETHING ABOUT ufsmount #ifdef DIAGNOSTIC if ((ap != NULL && nump == NULL) || (ap == NULL && nump != NULL)) panic("ext2fs_bmaparray: invalid arguments"); #endif if (runp) { /* * XXX * If MAXBSIZE is the largest transfer the disks can handle, * we probably want maxrun to be 1 block less so that we * don't create a block larger than the device can handle. */ *runp = 0; maxrun = MAXBSIZE / //mp->mnt_stat.f_iosize - 1; NEEDS FIX!!! mp->fs->e2fs_bsize - 1; } if (bn >= 0 && bn < NDADDR) { /* XXX ondisk32 */ *bnp = blkptrtodb(ump, fs2h32(ip->i_e2fs_blocks[bn])); if (*bnp == 0) *bnp = -1; else if (runp) /* XXX ondisk32 */ for (++bn; bn < NDADDR && *runp < maxrun && is_sequential(ump, (daddr_t)fs2h32(ip->i_e2fs_blocks[bn - 1]), (daddr_t)fs2h32(ip->i_e2fs_blocks[bn])); ++bn, ++*runp); return (0); } xap = ap == NULL ? a : ap; if (!nump) nump = # if ((error = ufs_getlbns(vp, bn, xap, nump)) != 0) return (error); num = *nump; /* Get disk address out of indirect block array */ /* XXX ondisk32 */ daddr = fs2h32(ip->i_e2fs_blocks[NDADDR + xap->in_off]); #ifdef DIAGNOSTIC if (num > NIADDR + 1 || num < 1) { printf("ext2fs_bmaparray: num=%d\n", num); panic("ext2fs_bmaparray: num"); } #endif for (bp = NULL, ++xap; --num; ++xap) { /* * Exit the loop if there is no disk address assigned yet and * the indirect block isn't in the cache, or if we were * looking for an indirect block and we've found it. */ metalbn = xap->in_lbn; if (metalbn == bn) break; if (daddr == 0) { mutex_enter(&bufcache_lock); cbp = incore(vp, metalbn); mutex_exit(&bufcache_lock); if (cbp == NULL) break; } /* * If we get here, we've either got the block in the cache * or we have a disk address for it, go fetch it. */ if (bp) brelse(bp, 0); xap->in_exists = 1; //!!!!!!!!!!!!!!replaced 3rd param with 1 ftw bp = getblk(vp, metalbn, 1, 0, 0); if (bp == NULL) { /* * getblk() above returns NULL only iff we are * pagedaemon. See the implementation of getblk * for detail. */ return (ENOMEM); } if (bp->b_oflags & (BO_DONE | BO_DELWRI)) { trace(TR_BREADHIT, pack(vp, size), metalbn); } #ifdef DIAGNOSTIC else if (!daddr) panic("ext2fs_bmaparry: indirect block not in cache"); #endif else { trace(TR_BREADMISS, pack(vp, size), metalbn); bp->b_blkno = blkptrtodb(ump, daddr); bp->b_flags |= B_READ; VOP_STRATEGY(vp, bp); // curlwp->l_ru.ru_inblock++; *//* XXX */ if ((error = biowait(bp)) != 0) { brelse(bp, 0); return (error); } } /* XXX ondisk32 */ daddr = fs2h32(((int32_t *)bp->b_data)[xap->in_off]); if (num == 1 && daddr && runp) /* XXX ondisk32 */ for (bn = xap->in_off + 1; bn < MNINDIR(ump) && *runp < maxrun && is_sequential(ump, ((int32_t *)bp->b_data)[bn - 1], ((int32_t *)bp->b_data)[bn]); ++bn, ++*runp); } if (bp) brelse(bp, 0); daddr = blkptrtodb(ump, daddr); *bnp = daddr == 0 ? -1 : daddr; return (0); }