/* ARGSUSED */ static int ext2_read(struct vop_read_args *ap) { struct vnode *vp; struct inode *ip; struct uio *uio; FS *fs; struct buf *bp; daddr_t lbn, nextlbn; off_t nextloffset; off_t bytesinfile; long size, xfersize, blkoffset; int error, orig_resid; int seqcount = ap->a_ioflag >> 16; vp = ap->a_vp; ip = VTOI(vp); uio = ap->a_uio; #ifdef DIAGNOSTIC if (uio->uio_rw != UIO_READ) panic("ext2_read: mode"); if (vp->v_type == VLNK) { if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen) panic("ext2_read: short symlink"); } else if (vp->v_type != VREG && vp->v_type != VDIR) panic("ext2_read: type %d", vp->v_type); #endif fs = ip->I_FS; #if 0 if ((u_quad_t)uio->uio_offset > fs->fs_maxfilesize) return (EFBIG); #endif orig_resid = uio->uio_resid; for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0) break; lbn = lblkno(fs, uio->uio_offset); nextlbn = lbn + 1; nextloffset = lblktodoff(fs, nextlbn); size = BLKSIZE(fs, ip, lbn); blkoffset = blkoff(fs, uio->uio_offset); xfersize = fs->s_frag_size - blkoffset; if (uio->uio_resid < xfersize) xfersize = uio->uio_resid; if (bytesinfile < xfersize) xfersize = bytesinfile; if (nextloffset >= ip->i_size) { error = bread(vp, lblktodoff(fs, lbn), size, &bp); } else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { error = cluster_read(vp, (off_t)ip->i_size, lblktodoff(fs, lbn), size, uio->uio_resid, (ap->a_ioflag >> 16) * BKVASIZE, &bp); } else if (seqcount > 1) {
/* * Return buffer with the contents of block "offset" from the beginning of * vnode "vp". If "res" is non-zero, fill it in with a pointer to the * remaining space in the vnode. */ int ffs_blkatoff(struct vnode *vp, off_t uoffset, char **res, struct buf **bpp) { struct inode *ip; struct fs *fs; struct buf *bp; ufs_daddr_t lbn; int bsize, error; ip = VTOI(vp); fs = ip->i_fs; lbn = lblkno(fs, uoffset); bsize = blksize(fs, ip, lbn); *bpp = NULL; error = bread(vp, lblktodoff(fs, lbn), bsize, &bp); if (error) { brelse(bp); return (error); } if (res) *res = (char *)bp->b_data + blkoff(fs, uoffset); *bpp = bp; return (0); }
/* * Return buffer with the contents of block "offset" from the beginning of * directory "ip". If "res" is non-zero, fill it in with a pointer to the * remaining space in the directory. */ int ext2_blkatoff(struct vnode *vp, off_t offset, char **res, struct buf **bpp) { struct inode *ip; struct ext2_sb_info *fs; struct buf *bp; daddr_t lbn; int bsize, error; ip = VTOI(vp); fs = ip->i_e2fs; lbn = lblkno(fs, offset); bsize = blksize(fs, ip, lbn); *bpp = NULL; if ((error = bread(vp, lblktodoff(fs, lbn), bsize, &bp)) != 0) { brelse(bp); return (error); } if (res) *res = (char *)bp->b_data + blkoff(fs, offset); *bpp = bp; return (0); }
/* * Release blocks associated with the inode ip and stored in the indirect * block bn. Blocks are free'd in LIFO order up to (but not including) * lastbn. If level is greater than SINGLE, the block is an indirect block * and recursive calls to indirtrunc must be used to cleanse other indirect * blocks. * * NB: triple indirect blocks are untested. */ static int ffs_indirtrunc(struct inode *ip, ufs_daddr_t lbn, ufs_daddr_t dbn, ufs_daddr_t lastbn, int level, long *countp) { int i; struct buf *bp; struct fs *fs = ip->i_fs; ufs_daddr_t *bap; struct vnode *vp; ufs_daddr_t *copy = NULL, nb, nlbn, last; long blkcount, factor; int nblocks, blocksreleased = 0; int error = 0, allerror = 0; /* * Calculate index in current block of last * block to be kept. -1 indicates the entire * block so we need not calculate the index. */ factor = 1; for (i = SINGLE; i < level; i++) factor *= NINDIR(fs); last = lastbn; if (lastbn > 0) last /= factor; nblocks = btodb(fs->fs_bsize); /* * Get buffer of block pointers, zero those entries corresponding * to blocks to be free'd, and update on disk copy first. Since * double(triple) indirect before single(double) indirect, calls * to bmap on these blocks will fail. However, we already have * the on disk address, so we have to set the bio_offset field * explicitly instead of letting bread do everything for us. */ vp = ITOV(ip); bp = getblk(vp, lblktodoff(fs, lbn), (int)fs->fs_bsize, 0, 0); if ((bp->b_flags & B_CACHE) == 0) { bp->b_flags &= ~(B_ERROR|B_INVAL); bp->b_cmd = BUF_CMD_READ; if (bp->b_bcount > bp->b_bufsize) panic("ffs_indirtrunc: bad buffer size"); /* * BIO is bio2 which chains back to bio1. We wait * on bio1. */ bp->b_bio2.bio_offset = dbtodoff(fs, dbn); bp->b_bio1.bio_done = biodone_sync; bp->b_bio1.bio_flags |= BIO_SYNC; vfs_busy_pages(vp, bp); /* * Access the block device layer using the device vnode * and the translated block number (bio2) instead of the * file vnode (vp) and logical block number (bio1). * * Even though we are bypassing the vnode layer, we still * want the vnode state to indicate that an I/O on its behalf * is in progress. */ bio_start_transaction(&bp->b_bio1, &vp->v_track_read); vn_strategy(ip->i_devvp, &bp->b_bio2); error = biowait(&bp->b_bio1, "biord"); } if (error) { brelse(bp); *countp = 0; return (error); } bap = (ufs_daddr_t *)bp->b_data; if (lastbn != -1) { copy = kmalloc(fs->fs_bsize, M_TEMP, M_WAITOK); bcopy((caddr_t)bap, (caddr_t)copy, (uint)fs->fs_bsize); bzero((caddr_t)&bap[last + 1], (uint)(NINDIR(fs) - (last + 1)) * sizeof (ufs_daddr_t)); if (DOINGASYNC(vp)) { bawrite(bp); } else { error = bwrite(bp); if (error) allerror = error; } bap = copy; } /* * Recursively free totally unused blocks. */ for (i = NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last; i--, nlbn += factor) { nb = bap[i]; if (nb == 0) continue; if (level > SINGLE) { if ((error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb), (ufs_daddr_t)-1, level - 1, &blkcount)) != 0) allerror = error; blocksreleased += blkcount; } ffs_blkfree(ip, nb, fs->fs_bsize); blocksreleased += nblocks; } /* * Recursively free last partial block. */ if (level > SINGLE && lastbn >= 0) { last = lastbn % factor; nb = bap[i]; if (nb != 0) { error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb), last, level - 1, &blkcount); if (error) allerror = error; blocksreleased += blkcount; } } if (copy != NULL) { kfree(copy, M_TEMP); } else { bp->b_flags |= B_INVAL | B_NOCACHE; brelse(bp); } *countp = blocksreleased; return (allerror); }
if (uio->uio_resid < xfersize) xfersize = uio->uio_resid; if (bytesinfile < xfersize) xfersize = bytesinfile; if (nextloffset >= ip->i_size) { error = bread(vp, lblktodoff(fs, lbn), size, &bp); } else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { error = cluster_read(vp, (off_t)ip->i_size, lblktodoff(fs, lbn), size, uio->uio_resid, (ap->a_ioflag >> 16) * BKVASIZE, &bp); } else if (seqcount > 1) { int nextsize = BLKSIZE(fs, ip, nextlbn); error = breadn(vp, lblktodoff(fs, lbn), size, &nextloffset, &nextsize, 1, &bp); } else { error = bread(vp, lblktodoff(fs, lbn), size, &bp); } if (error) { brelse(bp); bp = NULL; break; } /* * We should only get non-zero b_resid when an I/O error * has occurred, which should cause us to break above. * However, if the short read did not cause an error, * then we want to ensure that we do not uiomove bad
/* * ffs_balloc(struct vnode *a_vp, ufs_daddr_t a_lbn, int a_size, * struct ucred *a_cred, int a_flags, struct buf *a_bpp) * * Balloc defines the structure of filesystem storage by allocating * the physical blocks on a device given the inode and the logical * block number in a file. * * NOTE: B_CLRBUF - this flag tells balloc to clear invalid portions * of the buffer. However, any dirty bits will override missing * valid bits. This case occurs when writable mmaps are truncated * and then extended. */ int ffs_balloc(struct vop_balloc_args *ap) { struct inode *ip; ufs_daddr_t lbn; int size; struct ucred *cred; int flags; struct fs *fs; ufs_daddr_t nb; struct buf *bp, *nbp, *dbp; struct vnode *vp; struct indir indirs[NIADDR + 2]; ufs_daddr_t newb, *bap, pref; int deallocated, osize, nsize, num, i, error; ufs_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; ufs_daddr_t *lbns_remfree, lbns[NIADDR + 1]; int unwindidx; int seqcount; vp = ap->a_vp; ip = VTOI(vp); fs = ip->i_fs; lbn = lblkno(fs, ap->a_startoffset); size = blkoff(fs, ap->a_startoffset) + ap->a_size; if (size > fs->fs_bsize) panic("ffs_balloc: blk too big"); *ap->a_bpp = NULL; if (lbn < 0) return (EFBIG); cred = ap->a_cred; flags = ap->a_flags; /* * The vnode must be locked for us to be able to safely mess * around with the inode. */ if (vn_islocked(vp) != LK_EXCLUSIVE) { panic("ffs_balloc: vnode %p not exclusively locked!", vp); } /* * If the next write will extend the file into a new block, * and the file is currently composed of a fragment * this fragment has to be extended to be a full block. */ nb = lblkno(fs, ip->i_size); if (nb < NDADDR && nb < lbn) { /* * The filesize prior to this write can fit in direct * blocks (ex. fragmentation is possibly done) * we are now extending the file write beyond * the block which has end of the file prior to this write. */ osize = blksize(fs, ip, nb); /* * osize gives disk allocated size in the last block. It is * either in fragments or a file system block size. */ if (osize < fs->fs_bsize && osize > 0) { /* A few fragments are already allocated, since the * current extends beyond this block allocated the * complete block as fragments are on in last block. */ error = ffs_realloccg(ip, nb, ffs_blkpref(ip, nb, (int)nb, &ip->i_db[0]), osize, (int)fs->fs_bsize, cred, &bp); if (error) return (error); if (DOINGSOFTDEP(vp)) softdep_setup_allocdirect(ip, nb, dofftofsb(fs, bp->b_bio2.bio_offset), ip->i_db[nb], fs->fs_bsize, osize, bp); /* adjust the inode size, we just grew */ ip->i_size = smalllblktosize(fs, nb + 1); ip->i_db[nb] = dofftofsb(fs, bp->b_bio2.bio_offset); ip->i_flag |= IN_CHANGE | IN_UPDATE; if (flags & B_SYNC) bwrite(bp); else bawrite(bp); /* bp is already released here */ } } /* * The first NDADDR blocks are direct blocks */ if (lbn < NDADDR) { nb = ip->i_db[lbn]; if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) { error = bread(vp, lblktodoff(fs, lbn), fs->fs_bsize, &bp); if (error) { brelse(bp); return (error); } bp->b_bio2.bio_offset = fsbtodoff(fs, nb); *ap->a_bpp = bp; return (0); } if (nb != 0) { /* * Consider need to reallocate a fragment. */ osize = fragroundup(fs, blkoff(fs, ip->i_size)); nsize = fragroundup(fs, size); if (nsize <= osize) { error = bread(vp, lblktodoff(fs, lbn), osize, &bp); if (error) { brelse(bp); return (error); } bp->b_bio2.bio_offset = fsbtodoff(fs, nb); } else { /* * NOTE: ffs_realloccg() issues a bread(). */ error = ffs_realloccg(ip, lbn, ffs_blkpref(ip, lbn, (int)lbn, &ip->i_db[0]), osize, nsize, cred, &bp); if (error) return (error); if (DOINGSOFTDEP(vp)) softdep_setup_allocdirect(ip, lbn, dofftofsb(fs, bp->b_bio2.bio_offset), nb, nsize, osize, bp); } } else { if (ip->i_size < smalllblktosize(fs, lbn + 1)) nsize = fragroundup(fs, size); else nsize = fs->fs_bsize; error = ffs_alloc(ip, lbn, ffs_blkpref(ip, lbn, (int)lbn, &ip->i_db[0]), nsize, cred, &newb); if (error) return (error); bp = getblk(vp, lblktodoff(fs, lbn), nsize, 0, 0); bp->b_bio2.bio_offset = fsbtodoff(fs, newb); if (flags & B_CLRBUF) vfs_bio_clrbuf(bp); if (DOINGSOFTDEP(vp)) softdep_setup_allocdirect(ip, lbn, newb, 0, nsize, 0, bp); } ip->i_db[lbn] = dofftofsb(fs, bp->b_bio2.bio_offset); ip->i_flag |= IN_CHANGE | IN_UPDATE; *ap->a_bpp = bp; return (0); } /* * Determine the number of levels of indirection. */ pref = 0; if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0) return(error); #ifdef DIAGNOSTIC if (num < 1) panic ("ffs_balloc: ufs_bmaparray returned indirect block"); #endif /* * Get a handle on the data block buffer before working through * indirect blocks to avoid a deadlock between the VM system holding * a locked VM page and issuing a BMAP (which tries to lock the * indirect blocks), and the filesystem holding a locked indirect * block and then trying to read a data block (which tries to lock * the underlying VM pages). */ dbp = getblk(vp, lblktodoff(fs, lbn), fs->fs_bsize, 0, 0); /* * Setup undo history */ allocib = NULL; allocblk = allociblk; lbns_remfree = lbns; unwindidx = -1; /* * Fetch the first indirect block directly from the inode, allocating * one if necessary. */ --num; nb = ip->i_ib[indirs[0].in_off]; if (nb == 0) { pref = ffs_blkpref(ip, lbn, 0, NULL); /* * If the filesystem has run out of space we can skip the * full fsync/undo of the main [fail] case since no undo * history has been built yet. Hence the goto fail2. */ if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) != 0) goto fail2; nb = newb; *allocblk++ = nb; *lbns_remfree++ = indirs[1].in_lbn; bp = getblk(vp, lblktodoff(fs, indirs[1].in_lbn), fs->fs_bsize, 0, 0); bp->b_bio2.bio_offset = fsbtodoff(fs, nb); vfs_bio_clrbuf(bp); if (DOINGSOFTDEP(vp)) { softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off, newb, 0, fs->fs_bsize, 0, bp); bdwrite(bp); } else { /* * Write synchronously so that indirect blocks * never point at garbage. */ if (DOINGASYNC(vp)) bdwrite(bp); else if ((error = bwrite(bp)) != 0) goto fail; } allocib = &ip->i_ib[indirs[0].in_off]; *allocib = nb; ip->i_flag |= IN_CHANGE | IN_UPDATE; } /* * Fetch through the indirect blocks, allocating as necessary. */ for (i = 1;;) { error = bread(vp, lblktodoff(fs, indirs[i].in_lbn), (int)fs->fs_bsize, &bp); if (error) { brelse(bp); goto fail; } bap = (ufs_daddr_t *)bp->b_data; nb = bap[indirs[i].in_off]; if (i == num) break; i += 1; if (nb != 0) { bqrelse(bp); continue; } if (pref == 0) pref = ffs_blkpref(ip, lbn, 0, NULL); if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) != 0) { brelse(bp); goto fail; } nb = newb; *allocblk++ = nb; *lbns_remfree++ = indirs[i].in_lbn; nbp = getblk(vp, lblktodoff(fs, indirs[i].in_lbn), fs->fs_bsize, 0, 0); nbp->b_bio2.bio_offset = fsbtodoff(fs, nb); vfs_bio_clrbuf(nbp); if (DOINGSOFTDEP(vp)) { softdep_setup_allocindir_meta(nbp, ip, bp, indirs[i - 1].in_off, nb); bdwrite(nbp); } else { /* * Write synchronously so that indirect blocks * never point at garbage. */ if ((error = bwrite(nbp)) != 0) { brelse(bp); goto fail; } } bap[indirs[i - 1].in_off] = nb; if (allocib == NULL && unwindidx < 0) unwindidx = i - 1; /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & B_SYNC) { bwrite(bp); } else { if (bp->b_bufsize == fs->fs_bsize) bp->b_flags |= B_CLUSTEROK; bdwrite(bp); } } /* * Get the data block, allocating if necessary. We have already * called getblk() on the data block buffer, dbp. If we have to * allocate it and B_CLRBUF has been set the inference is an intention * to zero out the related disk blocks, so we do not have to issue * a read. Instead we simply call vfs_bio_clrbuf(). If B_CLRBUF is * not set the caller intends to overwrite the entire contents of the * buffer and we don't waste time trying to clean up the contents. * * bp references the current indirect block. When allocating, * the block must be updated. */ if (nb == 0) { pref = ffs_blkpref(ip, lbn, indirs[i].in_off, &bap[0]); error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb); if (error) { brelse(bp); goto fail; } nb = newb; *allocblk++ = nb; *lbns_remfree++ = lbn; dbp->b_bio2.bio_offset = fsbtodoff(fs, nb); if (flags & B_CLRBUF) vfs_bio_clrbuf(dbp); if (DOINGSOFTDEP(vp)) softdep_setup_allocindir_page(ip, lbn, bp, indirs[i].in_off, nb, 0, dbp); bap[indirs[i].in_off] = nb; /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & B_SYNC) { bwrite(bp); } else { if (bp->b_bufsize == fs->fs_bsize) bp->b_flags |= B_CLUSTEROK; bdwrite(bp); } *ap->a_bpp = dbp; return (0); } brelse(bp); /* * At this point all related indirect blocks have been allocated * if necessary and released. bp is no longer valid. dbp holds * our getblk()'d data block. * * XXX we previously performed a cluster_read operation here. */ if (flags & B_CLRBUF) { /* * If B_CLRBUF is set we must validate the invalid portions * of the buffer. This typically requires a read-before- * write. The strategy call will fill in bio_offset in that * case. * * If we hit this case we do a cluster read if possible * since nearby data blocks are likely to be accessed soon * too. */ if ((dbp->b_flags & B_CACHE) == 0) { bqrelse(dbp); seqcount = (flags & B_SEQMASK) >> B_SEQSHIFT; if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { error = cluster_read(vp, (off_t)ip->i_size, lblktodoff(fs, lbn), (int)fs->fs_bsize, fs->fs_bsize, seqcount * BKVASIZE, &dbp); } else { error = bread(vp, lblktodoff(fs, lbn), (int)fs->fs_bsize, &dbp); } if (error) goto fail; } else {
/* * Return buffer with the contents of block "offset" from the beginning of * vnode "vp". If "res" is non-zero, fill it in with a pointer to the * remaining space in the vnode. * * This version includes a read-ahead optimization. */ int ffs_blkatoff_ra(struct vnode *vp, off_t uoffset, char **res, struct buf **bpp, int seqcount) { struct inode *ip; struct fs *fs; struct buf *bp; ufs_daddr_t lbn; ufs_daddr_t nextlbn; off_t base_loffset; off_t next_loffset; int bsize, error; int nextbsize; ip = VTOI(vp); fs = ip->i_fs; lbn = lblkno(fs, uoffset); base_loffset = lblktodoff(fs, lbn); bsize = blksize(fs, ip, lbn); nextlbn = lbn + 1; next_loffset = lblktodoff(fs, nextlbn); *bpp = NULL; if (next_loffset >= ip->i_size) { /* * Do not do readahead if this is the last block, * bsize might represent a fragment. */ error = bread(vp, base_loffset, bsize, &bp); } else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { /* * Try to cluster if we allowed to. */ error = cluster_read(vp, (off_t)ip->i_size, base_loffset, bsize, bsize, seqcount * BKVASIZE, &bp); } else if (seqcount > 1) { /* * Faked read ahead */ nextbsize = blksize(fs, ip, nextlbn); error = breadn(vp, base_loffset, bsize, &next_loffset, &nextbsize, 1, &bp); } else { /* * Failing all of the above, just read what the * user asked for. Interestingly, the same as * the first option above. */ error = bread(vp, base_loffset, bsize, &bp); } if (error) { brelse(bp); return (error); } if (res) *res = (char *)bp->b_data + (int)(uoffset - base_loffset); *bpp = bp; return (0); }
/* * ext2_reallocblks(struct vnode *a_vp, struct cluster_save *a_buflist) */ int ext2_reallocblks(struct vop_reallocblks_args *ap) { #ifndef FANCY_REALLOC /* kprintf("ext2_reallocblks not implemented\n"); */ return ENOSPC; #else struct ext2_sb_info *fs; struct inode *ip; struct vnode *vp; struct buf *sbp, *ebp; daddr_t *bap, *sbap, *ebap; struct cluster_save *buflist; daddr_t start_lbn, end_lbn, soff, eoff, newblk, blkno; struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp; int i, len, start_lvl, end_lvl, pref, ssize; vp = ap->a_vp; ip = VTOI(vp); fs = ip->i_e2fs; #ifdef UNKLAR if (fs->fs_contigsumsize <= 0) return (ENOSPC); #endif buflist = ap->a_buflist; len = buflist->bs_nchildren; start_lbn = lblkno(fs, buflist->bs_children[0]->b_loffset); end_lbn = start_lbn + len - 1; #if DIAGNOSTIC for (i = 1; i < len; i++) { if (buflist->bs_children[i]->b_loffset != lblktodoff(fs, start_lbn) + lblktodoff(fs, i)) panic("ext2_reallocblks: non-cluster"); } #endif /* * If the latest allocation is in a new block group, assume that * the filesystem has decided to move and do not force it back to * the previous block group. */ if (dtog(fs, dofftofsb(fs, buflist->bs_children[0]->b_bio2.bio_offset)) != dtog(fs, dofftofsb(fs, buflist->bs_children[len - 1]->b_bio2.bio_offset))) return (ENOSPC); if (ext2_getlbns(vp, start_lbn, start_ap, &start_lvl) || ext2_getlbns(vp, end_lbn, end_ap, &end_lvl)) return (ENOSPC); /* * Get the starting offset and block map for the first block. */ if (start_lvl == 0) { sbap = &ip->i_db[0]; soff = start_lbn; } else { idp = &start_ap[start_lvl - 1]; if (bread(vp, lblktodoff(fs, idp->in_lbn), (int)fs->s_blocksize, NOCRED, &sbp)) { brelse(sbp); return (ENOSPC); } sbap = (daddr_t *)sbp->b_data; soff = idp->in_off; } /* * Find the preferred location for the cluster. */ pref = ext2_blkpref(ip, start_lbn, soff, sbap); /* * If the block range spans two block maps, get the second map. */ if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) { ssize = len; } else { #if DIAGNOSTIC if (start_ap[start_lvl-1].in_lbn == idp->in_lbn) panic("ext2_reallocblk: start == end"); #endif ssize = len - (idp->in_off + 1); if (bread(vp, lblktodoff(fs, idp->in_lbn), (int)fs->s_blocksize, NOCRED, &ebp)) goto fail; ebap = (daddr_t *)ebp->b_data; } /* * Search the block map looking for an allocation of the desired size. */ if ((newblk = (daddr_t)ext2_hashalloc(ip, dtog(fs, pref), (long)pref, len, (u_long (*)())ext2_clusteralloc)) == 0) goto fail; /* * We have found a new contiguous block. * * First we have to replace the old block pointers with the new * block pointers in the inode and indirect blocks associated * with the file. */ blkno = newblk; for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->s_frags_per_block) { if (i == ssize) bap = ebap; #if DIAGNOSTIC if (buflist->bs_children[i]->b_bio2.bio_offset != fsbtodoff(fs, *bap)) panic("ext2_reallocblks: alloc mismatch"); #endif *bap++ = blkno; } /* * Next we must write out the modified inode and indirect blocks. * For strict correctness, the writes should be synchronous since * the old block values may have been written to disk. In practise * they are almost never written, but if we are concerned about * strict correctness, the `doasyncfree' flag should be set to zero. * * The test on `doasyncfree' should be changed to test a flag * that shows whether the associated buffers and inodes have * been written. The flag should be set when the cluster is * started and cleared whenever the buffer or inode is flushed. * We can then check below to see if it is set, and do the * synchronous write only when it has been cleared. */ if (sbap != &ip->i_db[0]) { if (doasyncfree) bdwrite(sbp); else bwrite(sbp); } else { ip->i_flag |= IN_CHANGE | IN_UPDATE; if (!doasyncfree) EXT2_UPDATE(vp, 1); } if (ssize < len) if (doasyncfree) bdwrite(ebp); else bwrite(ebp); /* * Last, free the old blocks and assign the new blocks to the buffers. */ for (blkno = newblk, i = 0; i < len; i++, blkno += fs->s_frags_per_block) { ext2_blkfree(ip, dofftofsb(fs, buflist->bs_children[i]->b_bio2.bio_offset), fs->s_blocksize); buflist->bs_children[i]->b_bio2.bio_offset = fsbtodoff(fs, blkno); } return (0); fail: if (ssize < len) brelse(ebp); if (sbap != &ip->i_db[0]) brelse(sbp); return (ENOSPC); #endif /* FANCY_REALLOC */ }
static int ext2_indirtrunc(struct inode *ip, daddr_t lbn, off_t doffset, daddr_t lastbn, int level, long *countp) { int i; struct buf *bp; struct ext2_sb_info *fs = ip->i_e2fs; daddr_t *bap; struct vnode *vp; daddr_t *copy, nb, nlbn, last; long blkcount, factor; int nblocks, blocksreleased = 0; int error = 0, allerror = 0; /* * Calculate index in current block of last * block to be kept. -1 indicates the entire * block so we need not calculate the index. */ factor = 1; for (i = SINGLE; i < level; i++) factor *= NINDIR(fs); last = lastbn; if (lastbn > 0) last /= factor; nblocks = btodb(fs->s_blocksize); /* * Get buffer of block pointers, zero those entries corresponding * to blocks to be free'd, and update on disk copy first. Since * double(triple) indirect before single(double) indirect, calls * to bmap on these blocks will fail. However, we already have * the on disk address, so we have to set the bio_offset field * explicitly instead of letting bread do everything for us. */ vp = ITOV(ip); bp = getblk(vp, lblktodoff(fs, lbn), (int)fs->s_blocksize, 0, 0); if ((bp->b_flags & B_CACHE) == 0) { bp->b_flags &= ~(B_ERROR | B_INVAL); bp->b_cmd = BUF_CMD_READ; if (bp->b_bcount > bp->b_bufsize) panic("ext2_indirtrunc: bad buffer size"); bp->b_bio2.bio_offset = doffset; bp->b_bio1.bio_done = biodone_sync; bp->b_bio1.bio_flags |= BIO_SYNC; vfs_busy_pages(bp->b_vp, bp); vn_strategy(vp, &bp->b_bio1); error = biowait(&bp->b_bio1, "biord"); } if (error) { brelse(bp); *countp = 0; return (error); } bap = (daddr_t *)bp->b_data; MALLOC(copy, daddr_t *, fs->s_blocksize, M_TEMP, M_WAITOK); bcopy((caddr_t)bap, (caddr_t)copy, (u_int)fs->s_blocksize); bzero((caddr_t)&bap[last + 1], (u_int)(NINDIR(fs) - (last + 1)) * sizeof (daddr_t)); if (last == -1) bp->b_flags |= B_INVAL; error = bwrite(bp); if (error) allerror = error; bap = copy; /* * Recursively free totally unused blocks. */ for (i = NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last; i--, nlbn += factor) { nb = bap[i]; if (nb == 0) continue; if (level > SINGLE) { if ((error = ext2_indirtrunc(ip, nlbn, fsbtodoff(fs, nb), (daddr_t)-1, level - 1, &blkcount)) != 0) allerror = error; blocksreleased += blkcount; } ext2_blkfree(ip, nb, fs->s_blocksize); blocksreleased += nblocks; } /* * Recursively free last partial block. */ if (level > SINGLE && lastbn >= 0) { last = lastbn % factor; nb = bap[i]; if (nb != 0) { error = ext2_indirtrunc(ip, nlbn, fsbtodoff(fs, nb), last, level - 1, &blkcount); if (error) allerror = error; blocksreleased += blkcount; } } FREE(copy, M_TEMP); *countp = blocksreleased; return (allerror); }
/* * Indirect blocks are now on the vnode for the file. They are given negative * logical block numbers. Indirect blocks are addressed by the negative * address of the first data block to which they point. Double indirect blocks * are addressed by one less than the address of the first indirect block to * which they point. Triple indirect blocks are addressed by one less than * the address of the first double indirect block to which they point. * * ext2_bmaparray does the bmap conversion, and if requested returns the * array of logical blocks which must be traversed to get to a block. * Each entry contains the offset into that block that gets you to the * next block and the disk address of the block (if it is assigned). */ static int ext2_bmaparray(struct vnode *vp, ext2_daddr_t bn, ext2_daddr_t *bnp, struct indir *ap, int *nump, int *runp, int *runb) { struct inode *ip; struct buf *bp; struct ext2_mount *ump; struct mount *mp; struct ext2_sb_info *fs; struct indir a[NIADDR+1], *xap; ext2_daddr_t daddr; long metalbn; int error, maxrun, num; ip = VTOI(vp); mp = vp->v_mount; ump = VFSTOEXT2(mp); fs = ip->i_e2fs; #ifdef DIAGNOSTIC if ((ap != NULL && nump == NULL) || (ap == NULL && nump != NULL)) panic("ext2_bmaparray: invalid arguments"); #endif if (runp) { *runp = 0; } if (runb) { *runb = 0; } maxrun = mp->mnt_iosize_max / mp->mnt_stat.f_iosize - 1; xap = ap == NULL ? a : ap; if (!nump) nump = # error = ext2_getlbns(vp, bn, xap, nump); if (error) return (error); num = *nump; if (num == 0) { *bnp = blkptrtodb(ump, ip->i_db[bn]); if (*bnp == 0) *bnp = -1; else if (runp) { daddr_t bnb = bn; for (++bn; bn < NDADDR && *runp < maxrun && is_sequential(ump, ip->i_db[bn - 1], ip->i_db[bn]); ++bn, ++*runp); bn = bnb; if (runb && (bn > 0)) { for (--bn; (bn >= 0) && (*runb < maxrun) && is_sequential(ump, ip->i_db[bn], ip->i_db[bn+1]); --bn, ++*runb); } } return (0); } /* Get disk address out of indirect block array */ daddr = ip->i_ib[xap->in_off]; for (bp = NULL, ++xap; --num; ++xap) { /* * Exit the loop if there is no disk address assigned yet and * the indirect block isn't in the cache, or if we were * looking for an indirect block and we've found it. */ metalbn = xap->in_lbn; if ((daddr == 0 && !findblk(vp, dbtodoff(fs, metalbn), FINDBLK_TEST)) || metalbn == bn) { break; } /* * If we get here, we've either got the block in the cache * or we have a disk address for it, go fetch it. */ if (bp) bqrelse(bp); xap->in_exists = 1; bp = getblk(vp, lblktodoff(fs, metalbn), mp->mnt_stat.f_iosize, 0, 0); if ((bp->b_flags & B_CACHE) == 0) { #ifdef DIAGNOSTIC if (!daddr) panic("ext2_bmaparray: indirect block not in cache"); #endif /* * This runs through ext2_strategy using bio2 to * cache the disk offset, then comes back through * bio1. So we want to wait on bio1 */ bp->b_bio1.bio_done = biodone_sync; bp->b_bio1.bio_flags |= BIO_SYNC; bp->b_bio2.bio_offset = fsbtodoff(fs, daddr); bp->b_flags &= ~(B_INVAL|B_ERROR); bp->b_cmd = BUF_CMD_READ; vfs_busy_pages(bp->b_vp, bp); vn_strategy(bp->b_vp, &bp->b_bio1); error = biowait(&bp->b_bio1, "biord"); if (error) { brelse(bp); return (error); } } daddr = ((ext2_daddr_t *)bp->b_data)[xap->in_off]; if (num == 1 && daddr && runp) { for (bn = xap->in_off + 1; bn < MNINDIR(ump) && *runp < maxrun && is_sequential(ump, ((ext2_daddr_t *)bp->b_data)[bn - 1], ((ext2_daddr_t *)bp->b_data)[bn]); ++bn, ++*runp); bn = xap->in_off; if (runb && bn) { for(--bn; bn >= 0 && *runb < maxrun && is_sequential(ump, ((daddr_t *)bp->b_data)[bn], ((daddr_t *)bp->b_data)[bn+1]); --bn, ++*runb); } } } if (bp) bqrelse(bp); daddr = blkptrtodb(ump, daddr); *bnp = daddr == 0 ? -1 : daddr; return (0); }