static int ext2_ext_balloc(struct inode *ip, uint32_t lbn, int size, struct ucred *cred, struct buf **bpp, int flags) { struct m_ext2fs *fs; struct buf *bp = NULL; struct vnode *vp = ITOV(ip); daddr_t newblk; int osize, nsize, blks, error, allocated; fs = ip->i_e2fs; blks = howmany(size, fs->e2fs_bsize); error = ext4_ext_get_blocks(ip, lbn, blks, cred, NULL, &allocated, &newblk); if (error) return (error); if (allocated) { if (ip->i_size < (lbn + 1) * fs->e2fs_bsize) nsize = fragroundup(fs, size); else nsize = fs->e2fs_bsize; bp = getblk(vp, lbn, nsize, 0, 0, 0); if(!bp) return (EIO); bp->b_blkno = fsbtodb(fs, newblk); if (flags & BA_CLRBUF) vfs_bio_clrbuf(bp); } else { if (ip->i_size >= (lbn + 1) * fs->e2fs_bsize) { error = bread(vp, lbn, fs->e2fs_bsize, NOCRED, &bp); if (error) { brelse(bp); return (error); } bp->b_blkno = fsbtodb(fs, newblk); *bpp = bp; return (0); } /* * Consider need to reallocate a fragment. */ osize = fragroundup(fs, blkoff(fs, ip->i_size)); nsize = fragroundup(fs, size); if (nsize <= osize) error = bread(vp, lbn, osize, NOCRED, &bp); else error = bread(vp, lbn, fs->e2fs_bsize, NOCRED, &bp); if (error) { brelse(bp); return (error); } bp->b_blkno = fsbtodb(fs, newblk); } *bpp = bp; return (error); }
/* * Read data to a buf, including read-ahead if we find this to be beneficial. * cluster_read replaces bread. */ int cluster_read(struct vnode *vp, u_quad_t filesize, daddr_t lblkno, long size, struct ucred *cred, long totread, int seqcount, int gbflags, struct buf **bpp) { struct buf *bp, *rbp, *reqbp; struct bufobj *bo; daddr_t blkno, origblkno; int maxra, racluster; int error, ncontig; int i; error = 0; bo = &vp->v_bufobj; if (!unmapped_buf_allowed) gbflags &= ~GB_UNMAPPED; /* * Try to limit the amount of read-ahead by a few * ad-hoc parameters. This needs work!!! */ racluster = vp->v_mount->mnt_iosize_max / size; maxra = seqcount; maxra = min(read_max, maxra); maxra = min(nbuf/8, maxra); if (((u_quad_t)(lblkno + maxra + 1) * size) > filesize) maxra = (filesize / size) - lblkno; /* * get the requested block */ *bpp = reqbp = bp = getblk(vp, lblkno, size, 0, 0, gbflags); origblkno = lblkno; /* * if it is in the cache, then check to see if the reads have been * sequential. If they have, then try some read-ahead, otherwise * back-off on prospective read-aheads. */ if (bp->b_flags & B_CACHE) { if (!seqcount) { return 0; } else if ((bp->b_flags & B_RAM) == 0) { return 0; } else { bp->b_flags &= ~B_RAM; BO_RLOCK(bo); for (i = 1; i < maxra; i++) { /* * Stop if the buffer does not exist or it * is invalid (about to go away?) */ rbp = gbincore(&vp->v_bufobj, lblkno+i); if (rbp == NULL || (rbp->b_flags & B_INVAL)) break; /* * Set another read-ahead mark so we know * to check again. (If we can lock the * buffer without waiting) */ if ((((i % racluster) == (racluster - 1)) || (i == (maxra - 1))) && (0 == BUF_LOCK(rbp, LK_EXCLUSIVE | LK_NOWAIT, NULL))) { rbp->b_flags |= B_RAM; BUF_UNLOCK(rbp); } } BO_RUNLOCK(bo); if (i >= maxra) { return 0; } lblkno += i; } reqbp = bp = NULL; /* * If it isn't in the cache, then get a chunk from * disk if sequential, otherwise just get the block. */ } else { off_t firstread = bp->b_offset; int nblks; long minread; KASSERT(bp->b_offset != NOOFFSET, ("cluster_read: no buffer offset")); ncontig = 0; /* * Adjust totread if needed */ minread = read_min * size; if (minread > totread) totread = minread; /* * Compute the total number of blocks that we should read * synchronously. */ if (firstread + totread > filesize) totread = filesize - firstread; nblks = howmany(totread, size); if (nblks > racluster) nblks = racluster; /* * Now compute the number of contiguous blocks. */ if (nblks > 1) { error = VOP_BMAP(vp, lblkno, NULL, &blkno, &ncontig, NULL); /* * If this failed to map just do the original block. */ if (error || blkno == -1) ncontig = 0; } /* * If we have contiguous data available do a cluster * otherwise just read the requested block. */ if (ncontig) { /* Account for our first block. */ ncontig = min(ncontig + 1, nblks); if (ncontig < nblks) nblks = ncontig; bp = cluster_rbuild(vp, filesize, lblkno, blkno, size, nblks, gbflags, bp); lblkno += (bp->b_bufsize / size); } else { bp->b_flags |= B_RAM; bp->b_iocmd = BIO_READ; lblkno += 1; } } /* * handle the synchronous read so that it is available ASAP. */ if (bp) { if ((bp->b_flags & B_CLUSTER) == 0) { vfs_busy_pages(bp, 0); } bp->b_flags &= ~B_INVAL; bp->b_ioflags &= ~BIO_ERROR; if ((bp->b_flags & B_ASYNC) || bp->b_iodone != NULL) BUF_KERNPROC(bp); bp->b_iooffset = dbtob(bp->b_blkno); bstrategy(bp); curthread->td_ru.ru_inblock++; } /* * If we have been doing sequential I/O, then do some read-ahead. */ while (lblkno < (origblkno + maxra)) { error = VOP_BMAP(vp, lblkno, NULL, &blkno, &ncontig, NULL); if (error) break; if (blkno == -1) break; /* * We could throttle ncontig here by maxra but we might as * well read the data if it is contiguous. We're throttled * by racluster anyway. */ if (ncontig) { ncontig = min(ncontig + 1, racluster); rbp = cluster_rbuild(vp, filesize, lblkno, blkno, size, ncontig, gbflags, NULL); lblkno += (rbp->b_bufsize / size); if (rbp->b_flags & B_DELWRI) { bqrelse(rbp); continue; } } else { rbp = getblk(vp, lblkno, size, 0, 0, gbflags); lblkno += 1; if (rbp->b_flags & B_DELWRI) { bqrelse(rbp); continue; } rbp->b_flags |= B_ASYNC | B_RAM; rbp->b_iocmd = BIO_READ; rbp->b_blkno = blkno; } if (rbp->b_flags & B_CACHE) { rbp->b_flags &= ~B_ASYNC; bqrelse(rbp); continue; } if ((rbp->b_flags & B_CLUSTER) == 0) { vfs_busy_pages(rbp, 0); } rbp->b_flags &= ~B_INVAL; rbp->b_ioflags &= ~BIO_ERROR; if ((rbp->b_flags & B_ASYNC) || rbp->b_iodone != NULL) BUF_KERNPROC(rbp); rbp->b_iooffset = dbtob(rbp->b_blkno); bstrategy(rbp); curthread->td_ru.ru_inblock++; } if (reqbp) return (bufwait(reqbp)); else return (error); }
/* * Check the super block and its summary info. */ static int checksb(int listerr) { caddr_t err; /* * When the fs check is successfully completed, the alternate super * block at sblk.b_bno will be overwritten by ckfini() with the * repaired super block. */ sblk.b_bno = bflag ? bflag : (SBOFF / dev_bsize); sblk.b_size = SBSIZE; /* * Sanity-check some of the values we are going to use later * in allocation requests. */ if (sblock.fs_cstotal.cs_ndir < 1 || sblock.fs_cstotal.cs_ndir > sblock.fs_ncg * sblock.fs_ipg) { if (verbose) (void) printf( "Found %d directories, should be between 1 and %d inclusive.\n", sblock.fs_cstotal.cs_ndir, sblock.fs_ncg * sblock.fs_ipg); err = "NUMBER OF DIRECTORIES OUT OF RANGE"; goto failedsb; } if (sblock.fs_nrpos <= 0 || sblock.fs_postbloff < 0 || sblock.fs_cpc < 0 || (sblock.fs_postbloff + (sblock.fs_nrpos * sblock.fs_cpc * sizeof (short))) > sblock.fs_sbsize) { err = "ROTATIONAL POSITION TABLE SIZE OUT OF RANGE"; goto failedsb; } if (sblock.fs_cssize != fragroundup(&sblock, sblock.fs_ncg * sizeof (struct csum))) { err = "SIZE OF CYLINDER GROUP SUMMARY AREA WRONG"; goto failedsb; } if (sblock.fs_inopb != (sblock.fs_bsize / sizeof (struct dinode))) { err = "INOPB NONSENSICAL RELATIVE TO BSIZE"; goto failedsb; } if (sblock.fs_bsize > MAXBSIZE) { err = "BLOCK SIZE LARGER THAN MAXIMUM SUPPORTED"; goto failedsb; } if (sblock.fs_bsize != (sblock.fs_frag * sblock.fs_fsize)) { err = "FRAGS PER BLOCK OR FRAG SIZE WRONG"; goto failedsb; } if (sblock.fs_dsize >= sblock.fs_size) { err = "NUMBER OF DATA BLOCKS OUT OF RANGE"; goto failedsb; } #if 0 if (sblock.fs_size > (sblock.fs_nsect * sblock.fs_ntrak * sblock.fs_ncyl)) { err = "FILESYSTEM SIZE LARGER THAN DEVICE"; goto failedsb; } #endif /* * Check that the number of inodes per group isn't less than or * equal to zero. Also makes sure it isn't more than the * maximum number mkfs enforces. */ if (sblock.fs_ipg <= 0 || sblock.fs_ipg > MAXIpG) { err = "INODES PER GROUP OUT OF RANGE"; goto failedsb; } if (sblock.fs_cgsize > sblock.fs_bsize) { err = "CG HEADER LARGER THAN ONE BLOCK"; goto failedsb; } /* * Set all possible fields that could differ, then do check * of whole super block against an alternate super block. * When an alternate super-block is specified this check is skipped. */ (void) getblk(&asblk, cgsblock(&sblock, sblock.fs_ncg - 1), (size_t)sblock.fs_sbsize); if (asblk.b_errs != 0) { brelse(&asblk); return (0); } if (bflag != 0) { /* * Invalidate clean flag and state information. * Note that we couldn't return until after the * above getblk(), because we're going to want to * update asblk when everything's done. */ sblock.fs_clean = FSACTIVE; sblock.fs_state = (long)sblock.fs_time; sblock.fs_reclaim = 0; sbdirty(); havesb = 1; return (1); } altsblock.fs_link = sblock.fs_link; altsblock.fs_rolled = sblock.fs_rolled; altsblock.fs_time = sblock.fs_time; altsblock.fs_state = sblock.fs_state; altsblock.fs_cstotal = sblock.fs_cstotal; altsblock.fs_cgrotor = sblock.fs_cgrotor; altsblock.fs_fmod = sblock.fs_fmod; altsblock.fs_clean = sblock.fs_clean; altsblock.fs_ronly = sblock.fs_ronly; altsblock.fs_flags = sblock.fs_flags; altsblock.fs_maxcontig = sblock.fs_maxcontig; altsblock.fs_minfree = sblock.fs_minfree; altsblock.fs_optim = sblock.fs_optim; altsblock.fs_rotdelay = sblock.fs_rotdelay; altsblock.fs_maxbpg = sblock.fs_maxbpg; altsblock.fs_logbno = sblock.fs_logbno; altsblock.fs_reclaim = sblock.fs_reclaim; altsblock.fs_si = sblock.fs_si; (void) memmove((void *)altsblock.fs_fsmnt, (void *)sblock.fs_fsmnt, sizeof (sblock.fs_fsmnt)); /* * The following should not have to be copied. */ (void) memmove((void *)altsblock.fs_u.fs_csp_pad, (void *)sblock.fs_u.fs_csp_pad, sizeof (sblock.fs_u.fs_csp_pad)); altsblock.fs_fsbtodb = sblock.fs_fsbtodb; altsblock.fs_npsect = sblock.fs_npsect; altsblock.fs_nrpos = sblock.fs_nrpos; if (memcmp((void *)&sblock, (void *)&altsblock, (size_t)sblock.fs_sbsize) != 0) { err = "BAD VALUES IN SUPER BLOCK"; goto failedsb; } havesb = 1; return (1); failedsb: badsb(listerr, err); return (0); }
/* * Indirect blocks are now on the vnode for the file. They are given negative * logical block numbers. Indirect blocks are addressed by the negative * address of the first data block to which they point. Double indirect blocks * are addressed by one less than the address of the first indirect block to * which they point. Triple indirect blocks are addressed by one less than * the address of the first double indirect block to which they point. * * ufs_bmaparray does the bmap conversion, and if requested returns the * array of logical blocks which must be traversed to get to a block. * Each entry contains the offset into that block that gets you to the * next block and the disk address of the block (if it is assigned). */ int ufs_bmaparray(struct vnode *vp, daddr64_t bn, daddr64_t *bnp, struct indir *ap, int *nump, int *runp) { struct inode *ip; struct buf *bp; struct ufsmount *ump; struct mount *mp; struct vnode *devvp; struct indir a[NIADDR+1], *xap; daddr64_t daddr, metalbn; int error, maxrun = 0, num; ip = VTOI(vp); mp = vp->v_mount; ump = VFSTOUFS(mp); #ifdef DIAGNOSTIC if ((ap != NULL && nump == NULL) || (ap == NULL && nump != NULL)) panic("ufs_bmaparray: invalid arguments"); #endif if (runp) { /* * XXX * If MAXBSIZE is the largest transfer the disks can handle, * we probably want maxrun to be 1 block less so that we * don't create a block larger than the device can handle. */ *runp = 0; maxrun = MAXBSIZE / mp->mnt_stat.f_iosize - 1; } xap = ap == NULL ? a : ap; if (!nump) nump = # if ((error = ufs_getlbns(vp, bn, xap, nump)) != 0) return (error); num = *nump; if (num == 0) { *bnp = blkptrtodb(ump, DIP(ip, db[bn])); if (*bnp == 0) *bnp = -1; else if (runp) for (++bn; bn < NDADDR && *runp < maxrun && is_sequential(ump, DIP(ip, db[bn - 1]), DIP(ip, db[bn])); ++bn, ++*runp); return (0); } /* Get disk address out of indirect block array */ daddr = DIP(ip, ib[xap->in_off]); devvp = VFSTOUFS(vp->v_mount)->um_devvp; for (bp = NULL, ++xap; --num; ++xap) { /* * Exit the loop if there is no disk address assigned yet and * the indirect block isn't in the cache, or if we were * looking for an indirect block and we've found it. */ metalbn = xap->in_lbn; if ((daddr == 0 && !incore(vp, metalbn)) || metalbn == bn) break; /* * If we get here, we've either got the block in the cache * or we have a disk address for it, go fetch it. */ if (bp) brelse(bp); xap->in_exists = 1; bp = getblk(vp, metalbn, mp->mnt_stat.f_iosize, 0, 0); if (bp->b_flags & (B_DONE | B_DELWRI)) { ; } #ifdef DIAGNOSTIC else if (!daddr) panic("ufs_bmaparray: indirect block not in cache"); #endif else { bp->b_blkno = blkptrtodb(ump, daddr); bp->b_flags |= B_READ; bcstats.pendingreads++; bcstats.numreads++; VOP_STRATEGY(bp); curproc->p_ru.ru_inblock++; /* XXX */ if ((error = biowait(bp)) != 0) { brelse(bp); return (error); } } #ifdef FFS2 if (ip->i_ump->um_fstype == UM_UFS2) { daddr = ((int64_t *)bp->b_data)[xap->in_off]; if (num == 1 && daddr && runp) for (bn = xap->in_off + 1; bn < MNINDIR(ump) && *runp < maxrun && is_sequential(ump, ((int64_t *)bp->b_data)[bn - 1], ((int64_t *)bp->b_data)[bn]); ++bn, ++*runp); continue; } #endif /* FFS2 */ daddr = ((int32_t *)bp->b_data)[xap->in_off]; if (num == 1 && daddr && runp) for (bn = xap->in_off + 1; bn < MNINDIR(ump) && *runp < maxrun && is_sequential(ump, ((int32_t *)bp->b_data)[bn - 1], ((int32_t *)bp->b_data)[bn]); ++bn, ++*runp); } if (bp) brelse(bp); daddr = blkptrtodb(ump, daddr); *bnp = daddr == 0 ? -1 : daddr; return (0); }
/* * Balloc defines the structure of file system storage * by allocating the physical blocks on a device given * the inode and the logical block number in a file. * This is the allocation strategy for UFS2. Above is * the allocation strategy for UFS1. */ int ffs_balloc_ufs2(vnode *vp, off_t startoffset, int size, Ucred *cred, int flags, Buf **bpp) { int error = 0; print("HARVEY TODO: %s\n", __func__); #if 0 struct inode *ip; struct ufs2_dinode *dp; ufs_lbn_t lbn, lastlbn; struct fs *fs; struct buf *bp, *nbp; struct ufsmount *ump; struct indir indirs[UFS_NIADDR + 2]; ufs2_daddr_t nb, newb, *bap, pref; ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[UFS_NIADDR + 1]; ufs2_daddr_t *lbns_remfree, lbns[UFS_NIADDR + 1]; int deallocated, osize, nsize, num, i, error; int unwindidx = -1; int saved_inbdflush; static struct timeval lastfail; static int curfail; int gbflags, reclaimed; ip = VTOI(vp); dp = ip->i_din2; fs = ITOFS(ip); ump = ITOUMP(ip); lbn = lblkno(fs, startoffset); size = blkoff(fs, startoffset) + size; reclaimed = 0; if (size > fs->fs_bsize) panic("ffs_balloc_ufs2: blk too big"); *bpp = nil; if (lbn < 0) return (EFBIG); gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0; if (DOINGSOFTDEP(vp)) softdep_prealloc(vp, MNT_WAIT); /* * Check for allocating external data. */ if (flags & IO_EXT) { if (lbn >= UFS_NXADDR) return (EFBIG); /* * If the next write will extend the data into a new block, * and the data is currently composed of a fragment * this fragment has to be extended to be a full block. */ lastlbn = lblkno(fs, dp->di_extsize); if (lastlbn < lbn) { nb = lastlbn; osize = sblksize(fs, dp->di_extsize, nb); if (osize < fs->fs_bsize && osize > 0) { UFS_LOCK(ump); error = ffs_realloccg(ip, -1 - nb, dp->di_extb[nb], ffs_blkpref_ufs2(ip, lastlbn, (int)nb, &dp->di_extb[0]), osize, (int)fs->fs_bsize, flags, cred, &bp); if (error) return (error); if (DOINGSOFTDEP(vp)) softdep_setup_allocext(ip, nb, dbtofsb(fs, bp->b_blkno), dp->di_extb[nb], fs->fs_bsize, osize, bp); dp->di_extsize = smalllblktosize(fs, nb + 1); dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno); bp->b_xflags |= BX_ALTDATA; ip->i_flag |= IN_CHANGE; if (flags & IO_SYNC) bwrite(bp); else bawrite(bp); } } /* * All blocks are direct blocks */ if (flags & BA_METAONLY) panic("ffs_balloc_ufs2: BA_METAONLY for ext block"); nb = dp->di_extb[lbn]; if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) { error = bread_gb(vp, -1 - lbn, fs->fs_bsize, NOCRED, gbflags, &bp); if (error) { brelse(bp); return (error); } bp->b_blkno = fsbtodb(fs, nb); bp->b_xflags |= BX_ALTDATA; *bpp = bp; return (0); } if (nb != 0) { /* * Consider need to reallocate a fragment. */ osize = fragroundup(fs, blkoff(fs, dp->di_extsize)); nsize = fragroundup(fs, size); if (nsize <= osize) { error = bread_gb(vp, -1 - lbn, osize, NOCRED, gbflags, &bp); if (error) { brelse(bp); return (error); } bp->b_blkno = fsbtodb(fs, nb); bp->b_xflags |= BX_ALTDATA; } else { UFS_LOCK(ump); error = ffs_realloccg(ip, -1 - lbn, dp->di_extb[lbn], ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]), osize, nsize, flags, cred, &bp); if (error) return (error); bp->b_xflags |= BX_ALTDATA; if (DOINGSOFTDEP(vp)) softdep_setup_allocext(ip, lbn, dbtofsb(fs, bp->b_blkno), nb, nsize, osize, bp); } } else { if (dp->di_extsize < smalllblktosize(fs, lbn + 1)) nsize = fragroundup(fs, size); else nsize = fs->fs_bsize; UFS_LOCK(ump); error = ffs_alloc(ip, lbn, ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]), nsize, flags, cred, &newb); if (error) return (error); bp = getblk(vp, -1 - lbn, nsize, 0, 0, gbflags); bp->b_blkno = fsbtodb(fs, newb); bp->b_xflags |= BX_ALTDATA; if (flags & BA_CLRBUF) vfs_bio_clrbuf(bp); if (DOINGSOFTDEP(vp)) softdep_setup_allocext(ip, lbn, newb, 0, nsize, 0, bp); } dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno); ip->i_flag |= IN_CHANGE; *bpp = bp; return (0); } /* * If the next write will extend the file into a new block, * and the file is currently composed of a fragment * this fragment has to be extended to be a full block. */ lastlbn = lblkno(fs, ip->i_size); if (lastlbn < UFS_NDADDR && lastlbn < lbn) { nb = lastlbn; osize = blksize(fs, ip, nb); if (osize < fs->fs_bsize && osize > 0) { UFS_LOCK(ump); error = ffs_realloccg(ip, nb, dp->di_db[nb], ffs_blkpref_ufs2(ip, lastlbn, (int)nb, &dp->di_db[0]), osize, (int)fs->fs_bsize, flags, cred, &bp); if (error) return (error); if (DOINGSOFTDEP(vp)) softdep_setup_allocdirect(ip, nb, dbtofsb(fs, bp->b_blkno), dp->di_db[nb], fs->fs_bsize, osize, bp); ip->i_size = smalllblktosize(fs, nb + 1); dp->di_size = ip->i_size; dp->di_db[nb] = dbtofsb(fs, bp->b_blkno); ip->i_flag |= IN_CHANGE | IN_UPDATE; if (flags & IO_SYNC) bwrite(bp); else bawrite(bp); } } /* * The first UFS_NDADDR blocks are direct blocks */ if (lbn < UFS_NDADDR) { if (flags & BA_METAONLY) panic("ffs_balloc_ufs2: BA_METAONLY for direct block"); nb = dp->di_db[lbn]; if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) { error = bread_gb(vp, lbn, fs->fs_bsize, NOCRED, gbflags, &bp); if (error) { brelse(bp); return (error); } bp->b_blkno = fsbtodb(fs, nb); *bpp = bp; return (0); } if (nb != 0) { /* * Consider need to reallocate a fragment. */ osize = fragroundup(fs, blkoff(fs, ip->i_size)); nsize = fragroundup(fs, size); if (nsize <= osize) { error = bread_gb(vp, lbn, osize, NOCRED, gbflags, &bp); if (error) { brelse(bp); return (error); } bp->b_blkno = fsbtodb(fs, nb); } else { UFS_LOCK(ump); error = ffs_realloccg(ip, lbn, dp->di_db[lbn], ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_db[0]), osize, nsize, flags, cred, &bp); if (error) return (error); if (DOINGSOFTDEP(vp)) softdep_setup_allocdirect(ip, lbn, dbtofsb(fs, bp->b_blkno), nb, nsize, osize, bp); } } else { if (ip->i_size < smalllblktosize(fs, lbn + 1)) nsize = fragroundup(fs, size); else nsize = fs->fs_bsize; UFS_LOCK(ump); error = ffs_alloc(ip, lbn, ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_db[0]), nsize, flags, cred, &newb); if (error) return (error); bp = getblk(vp, lbn, nsize, 0, 0, gbflags); bp->b_blkno = fsbtodb(fs, newb); if (flags & BA_CLRBUF) vfs_bio_clrbuf(bp); if (DOINGSOFTDEP(vp)) softdep_setup_allocdirect(ip, lbn, newb, 0, nsize, 0, bp); } dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno); ip->i_flag |= IN_CHANGE | IN_UPDATE; *bpp = bp; return (0); } /* * Determine the number of levels of indirection. */ pref = 0; if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0) return(error); #ifdef INVARIANTS if (num < 1) panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block"); #endif saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH); /* * Fetch the first indirect block allocating if necessary. */ --num; nb = dp->di_ib[indirs[0].in_off]; allocib = nil; allocblk = allociblk; lbns_remfree = lbns; if (nb == 0) { UFS_LOCK(ump); pref = ffs_blkpref_ufs2(ip, lbn, -indirs[0].in_off - 1, (ufs2_daddr_t *)0); if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred, &newb)) != 0) { curthread_pflags_restore(saved_inbdflush); return (error); } pref = newb + fs->fs_frag; nb = newb; MPASS(allocblk < allociblk + nitems(allociblk)); MPASS(lbns_remfree < lbns + nitems(lbns)); *allocblk++ = nb; *lbns_remfree++ = indirs[1].in_lbn; bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, GB_UNMAPPED); bp->b_blkno = fsbtodb(fs, nb); vfs_bio_clrbuf(bp); if (DOINGSOFTDEP(vp)) { softdep_setup_allocdirect(ip, UFS_NDADDR + indirs[0].in_off, newb, 0, fs->fs_bsize, 0, bp); bdwrite(bp); } else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) { if (bp->b_bufsize == fs->fs_bsize) bp->b_flags |= B_CLUSTEROK; bdwrite(bp); } else { if ((error = bwrite(bp)) != 0) goto fail; } allocib = &dp->di_ib[indirs[0].in_off]; *allocib = nb; ip->i_flag |= IN_CHANGE | IN_UPDATE; } /* * Fetch through the indirect blocks, allocating as necessary. */ retry: for (i = 1;;) { error = bread(vp, indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp); if (error) { brelse(bp); goto fail; } bap = (ufs2_daddr_t *)bp->b_data; nb = bap[indirs[i].in_off]; if (i == num) break; i += 1; if (nb != 0) { bqrelse(bp); continue; } UFS_LOCK(ump); /* * If parent indirect has just been allocated, try to cluster * immediately following it. */ if (pref == 0) pref = ffs_blkpref_ufs2(ip, lbn, i - num - 1, (ufs2_daddr_t *)0); if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags | IO_BUFLOCKED, cred, &newb)) != 0) { brelse(bp); if (DOINGSOFTDEP(vp) && ++reclaimed == 1) { UFS_LOCK(ump); softdep_request_cleanup(fs, vp, cred, FLUSH_BLOCKS_WAIT); UFS_UNLOCK(ump); goto retry; } if (ppsratecheck(&lastfail, &curfail, 1)) { ffs_fserr(fs, ip->i_number, "filesystem full"); uprintf("\n%s: write failed, filesystem " "is full\n", fs->fs_fsmnt); } goto fail; } pref = newb + fs->fs_frag; nb = newb; MPASS(allocblk < allociblk + nitems(allociblk)); MPASS(lbns_remfree < lbns + nitems(lbns)); *allocblk++ = nb; *lbns_remfree++ = indirs[i].in_lbn; nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, GB_UNMAPPED); nbp->b_blkno = fsbtodb(fs, nb); vfs_bio_clrbuf(nbp); if (DOINGSOFTDEP(vp)) { softdep_setup_allocindir_meta(nbp, ip, bp, indirs[i - 1].in_off, nb); bdwrite(nbp); } else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) { if (nbp->b_bufsize == fs->fs_bsize) nbp->b_flags |= B_CLUSTEROK; bdwrite(nbp); } else { if ((error = bwrite(nbp)) != 0) { brelse(bp); goto fail; } } bap[indirs[i - 1].in_off] = nb; if (allocib == nil && unwindidx < 0) unwindidx = i - 1; /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & IO_SYNC) { bwrite(bp); } else { if (bp->b_bufsize == fs->fs_bsize) bp->b_flags |= B_CLUSTEROK; bdwrite(bp); } } /* * If asked only for the indirect block, then return it. */ if (flags & BA_METAONLY) { curthread_pflags_restore(saved_inbdflush); *bpp = bp; return (0); } /* * Get the data block, allocating if necessary. */ if (nb == 0) { UFS_LOCK(ump); /* * If allocating metadata at the front of the cylinder * group and parent indirect block has just been allocated, * then cluster next to it if it is the first indirect in * the file. Otherwise it has been allocated in the metadata * area, so we want to find our own place out in the data area. */ if (pref == 0 || (lbn > UFS_NDADDR && fs->fs_metaspace != 0)) pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off, &bap[0]); error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags | IO_BUFLOCKED, cred, &newb); if (error) { brelse(bp); if (DOINGSOFTDEP(vp) && ++reclaimed == 1) { UFS_LOCK(ump); softdep_request_cleanup(fs, vp, cred, FLUSH_BLOCKS_WAIT); UFS_UNLOCK(ump); goto retry; } if (ppsratecheck(&lastfail, &curfail, 1)) { ffs_fserr(fs, ip->i_number, "filesystem full"); uprintf("\n%s: write failed, filesystem " "is full\n", fs->fs_fsmnt); } goto fail; } nb = newb; MPASS(allocblk < allociblk + nitems(allociblk)); MPASS(lbns_remfree < lbns + nitems(lbns)); *allocblk++ = nb; *lbns_remfree++ = lbn; nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags); nbp->b_blkno = fsbtodb(fs, nb); if (flags & BA_CLRBUF) vfs_bio_clrbuf(nbp); if (DOINGSOFTDEP(vp)) softdep_setup_allocindir_page(ip, lbn, bp, indirs[i].in_off, nb, 0, nbp); bap[indirs[i].in_off] = nb; /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & IO_SYNC) { bwrite(bp); } else { if (bp->b_bufsize == fs->fs_bsize) bp->b_flags |= B_CLUSTEROK; bdwrite(bp); } curthread_pflags_restore(saved_inbdflush); *bpp = nbp; return (0); } brelse(bp); /* * If requested clear invalid portions of the buffer. If we * have to do a read-before-write (typical if BA_CLRBUF is set), * try to do some read-ahead in the sequential case to reduce * the number of I/O transactions. */ if (flags & BA_CLRBUF) { int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT; if (seqcount != 0 && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0 && !(vm_page_count_severe() || buf_dirty_count_severe())) { error = cluster_read(vp, ip->i_size, lbn, (int)fs->fs_bsize, NOCRED, MAXBSIZE, seqcount, gbflags, &nbp); } else { error = bread_gb(vp, lbn, (int)fs->fs_bsize, NOCRED, gbflags, &nbp); } if (error) { brelse(nbp); goto fail; } } else {
/* * Balloc defines the structure of file system storage * by allocating the physical blocks on a device given * the inode and the logical block number in a file. */ int ext2fs_balloc(struct inode *ip, daddr_t bn, int size, kauth_cred_t cred, struct buf **bpp, int flags) { struct m_ext2fs *fs; daddr_t nb; struct buf *bp, *nbp; struct vnode *vp = ITOV(ip); struct indir indirs[EXT2FS_NIADDR + 2]; daddr_t newb, lbn, pref; int32_t *bap; /* XXX ondisk32 */ int num, i, error; u_int deallocated; daddr_t *blkp, *allocblk, allociblk[EXT2FS_NIADDR + 1]; int32_t *allocib; /* XXX ondisk32 */ int unwindidx = -1; UVMHIST_FUNC("ext2fs_balloc"); UVMHIST_CALLED(ubchist); UVMHIST_LOG(ubchist, "bn 0x%x", bn,0,0,0); if (bpp != NULL) { *bpp = NULL; } if (bn < 0) return (EFBIG); fs = ip->i_e2fs; lbn = bn; /* * The first EXT2FS_NDADDR blocks are direct blocks */ if (bn < EXT2FS_NDADDR) { /* XXX ondisk32 */ nb = fs2h32(ip->i_e2fs_blocks[bn]); if (nb != 0) { /* * the block is already allocated, just read it. */ if (bpp != NULL) { error = bread(vp, bn, fs->e2fs_bsize, NOCRED, B_MODIFY, &bp); if (error) { return (error); } *bpp = bp; } return (0); } /* * allocate a new direct block. */ error = ext2fs_alloc(ip, bn, ext2fs_blkpref(ip, bn, bn, &ip->i_e2fs_blocks[0]), cred, &newb); if (error) return (error); ip->i_e2fs_last_lblk = lbn; ip->i_e2fs_last_blk = newb; /* XXX ondisk32 */ ip->i_e2fs_blocks[bn] = h2fs32((int32_t)newb); ip->i_flag |= IN_CHANGE | IN_UPDATE; if (bpp != NULL) { bp = getblk(vp, bn, fs->e2fs_bsize, 0, 0); bp->b_blkno = EXT2_FSBTODB(fs, newb); if (flags & B_CLRBUF) clrbuf(bp); *bpp = bp; } return (0); } /* * Determine the number of levels of indirection. */ pref = 0; if ((error = ufs_getlbns(vp, bn, indirs, &num)) != 0) return(error); #ifdef DIAGNOSTIC if (num < 1) panic ("ext2fs_balloc: ufs_getlbns returned indirect block\n"); #endif /* * Fetch the first indirect block allocating if necessary. */ --num; /* XXX ondisk32 */ nb = fs2h32(ip->i_e2fs_blocks[EXT2FS_NDADDR + indirs[0].in_off]); allocib = NULL; allocblk = allociblk; if (nb == 0) { pref = ext2fs_blkpref(ip, lbn, 0, (int32_t *)0); error = ext2fs_alloc(ip, lbn, pref, cred, &newb); if (error) return (error); nb = newb; *allocblk++ = nb; ip->i_e2fs_last_blk = newb; bp = getblk(vp, indirs[1].in_lbn, fs->e2fs_bsize, 0, 0); bp->b_blkno = EXT2_FSBTODB(fs, newb); clrbuf(bp); /* * Write synchronously so that indirect blocks * never point at garbage. */ if ((error = bwrite(bp)) != 0) goto fail; unwindidx = 0; allocib = &ip->i_e2fs_blocks[EXT2FS_NDADDR + indirs[0].in_off]; /* XXX ondisk32 */ *allocib = h2fs32((int32_t)newb); ip->i_flag |= IN_CHANGE | IN_UPDATE; } /* * Fetch through the indirect blocks, allocating as necessary. */ for (i = 1;;) { error = bread(vp, indirs[i].in_lbn, (int)fs->e2fs_bsize, NOCRED, 0, &bp); if (error) { goto fail; } bap = (int32_t *)bp->b_data; /* XXX ondisk32 */ nb = fs2h32(bap[indirs[i].in_off]); if (i == num) break; i++; if (nb != 0) { brelse(bp, 0); continue; } pref = ext2fs_blkpref(ip, lbn, 0, (int32_t *)0); error = ext2fs_alloc(ip, lbn, pref, cred, &newb); if (error) { brelse(bp, 0); goto fail; } nb = newb; *allocblk++ = nb; ip->i_e2fs_last_blk = newb; nbp = getblk(vp, indirs[i].in_lbn, fs->e2fs_bsize, 0, 0); nbp->b_blkno = EXT2_FSBTODB(fs, nb); clrbuf(nbp); /* * Write synchronously so that indirect blocks * never point at garbage. */ if ((error = bwrite(nbp)) != 0) { brelse(bp, 0); goto fail; } if (unwindidx < 0) unwindidx = i - 1; /* XXX ondisk32 */ bap[indirs[i - 1].in_off] = h2fs32((int32_t)nb); /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & B_SYNC) { bwrite(bp); } else { bdwrite(bp); } } /* * Get the data block, allocating if necessary. */ if (nb == 0) { pref = ext2fs_blkpref(ip, lbn, indirs[num].in_off, &bap[0]); error = ext2fs_alloc(ip, lbn, pref, cred, &newb); if (error) { brelse(bp, 0); goto fail; } nb = newb; *allocblk++ = nb; ip->i_e2fs_last_lblk = lbn; ip->i_e2fs_last_blk = newb; /* XXX ondisk32 */ bap[indirs[num].in_off] = h2fs32((int32_t)nb); /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & B_SYNC) { bwrite(bp); } else { bdwrite(bp); } if (bpp != NULL) { nbp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0); nbp->b_blkno = EXT2_FSBTODB(fs, nb); if (flags & B_CLRBUF) clrbuf(nbp); *bpp = nbp; } return (0); } brelse(bp, 0); if (bpp != NULL) { if (flags & B_CLRBUF) { error = bread(vp, lbn, (int)fs->e2fs_bsize, NOCRED, B_MODIFY, &nbp); if (error) { goto fail; } } else { nbp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0); nbp->b_blkno = EXT2_FSBTODB(fs, nb); } *bpp = nbp; } return (0); fail: /* * If we have failed part way through block allocation, we * have to deallocate any indirect blocks that we have allocated. */ for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) { ext2fs_blkfree(ip, *blkp); deallocated += fs->e2fs_bsize; } if (unwindidx >= 0) { if (unwindidx == 0) { *allocib = 0; } else { int r; r = bread(vp, indirs[unwindidx].in_lbn, (int)fs->e2fs_bsize, NOCRED, B_MODIFY, &bp); if (r) { panic("Could not unwind indirect block, error %d", r); } else { bap = (int32_t *)bp->b_data; /* XXX ondisk32 */ bap[indirs[unwindidx].in_off] = 0; if (flags & B_SYNC) bwrite(bp); else bdwrite(bp); } } for (i = unwindidx + 1; i <= num; i++) { bp = getblk(vp, indirs[i].in_lbn, (int)fs->e2fs_bsize, 0, 0); brelse(bp, BC_INVAL); } } if (deallocated) { ext2fs_setnblock(ip, ext2fs_nblock(ip) - btodb(deallocated)); ip->i_e2fs_flags |= IN_CHANGE | IN_UPDATE; } return error; }
static int prclin(void) /* Build up line; return 1 for eof */ { uint8_t *pbptr; /* -> xxprev->bdata[bchars] for speed */ int inindent = 040000; /* Inside indenting w/s, mask matches mode */ /* */ for (pbchars = linpos = 0, pbptr = xxprev->bdata; pbchars < BUFMAX; linpos++) { if (state != 0 && state != 1) { /* Else assume state 2 */ if (!(linpos % tabsiz)) /* Reached tabstop (multiple of 8) */ state = 0; /* Fall through */ else goto endswitch; /* switch(state) case 2 */ } /* if(state!=0&&state!=1) */ if (!bytes && getblk()) /* Eof, getblk writes data */ return 1; thisch = *bufpos++; bytes--; /* When reading in binary, try to guess good places to break "lines" * (which are arbitrary anyway) unless requested not to (for w/f to use * the least amount of memory). (LATER) * We also try not to break strings of printable characters. */ if (binary) { if (thisch == '\n' || (thisch == 0 && pbchars > BLKCAP / 2) || (pbchars >= BLKCAP - 2 && (prevch < ' ' || prevch > '\176'))) { *pbptr++ = prevch = thisch; pbchars++; /* Store thisch */ return 0; /* Finish this line */ } /* if(thisch=='\n'||(thisch==0&&... */ } /* if(binary) */ else /* For Ascii files, now process significant characters: Newline, Return & Tab */ { if (inindent && thisch != '\t' && thisch != ' ') inindent = 0; if (thisch < ' ' || state) /* One of the following tests might pass */ { if (thisch == '\r') { /* If doing a DOS read and already holding on to a Cr, store it and stay in * state 1. Else, move to state 1 and store nothing */ if (xxmode & 1 && state != 1) /* DOS read, not holding Cr */ { state = 1; /* Holding a Cr */ continue; /* for(pbchars=linpos... (don't store Cr) */ } /* if(xxmode&1) */ linpos = -1; /* Back at line start */ goto endswitch; /* switch(state) cases 0 & 1 */ } /* if(thisch=='\r') */ if (thisch == '\n') { if (state == 1) /* Holding Cr */ state = 0; /* Discard Cr */ if (state != 0) /* Unexpected */ { visbel(); printf("\r\nUnexpected state %d at Nl\r\n", state); } /* if (state != 0) */ return 0; /* Have line */ } /* if(thisch=='\n') */ else if (state == 1) /* Holding on to Cr 2B stored now */ { bytes++, bufpos--; /* Put back char just read */ state = 0; thisch = '\r'; linpos = -1; /* Back at line start */ } /* else if(state==1) */ /* Only check for tab character after return has been checked for */ if (thisch == '\t' || thisch == '\b') { /* +tr || +l in ldg w/s */ if ((xxmode & 4) || (xxmode & 040000 & inindent)) { if (thisch == '\t') { thisch = ' '; /* Store spaces */ state = 2; /* Until on next 8-char boundary */ } /* if(thisch=='\t' */ else if (linpos > 0) /* thisch must be '\b' */ linpos -= 2; } /* if(xxmode&4) */ } /* if(thisch=='\t'||thisch=='\b') */ } /* if(thisch<' '||state) */ } /* if(binary) else */ endswitch: *pbptr++ = prevch = thisch; pbchars++; } /* for(pbchars=linpos=0,pbptr=xxprev->bdata;pbchars<BUFMAX;linpos++) */ /* * Dropped out of loop: we have a full line. If the file is binary, this * is the norm, just write away the chars read. * Otherwise warn user, drain chars until EOL... */ state = 0; /* No longer holding on to Cr */ if (!binary && lngwrn) { /* If the very next ch is Nl, we haven't actually misssed anything, */ /* so defer outputting a warning here. */ bool tried_next_ch = false; for (;;) { if (!bytes && getblk()) { if (!tried_next_ch) puts("Long line found!\r"); return 1; } /* if (!bytes && getblk()) */ thisch = *bufpos++; bytes--; if (thisch == '\n') /* Eol */ break; if (!tried_next_ch) { tried_next_ch = true; puts("Long line found!\r"); } /* if (!tried_next_ch) */ } /* for(;;) */ } /* if(!binary&&lngwrn) */ return 0; } /* static int prclin(void) */
errcode_t journal_find_head(journal_t *journal) { unsigned int next_commit_ID; blk64_t next_log_block, head_block; int err; journal_superblock_t *sb; journal_header_t *tmp; struct buffer_head *bh; unsigned int sequence; int blocktype; /* * First thing is to establish what we expect to find in the log * (in terms of transaction IDs), and where (in terms of log * block offsets): query the superblock. */ sb = journal->j_superblock; next_commit_ID = ext2fs_be32_to_cpu(sb->s_sequence); next_log_block = ext2fs_be32_to_cpu(sb->s_start); head_block = next_log_block; if (next_log_block == 0) return 0; bh = getblk(journal->j_dev, 0, journal->j_blocksize); if (bh == NULL) return ENOMEM; /* * Now we walk through the log, transaction by transaction, * making sure that each transaction has a commit block in the * expected place. Each complete transaction gets replayed back * into the main filesystem. */ while (1) { dbg_printf("Scanning for sequence ID %u at %lu/%lu\n", next_commit_ID, (unsigned long)next_log_block, journal->j_last); /* Skip over each chunk of the transaction looking * either the next descriptor block or the final commit * record. */ err = journal_bmap(journal, next_log_block, &bh->b_blocknr); if (err) goto err; mark_buffer_uptodate(bh, 0); ll_rw_block(READ, 1, &bh); err = bh->b_err; if (err) goto err; next_log_block++; wrap(journal, next_log_block); /* What kind of buffer is it? * * If it is a descriptor block, check that it has the * expected sequence number. Otherwise, we're all done * here. */ tmp = (journal_header_t *)bh->b_data; if (tmp->h_magic != ext2fs_cpu_to_be32(JFS_MAGIC_NUMBER)) { dbg_printf("JBD2: wrong magic 0x%x\n", tmp->h_magic); goto err; } blocktype = ext2fs_be32_to_cpu(tmp->h_blocktype); sequence = ext2fs_be32_to_cpu(tmp->h_sequence); dbg_printf("Found magic %d, sequence %d\n", blocktype, sequence); if (sequence != next_commit_ID) { dbg_printf("JBD2: Wrong sequence %d (wanted %d)\n", sequence, next_commit_ID); goto err; } /* OK, we have a valid descriptor block which matches * all of the sequence number checks. What are we going * to do with it? That depends on the pass... */ switch (blocktype) { case JFS_DESCRIPTOR_BLOCK: next_log_block += count_tags(journal, bh->b_data); wrap(journal, next_log_block); continue; case JFS_COMMIT_BLOCK: head_block = next_log_block; next_commit_ID++; continue; case JFS_REVOKE_BLOCK: continue; default: dbg_printf("Unrecognised magic %d, end of scan.\n", blocktype); err = -EINVAL; goto err; } } err: if (err == 0) { dbg_printf("head seq=%d blk=%llu\n", next_commit_ID, head_block); journal->j_transaction_sequence = next_commit_ID; journal->j_head = head_block; } brelse(bh); return err; }
static errcode_t journal_commit_trans(journal_transaction_t *trans) { struct buffer_head *bh, *cbh = NULL; struct commit_header *commit; #ifdef HAVE_SYS_TIME_H struct timeval tv; #endif errcode_t err; JOURNAL_CHECK_TRANS_MAGIC(trans); if ((trans->flags & J_TRANS_COMMITTED) || !(trans->flags & J_TRANS_OPEN)) return EXT2_ET_INVALID_ARGUMENT; bh = getblk(trans->journal->j_dev, 0, trans->journal->j_blocksize); if (bh == NULL) return ENOMEM; /* write the descriptor block header */ commit = (struct commit_header *)bh->b_data; commit->h_magic = ext2fs_cpu_to_be32(JFS_MAGIC_NUMBER); commit->h_blocktype = ext2fs_cpu_to_be32(JFS_COMMIT_BLOCK); commit->h_sequence = ext2fs_cpu_to_be32(trans->tid); if (JFS_HAS_COMPAT_FEATURE(trans->journal, JFS_FEATURE_COMPAT_CHECKSUM)) { __u32 csum_v1 = ~0; blk64_t cblk; cbh = getblk(trans->journal->j_dev, 0, trans->journal->j_blocksize); if (cbh == NULL) { err = ENOMEM; goto error; } for (cblk = trans->start; cblk < trans->block; cblk++) { err = journal_bmap(trans->journal, cblk, &cbh->b_blocknr); if (err) goto error; mark_buffer_uptodate(cbh, 0); ll_rw_block(READ, 1, &cbh); err = cbh->b_err; if (err) goto error; csum_v1 = ext2fs_crc32_be(csum_v1, (unsigned char const *)cbh->b_data, cbh->b_size); } commit->h_chksum_type = JFS_CRC32_CHKSUM; commit->h_chksum_size = JFS_CRC32_CHKSUM_SIZE; commit->h_chksum[0] = ext2fs_cpu_to_be32(csum_v1); } else { commit->h_chksum_type = 0; commit->h_chksum_size = 0; commit->h_chksum[0] = 0; } #ifdef HAVE_SYS_TIME_H gettimeofday(&tv, NULL); commit->h_commit_sec = ext2fs_cpu_to_be32(tv.tv_sec); commit->h_commit_nsec = ext2fs_cpu_to_be32(tv.tv_usec * 1000); #else commit->h_commit_sec = 0; commit->h_commit_nsec = 0; #endif /* Write block */ jbd2_commit_block_csum_set(trans->journal, bh); err = journal_bmap(trans->journal, trans->block, &bh->b_blocknr); if (err) goto error; dbg_printf("Writing commit block at %llu:%llu\n", trans->block, bh->b_blocknr); mark_buffer_dirty(bh); ll_rw_block(WRITE, 1, &bh); err = bh->b_err; if (err) goto error; trans->flags |= J_TRANS_COMMITTED; trans->flags &= ~J_TRANS_OPEN; trans->block++; trans->fs->super->s_feature_incompat |= EXT3_FEATURE_INCOMPAT_RECOVER; ext2fs_mark_super_dirty(trans->fs); error: if (cbh) brelse(cbh); brelse(bh); return err; }
static errcode_t journal_add_revoke_to_trans(journal_transaction_t *trans, blk64_t *revoke_list, size_t revoke_len) { journal_revoke_header_t *jrb; void *buf; size_t i, offset; blk64_t curr_blk; int sz, csum_size = 0; struct buffer_head *bh; errcode_t err; JOURNAL_CHECK_TRANS_MAGIC(trans); if ((trans->flags & J_TRANS_COMMITTED) || !(trans->flags & J_TRANS_OPEN)) return EXT2_ET_INVALID_ARGUMENT; if (revoke_len == 0) return 0; /* Do we need to leave space at the end for a checksum? */ if (journal_has_csum_v2or3(trans->journal)) csum_size = sizeof(struct journal_revoke_tail); curr_blk = trans->block; bh = getblk(trans->journal->j_dev, curr_blk, trans->journal->j_blocksize); if (bh == NULL) return ENOMEM; jrb = buf = bh->b_data; jrb->r_header.h_magic = ext2fs_cpu_to_be32(JFS_MAGIC_NUMBER); jrb->r_header.h_blocktype = ext2fs_cpu_to_be32(JFS_REVOKE_BLOCK); jrb->r_header.h_sequence = ext2fs_cpu_to_be32(trans->tid); offset = sizeof(*jrb); if (JFS_HAS_INCOMPAT_FEATURE(trans->journal, JFS_FEATURE_INCOMPAT_64BIT)) sz = 8; else sz = 4; for (i = 0; i < revoke_len; i++) { /* Block full, write to journal */ if (offset + sz > trans->journal->j_blocksize - csum_size) { jrb->r_count = ext2fs_cpu_to_be32(offset); jbd2_revoke_csum_set(trans->journal, bh); err = journal_bmap(trans->journal, curr_blk, &bh->b_blocknr); if (err) goto error; dbg_printf("Writing revoke block at %llu:%llu\n", curr_blk, bh->b_blocknr); mark_buffer_dirty(bh); ll_rw_block(WRITE, 1, &bh); err = bh->b_err; if (err) goto error; offset = sizeof(*jrb); curr_blk++; } if (revoke_list[i] >= ext2fs_blocks_count(trans->journal->j_fs_dev->k_fs->super)) { err = EXT2_ET_BAD_BLOCK_NUM; goto error; } if (JFS_HAS_INCOMPAT_FEATURE(trans->journal, JFS_FEATURE_INCOMPAT_64BIT)) *((__u64 *)(&((char *)buf)[offset])) = ext2fs_cpu_to_be64(revoke_list[i]); else *((__u32 *)(&((char *)buf)[offset])) = ext2fs_cpu_to_be32(revoke_list[i]); offset += sz; } if (offset > 0) { jrb->r_count = ext2fs_cpu_to_be32(offset); jbd2_revoke_csum_set(trans->journal, bh); err = journal_bmap(trans->journal, curr_blk, &bh->b_blocknr); if (err) goto error; dbg_printf("Writing revoke block at %llu:%llu\n", curr_blk, bh->b_blocknr); mark_buffer_dirty(bh); ll_rw_block(WRITE, 1, &bh); err = bh->b_err; if (err) goto error; curr_blk++; } error: trans->block = curr_blk; brelse(bh); return err; }
static errcode_t journal_add_blocks_to_trans(journal_transaction_t *trans, blk64_t *block_list, size_t block_len, FILE *fp) { blk64_t curr_blk, jdb_blk; size_t i, j; int csum_size = 0; journal_header_t *jdb; journal_block_tag_t *jdbt; int tag_bytes; void *buf = NULL, *jdb_buf = NULL; struct buffer_head *bh = NULL, *data_bh; errcode_t err; JOURNAL_CHECK_TRANS_MAGIC(trans); if ((trans->flags & J_TRANS_COMMITTED) || !(trans->flags & J_TRANS_OPEN)) return EXT2_ET_INVALID_ARGUMENT; if (block_len == 0) return 0; /* Do we need to leave space at the end for a checksum? */ if (journal_has_csum_v2or3(trans->journal)) csum_size = sizeof(struct journal_block_tail); curr_blk = jdb_blk = trans->block; data_bh = getblk(trans->journal->j_dev, curr_blk, trans->journal->j_blocksize); if (data_bh == NULL) return ENOMEM; buf = data_bh->b_data; /* write the descriptor block header */ bh = getblk(trans->journal->j_dev, curr_blk, trans->journal->j_blocksize); if (bh == NULL) { err = ENOMEM; goto error; } jdb = jdb_buf = bh->b_data; jdb->h_magic = ext2fs_cpu_to_be32(JFS_MAGIC_NUMBER); jdb->h_blocktype = ext2fs_cpu_to_be32(JFS_DESCRIPTOR_BLOCK); jdb->h_sequence = ext2fs_cpu_to_be32(trans->tid); jdbt = (journal_block_tag_t *)(jdb + 1); curr_blk++; for (i = 0; i < block_len; i++) { j = fread(data_bh->b_data, trans->journal->j_blocksize, 1, fp); if (j != 1) { err = errno; goto error; } tag_bytes = journal_tag_bytes(trans->journal); /* No space left in descriptor block, write it out */ if ((char *)jdbt + tag_bytes > (char *)jdb_buf + trans->journal->j_blocksize - csum_size) { jbd2_descr_block_csum_set(trans->journal, bh); err = journal_bmap(trans->journal, jdb_blk, &bh->b_blocknr); if (err) goto error; dbg_printf("Writing descriptor block at %llu:%llu\n", jdb_blk, bh->b_blocknr); mark_buffer_dirty(bh); ll_rw_block(WRITE, 1, &bh); err = bh->b_err; if (err) goto error; jdbt = (journal_block_tag_t *)(jdb + 1); jdb_blk = curr_blk; curr_blk++; } if (block_list[i] >= ext2fs_blocks_count(trans->journal->j_fs_dev->k_fs->super)) { err = EXT2_ET_BAD_BLOCK_NUM; goto error; } /* Fill out the block tag */ jdbt->t_blocknr = ext2fs_cpu_to_be32(block_list[i] & 0xFFFFFFFF); jdbt->t_flags = 0; if (jdbt != (journal_block_tag_t *)(jdb + 1)) jdbt->t_flags |= ext2fs_cpu_to_be16(JFS_FLAG_SAME_UUID); else { memcpy(jdbt + tag_bytes, trans->journal->j_superblock->s_uuid, sizeof(trans->journal->j_superblock->s_uuid)); tag_bytes += 16; } if (i == block_len - 1) jdbt->t_flags |= ext2fs_cpu_to_be16(JFS_FLAG_LAST_TAG); if (*((__u32 *)buf) == ext2fs_cpu_to_be32(JFS_MAGIC_NUMBER)) { *((__u32 *)buf) = 0; jdbt->t_flags |= ext2fs_cpu_to_be16(JFS_FLAG_ESCAPE); } if (JFS_HAS_INCOMPAT_FEATURE(trans->journal, JFS_FEATURE_INCOMPAT_64BIT)) jdbt->t_blocknr_high = ext2fs_cpu_to_be32(block_list[i] >> 32); jbd2_block_tag_csum_set(trans->journal, jdbt, data_bh, trans->tid); /* Write the data block */ err = journal_bmap(trans->journal, curr_blk, &data_bh->b_blocknr); if (err) goto error; dbg_printf("Writing data block %llu at %llu:%llu tag %d\n", block_list[i], curr_blk, data_bh->b_blocknr, tag_bytes); mark_buffer_dirty(data_bh); ll_rw_block(WRITE, 1, &data_bh); err = data_bh->b_err; if (err) goto error; curr_blk++; jdbt = (journal_block_tag_t *)(((char *)jdbt) + tag_bytes); } /* Write out the last descriptor block */ if (jdbt != (journal_block_tag_t *)(jdb + 1)) { jbd2_descr_block_csum_set(trans->journal, bh); err = journal_bmap(trans->journal, jdb_blk, &bh->b_blocknr); if (err) goto error; dbg_printf("Writing descriptor block at %llu:%llu\n", jdb_blk, bh->b_blocknr); mark_buffer_dirty(bh); ll_rw_block(WRITE, 1, &bh); err = bh->b_err; if (err) goto error; } error: trans->block = curr_blk; if (bh) brelse(bh); brelse(data_bh); return err; }
int xfs_write_file(xfs_inode_t *xip, struct uio *uio, int ioflag) { struct buf *bp; //struct thread *td; daddr_t lbn; off_t osize = 0; off_t offset= 0; int blkoffset, error, resid, xfersize; int fsblocksize; int seqcount; xfs_iomap_t iomap; int maps = 1; xfs_vnode_t *xvp = XFS_ITOV(xip); struct vnode *vp = xvp->v_vnode; xfs_mount_t *mp = (&xip->i_iocore)->io_mount; seqcount = ioflag >> IO_SEQSHIFT; memset(&iomap,0,sizeof(xfs_iomap_t)); /* * Maybe this should be above the vnode op call, but so long as * file servers have no limits, I don't think it matters. */ #if 0 td = uio->uio_td; if (vn_rlimit_fsize(vp, uio, uio->uio_td)) return (EFBIG); #endif resid = uio->uio_resid; offset = uio->uio_offset; osize = xip->i_d.di_size; /* xfs bmap wants bytes for both offset and size */ XVOP_BMAP(xvp, uio->uio_offset, uio->uio_resid, BMAPI_WRITE|BMAPI_DIRECT, &iomap, &maps, error); if(error) { printf("XVOP_BMAP failed\n"); goto error; } for (error = 0; uio->uio_resid > 0;) { lbn = XFS_B_TO_FSBT(mp, offset); blkoffset = XFS_B_FSB_OFFSET(mp, offset); xfersize = mp->m_sb.sb_blocksize - blkoffset; fsblocksize = mp->m_sb.sb_blocksize; if (uio->uio_resid < xfersize) xfersize = uio->uio_resid; /* * getblk sets buf by blkno * bo->bo_bsize * bo_bsize is set from the mnt point fsize * so we call getblk in the case using fsblocks * not basic blocks */ bp = getblk(vp, lbn, fsblocksize, 0, 0, 0); if(!bp) { printf("getblk failed\n"); error = EINVAL; break; } if (!(bp->b_flags & B_CACHE) && fsblocksize > xfersize) vfs_bio_clrbuf(bp); if (offset + xfersize > xip->i_d.di_size) { xip->i_d.di_size = offset + xfersize; vnode_pager_setsize(vp, offset + fsblocksize); } /* move the offset for the next itteration of the loop */ offset += xfersize; error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio); if ((ioflag & IO_VMIO) && (LIST_FIRST(&bp->b_dep) == NULL)) /* in ext2fs? */ bp->b_flags |= B_RELBUF; /* force to full direct for now */ bp->b_flags |= B_DIRECT; /* and sync ... the delay path is not pushing data out */ ioflag |= IO_SYNC; if (ioflag & IO_SYNC) { (void)bwrite(bp); } else if (0 /* RMC xfersize + blkoffset == fs->s_frag_size */) { if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) { bp->b_flags |= B_CLUSTEROK; cluster_write(vp, bp, osize, seqcount); } else { bawrite(bp); } } else { bp->b_flags |= B_CLUSTEROK; bdwrite(bp); } if (error || xfersize == 0) break; } /* * If we successfully wrote any data, and we are not the superuser * we clear the setuid and setgid bits as a precaution against * tampering. */ #if 0 if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0) ip->i_mode &= ~(ISUID | ISGID); #endif if (error) { if (ioflag & IO_UNIT) { #if 0 (void)ext2_truncate(vp, osize, ioflag & IO_SYNC, ap->a_cred, uio->uio_td); #endif uio->uio_offset -= resid - uio->uio_resid; uio->uio_resid = resid; } } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) { /* Update the vnode here? */ } error: return error; }
/* * Release blocks associated with the inode ip and stored in the indirect * block bn. Blocks are free'd in LIFO order up to (but not including) * lastbn. If level is greater than SINGLE, the block is an indirect block * and recursive calls to indirtrunc must be used to cleanse other indirect * blocks. * * NB: triple indirect blocks are untested. */ static int lfs_indirtrunc(struct inode *ip, daddr_t lbn, daddr_t dbn, daddr_t lastbn, int level, daddr_t *countp, daddr_t *rcountp, long *lastsegp, size_t *bcp) { int i; struct buf *bp; struct lfs *fs = ip->i_lfs; int32_t *bap; /* XXX ondisk32 */ struct vnode *vp; daddr_t nb, nlbn, last; int32_t *copy = NULL; /* XXX ondisk32 */ daddr_t blkcount, rblkcount, factor; int nblocks; daddr_t blocksreleased = 0, real_released = 0; int error = 0, allerror = 0; ASSERT_SEGLOCK(fs); /* * Calculate index in current block of last * block to be kept. -1 indicates the entire * block so we need not calculate the index. */ factor = 1; for (i = SINGLE; i < level; i++) factor *= LFS_NINDIR(fs); last = lastbn; if (lastbn > 0) last /= factor; nblocks = lfs_btofsb(fs, lfs_sb_getbsize(fs)); /* * Get buffer of block pointers, zero those entries corresponding * to blocks to be free'd, and update on disk copy first. Since * double(triple) indirect before single(double) indirect, calls * to bmap on these blocks will fail. However, we already have * the on disk address, so we have to set the b_blkno field * explicitly instead of letting bread do everything for us. */ vp = ITOV(ip); bp = getblk(vp, lbn, lfs_sb_getbsize(fs), 0, 0); if (bp->b_oflags & (BO_DONE | BO_DELWRI)) { /* Braces must be here in case trace evaluates to nothing. */ trace(TR_BREADHIT, pack(vp, lfs_sb_getbsize(fs)), lbn); } else { trace(TR_BREADMISS, pack(vp, lfs_sb_getbsize(fs)), lbn); curlwp->l_ru.ru_inblock++; /* pay for read */ bp->b_flags |= B_READ; if (bp->b_bcount > bp->b_bufsize) panic("lfs_indirtrunc: bad buffer size"); bp->b_blkno = LFS_FSBTODB(fs, dbn); VOP_STRATEGY(vp, bp); error = biowait(bp); } if (error) { brelse(bp, 0); *countp = *rcountp = 0; return (error); } bap = (int32_t *)bp->b_data; /* XXX ondisk32 */ if (lastbn >= 0) { copy = lfs_malloc(fs, lfs_sb_getbsize(fs), LFS_NB_IBLOCK); memcpy((void *)copy, (void *)bap, lfs_sb_getbsize(fs)); memset((void *)&bap[last + 1], 0, /* XXX ondisk32 */ (u_int)(LFS_NINDIR(fs) - (last + 1)) * sizeof (int32_t)); error = VOP_BWRITE(bp->b_vp, bp); if (error) allerror = error; bap = copy; } /* * Recursively free totally unused blocks. */ for (i = LFS_NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last; i--, nlbn += factor) { nb = bap[i]; if (nb == 0) continue; if (level > SINGLE) { error = lfs_indirtrunc(ip, nlbn, nb, (daddr_t)-1, level - 1, &blkcount, &rblkcount, lastsegp, bcp); if (error) allerror = error; blocksreleased += blkcount; real_released += rblkcount; } lfs_blkfree(fs, ip, nb, lfs_sb_getbsize(fs), lastsegp, bcp); if (bap[i] > 0) real_released += nblocks; blocksreleased += nblocks; } /* * Recursively free last partial block. */ if (level > SINGLE && lastbn >= 0) { last = lastbn % factor; nb = bap[i]; if (nb != 0) { error = lfs_indirtrunc(ip, nlbn, nb, last, level - 1, &blkcount, &rblkcount, lastsegp, bcp); if (error) allerror = error; real_released += rblkcount; blocksreleased += blkcount; } } if (copy != NULL) { lfs_free(fs, copy, LFS_NB_IBLOCK); } else { mutex_enter(&bufcache_lock); if (bp->b_oflags & BO_DELWRI) { LFS_UNLOCK_BUF(bp); lfs_sb_addavail(fs, lfs_btofsb(fs, bp->b_bcount)); wakeup(&fs->lfs_availsleep); } brelsel(bp, BC_INVAL); mutex_exit(&bufcache_lock); } *countp = blocksreleased; *rcountp = real_released; return (allerror); }
static int trunc_indirect(struct inode * inode, int offset, unsigned long * p) { int i, tmp; struct buffer_head * bh; struct buffer_head * ind_bh; unsigned long * ind; int retry = 0; #define INDIRECT_BLOCK (DIRECT_BLOCK-offset) tmp = *p; if (!tmp) return 0; ind_bh = bread(inode->i_dev, tmp, BLOCK_SIZE); if (tmp != *p) { brelse(ind_bh); return 1; } if (!ind_bh) { *p = 0; return 0; } repeat: for (i = INDIRECT_BLOCK ; i < 256 ; i++) { if (i < 0) i = 0; if (i < INDIRECT_BLOCK) goto repeat; ind = i+(unsigned long *) ind_bh->b_data; tmp = *ind; if (!tmp) continue; bh = getblk(inode->i_dev,tmp,BLOCK_SIZE); if (i < INDIRECT_BLOCK) { brelse(bh); goto repeat; } if ((bh && bh->b_count != 1) || tmp != *ind) { retry = 1; brelse(bh); continue; } *ind = 0; mark_buffer_dirty(ind_bh, 1); brelse(bh); ext_free_block(inode->i_sb,tmp); } ind = (unsigned long *) ind_bh->b_data; for (i = 0; i < 256; i++) if (*(ind++)) break; if (i >= 256) if (ind_bh->b_count != 1) retry = 1; else { tmp = *p; *p = 0; inode->i_dirt = 1; ext_free_block(inode->i_sb,tmp); } brelse(ind_bh); return retry; }
static errcode_t ext2fs_get_journal(ext2_filsys fs, journal_t **ret_journal) { struct process_block_struct pb; struct ext2_super_block *sb = fs->super; struct ext2_super_block jsuper; struct buffer_head *bh; struct inode *j_inode = NULL; struct kdev_s *dev_fs = NULL, *dev_journal; const char *journal_name = 0; journal_t *journal = NULL; errcode_t retval = 0; io_manager io_ptr = 0; unsigned long long start = 0; int ext_journal = 0; int tried_backup_jnl = 0; retval = ext2fs_get_memzero(sizeof(journal_t), &journal); if (retval) return retval; retval = ext2fs_get_memzero(2 * sizeof(struct kdev_s), &dev_fs); if (retval) goto errout; dev_journal = dev_fs+1; dev_fs->k_fs = dev_journal->k_fs = fs; dev_fs->k_dev = K_DEV_FS; dev_journal->k_dev = K_DEV_JOURNAL; journal->j_dev = dev_journal; journal->j_fs_dev = dev_fs; journal->j_inode = NULL; journal->j_blocksize = fs->blocksize; if (uuid_is_null(sb->s_journal_uuid)) { if (!sb->s_journal_inum) { retval = EXT2_ET_BAD_INODE_NUM; goto errout; } retval = ext2fs_get_memzero(sizeof(*j_inode), &j_inode); if (retval) goto errout; j_inode->i_fs = fs; j_inode->i_ino = sb->s_journal_inum; retval = ext2fs_read_inode(fs, sb->s_journal_inum, &j_inode->i_ext2); if (retval) { try_backup_journal: if (sb->s_jnl_backup_type != EXT3_JNL_BACKUP_BLOCKS || tried_backup_jnl) goto errout; memset(&j_inode->i_ext2, 0, sizeof(struct ext2_inode)); memcpy(&j_inode->i_ext2.i_block[0], sb->s_jnl_blocks, EXT2_N_BLOCKS*4); j_inode->i_ext2.i_size_high = sb->s_jnl_blocks[15]; j_inode->i_ext2.i_size = sb->s_jnl_blocks[16]; j_inode->i_ext2.i_links_count = 1; j_inode->i_ext2.i_mode = LINUX_S_IFREG | 0600; tried_backup_jnl++; } if (!j_inode->i_ext2.i_links_count || !LINUX_S_ISREG(j_inode->i_ext2.i_mode)) { retval = EXT2_ET_NO_JOURNAL; goto try_backup_journal; } if (EXT2_I_SIZE(&j_inode->i_ext2) / journal->j_blocksize < JFS_MIN_JOURNAL_BLOCKS) { retval = EXT2_ET_JOURNAL_TOO_SMALL; goto try_backup_journal; } pb.last_block = -1; retval = ext2fs_block_iterate3(fs, j_inode->i_ino, BLOCK_FLAG_HOLE, 0, process_journal_block, &pb); if ((pb.last_block + 1) * fs->blocksize < (int) EXT2_I_SIZE(&j_inode->i_ext2)) { retval = EXT2_ET_JOURNAL_TOO_SMALL; goto try_backup_journal; } if (tried_backup_jnl && (fs->flags & EXT2_FLAG_RW)) { retval = ext2fs_write_inode(fs, sb->s_journal_inum, &j_inode->i_ext2); if (retval) goto errout; } journal->j_maxlen = EXT2_I_SIZE(&j_inode->i_ext2) / journal->j_blocksize; #ifdef USE_INODE_IO retval = ext2fs_inode_io_intern2(fs, sb->s_journal_inum, &j_inode->i_ext2, &journal_name); if (retval) goto errout; io_ptr = inode_io_manager; #else journal->j_inode = j_inode; fs->journal_io = fs->io; retval = (errcode_t)journal_bmap(journal, 0, &start); if (retval) goto errout; #endif } else { ext_journal = 1; if (!fs->journal_name) { char uuid[37]; blkid_cache blkid; blkid_get_cache(&blkid, NULL); uuid_unparse(sb->s_journal_uuid, uuid); fs->journal_name = blkid_get_devname(blkid, "UUID", uuid); if (!fs->journal_name) fs->journal_name = blkid_devno_to_devname(sb->s_journal_dev); blkid_put_cache(blkid); } journal_name = fs->journal_name; if (!journal_name) { retval = EXT2_ET_LOAD_EXT_JOURNAL; goto errout; } jfs_debug(1, "Using journal file %s\n", journal_name); io_ptr = unix_io_manager; } #if 0 test_io_backing_manager = io_ptr; io_ptr = test_io_manager; #endif #ifndef USE_INODE_IO if (ext_journal) #endif { retval = io_ptr->open(journal_name, fs->flags & EXT2_FLAG_RW, &fs->journal_io); } if (retval) goto errout; io_channel_set_blksize(fs->journal_io, fs->blocksize); if (ext_journal) { blk64_t maxlen; start = ext2fs_journal_sb_start(fs->blocksize) - 1; bh = getblk(dev_journal, start, fs->blocksize); if (!bh) { retval = EXT2_ET_NO_MEMORY; goto errout; } ll_rw_block(READ, 1, &bh); retval = bh->b_err; if (retval) { brelse(bh); goto errout; } memcpy(&jsuper, start ? bh->b_data : bh->b_data + SUPERBLOCK_OFFSET, sizeof(jsuper)); #ifdef WORDS_BIGENDIAN if (jsuper.s_magic == ext2fs_swab16(EXT2_SUPER_MAGIC)) ext2fs_swap_super(&jsuper); #endif if (jsuper.s_magic != EXT2_SUPER_MAGIC || !ext2fs_has_feature_journal_dev(&jsuper)) { retval = EXT2_ET_LOAD_EXT_JOURNAL; brelse(bh); goto errout; } /* Make sure the journal UUID is correct */ if (memcmp(jsuper.s_uuid, fs->super->s_journal_uuid, sizeof(jsuper.s_uuid))) { retval = EXT2_ET_LOAD_EXT_JOURNAL; brelse(bh); goto errout; } /* Check the superblock checksum */ if (ext2fs_has_feature_metadata_csum(&jsuper)) { struct struct_ext2_filsys fsx; struct ext2_super_block superx; void *p; p = start ? bh->b_data : bh->b_data + SUPERBLOCK_OFFSET; memcpy(&fsx, fs, sizeof(fsx)); memcpy(&superx, fs->super, sizeof(superx)); fsx.super = &superx; ext2fs_set_feature_metadata_csum(fsx.super); if (!ext2fs_superblock_csum_verify(&fsx, p)) { retval = EXT2_ET_LOAD_EXT_JOURNAL; brelse(bh); goto errout; } } brelse(bh); maxlen = ext2fs_blocks_count(&jsuper); journal->j_maxlen = (maxlen < 1ULL << 32) ? maxlen : (1ULL << 32) - 1; start++; } bh = getblk(dev_journal, start, journal->j_blocksize); if (!bh) { retval = EXT2_ET_NO_MEMORY; goto errout; } journal->j_sb_buffer = bh; journal->j_superblock = (journal_superblock_t *)bh->b_data; #ifdef USE_INODE_IO if (j_inode) ext2fs_free_mem(&j_inode); #endif *ret_journal = journal; return 0; errout: if (dev_fs) ext2fs_free_mem(&dev_fs); if (j_inode) ext2fs_free_mem(&j_inode); if (journal) ext2fs_free_mem(&journal); return retval; }
/* * hpfs_write(struct vnode *a_vp, struct uio *a_uio, int a_ioflag, * struct ucred *a_cred) */ static int hpfs_write(struct vop_write_args *ap) { struct vnode *vp = ap->a_vp; struct hpfsnode *hp = VTOHP(vp); struct uio *uio = ap->a_uio; struct buf *bp; u_int xfersz, towrite; u_int off; daddr_t lbn, bn; int runl; int error = 0; dprintf(("hpfs_write(0x%x, off: %d resid: %ld, segflg: %d):\n", hp->h_no, (u_int32_t)uio->uio_offset, uio->uio_resid, uio->uio_segflg)); if (ap->a_ioflag & IO_APPEND) { dprintf(("hpfs_write: APPEND mode\n")); uio->uio_offset = hp->h_fn.fn_size; } if (uio->uio_offset + uio->uio_resid > hp->h_fn.fn_size) { error = hpfs_extend (hp, uio->uio_offset + uio->uio_resid); if (error) { kprintf("hpfs_write: hpfs_extend FAILED %d\n", error); return (error); } } while (uio->uio_resid) { lbn = uio->uio_offset >> DEV_BSHIFT; off = uio->uio_offset & (DEV_BSIZE - 1); dprintf(("hpfs_write: resid: 0x%lx lbn: 0x%x off: 0x%x\n", uio->uio_resid, lbn, off)); error = hpfs_hpbmap(hp, lbn, &bn, &runl); if (error) return (error); towrite = szmin(off + uio->uio_resid, min(DFLTPHYS, (runl+1)*DEV_BSIZE)); xfersz = (towrite + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); dprintf(("hpfs_write: bn: 0x%x (0x%x) towrite: 0x%x (0x%x)\n", bn, runl, towrite, xfersz)); /* * We do not have to issue a read-before-write if the xfer * size does not cover the whole block. * * In the UIO_NOCOPY case, however, we are not overwriting * anything and must do a read-before-write to fill in * any missing pieces. */ if (off == 0 && towrite == xfersz && uio->uio_segflg != UIO_NOCOPY) { bp = getblk(hp->h_devvp, dbtodoff(bn), xfersz, 0, 0); clrbuf(bp); } else { error = bread(hp->h_devvp, dbtodoff(bn), xfersz, &bp); if (error) { brelse(bp); return (error); } } error = uiomove(bp->b_data + off, (size_t)(towrite - off), uio); if(error) { brelse(bp); return (error); } if (ap->a_ioflag & IO_SYNC) bwrite(bp); else bawrite(bp); } dprintf(("hpfs_write: successful\n")); return (0); }
static errcode_t e2fsck_get_journal(e2fsck_t ctx, journal_t **ret_journal) { struct process_block_struct pb; struct ext2_super_block *sb = ctx->fs->super; struct ext2_super_block jsuper; struct problem_context pctx; struct buffer_head *bh; struct inode *j_inode = NULL; struct kdev_s *dev_fs = NULL, *dev_journal; const char *journal_name = 0; journal_t *journal = NULL; errcode_t retval = 0; io_manager io_ptr = 0; unsigned long long start = 0; int ext_journal = 0; int tried_backup_jnl = 0; clear_problem_context(&pctx); journal = e2fsck_allocate_memory(ctx, sizeof(journal_t), "journal"); if (!journal) { return EXT2_ET_NO_MEMORY; } dev_fs = e2fsck_allocate_memory(ctx, 2*sizeof(struct kdev_s), "kdev"); if (!dev_fs) { retval = EXT2_ET_NO_MEMORY; goto errout; } dev_journal = dev_fs+1; dev_fs->k_ctx = dev_journal->k_ctx = ctx; dev_fs->k_dev = K_DEV_FS; dev_journal->k_dev = K_DEV_JOURNAL; journal->j_dev = dev_journal; journal->j_fs_dev = dev_fs; journal->j_inode = NULL; journal->j_blocksize = ctx->fs->blocksize; if (uuid_is_null(sb->s_journal_uuid)) { if (!sb->s_journal_inum) { retval = EXT2_ET_BAD_INODE_NUM; goto errout; } j_inode = e2fsck_allocate_memory(ctx, sizeof(*j_inode), "journal inode"); if (!j_inode) { retval = EXT2_ET_NO_MEMORY; goto errout; } j_inode->i_ctx = ctx; j_inode->i_ino = sb->s_journal_inum; if ((retval = ext2fs_read_inode(ctx->fs, sb->s_journal_inum, &j_inode->i_ext2))) { try_backup_journal: if (sb->s_jnl_backup_type != EXT3_JNL_BACKUP_BLOCKS || tried_backup_jnl) goto errout; memset(&j_inode->i_ext2, 0, sizeof(struct ext2_inode)); memcpy(&j_inode->i_ext2.i_block[0], sb->s_jnl_blocks, EXT2_N_BLOCKS*4); j_inode->i_ext2.i_size_high = sb->s_jnl_blocks[15]; j_inode->i_ext2.i_size = sb->s_jnl_blocks[16]; j_inode->i_ext2.i_links_count = 1; j_inode->i_ext2.i_mode = LINUX_S_IFREG | 0600; e2fsck_use_inode_shortcuts(ctx, 1); ctx->stashed_ino = j_inode->i_ino; ctx->stashed_inode = &j_inode->i_ext2; tried_backup_jnl++; } if (!j_inode->i_ext2.i_links_count || !LINUX_S_ISREG(j_inode->i_ext2.i_mode)) { retval = EXT2_ET_NO_JOURNAL; goto try_backup_journal; } if (EXT2_I_SIZE(&j_inode->i_ext2) / journal->j_blocksize < JFS_MIN_JOURNAL_BLOCKS) { retval = EXT2_ET_JOURNAL_TOO_SMALL; goto try_backup_journal; } pb.last_block = -1; retval = ext2fs_block_iterate3(ctx->fs, j_inode->i_ino, BLOCK_FLAG_HOLE, 0, process_journal_block, &pb); if ((pb.last_block + 1) * ctx->fs->blocksize < (int) EXT2_I_SIZE(&j_inode->i_ext2)) { retval = EXT2_ET_JOURNAL_TOO_SMALL; goto try_backup_journal; } if (tried_backup_jnl && !(ctx->options & E2F_OPT_READONLY)) { retval = ext2fs_write_inode(ctx->fs, sb->s_journal_inum, &j_inode->i_ext2); if (retval) goto errout; } journal->j_maxlen = EXT2_I_SIZE(&j_inode->i_ext2) / journal->j_blocksize; #ifdef USE_INODE_IO retval = ext2fs_inode_io_intern2(ctx->fs, sb->s_journal_inum, &j_inode->i_ext2, &journal_name); if (retval) goto errout; io_ptr = inode_io_manager; #else journal->j_inode = j_inode; ctx->journal_io = ctx->fs->io; if ((retval = (errcode_t) journal_bmap(journal, 0, &start)) != 0) goto errout; #endif } else { ext_journal = 1; if (!ctx->journal_name) { char uuid[37]; uuid_unparse(sb->s_journal_uuid, uuid); ctx->journal_name = blkid_get_devname(ctx->blkid, "UUID", uuid); if (!ctx->journal_name) ctx->journal_name = blkid_devno_to_devname(sb->s_journal_dev); } journal_name = ctx->journal_name; if (!journal_name) { fix_problem(ctx, PR_0_CANT_FIND_JOURNAL, &pctx); retval = EXT2_ET_LOAD_EXT_JOURNAL; goto errout; } jfs_debug(1, "Using journal file %s\n", journal_name); io_ptr = unix_io_manager; } #if 0 test_io_backing_manager = io_ptr; io_ptr = test_io_manager; #endif #ifndef USE_INODE_IO if (ext_journal) #endif { int flags = IO_FLAG_RW; if (!(ctx->mount_flags & EXT2_MF_ISROOT && ctx->mount_flags & EXT2_MF_READONLY)) flags |= IO_FLAG_EXCLUSIVE; if ((ctx->mount_flags & EXT2_MF_READONLY) && (ctx->options & E2F_OPT_FORCE)) flags &= ~IO_FLAG_EXCLUSIVE; retval = io_ptr->open(journal_name, flags, &ctx->journal_io); } if (retval) goto errout; io_channel_set_blksize(ctx->journal_io, ctx->fs->blocksize); if (ext_journal) { blk64_t maxlen; start = ext2fs_journal_sb_start(ctx->fs->blocksize) - 1; bh = getblk(dev_journal, start, ctx->fs->blocksize); if (!bh) { retval = EXT2_ET_NO_MEMORY; goto errout; } ll_rw_block(READ, 1, &bh); if ((retval = bh->b_err) != 0) { brelse(bh); goto errout; } memcpy(&jsuper, start ? bh->b_data : bh->b_data + SUPERBLOCK_OFFSET, sizeof(jsuper)); #ifdef WORDS_BIGENDIAN if (jsuper.s_magic == ext2fs_swab16(EXT2_SUPER_MAGIC)) ext2fs_swap_super(&jsuper); #endif if (jsuper.s_magic != EXT2_SUPER_MAGIC || !(jsuper.s_feature_incompat & EXT3_FEATURE_INCOMPAT_JOURNAL_DEV)) { fix_problem(ctx, PR_0_EXT_JOURNAL_BAD_SUPER, &pctx); retval = EXT2_ET_LOAD_EXT_JOURNAL; brelse(bh); goto errout; } /* Make sure the journal UUID is correct */ if (memcmp(jsuper.s_uuid, ctx->fs->super->s_journal_uuid, sizeof(jsuper.s_uuid))) { fix_problem(ctx, PR_0_JOURNAL_BAD_UUID, &pctx); retval = EXT2_ET_LOAD_EXT_JOURNAL; brelse(bh); goto errout; } /* Check the superblock checksum */ if (jsuper.s_feature_ro_compat & EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) { struct struct_ext2_filsys fsx; struct ext2_super_block superx; void *p; p = start ? bh->b_data : bh->b_data + SUPERBLOCK_OFFSET; memcpy(&fsx, ctx->fs, sizeof(fsx)); memcpy(&superx, ctx->fs->super, sizeof(superx)); fsx.super = &superx; fsx.super->s_feature_ro_compat |= EXT4_FEATURE_RO_COMPAT_METADATA_CSUM; if (!ext2fs_superblock_csum_verify(&fsx, p) && fix_problem(ctx, PR_0_EXT_JOURNAL_SUPER_CSUM_INVALID, &pctx)) { ext2fs_superblock_csum_set(&fsx, p); mark_buffer_dirty(bh); } } brelse(bh); maxlen = ext2fs_blocks_count(&jsuper); journal->j_maxlen = (maxlen < 1ULL << 32) ? maxlen : (1ULL << 32) - 1; start++; } if (!(bh = getblk(dev_journal, start, journal->j_blocksize))) { retval = EXT2_ET_NO_MEMORY; goto errout; } journal->j_sb_buffer = bh; journal->j_superblock = (journal_superblock_t *)bh->b_data; #ifdef USE_INODE_IO if (j_inode) ext2fs_free_mem(&j_inode); #endif *ret_journal = journal; e2fsck_use_inode_shortcuts(ctx, 0); return 0; errout: e2fsck_use_inode_shortcuts(ctx, 0); if (dev_fs) ext2fs_free_mem(&dev_fs); if (j_inode) ext2fs_free_mem(&j_inode); if (journal) ext2fs_free_mem(&journal); return retval; }
void pass1(void) { ino_t inumber, inosused, ninosused; size_t inospace; struct inostat *info; int c; struct inodesc idesc; daddr_t i, cgd; u_int8_t *cp; /* * Set file system reserved blocks in used block map. */ for (c = 0; c < sblock.fs_ncg; c++) { cgd = cgdmin(&sblock, c); if (c == 0) i = cgbase(&sblock, c); else i = cgsblock(&sblock, c); for (; i < cgd; i++) setbmap(i); } i = sblock.fs_csaddr; cgd = i + howmany(sblock.fs_cssize, sblock.fs_fsize); for (; i < cgd; i++) setbmap(i); /* * Find all allocated blocks. */ memset(&idesc, 0, sizeof(struct inodesc)); idesc.id_type = ADDR; idesc.id_func = pass1check; n_files = n_blks = 0; info_inumber = 0; info_fn = pass1_info; for (c = 0; c < sblock.fs_ncg; c++) { inumber = c * sblock.fs_ipg; setinodebuf(inumber); getblk(&cgblk, cgtod(&sblock, c), sblock.fs_cgsize); if (sblock.fs_magic == FS_UFS2_MAGIC) { inosused = cgrp.cg_initediblk; if (inosused > sblock.fs_ipg) inosused = sblock.fs_ipg; } else inosused = sblock.fs_ipg; /* * If we are using soft updates, then we can trust the * cylinder group inode allocation maps to tell us which * inodes are allocated. We will scan the used inode map * to find the inodes that are really in use, and then * read only those inodes in from disk. */ if (preen && usedsoftdep) { cp = &cg_inosused(&cgrp)[(inosused - 1) / CHAR_BIT]; for ( ; inosused > 0; inosused -= CHAR_BIT, cp--) { if (*cp == 0) continue; for (i = 1 << (CHAR_BIT - 1); i > 0; i >>= 1) { if (*cp & i) break; inosused--; } break; } if (inosused < 0) inosused = 0; } /* * Allocate inoinfo structures for the allocated inodes. */ inostathead[c].il_numalloced = inosused; if (inosused == 0) { inostathead[c].il_stat = 0; continue; } info = calloc((unsigned)inosused, sizeof(struct inostat)); inospace = (unsigned)inosused * sizeof(struct inostat); if (info == NULL) errexit("cannot alloc %zu bytes for inoinfo", inospace); inostathead[c].il_stat = info; /* * Scan the allocated inodes. */ for (i = 0; i < inosused; i++, inumber++) { info_inumber = inumber; if (inumber < ROOTINO) { (void)getnextinode(inumber); continue; } checkinode(inumber, &idesc); } lastino += 1; if (inosused < sblock.fs_ipg || inumber == lastino) continue; /* * If we were not able to determine in advance which inodes * were in use, then reduce the size of the inoinfo structure * to the size necessary to describe the inodes that we * really found. */ if (lastino < (c * sblock.fs_ipg)) ninosused = 0; else ninosused = lastino - (c * sblock.fs_ipg); inostathead[c].il_numalloced = ninosused; if (ninosused == 0) { free(inostathead[c].il_stat); inostathead[c].il_stat = 0; continue; } if (ninosused != inosused) { struct inostat *ninfo; size_t ninospace; ninfo = reallocarray(info, ninosused, sizeof(*ninfo)); if (ninfo == NULL) { pfatal("too many inodes %llu, or out of memory\n", (unsigned long long)ninosused); exit(8); } ninospace = ninosused * sizeof(*ninfo); if (ninosused > inosused) memset(&ninfo[inosused], 0, ninospace - inospace); inostathead[c].il_stat = ninfo; } }
int main() { u16 i,iblk; char *loc; u32 *l; GD *gp; INODE *ip; prints("What image: "); gets(kstr); prints("\r\n"); // Read group descriptor block and find inode table getblk(2, buf1); gp = (GD *)buf1; iblk = (u16)gp->bg_inode_table; getblk((u16)iblk, buf1); // read first inode block ip = (INODE *)buf1 + 1; // ip->root inode #2 ( / directory) // Read through directory entries looking for /boot i = findentry((u16)ip->i_block[0], "boot"); // get inode ip = getinode(iblk, i); i = findentry((u16)ip->i_block[0], kstr); // Get inode ip = getinode(iblk, i); // get single indirect blocks getblk((u16)ip->i_block[12], buf2); // Copy file into memory prints("Loading: "); setes(0x1000); loc = 0; i = 0; // Get first 12 blocks while(i < 12 && ip->i_block[i] != 0) { getblk((u16)ip->i_block[i], loc); i++; loc += 1024; putc('.'); } l = buf2; i = 0; while(i < 256 && l[i] > 0) { getblk((u16)(l[i]), loc); i++; loc += 1024; putc('+'); } prints("\n\rReady?\n\r"); getc(); return 1; }
/* * Indirect blocks are now on the vnode for the file. They are given negative * logical block numbers. Indirect blocks are addressed by the negative * address of the first data block to which they point. Double indirect blocks * are addressed by one less than the address of the first indirect block to * which they point. Triple indirect blocks are addressed by one less than * the address of the first double indirect block to which they point. * * ext2_bmaparray does the bmap conversion, and if requested returns the * array of logical blocks which must be traversed to get to a block. * Each entry contains the offset into that block that gets you to the * next block and the disk address of the block (if it is assigned). */ static int ext2_bmaparray(struct vnode *vp, ext2_daddr_t bn, ext2_daddr_t *bnp, struct indir *ap, int *nump, int *runp, int *runb) { struct inode *ip; struct buf *bp; struct ext2_mount *ump; struct mount *mp; struct ext2_sb_info *fs; struct indir a[NIADDR+1], *xap; ext2_daddr_t daddr; long metalbn; int error, maxrun, num; ip = VTOI(vp); mp = vp->v_mount; ump = VFSTOEXT2(mp); fs = ip->i_e2fs; #ifdef DIAGNOSTIC if ((ap != NULL && nump == NULL) || (ap == NULL && nump != NULL)) panic("ext2_bmaparray: invalid arguments"); #endif if (runp) { *runp = 0; } if (runb) { *runb = 0; } maxrun = mp->mnt_iosize_max / mp->mnt_stat.f_iosize - 1; xap = ap == NULL ? a : ap; if (!nump) nump = # error = ext2_getlbns(vp, bn, xap, nump); if (error) return (error); num = *nump; if (num == 0) { *bnp = blkptrtodb(ump, ip->i_db[bn]); if (*bnp == 0) *bnp = -1; else if (runp) { daddr_t bnb = bn; for (++bn; bn < NDADDR && *runp < maxrun && is_sequential(ump, ip->i_db[bn - 1], ip->i_db[bn]); ++bn, ++*runp); bn = bnb; if (runb && (bn > 0)) { for (--bn; (bn >= 0) && (*runb < maxrun) && is_sequential(ump, ip->i_db[bn], ip->i_db[bn+1]); --bn, ++*runb); } } return (0); } /* Get disk address out of indirect block array */ daddr = ip->i_ib[xap->in_off]; for (bp = NULL, ++xap; --num; ++xap) { /* * Exit the loop if there is no disk address assigned yet and * the indirect block isn't in the cache, or if we were * looking for an indirect block and we've found it. */ metalbn = xap->in_lbn; if ((daddr == 0 && !findblk(vp, dbtodoff(fs, metalbn), FINDBLK_TEST)) || metalbn == bn) { break; } /* * If we get here, we've either got the block in the cache * or we have a disk address for it, go fetch it. */ if (bp) bqrelse(bp); xap->in_exists = 1; bp = getblk(vp, lblktodoff(fs, metalbn), mp->mnt_stat.f_iosize, 0, 0); if ((bp->b_flags & B_CACHE) == 0) { #ifdef DIAGNOSTIC if (!daddr) panic("ext2_bmaparray: indirect block not in cache"); #endif /* * This runs through ext2_strategy using bio2 to * cache the disk offset, then comes back through * bio1. So we want to wait on bio1 */ bp->b_bio1.bio_done = biodone_sync; bp->b_bio1.bio_flags |= BIO_SYNC; bp->b_bio2.bio_offset = fsbtodoff(fs, daddr); bp->b_flags &= ~(B_INVAL|B_ERROR); bp->b_cmd = BUF_CMD_READ; vfs_busy_pages(bp->b_vp, bp); vn_strategy(bp->b_vp, &bp->b_bio1); error = biowait(&bp->b_bio1, "biord"); if (error) { brelse(bp); return (error); } } daddr = ((ext2_daddr_t *)bp->b_data)[xap->in_off]; if (num == 1 && daddr && runp) { for (bn = xap->in_off + 1; bn < MNINDIR(ump) && *runp < maxrun && is_sequential(ump, ((ext2_daddr_t *)bp->b_data)[bn - 1], ((ext2_daddr_t *)bp->b_data)[bn]); ++bn, ++*runp); bn = xap->in_off; if (runb && bn) { for(--bn; bn >= 0 && *runb < maxrun && is_sequential(ump, ((daddr_t *)bp->b_data)[bn], ((daddr_t *)bp->b_data)[bn+1]); --bn, ++*runb); } } } if (bp) bqrelse(bp); daddr = blkptrtodb(ump, daddr); *bnp = daddr == 0 ? -1 : daddr; return (0); }
/* VOP_BWRITE ULFS_NIADDR+2 times */ int lfs_balloc(struct vnode *vp, off_t startoffset, int iosize, kauth_cred_t cred, int flags, struct buf **bpp) { int offset; daddr_t daddr, idaddr; struct buf *ibp, *bp; struct inode *ip; struct lfs *fs; struct indir indirs[ULFS_NIADDR+2], *idp; daddr_t lbn, lastblock; int bcount; int error, frags, i, nsize, osize, num; ip = VTOI(vp); fs = ip->i_lfs; offset = lfs_blkoff(fs, startoffset); KASSERT(iosize <= lfs_sb_getbsize(fs)); lbn = lfs_lblkno(fs, startoffset); /* (void)lfs_check(vp, lbn, 0); */ ASSERT_MAYBE_SEGLOCK(fs); /* * Three cases: it's a block beyond the end of file, it's a block in * the file that may or may not have been assigned a disk address or * we're writing an entire block. * * Note, if the daddr is UNWRITTEN, the block already exists in * the cache (it was read or written earlier). If so, make sure * we don't count it as a new block or zero out its contents. If * it did not, make sure we allocate any necessary indirect * blocks. * * If we are writing a block beyond the end of the file, we need to * check if the old last block was a fragment. If it was, we need * to rewrite it. */ if (bpp) *bpp = NULL; /* Check for block beyond end of file and fragment extension needed. */ lastblock = lfs_lblkno(fs, ip->i_size); if (lastblock < ULFS_NDADDR && lastblock < lbn) { osize = lfs_blksize(fs, ip, lastblock); if (osize < lfs_sb_getbsize(fs) && osize > 0) { if ((error = lfs_fragextend(vp, osize, lfs_sb_getbsize(fs), lastblock, (bpp ? &bp : NULL), cred))) return (error); ip->i_size = (lastblock + 1) * lfs_sb_getbsize(fs); lfs_dino_setsize(fs, ip->i_din, ip->i_size); uvm_vnp_setsize(vp, ip->i_size); ip->i_flag |= IN_CHANGE | IN_UPDATE; if (bpp) (void) VOP_BWRITE(bp->b_vp, bp); } } /* * If the block we are writing is a direct block, it's the last * block in the file, and offset + iosize is less than a full * block, we can write one or more fragments. There are two cases: * the block is brand new and we should allocate it the correct * size or it already exists and contains some fragments and * may need to extend it. */ if (lbn < ULFS_NDADDR && lfs_lblkno(fs, ip->i_size) <= lbn) { osize = lfs_blksize(fs, ip, lbn); nsize = lfs_fragroundup(fs, offset + iosize); if (lfs_lblktosize(fs, lbn) >= ip->i_size) { /* Brand new block or fragment */ frags = lfs_numfrags(fs, nsize); if (!ISSPACE(fs, frags, cred)) return ENOSPC; if (bpp) { *bpp = bp = getblk(vp, lbn, nsize, 0, 0); bp->b_blkno = UNWRITTEN; if (flags & B_CLRBUF) clrbuf(bp); } ip->i_lfs_effnblks += frags; mutex_enter(&lfs_lock); lfs_sb_subbfree(fs, frags); mutex_exit(&lfs_lock); lfs_dino_setdb(fs, ip->i_din, lbn, UNWRITTEN); } else { if (nsize <= osize) { /* No need to extend */ if (bpp && (error = bread(vp, lbn, osize, 0, &bp))) return error; } else { /* Extend existing block */ if ((error = lfs_fragextend(vp, osize, nsize, lbn, (bpp ? &bp : NULL), cred))) return error; } if (bpp) *bpp = bp; } return 0; } error = ulfs_bmaparray(vp, lbn, &daddr, &indirs[0], &num, NULL, NULL); if (error) return (error); KASSERT(daddr <= LFS_MAX_DADDR(fs)); /* * Do byte accounting all at once, so we can gracefully fail *before* * we start assigning blocks. */ frags = fs->um_seqinc; bcount = 0; if (daddr == UNASSIGNED) { bcount = frags; } for (i = 1; i < num; ++i) { if (!indirs[i].in_exists) { bcount += frags; } } if (ISSPACE(fs, bcount, cred)) { mutex_enter(&lfs_lock); lfs_sb_subbfree(fs, bcount); mutex_exit(&lfs_lock); ip->i_lfs_effnblks += bcount; } else { return ENOSPC; } if (daddr == UNASSIGNED) { if (num > 0 && lfs_dino_getib(fs, ip->i_din, indirs[0].in_off) == 0) { lfs_dino_setib(fs, ip->i_din, indirs[0].in_off, UNWRITTEN); } /* * Create new indirect blocks if necessary */ if (num > 1) { idaddr = lfs_dino_getib(fs, ip->i_din, indirs[0].in_off); for (i = 1; i < num; ++i) { ibp = getblk(vp, indirs[i].in_lbn, lfs_sb_getbsize(fs), 0,0); if (!indirs[i].in_exists) { clrbuf(ibp); ibp->b_blkno = UNWRITTEN; } else if (!(ibp->b_oflags & (BO_DELWRI | BO_DONE))) { ibp->b_blkno = LFS_FSBTODB(fs, idaddr); ibp->b_flags |= B_READ; VOP_STRATEGY(vp, ibp); biowait(ibp); } /* * This block exists, but the next one may not. * If that is the case mark it UNWRITTEN to keep * the accounting straight. */ /* XXX ondisk32 */ if (((int32_t *)ibp->b_data)[indirs[i].in_off] == 0) ((int32_t *)ibp->b_data)[indirs[i].in_off] = UNWRITTEN; /* XXX ondisk32 */ idaddr = ((int32_t *)ibp->b_data)[indirs[i].in_off]; #ifdef DEBUG if (vp == fs->lfs_ivnode) { LFS_ENTER_LOG("balloc", __FILE__, __LINE__, indirs[i].in_lbn, ibp->b_flags, curproc->p_pid); } #endif if ((error = VOP_BWRITE(ibp->b_vp, ibp))) return error; } } } /* * Get the existing block from the cache, if requested. */ if (bpp) *bpp = bp = getblk(vp, lbn, lfs_blksize(fs, ip, lbn), 0, 0); /* * Do accounting on blocks that represent pages. */ if (!bpp) lfs_register_block(vp, lbn); /* * The block we are writing may be a brand new block * in which case we need to do accounting. * * We can tell a truly new block because ulfs_bmaparray will say * it is UNASSIGNED. Once we allocate it we will assign it the * disk address UNWRITTEN. */ if (daddr == UNASSIGNED) { if (bpp) { if (flags & B_CLRBUF) clrbuf(bp); /* Note the new address */ bp->b_blkno = UNWRITTEN; } switch (num) { case 0: lfs_dino_setdb(fs, ip->i_din, lbn, UNWRITTEN); break; case 1: lfs_dino_setib(fs, ip->i_din, indirs[0].in_off, UNWRITTEN); break; default: idp = &indirs[num - 1]; if (bread(vp, idp->in_lbn, lfs_sb_getbsize(fs), B_MODIFY, &ibp)) panic("lfs_balloc: bread bno %lld", (long long)idp->in_lbn); /* XXX ondisk32 */ ((int32_t *)ibp->b_data)[idp->in_off] = UNWRITTEN; #ifdef DEBUG if (vp == fs->lfs_ivnode) { LFS_ENTER_LOG("balloc", __FILE__, __LINE__, idp->in_lbn, ibp->b_flags, curproc->p_pid); } #endif VOP_BWRITE(ibp->b_vp, ibp); } } else if (bpp && !(bp->b_oflags & (BO_DONE|BO_DELWRI))) { /* * Not a brand new block, also not in the cache; * read it in from disk. */ if (iosize == lfs_sb_getbsize(fs)) /* Optimization: I/O is unnecessary. */ bp->b_blkno = daddr; else { /* * We need to read the block to preserve the * existing bytes. */ bp->b_blkno = daddr; bp->b_flags |= B_READ; VOP_STRATEGY(vp, bp); return (biowait(bp)); } } return (0); }
static int do_one_pass(journal_t *journal, struct recovery_info *info, enum passtype pass) { unsigned int first_commit_ID, next_commit_ID; unsigned long next_log_block; int err, success = 0; journal_superblock_t * sb; journal_header_t * tmp; struct buffer_head * bh; unsigned int sequence; int blocktype; /* Precompute the maximum metadata descriptors in a descriptor block */ int MAX_BLOCKS_PER_DESC; MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(journal_header_t)) / sizeof(journal_block_tag_t)); /* * First thing is to establish what we expect to find in the log * (in terms of transaction IDs), and where (in terms of log * block offsets): query the superblock. */ sb = journal->j_superblock; next_commit_ID = ntohl(sb->s_sequence); next_log_block = ntohl(sb->s_start); first_commit_ID = next_commit_ID; if (pass == PASS_SCAN) info->start_transaction = first_commit_ID; jfs_debug(1, "Starting recovery pass %d\n", pass); /* * Now we walk through the log, transaction by transaction, * making sure that each transaction has a commit block in the * expected place. Each complete transaction gets replayed back * into the main filesystem. */ while (1) { int flags; char * tagp; journal_block_tag_t * tag; struct buffer_head * obh; struct buffer_head * nbh; /* If we already know where to stop the log traversal, * check right now that we haven't gone past the end of * the log. */ if (pass != PASS_SCAN) if (tid_geq(next_commit_ID, info->end_transaction)) break; jfs_debug(2, "Scanning for sequence ID %u at %lu/%lu\n", next_commit_ID, next_log_block, journal->j_last); /* Skip over each chunk of the transaction looking * either the next descriptor block or the final commit * record. */ jfs_debug(3, "JFS: checking block %ld\n", next_log_block); err = jread(&bh, journal, next_log_block); if (err) goto failed; next_log_block++; wrap(journal, next_log_block); /* What kind of buffer is it? * * If it is a descriptor block, check that it has the * expected sequence number. Otherwise, we're all done * here. */ tmp = (journal_header_t *) bh->b_data; if (tmp->h_magic != htonl(JFS_MAGIC_NUMBER)) { brelse(bh); break; } blocktype = ntohl(tmp->h_blocktype); sequence = ntohl(tmp->h_sequence); jfs_debug(3, "Found magic %d, sequence %d\n", blocktype, sequence); if (sequence != next_commit_ID) { brelse(bh); break; } /* OK, we have a valid descriptor block which matches * all of the sequence number checks. What are we going * to do with it? That depends on the pass... */ switch(blocktype) { case JFS_DESCRIPTOR_BLOCK: /* If it is a valid descriptor block, replay it * in pass REPLAY; otherwise, just skip over the * blocks it describes. */ if (pass != PASS_REPLAY) { next_log_block += count_tags(bh, journal->j_blocksize); wrap(journal, next_log_block); brelse(bh); continue; } /* A descriptor block: we can now write all of * the data blocks. Yay, useful work is finally * getting done here! */ tagp = &bh->b_data[sizeof(journal_header_t)]; while ((tagp - bh->b_data +sizeof(journal_block_tag_t)) <= journal->j_blocksize) { unsigned long io_block; tag = (journal_block_tag_t *) tagp; flags = ntohl(tag->t_flags); io_block = next_log_block++; wrap(journal, next_log_block); err = jread(&obh, journal, io_block); if (err) { /* Recover what we can, but * report failure at the end. */ success = err; printk (KERN_ERR "JFS: IO error %d recovering " "block %ld in log\n", err, io_block); } else { unsigned long blocknr; J_ASSERT(obh != NULL); blocknr = ntohl(tag->t_blocknr); /* If the block has been * revoked, then we're all done * here. */ if (journal_test_revoke (journal, blocknr, next_commit_ID)) { brelse(obh); ++info->nr_revoke_hits; goto skip_write; } /* Find a buffer for the new * data being restored */ nbh = getblk(journal->j_dev, blocknr, journal->j_blocksize); if (nbh == NULL) { printk(KERN_ERR "JFS: Out of memory " "during recovery.\n"); err = -ENOMEM; brelse(bh); brelse(obh); goto failed; } memcpy(nbh->b_data, obh->b_data, journal->j_blocksize); if (flags & JFS_FLAG_ESCAPE) { * ((unsigned int *) bh->b_data) = htonl(JFS_MAGIC_NUMBER); } mark_buffer_dirty(nbh, 1); mark_buffer_uptodate(nbh, 1); ++info->nr_replays; /* ll_rw_block(WRITE, 1, &nbh); */ brelse(obh); brelse(nbh); } skip_write: tagp += sizeof(journal_block_tag_t); if (!(flags & JFS_FLAG_SAME_UUID)) tagp += 16; if (flags & JFS_FLAG_LAST_TAG) break; } brelse(bh); continue; case JFS_COMMIT_BLOCK: /* Found an expected commit block: not much to * do other than move on to the next sequence * number. */ brelse(bh); next_commit_ID++; continue; case JFS_REVOKE_BLOCK: /* If we aren't in the REVOKE pass, then we can * just skip over this block. */ if (pass != PASS_REVOKE) { brelse(bh); continue; } err = scan_revoke_records(journal, bh, next_commit_ID, info); brelse(bh); if (err) goto failed; continue; default: jfs_debug(3, "Unrecognised magic %d, end of scan.\n", blocktype); goto done; } } done: /* * We broke out of the log scan loop: either we came to the * known end of the log or we found an unexpected block in the * log. If the latter happened, then we know that the "current" * transaction marks the end of the valid log. */ if (pass == PASS_SCAN) info->end_transaction = next_commit_ID; else { /* It's really bad news if different passes end up at * different places (but possible due to IO errors). */ if (info->end_transaction != next_commit_ID) { printk (KERN_ERR "JFS: recovery pass %d ended at " "transaction %u, expected %u\n", pass, next_commit_ID, info->end_transaction); if (!success) success = -EIO; } } return success; failed: return err; }
/* * Release blocks associated with the inode ip and stored in the indirect * block bn. Blocks are free'd in LIFO order up to (but not including) * lastbn. If level is greater than SINGLE, the block is an indirect block * and recursive calls to indirtrunc must be used to cleanse other indirect * blocks. * * NB: triple indirect blocks are untested. */ int ffs_indirtrunc(struct inode *ip, daddr64_t lbn, daddr64_t dbn, daddr64_t lastbn, int level, long *countp) { int i; struct buf *bp; struct fs *fs = ip->i_fs; struct vnode *vp; void *copy = NULL; daddr64_t nb, nlbn, last; long blkcount, factor; int nblocks, blocksreleased = 0; int error = 0, allerror = 0; int32_t *bap1 = NULL; #ifdef FFS2 int64_t *bap2 = NULL; #endif /* * Calculate index in current block of last * block to be kept. -1 indicates the entire * block so we need not calculate the index. */ factor = 1; for (i = SINGLE; i < level; i++) factor *= NINDIR(fs); last = lastbn; if (lastbn > 0) last /= factor; nblocks = btodb(fs->fs_bsize); /* * Get buffer of block pointers, zero those entries corresponding * to blocks to be free'd, and update on disk copy first. Since * double(triple) indirect before single(double) indirect, calls * to bmap on these blocks will fail. However, we already have * the on disk address, so we have to set the b_blkno field * explicitly instead of letting bread do everything for us. */ vp = ITOV(ip); bp = getblk(vp, lbn, (int)fs->fs_bsize, 0, 0); if (!(bp->b_flags & (B_DONE | B_DELWRI))) { curproc->p_stats->p_ru.ru_inblock++; /* pay for read */ bcstats.pendingreads++; bcstats.numreads++; bp->b_flags |= B_READ; if (bp->b_bcount > bp->b_bufsize) panic("ffs_indirtrunc: bad buffer size"); bp->b_blkno = dbn; VOP_STRATEGY(bp); error = biowait(bp); } if (error) { brelse(bp); *countp = 0; return (error); } #ifdef FFS2 if (ip->i_ump->um_fstype == UM_UFS2) bap2 = (int64_t *)bp->b_data; else #endif bap1 = (int32_t *)bp->b_data; if (lastbn != -1) { copy = malloc(fs->fs_bsize, M_TEMP, M_WAITOK); bcopy(bp->b_data, copy, (u_int) fs->fs_bsize); for (i = last + 1; i < NINDIR(fs); i++) BAP_ASSIGN(ip, i, 0); if (!DOINGASYNC(vp)) { error = bwrite(bp); if (error) allerror = error; } else { bawrite(bp); } #ifdef FFS2 if (ip->i_ump->um_fstype == UM_UFS2) bap2 = (int64_t *)copy; else #endif bap1 = (int32_t *)copy; } /* * Recursively free totally unused blocks. */ for (i = NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last; i--, nlbn += factor) { nb = BAP(ip, i); if (nb == 0) continue; if (level > SINGLE) { error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb), (daddr64_t)-1, level - 1, &blkcount); if (error) allerror = error; blocksreleased += blkcount; } ffs_blkfree(ip, nb, fs->fs_bsize); blocksreleased += nblocks; } /* * Recursively free last partial block. */ if (level > SINGLE && lastbn >= 0) { last = lastbn % factor; nb = BAP(ip, i); if (nb != 0) { error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb), last, level - 1, &blkcount); if (error) allerror = error; blocksreleased += blkcount; } } if (copy != NULL) { free(copy, M_TEMP); } else { bp->b_flags |= B_INVAL; brelse(bp); } *countp = blocksreleased; return (allerror); }
/* * Read in the super block and its summary info. */ int readsb(int listerr) { off_t super; int bad, ret; struct fs *fs; super = bflag ? bflag * dev_bsize : -1; readcnt[sblk.b_type]++; if ((ret = sbget(fsreadfd, &fs, super)) != 0) { switch (ret) { case EINVAL: fprintf(stderr, "The previous newfs operation " "on this volume did not complete.\nYou must " "complete newfs before using this volume.\n"); exit(11); case ENOENT: if (bflag) fprintf(stderr, "%jd is not a file system " "superblock\n", super / dev_bsize); else fprintf(stderr, "Cannot find file system " "superblock\n"); return (0); case EIO: default: fprintf(stderr, "I/O error reading %jd\n", super / dev_bsize); return (0); } } memcpy(&sblock, fs, fs->fs_sbsize); free(fs); /* * Compute block size that the file system is based on, * according to fsbtodb, and adjust superblock block number * so we can tell if this is an alternate later. */ dev_bsize = sblock.fs_fsize / fsbtodb(&sblock, 1); sblk.b_bno = sblock.fs_sblockactualloc / dev_bsize; sblk.b_size = SBLOCKSIZE; /* * Compare all fields that should not differ in alternate super block. * When an alternate super-block is specified this check is skipped. */ if (bflag) goto out; getblk(&asblk, cgsblock(&sblock, sblock.fs_ncg - 1), sblock.fs_sbsize); if (asblk.b_errs) return (0); bad = 0; #define CHK(x, y) \ if (altsblock.x != sblock.x) { \ bad++; \ if (listerr && debug) \ printf("SUPER BLOCK VS ALTERNATE MISMATCH %s: " y " vs " y "\n", \ #x, (intmax_t)sblock.x, (intmax_t)altsblock.x); \ } CHK(fs_sblkno, "%jd"); CHK(fs_cblkno, "%jd"); CHK(fs_iblkno, "%jd"); CHK(fs_dblkno, "%jd"); CHK(fs_ncg, "%jd"); CHK(fs_bsize, "%jd"); CHK(fs_fsize, "%jd"); CHK(fs_frag, "%jd"); CHK(fs_bmask, "%#jx"); CHK(fs_fmask, "%#jx"); CHK(fs_bshift, "%jd"); CHK(fs_fshift, "%jd"); CHK(fs_fragshift, "%jd"); CHK(fs_fsbtodb, "%jd"); CHK(fs_sbsize, "%jd"); CHK(fs_nindir, "%jd"); CHK(fs_inopb, "%jd"); CHK(fs_cssize, "%jd"); CHK(fs_ipg, "%jd"); CHK(fs_fpg, "%jd"); CHK(fs_magic, "%#jx"); #undef CHK if (bad) { if (listerr == 0) return (0); if (preen) printf("%s: ", cdevname); printf( "VALUES IN SUPER BLOCK LSB=%jd DISAGREE WITH THOSE IN\n" "LAST ALTERNATE LSB=%jd\n", sblk.b_bno, asblk.b_bno); if (reply("IGNORE ALTERNATE SUPER BLOCK") == 0) return (0); } out: /* * If not yet done, update UFS1 superblock with new wider fields. */ if (sblock.fs_magic == FS_UFS1_MAGIC && sblock.fs_maxbsize != sblock.fs_bsize) { sblock.fs_maxbsize = sblock.fs_bsize; sblock.fs_time = sblock.fs_old_time; sblock.fs_size = sblock.fs_old_size; sblock.fs_dsize = sblock.fs_old_dsize; sblock.fs_csaddr = sblock.fs_old_csaddr; sblock.fs_cstotal.cs_ndir = sblock.fs_old_cstotal.cs_ndir; sblock.fs_cstotal.cs_nbfree = sblock.fs_old_cstotal.cs_nbfree; sblock.fs_cstotal.cs_nifree = sblock.fs_old_cstotal.cs_nifree; sblock.fs_cstotal.cs_nffree = sblock.fs_old_cstotal.cs_nffree; } havesb = 1; return (1); }
/* * If blocks are contiguous on disk, use this to provide clustered * read ahead. We will read as many blocks as possible sequentially * and then parcel them up into logical blocks in the buffer hash table. */ static struct buf * cluster_rbuild(struct vnode *vp, u_quad_t filesize, daddr_t lbn, daddr_t blkno, long size, int run, int gbflags, struct buf *fbp) { struct buf *bp, *tbp; daddr_t bn; off_t off; long tinc, tsize; int i, inc, j, k, toff; KASSERT(size == vp->v_mount->mnt_stat.f_iosize, ("cluster_rbuild: size %ld != f_iosize %jd\n", size, (intmax_t)vp->v_mount->mnt_stat.f_iosize)); /* * avoid a division */ while ((u_quad_t) size * (lbn + run) > filesize) { --run; } if (fbp) { tbp = fbp; tbp->b_iocmd = BIO_READ; } else { tbp = getblk(vp, lbn, size, 0, 0, gbflags); if (tbp->b_flags & B_CACHE) return tbp; tbp->b_flags |= B_ASYNC | B_RAM; tbp->b_iocmd = BIO_READ; } tbp->b_blkno = blkno; if( (tbp->b_flags & B_MALLOC) || ((tbp->b_flags & B_VMIO) == 0) || (run <= 1) ) return tbp; bp = trypbuf(&cluster_pbuf_freecnt); if (bp == 0) return tbp; /* * We are synthesizing a buffer out of vm_page_t's, but * if the block size is not page aligned then the starting * address may not be either. Inherit the b_data offset * from the original buffer. */ bp->b_flags = B_ASYNC | B_CLUSTER | B_VMIO; if ((gbflags & GB_UNMAPPED) != 0) { bp->b_data = unmapped_buf; } else { bp->b_data = (char *)((vm_offset_t)bp->b_data | ((vm_offset_t)tbp->b_data & PAGE_MASK)); } bp->b_iocmd = BIO_READ; bp->b_iodone = cluster_callback; bp->b_blkno = blkno; bp->b_lblkno = lbn; bp->b_offset = tbp->b_offset; KASSERT(bp->b_offset != NOOFFSET, ("cluster_rbuild: no buffer offset")); pbgetvp(vp, bp); TAILQ_INIT(&bp->b_cluster.cluster_head); bp->b_bcount = 0; bp->b_bufsize = 0; bp->b_npages = 0; inc = btodb(size); for (bn = blkno, i = 0; i < run; ++i, bn += inc) { if (i == 0) { VM_OBJECT_WLOCK(tbp->b_bufobj->bo_object); vfs_drain_busy_pages(tbp); vm_object_pip_add(tbp->b_bufobj->bo_object, tbp->b_npages); for (k = 0; k < tbp->b_npages; k++) vm_page_sbusy(tbp->b_pages[k]); VM_OBJECT_WUNLOCK(tbp->b_bufobj->bo_object); } else { if ((bp->b_npages * PAGE_SIZE) + round_page(size) > vp->v_mount->mnt_iosize_max) { break; } tbp = getblk(vp, lbn + i, size, 0, 0, GB_LOCK_NOWAIT | (gbflags & GB_UNMAPPED)); /* Don't wait around for locked bufs. */ if (tbp == NULL) break; /* * Stop scanning if the buffer is fully valid * (marked B_CACHE), or locked (may be doing a * background write), or if the buffer is not * VMIO backed. The clustering code can only deal * with VMIO-backed buffers. The bo lock is not * required for the BKGRDINPROG check since it * can not be set without the buf lock. */ if ((tbp->b_vflags & BV_BKGRDINPROG) || (tbp->b_flags & B_CACHE) || (tbp->b_flags & B_VMIO) == 0) { bqrelse(tbp); break; } /* * The buffer must be completely invalid in order to * take part in the cluster. If it is partially valid * then we stop. */ off = tbp->b_offset; tsize = size; VM_OBJECT_WLOCK(tbp->b_bufobj->bo_object); for (j = 0; tsize > 0; j++) { toff = off & PAGE_MASK; tinc = tsize; if (toff + tinc > PAGE_SIZE) tinc = PAGE_SIZE - toff; VM_OBJECT_ASSERT_WLOCKED(tbp->b_pages[j]->object); if ((tbp->b_pages[j]->valid & vm_page_bits(toff, tinc)) != 0) break; if (vm_page_xbusied(tbp->b_pages[j])) break; vm_object_pip_add(tbp->b_bufobj->bo_object, 1); vm_page_sbusy(tbp->b_pages[j]); off += tinc; tsize -= tinc; } if (tsize > 0) { clean_sbusy: vm_object_pip_add(tbp->b_bufobj->bo_object, -j); for (k = 0; k < j; k++) vm_page_sunbusy(tbp->b_pages[k]); VM_OBJECT_WUNLOCK(tbp->b_bufobj->bo_object); bqrelse(tbp); break; } VM_OBJECT_WUNLOCK(tbp->b_bufobj->bo_object); /* * Set a read-ahead mark as appropriate */ if ((fbp && (i == 1)) || (i == (run - 1))) tbp->b_flags |= B_RAM; /* * Set the buffer up for an async read (XXX should * we do this only if we do not wind up brelse()ing?). * Set the block number if it isn't set, otherwise * if it is make sure it matches the block number we * expect. */ tbp->b_flags |= B_ASYNC; tbp->b_iocmd = BIO_READ; if (tbp->b_blkno == tbp->b_lblkno) { tbp->b_blkno = bn; } else if (tbp->b_blkno != bn) { VM_OBJECT_WLOCK(tbp->b_bufobj->bo_object); goto clean_sbusy; } } /* * XXX fbp from caller may not be B_ASYNC, but we are going * to biodone() it in cluster_callback() anyway */ BUF_KERNPROC(tbp); TAILQ_INSERT_TAIL(&bp->b_cluster.cluster_head, tbp, b_cluster.cluster_entry); VM_OBJECT_WLOCK(tbp->b_bufobj->bo_object); for (j = 0; j < tbp->b_npages; j += 1) { vm_page_t m; m = tbp->b_pages[j]; if ((bp->b_npages == 0) || (bp->b_pages[bp->b_npages-1] != m)) { bp->b_pages[bp->b_npages] = m; bp->b_npages++; } if (m->valid == VM_PAGE_BITS_ALL) tbp->b_pages[j] = bogus_page; } VM_OBJECT_WUNLOCK(tbp->b_bufobj->bo_object); /* * Don't inherit tbp->b_bufsize as it may be larger due to * a non-page-aligned size. Instead just aggregate using * 'size'. */ if (tbp->b_bcount != size) printf("warning: tbp->b_bcount wrong %ld vs %ld\n", tbp->b_bcount, size); if (tbp->b_bufsize != size) printf("warning: tbp->b_bufsize wrong %ld vs %ld\n", tbp->b_bufsize, size); bp->b_bcount += size; bp->b_bufsize += size; } /* * Fully valid pages in the cluster are already good and do not need * to be re-read from disk. Replace the page with bogus_page */ VM_OBJECT_WLOCK(bp->b_bufobj->bo_object); for (j = 0; j < bp->b_npages; j++) { VM_OBJECT_ASSERT_WLOCKED(bp->b_pages[j]->object); if (bp->b_pages[j]->valid == VM_PAGE_BITS_ALL) bp->b_pages[j] = bogus_page; } VM_OBJECT_WUNLOCK(bp->b_bufobj->bo_object); if (bp->b_bufsize > bp->b_kvasize) panic("cluster_rbuild: b_bufsize(%ld) > b_kvasize(%d)\n", bp->b_bufsize, bp->b_kvasize); if (buf_mapped(bp)) { pmap_qenter(trunc_page((vm_offset_t) bp->b_data), (vm_page_t *)bp->b_pages, bp->b_npages); } return (bp); }
static errcode_t e2fsck_get_journal(e2fsck_t ctx, journal_t **ret_journal) { struct process_block_struct pb; struct ext2_super_block *sb = ctx->fs->super; struct ext2_super_block jsuper; struct problem_context pctx; struct buffer_head *bh; struct inode *j_inode = NULL; struct kdev_s *dev_fs = NULL, *dev_journal; const char *journal_name = 0; journal_t *journal = NULL; errcode_t retval = 0; io_manager io_ptr = 0; unsigned long start = 0; int ext_journal = 0; int tried_backup_jnl = 0; clear_problem_context(&pctx); journal = e2fsck_allocate_memory(ctx, sizeof(journal_t), "journal"); if (!journal) { return EXT2_ET_NO_MEMORY; } dev_fs = e2fsck_allocate_memory(ctx, 2*sizeof(struct kdev_s), "kdev"); if (!dev_fs) { retval = EXT2_ET_NO_MEMORY; goto errout; } dev_journal = dev_fs+1; dev_fs->k_ctx = dev_journal->k_ctx = ctx; dev_fs->k_dev = K_DEV_FS; dev_journal->k_dev = K_DEV_JOURNAL; journal->j_dev = dev_journal; journal->j_fs_dev = dev_fs; journal->j_inode = NULL; journal->j_blocksize = ctx->fs->blocksize; if (uuid_is_null(sb->s_journal_uuid)) { if (!sb->s_journal_inum) { retval = EXT2_ET_BAD_INODE_NUM; goto errout; } j_inode = e2fsck_allocate_memory(ctx, sizeof(*j_inode), "journal inode"); if (!j_inode) { retval = EXT2_ET_NO_MEMORY; goto errout; } j_inode->i_ctx = ctx; j_inode->i_ino = sb->s_journal_inum; if ((retval = ext2fs_read_inode(ctx->fs, sb->s_journal_inum, &j_inode->i_ext2))) { try_backup_journal: if (sb->s_jnl_backup_type != EXT3_JNL_BACKUP_BLOCKS || tried_backup_jnl) goto errout; memset(&j_inode->i_ext2, 0, sizeof(struct ext2_inode)); memcpy(&j_inode->i_ext2.i_block[0], sb->s_jnl_blocks, EXT2_N_BLOCKS*4); j_inode->i_ext2.i_size = sb->s_jnl_blocks[16]; j_inode->i_ext2.i_links_count = 1; j_inode->i_ext2.i_mode = LINUX_S_IFREG | 0600; e2fsck_use_inode_shortcuts(ctx, 1); ctx->stashed_ino = j_inode->i_ino; ctx->stashed_inode = &j_inode->i_ext2; tried_backup_jnl++; } if (!j_inode->i_ext2.i_links_count || !LINUX_S_ISREG(j_inode->i_ext2.i_mode)) { retval = EXT2_ET_NO_JOURNAL; goto try_backup_journal; } if (j_inode->i_ext2.i_size / journal->j_blocksize < JFS_MIN_JOURNAL_BLOCKS) { retval = EXT2_ET_JOURNAL_TOO_SMALL; goto try_backup_journal; } pb.last_block = -1; retval = ext2fs_block_iterate2(ctx->fs, j_inode->i_ino, BLOCK_FLAG_HOLE, 0, process_journal_block, &pb); if ((pb.last_block+1) * ctx->fs->blocksize < j_inode->i_ext2.i_size) { retval = EXT2_ET_JOURNAL_TOO_SMALL; goto try_backup_journal; } if (tried_backup_jnl && !(ctx->options & E2F_OPT_READONLY)) { retval = ext2fs_write_inode(ctx->fs, sb->s_journal_inum, &j_inode->i_ext2); if (retval) goto errout; } journal->j_maxlen = j_inode->i_ext2.i_size / journal->j_blocksize; #ifdef USE_INODE_IO retval = ext2fs_inode_io_intern2(ctx->fs, sb->s_journal_inum, &j_inode->i_ext2, &journal_name); if (retval) goto errout; io_ptr = inode_io_manager; #else journal->j_inode = j_inode; ctx->journal_io = ctx->fs->io; if ((retval = journal_bmap(journal, 0, &start)) != 0) goto errout; #endif } else { ext_journal = 1; if (!ctx->journal_name) { char uuid[37]; uuid_unparse(sb->s_journal_uuid, uuid); ctx->journal_name = blkid_get_devname(ctx->blkid, "UUID", uuid); if (!ctx->journal_name) ctx->journal_name = blkid_devno_to_devname(sb->s_journal_dev); } journal_name = ctx->journal_name; if (!journal_name) { fix_problem(ctx, PR_0_CANT_FIND_JOURNAL, &pctx); retval = EXT2_ET_LOAD_EXT_JOURNAL; goto errout; } jfs_debug(1, "Using journal file %s\n", journal_name); io_ptr = unix_io_manager; } #if 0 test_io_backing_manager = io_ptr; io_ptr = test_io_manager; #endif #ifndef USE_INODE_IO if (ext_journal) #endif retval = io_ptr->open(journal_name, IO_FLAG_RW, &ctx->journal_io); if (retval) goto errout; io_channel_set_blksize(ctx->journal_io, ctx->fs->blocksize); if (ext_journal) { if (ctx->fs->blocksize == 1024) start = 1; bh = getblk(dev_journal, start, ctx->fs->blocksize); if (!bh) { retval = EXT2_ET_NO_MEMORY; goto errout; } ll_rw_block(READ, 1, &bh); if ((retval = bh->b_err) != 0) { brelse(bh); goto errout; } memcpy(&jsuper, start ? bh->b_data : bh->b_data + 1024, sizeof(jsuper)); brelse(bh); #ifdef EXT2FS_ENABLE_SWAPFS if (jsuper.s_magic == ext2fs_swab16(EXT2_SUPER_MAGIC)) ext2fs_swap_super(&jsuper); #endif if (jsuper.s_magic != EXT2_SUPER_MAGIC || !(jsuper.s_feature_incompat & EXT3_FEATURE_INCOMPAT_JOURNAL_DEV)) { fix_problem(ctx, PR_0_EXT_JOURNAL_BAD_SUPER, &pctx); retval = EXT2_ET_LOAD_EXT_JOURNAL; goto errout; } /* Make sure the journal UUID is correct */ if (memcmp(jsuper.s_uuid, ctx->fs->super->s_journal_uuid, sizeof(jsuper.s_uuid))) { fix_problem(ctx, PR_0_JOURNAL_BAD_UUID, &pctx); retval = EXT2_ET_LOAD_EXT_JOURNAL; goto errout; } journal->j_maxlen = jsuper.s_blocks_count; start++; } if (!(bh = getblk(dev_journal, start, journal->j_blocksize))) { retval = EXT2_ET_NO_MEMORY; goto errout; } journal->j_sb_buffer = bh; journal->j_superblock = (journal_superblock_t *)bh->b_data; #ifdef USE_INODE_IO if (j_inode) ext2fs_free_mem(&j_inode); #endif *ret_journal = journal; e2fsck_use_inode_shortcuts(ctx, 0); return 0; errout: e2fsck_use_inode_shortcuts(ctx, 0); if (dev_fs) ext2fs_free_mem(&dev_fs); if (j_inode) ext2fs_free_mem(&j_inode); if (journal) ext2fs_free_mem(&journal); return retval; }
/* * Write a superblock and associated information back to disk. */ int ffs_sbupdate(struct ufsmount *mp, int waitfor) { struct fs *dfs, *fs = mp->um_fs; struct buf *bp; int blks; caddr_t space; int i, size, error, allerror = 0; /* * First write back the summary information. */ blks = howmany(fs->fs_cssize, fs->fs_fsize); space = (caddr_t)fs->fs_csp; for (i = 0; i < blks; i += fs->fs_frag) { size = fs->fs_bsize; if (i + fs->fs_frag > blks) size = (blks - i) * fs->fs_fsize; bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i), size, 0, 0); memcpy(bp->b_data, space, size); space += size; if (waitfor != MNT_WAIT) bawrite(bp); else if ((error = bwrite(bp))) allerror = error; } /* * Now write back the superblock itself. If any errors occurred * up to this point, then fail so that the superblock avoids * being written out as clean. */ if (allerror) { return (allerror); } bp = getblk(mp->um_devvp, fs->fs_sblockloc >> (fs->fs_fshift - fs->fs_fsbtodb), (int)fs->fs_sbsize, 0, 0); fs->fs_fmod = 0; fs->fs_time = time_second; memcpy(bp->b_data, fs, fs->fs_sbsize); /* Restore compatibility to old file systems. XXX */ dfs = (struct fs *)bp->b_data; /* XXX */ if (fs->fs_postblformat == FS_42POSTBLFMT) /* XXX */ dfs->fs_nrpos = -1; /* XXX */ if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */ int32_t *lp, tmp; /* XXX */ /* XXX */ lp = (int32_t *)&dfs->fs_qbmask; /* XXX */ tmp = lp[4]; /* XXX */ for (i = 4; i > 0; i--) /* XXX */ lp[i] = lp[i-1]; /* XXX */ lp[0] = tmp; /* XXX */ } /* XXX */ dfs->fs_maxfilesize = mp->um_savedmaxfilesize; /* XXX */ ffs1_compat_write(dfs, mp); if (waitfor != MNT_WAIT) bawrite(bp); else if ((error = bwrite(bp))) allerror = error; return (allerror); }
// // set up start journal block and journal size // make journal unreplayable by kernel replay routine // void reset_journal (struct super_block * s) { int i ; struct buffer_head *bh ; int done = 0; int len; int start; /* first block of journal */ s->u.reiserfs_sb.s_rs->s_journal_block = get_journal_start (s); start = s->u.reiserfs_sb.s_rs->s_journal_block; /* journal size */ s->u.reiserfs_sb.s_rs->s_orig_journal_size = get_journal_size (s); len = s->u.reiserfs_sb.s_rs->s_orig_journal_size + 1; printf ("Resetting journal - "); fflush (stdout); for (i = 0 ; i < len ; i++) { print_how_far (&done, len); bh = getblk (s->s_dev, start + i, s->s_blocksize) ; memset(bh->b_data, 0, s->s_blocksize) ; mark_buffer_dirty(bh,0) ; mark_buffer_uptodate(bh,0) ; bwrite (bh); brelse(bh) ; } printf ("\n"); fflush (stdout); #if 0 /* need better way to make journal unreplayable */ /* have journal_read to replay nothing: look for first non-desc block and set j_first_unflushed_offset to it */ { int offset; struct buffer_head * bh, *jh_bh; struct reiserfs_journal_header * j_head; struct reiserfs_journal_desc * desc; jh_bh = bread (s->s_dev, s->u.reiserfs_sb.s_rs->s_journal_block + s->u.reiserfs_sb.s_rs->s_orig_journal_size, s->s_blocksize); j_head = (struct reiserfs_journal_header *)(jh_bh->b_data); for (offset = 0; offset < s->u.reiserfs_sb.s_rs->s_orig_journal_size; offset ++) { bh = bread (s->s_dev, s->u.reiserfs_sb.s_rs->s_journal_block + offset, s->s_blocksize); desc = (struct reiserfs_journal_desc *)((bh)->b_data); if (memcmp(desc->j_magic, JOURNAL_DESC_MAGIC, 8)) { /* not desc block found */ j_head->j_first_unflushed_offset = offset; brelse (bh); break; } brelse (bh); } mark_buffer_uptodate (jh_bh, 1); mark_buffer_dirty (jh_bh, 1); bwrite (jh_bh); brelse (jh_bh); } #endif }
void pass5(void) { int c, blk, frags, basesize, sumsize, mapsize, cssize; int inomapsize, blkmapsize; struct fs *fs = sblock; daddr_t dbase, dmax; daddr_t d; long i, j, k; struct csum *cs; struct csum_total cstotal; struct inodesc idesc[4]; char buf[MAXBSIZE]; struct cg *newcg = (struct cg *)buf; struct ocg *ocg = (struct ocg *)buf; struct cg *cg = cgrp, *ncg; struct inostat *info; u_int32_t ncgsize; inoinfo(WINO)->ino_state = USTATE; memset(newcg, 0, (size_t)fs->fs_cgsize); newcg->cg_niblk = fs->fs_ipg; if (cvtlevel >= 3) { if (fs->fs_maxcontig < 2 && fs->fs_contigsumsize > 0) { if (preen) pwarn("DELETING CLUSTERING MAPS\n"); if (preen || reply("DELETE CLUSTERING MAPS")) { fs->fs_contigsumsize = 0; doinglevel1 = 1; sbdirty(); } } if (fs->fs_maxcontig > 1) { const char *doit = NULL; if (fs->fs_contigsumsize < 1) { doit = "CREAT"; } else if (fs->fs_contigsumsize < fs->fs_maxcontig && fs->fs_contigsumsize < FS_MAXCONTIG) { doit = "EXPAND"; } if (doit) { i = fs->fs_contigsumsize; fs->fs_contigsumsize = MIN(fs->fs_maxcontig, FS_MAXCONTIG); if (CGSIZE(fs) > fs->fs_bsize) { pwarn("CANNOT %s CLUSTER MAPS\n", doit); fs->fs_contigsumsize = i; } else if (preen || reply("CREATE CLUSTER MAPS")) { if (preen) pwarn("%sING CLUSTER MAPS\n", doit); ncgsize = fragroundup(fs, CGSIZE(fs)); ncg = realloc(cgrp, ncgsize); if (ncg == NULL) errexit( "cannot reallocate cg space"); cg = cgrp = ncg; fs->fs_cgsize = ncgsize; doinglevel1 = 1; sbdirty(); } } } } basesize = &newcg->cg_space[0] - (u_char *)(&newcg->cg_firstfield); cssize = (u_char *)&cstotal.cs_spare[0] - (u_char *)&cstotal.cs_ndir; sumsize = 0; if (is_ufs2) { newcg->cg_iusedoff = basesize; } else { /* * We reserve the space for the old rotation summary * tables for the benefit of old kernels, but do not * maintain them in modern kernels. In time, they can * go away. */ newcg->cg_old_btotoff = basesize; newcg->cg_old_boff = newcg->cg_old_btotoff + fs->fs_old_cpg * sizeof(int32_t); newcg->cg_iusedoff = newcg->cg_old_boff + fs->fs_old_cpg * fs->fs_old_nrpos * sizeof(u_int16_t); memset(&newcg->cg_space[0], 0, newcg->cg_iusedoff - basesize); } inomapsize = howmany(fs->fs_ipg, CHAR_BIT); newcg->cg_freeoff = newcg->cg_iusedoff + inomapsize; blkmapsize = howmany(fs->fs_fpg, CHAR_BIT); newcg->cg_nextfreeoff = newcg->cg_freeoff + blkmapsize; if (fs->fs_contigsumsize > 0) { newcg->cg_clustersumoff = newcg->cg_nextfreeoff - sizeof(u_int32_t); if (isappleufs) { /* Apple PR2216969 gives rationale for this change. * I believe they were mistaken, but we need to * duplicate it for compatibility. -- [email protected] */ newcg->cg_clustersumoff += sizeof(u_int32_t); } newcg->cg_clustersumoff = roundup(newcg->cg_clustersumoff, sizeof(u_int32_t)); newcg->cg_clusteroff = newcg->cg_clustersumoff + (fs->fs_contigsumsize + 1) * sizeof(u_int32_t); newcg->cg_nextfreeoff = newcg->cg_clusteroff + howmany(fragstoblks(fs, fs->fs_fpg), CHAR_BIT); } newcg->cg_magic = CG_MAGIC; mapsize = newcg->cg_nextfreeoff - newcg->cg_iusedoff; if (!is_ufs2 && ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0)) { switch ((int)fs->fs_old_postblformat) { case FS_42POSTBLFMT: basesize = (char *)(&ocg->cg_btot[0]) - (char *)(&ocg->cg_firstfield); sumsize = &ocg->cg_iused[0] - (u_int8_t *)(&ocg->cg_btot[0]); mapsize = &ocg->cg_free[howmany(fs->fs_fpg, NBBY)] - (u_char *)&ocg->cg_iused[0]; blkmapsize = howmany(fs->fs_fpg, NBBY); inomapsize = &ocg->cg_free[0] - (u_char *)&ocg->cg_iused[0]; ocg->cg_magic = CG_MAGIC; newcg->cg_magic = 0; break; case FS_DYNAMICPOSTBLFMT: sumsize = newcg->cg_iusedoff - newcg->cg_old_btotoff; break; default: errexit("UNKNOWN ROTATIONAL TABLE FORMAT %d", fs->fs_old_postblformat); } } memset(&idesc[0], 0, sizeof idesc); for (i = 0; i < 4; i++) { idesc[i].id_type = ADDR; if (!is_ufs2 && doinglevel2) idesc[i].id_fix = FIX; } memset(&cstotal, 0, sizeof(struct csum_total)); dmax = blknum(fs, fs->fs_size + fs->fs_frag - 1); for (d = fs->fs_size; d < dmax; d++) setbmap(d); for (c = 0; c < fs->fs_ncg; c++) { if (got_siginfo) { fprintf(stderr, "%s: phase 5: cyl group %d of %d (%d%%)\n", cdevname(), c, fs->fs_ncg, c * 100 / fs->fs_ncg); got_siginfo = 0; } #ifdef PROGRESS progress_bar(cdevname(), preen ? NULL : "phase 5", c, fs->fs_ncg); #endif /* PROGRESS */ getblk(&cgblk, cgtod(fs, c), fs->fs_cgsize); memcpy(cg, cgblk.b_un.b_cg, fs->fs_cgsize); if((doswap && !needswap) || (!doswap && needswap)) ffs_cg_swap(cgblk.b_un.b_cg, cg, sblock); if (!doinglevel1 && !cg_chkmagic(cg, 0)) pfatal("CG %d: PASS5: BAD MAGIC NUMBER\n", c); if(doswap) cgdirty(); /* * While we have the disk head where we want it, * write back the superblock to the spare at this * cylinder group. */ if ((cvtlevel && sblk.b_dirty) || doswap) { bwrite(fswritefd, sblk.b_un.b_buf, fsbtodb(sblock, cgsblock(sblock, c)), sblock->fs_sbsize); } else { /* * Read in the current alternate superblock, * and compare it to the master. If it's * wrong, fix it up. */ getblk(&asblk, cgsblock(sblock, c), sblock->fs_sbsize); if (asblk.b_errs) pfatal("CG %d: UNABLE TO READ ALTERNATE " "SUPERBLK\n", c); else { memmove(altsblock, asblk.b_un.b_fs, sblock->fs_sbsize); if (needswap) ffs_sb_swap(asblk.b_un.b_fs, altsblock); } sb_oldfscompat_write(sblock, sblocksave); if ((asblk.b_errs || cmpsblks(sblock, altsblock)) && dofix(&idesc[3], "ALTERNATE SUPERBLK(S) ARE INCORRECT")) { bwrite(fswritefd, sblk.b_un.b_buf, fsbtodb(sblock, cgsblock(sblock, c)), sblock->fs_sbsize); } sb_oldfscompat_read(sblock, 0); } dbase = cgbase(fs, c); dmax = dbase + fs->fs_fpg; if (dmax > fs->fs_size) dmax = fs->fs_size; if (is_ufs2 || (fs->fs_old_flags & FS_FLAGS_UPDATED)) newcg->cg_time = cg->cg_time; newcg->cg_old_time = cg->cg_old_time; newcg->cg_cgx = c; newcg->cg_ndblk = dmax - dbase; if (!is_ufs2) { if (c == fs->fs_ncg - 1) { /* Avoid fighting old fsck for this value. Its never used * outside of this check anyway. */ if ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) newcg->cg_old_ncyl = fs->fs_old_ncyl % fs->fs_old_cpg; else newcg->cg_old_ncyl = howmany(newcg->cg_ndblk, fs->fs_fpg / fs->fs_old_cpg); } else newcg->cg_old_ncyl = fs->fs_old_cpg; newcg->cg_old_niblk = fs->fs_ipg; newcg->cg_niblk = 0; } if (fs->fs_contigsumsize > 0) newcg->cg_nclusterblks = newcg->cg_ndblk / fs->fs_frag; newcg->cg_cs.cs_ndir = 0; newcg->cg_cs.cs_nffree = 0; newcg->cg_cs.cs_nbfree = 0; newcg->cg_cs.cs_nifree = fs->fs_ipg; if (cg->cg_rotor >= 0 && cg->cg_rotor < newcg->cg_ndblk) newcg->cg_rotor = cg->cg_rotor; else newcg->cg_rotor = 0; if (cg->cg_frotor >= 0 && cg->cg_frotor < newcg->cg_ndblk) newcg->cg_frotor = cg->cg_frotor; else newcg->cg_frotor = 0; if (cg->cg_irotor >= 0 && cg->cg_irotor < fs->fs_ipg) newcg->cg_irotor = cg->cg_irotor; else newcg->cg_irotor = 0; if (!is_ufs2) { newcg->cg_initediblk = 0; } else { if ((unsigned)cg->cg_initediblk > fs->fs_ipg) newcg->cg_initediblk = fs->fs_ipg; else newcg->cg_initediblk = cg->cg_initediblk; } memset(&newcg->cg_frsum[0], 0, sizeof newcg->cg_frsum); memset(&old_cg_blktot(newcg, 0)[0], 0, (size_t)(sumsize)); memset(cg_inosused(newcg, 0), 0, (size_t)(mapsize)); if (!is_ufs2 && ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) && fs->fs_old_postblformat == FS_42POSTBLFMT) ocg->cg_magic = CG_MAGIC; j = fs->fs_ipg * c; for (i = 0; i < fs->fs_ipg; j++, i++) { info = inoinfo(j); switch (info->ino_state) { case USTATE: break; case DSTATE: case DCLEAR: case DFOUND: newcg->cg_cs.cs_ndir++; /* fall through */ case FSTATE: case FCLEAR: newcg->cg_cs.cs_nifree--; setbit(cg_inosused(newcg, 0), i); break; default: if (j < ROOTINO) break; errexit("BAD STATE %d FOR INODE I=%ld", info->ino_state, (long)j); } } if (c == 0) for (i = 0; i < ROOTINO; i++) { setbit(cg_inosused(newcg, 0), i); newcg->cg_cs.cs_nifree--; } for (i = 0, d = dbase; d < dmax; d += fs->fs_frag, i += fs->fs_frag) { frags = 0; for (j = 0; j < fs->fs_frag; j++) { if (testbmap(d + j)) continue; setbit(cg_blksfree(newcg, 0), i + j); frags++; } if (frags == fs->fs_frag) { newcg->cg_cs.cs_nbfree++; if (sumsize) { j = old_cbtocylno(fs, i); old_cg_blktot(newcg, 0)[j]++; old_cg_blks(fs, newcg, j, 0)[old_cbtorpos(fs, i)]++; } if (fs->fs_contigsumsize > 0) setbit(cg_clustersfree(newcg, 0), fragstoblks(fs, i)); } else if (frags > 0) { newcg->cg_cs.cs_nffree += frags; blk = blkmap(fs, cg_blksfree(newcg, 0), i); ffs_fragacct(fs, blk, newcg->cg_frsum, 1, 0); } } if (fs->fs_contigsumsize > 0) { int32_t *sump = cg_clustersum(newcg, 0); u_char *mapp = cg_clustersfree(newcg, 0); int map = *mapp++; int bit = 1; int run = 0; for (i = 0; i < newcg->cg_nclusterblks; i++) { if ((map & bit) != 0) { run++; } else if (run != 0) { if (run > fs->fs_contigsumsize) run = fs->fs_contigsumsize; sump[run]++; run = 0; } if ((i & (NBBY - 1)) != (NBBY - 1)) { bit <<= 1; } else { map = *mapp++; bit = 1; } } if (run != 0) { if (run > fs->fs_contigsumsize) run = fs->fs_contigsumsize; sump[run]++; } } cstotal.cs_nffree += newcg->cg_cs.cs_nffree; cstotal.cs_nbfree += newcg->cg_cs.cs_nbfree; cstotal.cs_nifree += newcg->cg_cs.cs_nifree; cstotal.cs_ndir += newcg->cg_cs.cs_ndir; cs = &fs->fs_cs(fs, c); if (memcmp(&newcg->cg_cs, cs, sizeof *cs) != 0) { if (debug) { printf("cg %d: nffree: %d/%d nbfree %d/%d" " nifree %d/%d ndir %d/%d\n", c, cs->cs_nffree,newcg->cg_cs.cs_nffree, cs->cs_nbfree,newcg->cg_cs.cs_nbfree, cs->cs_nifree,newcg->cg_cs.cs_nifree, cs->cs_ndir,newcg->cg_cs.cs_ndir); } if (dofix(&idesc[0], "FREE BLK COUNT(S) WRONG IN SUPERBLK")) { memmove(cs, &newcg->cg_cs, sizeof *cs); sbdirty(); } else markclean = 0; } if (doinglevel1) { memmove(cg, newcg, (size_t)fs->fs_cgsize); cgdirty(); continue; } if ((memcmp(newcg, cg, basesize) != 0) || (memcmp(&old_cg_blktot(newcg, 0)[0], &old_cg_blktot(cg, 0)[0], sumsize) != 0)) { if (dofix(&idesc[2], "SUMMARY INFORMATION BAD")) { memmove(cg, newcg, (size_t)basesize); memmove(&old_cg_blktot(cg, 0)[0], &old_cg_blktot(newcg, 0)[0], (size_t)sumsize); cgdirty(); } else markclean = 0; } if (usedsoftdep) { for (i = 0; i < inomapsize; i++) { j = cg_inosused(newcg, 0)[i]; if ((cg_inosused(cg, 0)[i] & j) == j) continue; for (k = 0; k < NBBY; k++) { if ((j & (1 << k)) == 0) continue; if (cg_inosused(cg, 0)[i] & (1 << k)) continue; pwarn("ALLOCATED INODE %ld " "MARKED FREE\n", c * fs->fs_ipg + i * 8 + k); } } for (i = 0; i < blkmapsize; i++) { j = cg_blksfree(cg, 0)[i]; if ((cg_blksfree(newcg, 0)[i] & j) == j) continue; for (k = 0; k < NBBY; k++) { if ((j & (1 << k)) == 0) continue; if (cg_inosused(cg, 0)[i] & (1 << k)) continue; pwarn("ALLOCATED FRAG %ld " "MARKED FREE\n", c * fs->fs_fpg + i * 8 + k); } } } if (memcmp(cg_inosused(newcg, 0), cg_inosused(cg, 0), mapsize) != 0 && dofix(&idesc[1], "BLK(S) MISSING IN BIT MAPS")) { memmove(cg_inosused(cg, 0), cg_inosused(newcg, 0), (size_t)mapsize); cgdirty(); } } if (memcmp(&cstotal, &fs->fs_cstotal, cssize) != 0) { if (debug) { printf("total: nffree: %lld/%lld nbfree %lld/%lld" " nifree %lld/%lld ndir %lld/%lld\n", (long long int)fs->fs_cstotal.cs_nffree, (long long int)cstotal.cs_nffree, (long long int)fs->fs_cstotal.cs_nbfree, (long long int)cstotal.cs_nbfree, (long long int)fs->fs_cstotal.cs_nifree, (long long int)cstotal.cs_nifree, (long long int)fs->fs_cstotal.cs_ndir, (long long int)cstotal.cs_ndir); } if (dofix(&idesc[0], "FREE BLK COUNT(S) WRONG IN SUPERBLK")) { memmove(&fs->fs_cstotal, &cstotal, sizeof cstotal); fs->fs_ronly = 0; fs->fs_fmod = 0; sbdirty(); } else markclean = 0; } #ifdef PROGRESS if (!preen) progress_done(); #endif /* PROGRESS */ }
/* * Balloc defines the structure of filesystem storage * by allocating the physical blocks on a device given * the inode and the logical block number in a file. */ int ext2_balloc(struct inode *ip, e2fs_lbn_t lbn, int size, struct ucred *cred, struct buf **bpp, int flags) { struct m_ext2fs *fs; struct ext2mount *ump; struct buf *bp, *nbp; struct vnode *vp = ITOV(ip); struct indir indirs[EXT2_NIADDR + 2]; e4fs_daddr_t nb, newb; e2fs_daddr_t *bap, pref; int osize, nsize, num, i, error; *bpp = NULL; if (lbn < 0) return (EFBIG); fs = ip->i_e2fs; ump = ip->i_ump; /* * check if this is a sequential block allocation. * If so, increment next_alloc fields to allow ext2_blkpref * to make a good guess */ if (lbn == ip->i_next_alloc_block + 1) { ip->i_next_alloc_block++; ip->i_next_alloc_goal++; } if (ip->i_flag & IN_E4EXTENTS) return (ext2_ext_balloc(ip, lbn, size, cred, bpp, flags)); /* * The first EXT2_NDADDR blocks are direct blocks */ if (lbn < EXT2_NDADDR) { nb = ip->i_db[lbn]; /* * no new block is to be allocated, and no need to expand * the file */ if (nb != 0 && ip->i_size >= (lbn + 1) * fs->e2fs_bsize) { error = bread(vp, lbn, fs->e2fs_bsize, NOCRED, &bp); if (error) { brelse(bp); return (error); } bp->b_blkno = fsbtodb(fs, nb); *bpp = bp; return (0); } if (nb != 0) { /* * Consider need to reallocate a fragment. */ osize = fragroundup(fs, blkoff(fs, ip->i_size)); nsize = fragroundup(fs, size); if (nsize <= osize) { error = bread(vp, lbn, osize, NOCRED, &bp); if (error) { brelse(bp); return (error); } bp->b_blkno = fsbtodb(fs, nb); } else { /* * Godmar thinks: this shouldn't happen w/o * fragments */ printf("nsize %d(%d) > osize %d(%d) nb %d\n", (int)nsize, (int)size, (int)osize, (int)ip->i_size, (int)nb); panic( "ext2_balloc: Something is terribly wrong"); /* * please note there haven't been any changes from here on - * FFS seems to work. */ } } else { if (ip->i_size < (lbn + 1) * fs->e2fs_bsize) nsize = fragroundup(fs, size); else nsize = fs->e2fs_bsize; EXT2_LOCK(ump); error = ext2_alloc(ip, lbn, ext2_blkpref(ip, lbn, (int)lbn, &ip->i_db[0], 0), nsize, cred, &newb); if (error) return (error); /* * If the newly allocated block exceeds 32-bit limit, * we can not use it in file block maps. */ if (newb > UINT_MAX) return (EFBIG); bp = getblk(vp, lbn, nsize, 0, 0, 0); bp->b_blkno = fsbtodb(fs, newb); if (flags & BA_CLRBUF) vfs_bio_clrbuf(bp); } ip->i_db[lbn] = dbtofsb(fs, bp->b_blkno); ip->i_flag |= IN_CHANGE | IN_UPDATE; *bpp = bp; return (0); } /* * Determine the number of levels of indirection. */ pref = 0; if ((error = ext2_getlbns(vp, lbn, indirs, &num)) != 0) return (error); #ifdef INVARIANTS if (num < 1) panic("ext2_balloc: ext2_getlbns returned indirect block"); #endif /* * Fetch the first indirect block allocating if necessary. */ --num; nb = ip->i_ib[indirs[0].in_off]; if (nb == 0) { EXT2_LOCK(ump); pref = ext2_blkpref(ip, lbn, indirs[0].in_off + EXT2_NDIR_BLOCKS, &ip->i_db[0], 0); if ((error = ext2_alloc(ip, lbn, pref, fs->e2fs_bsize, cred, &newb))) return (error); if (newb > UINT_MAX) return (EFBIG); nb = newb; bp = getblk(vp, indirs[1].in_lbn, fs->e2fs_bsize, 0, 0, 0); bp->b_blkno = fsbtodb(fs, newb); vfs_bio_clrbuf(bp); /* * Write synchronously so that indirect blocks * never point at garbage. */ if ((error = bwrite(bp)) != 0) { ext2_blkfree(ip, nb, fs->e2fs_bsize); return (error); } ip->i_ib[indirs[0].in_off] = newb; ip->i_flag |= IN_CHANGE | IN_UPDATE; } /* * Fetch through the indirect blocks, allocating as necessary. */ for (i = 1;;) { error = bread(vp, indirs[i].in_lbn, (int)fs->e2fs_bsize, NOCRED, &bp); if (error) { brelse(bp); return (error); } bap = (e2fs_daddr_t *)bp->b_data; nb = bap[indirs[i].in_off]; if (i == num) break; i += 1; if (nb != 0) { bqrelse(bp); continue; } EXT2_LOCK(ump); if (pref == 0) pref = ext2_blkpref(ip, lbn, indirs[i].in_off, bap, bp->b_lblkno); error = ext2_alloc(ip, lbn, pref, (int)fs->e2fs_bsize, cred, &newb); if (error) { brelse(bp); return (error); } if (newb > UINT_MAX) return (EFBIG); nb = newb; nbp = getblk(vp, indirs[i].in_lbn, fs->e2fs_bsize, 0, 0, 0); nbp->b_blkno = fsbtodb(fs, nb); vfs_bio_clrbuf(nbp); /* * Write synchronously so that indirect blocks * never point at garbage. */ if ((error = bwrite(nbp)) != 0) { ext2_blkfree(ip, nb, fs->e2fs_bsize); EXT2_UNLOCK(ump); brelse(bp); return (error); } bap[indirs[i - 1].in_off] = nb; /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & IO_SYNC) { bwrite(bp); } else { if (bp->b_bufsize == fs->e2fs_bsize) bp->b_flags |= B_CLUSTEROK; bdwrite(bp); } } /* * Get the data block, allocating if necessary. */ if (nb == 0) { EXT2_LOCK(ump); pref = ext2_blkpref(ip, lbn, indirs[i].in_off, &bap[0], bp->b_lblkno); if ((error = ext2_alloc(ip, lbn, pref, (int)fs->e2fs_bsize, cred, &newb)) != 0) { brelse(bp); return (error); } if (newb > UINT_MAX) return (EFBIG); nb = newb; nbp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0, 0); nbp->b_blkno = fsbtodb(fs, nb); if (flags & BA_CLRBUF) vfs_bio_clrbuf(nbp); bap[indirs[i].in_off] = nb; /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & IO_SYNC) { bwrite(bp); } else { if (bp->b_bufsize == fs->e2fs_bsize) bp->b_flags |= B_CLUSTEROK; bdwrite(bp); } *bpp = nbp; return (0); } brelse(bp); if (flags & BA_CLRBUF) { int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT; if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { error = cluster_read(vp, ip->i_size, lbn, (int)fs->e2fs_bsize, NOCRED, MAXBSIZE, seqcount, 0, &nbp); } else { error = bread(vp, lbn, (int)fs->e2fs_bsize, NOCRED, &nbp); } if (error) { brelse(nbp); return (error); } } else {