/* * Verify that an FS inode number pointer neither points outside the * filesystem nor points at static AG metadata. */ bool xfs_verify_ino( struct xfs_mount *mp, xfs_ino_t ino) { xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, ino); xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ino); if (agno >= mp->m_sb.sb_agcount) return false; if (XFS_AGINO_TO_INO(mp, agno, agino) != ino) return false; return xfs_verify_agino(mp, agno, agino); }
/* * If we are doing readahead on an inode buffer, we might be in log recovery * reading an inode allocation buffer that hasn't yet been replayed, and hence * has not had the inode cores stamped into it. Hence for readahead, the buffer * may be potentially invalid. * * If the readahead buffer is invalid, we need to mark it with an error and * clear the DONE status of the buffer so that a followup read will re-read it * from disk. We don't report the error otherwise to avoid warnings during log * recovery and we don't get unnecssary panics on debug kernels. We use EIO here * because all we want to do is say readahead failed; there is no-one to report * the error to, so this will distinguish it from a non-ra verifier failure. * Changes to this readahead error behavour also need to be reflected in * xfs_dquot_buf_readahead_verify(). */ static void xfs_inode_buf_verify( struct xfs_buf *bp, bool readahead) { struct xfs_mount *mp = bp->b_target->bt_mount; xfs_agnumber_t agno; int i; int ni; /* * Validate the magic number and version of every inode in the buffer */ agno = xfs_daddr_to_agno(mp, XFS_BUF_ADDR(bp)); ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock; for (i = 0; i < ni; i++) { int di_ok; xfs_dinode_t *dip; xfs_agino_t unlinked_ino; dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog)); unlinked_ino = be32_to_cpu(dip->di_next_unlinked); di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) && xfs_dinode_good_version(mp, dip->di_version) && (unlinked_ino == NULLAGINO || xfs_verify_agino(mp, agno, unlinked_ino)); if (unlikely(XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP))) { if (readahead) { bp->b_flags &= ~XBF_DONE; xfs_buf_ioerror(bp, -EIO); return; } #ifdef DEBUG xfs_alert(mp, "bad inode magic/vsn daddr %lld #%d (magic=%x)", (unsigned long long)bp->b_bn, i, be16_to_cpu(dip->di_magic)); #endif xfs_buf_verifier_error(bp, -EFSCORRUPTED, __func__, dip, sizeof(*dip), NULL); return; } } }
/* * Figure out how many blocks to reserve for an AG repair. We calculate the * worst case estimate for the number of blocks we'd need to rebuild one of * any type of per-AG btree. */ xfs_extlen_t xrep_calc_ag_resblks( struct xfs_scrub *sc) { struct xfs_mount *mp = sc->mp; struct xfs_scrub_metadata *sm = sc->sm; struct xfs_perag *pag; struct xfs_buf *bp; xfs_agino_t icount = NULLAGINO; xfs_extlen_t aglen = NULLAGBLOCK; xfs_extlen_t usedlen; xfs_extlen_t freelen; xfs_extlen_t bnobt_sz; xfs_extlen_t inobt_sz; xfs_extlen_t rmapbt_sz; xfs_extlen_t refcbt_sz; int error; if (!(sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)) return 0; pag = xfs_perag_get(mp, sm->sm_agno); if (pag->pagi_init) { /* Use in-core icount if possible. */ icount = pag->pagi_count; } else { /* Try to get the actual counters from disk. */ error = xfs_ialloc_read_agi(mp, NULL, sm->sm_agno, &bp); if (!error) { icount = pag->pagi_count; xfs_buf_relse(bp); } } /* Now grab the block counters from the AGF. */ error = xfs_alloc_read_agf(mp, NULL, sm->sm_agno, 0, &bp); if (!error) { aglen = be32_to_cpu(XFS_BUF_TO_AGF(bp)->agf_length); freelen = be32_to_cpu(XFS_BUF_TO_AGF(bp)->agf_freeblks); usedlen = aglen - freelen; xfs_buf_relse(bp); } xfs_perag_put(pag); /* If the icount is impossible, make some worst-case assumptions. */ if (icount == NULLAGINO || !xfs_verify_agino(mp, sm->sm_agno, icount)) { xfs_agino_t first, last; xfs_agino_range(mp, sm->sm_agno, &first, &last); icount = last - first + 1; } /* If the block counts are impossible, make worst-case assumptions. */ if (aglen == NULLAGBLOCK || aglen != xfs_ag_block_count(mp, sm->sm_agno) || freelen >= aglen) { aglen = xfs_ag_block_count(mp, sm->sm_agno); freelen = aglen; usedlen = aglen; } trace_xrep_calc_ag_resblks(mp, sm->sm_agno, icount, aglen, freelen, usedlen); /* * Figure out how many blocks we'd need worst case to rebuild * each type of btree. Note that we can only rebuild the * bnobt/cntbt or inobt/finobt as pairs. */ bnobt_sz = 2 * xfs_allocbt_calc_size(mp, freelen); if (xfs_sb_version_hassparseinodes(&mp->m_sb)) inobt_sz = xfs_iallocbt_calc_size(mp, icount / XFS_INODES_PER_HOLEMASK_BIT); else inobt_sz = xfs_iallocbt_calc_size(mp, icount / XFS_INODES_PER_CHUNK); if (xfs_sb_version_hasfinobt(&mp->m_sb)) inobt_sz *= 2; if (xfs_sb_version_hasreflink(&mp->m_sb)) refcbt_sz = xfs_refcountbt_calc_size(mp, usedlen); else refcbt_sz = 0; if (xfs_sb_version_hasrmapbt(&mp->m_sb)) { /* * Guess how many blocks we need to rebuild the rmapbt. * For non-reflink filesystems we can't have more records than * used blocks. However, with reflink it's possible to have * more than one rmap record per AG block. We don't know how * many rmaps there could be in the AG, so we start off with * what we hope is an generous over-estimation. */ if (xfs_sb_version_hasreflink(&mp->m_sb)) rmapbt_sz = xfs_rmapbt_calc_size(mp, (unsigned long long)aglen * 2); else rmapbt_sz = xfs_rmapbt_calc_size(mp, usedlen); } else { rmapbt_sz = 0; } trace_xrep_calc_ag_resblks_btsize(mp, sm->sm_agno, bnobt_sz, inobt_sz, rmapbt_sz, refcbt_sz); return max(max(bnobt_sz, inobt_sz), max(rmapbt_sz, refcbt_sz)); }
/* Scrub an inobt/finobt record. */ STATIC int xfs_scrub_iallocbt_rec( struct xfs_scrub_btree *bs, union xfs_btree_rec *rec) { struct xfs_mount *mp = bs->cur->bc_mp; struct xfs_inobt_rec_incore irec; uint64_t holes; xfs_agnumber_t agno = bs->cur->bc_private.a.agno; xfs_agino_t agino; xfs_agblock_t agbno; xfs_extlen_t len; int holecount; int i; int error = 0; unsigned int real_freecount; uint16_t holemask; xfs_inobt_btrec_to_irec(mp, rec, &irec); if (irec.ir_count > XFS_INODES_PER_CHUNK || irec.ir_freecount > XFS_INODES_PER_CHUNK) xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); real_freecount = irec.ir_freecount + (XFS_INODES_PER_CHUNK - irec.ir_count); if (real_freecount != xfs_scrub_iallocbt_freecount(irec.ir_free)) xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); agino = irec.ir_startino; /* Record has to be properly aligned within the AG. */ if (!xfs_verify_agino(mp, agno, agino) || !xfs_verify_agino(mp, agno, agino + XFS_INODES_PER_CHUNK - 1)) { xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); goto out; } /* Make sure this record is aligned to cluster and inoalignmnt size. */ agbno = XFS_AGINO_TO_AGBNO(mp, irec.ir_startino); if ((agbno & (xfs_ialloc_cluster_alignment(mp) - 1)) || (agbno & (xfs_icluster_size_fsb(mp) - 1))) xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); /* Handle non-sparse inodes */ if (!xfs_inobt_issparse(irec.ir_holemask)) { len = XFS_B_TO_FSB(mp, XFS_INODES_PER_CHUNK * mp->m_sb.sb_inodesize); if (irec.ir_count != XFS_INODES_PER_CHUNK) xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); if (!xfs_scrub_iallocbt_chunk(bs, &irec, agino, len)) goto out; goto check_freemask; } /* Check each chunk of a sparse inode cluster. */ holemask = irec.ir_holemask; holecount = 0; len = XFS_B_TO_FSB(mp, XFS_INODES_PER_HOLEMASK_BIT * mp->m_sb.sb_inodesize); holes = ~xfs_inobt_irec_to_allocmask(&irec); if ((holes & irec.ir_free) != holes || irec.ir_freecount > irec.ir_count) xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); for (i = 0; i < XFS_INOBT_HOLEMASK_BITS; i++) { if (holemask & 1) holecount += XFS_INODES_PER_HOLEMASK_BIT; else if (!xfs_scrub_iallocbt_chunk(bs, &irec, agino, len)) break; holemask >>= 1; agino += XFS_INODES_PER_HOLEMASK_BIT; } if (holecount > XFS_INODES_PER_CHUNK || holecount + irec.ir_count != XFS_INODES_PER_CHUNK) xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); check_freemask: error = xfs_scrub_iallocbt_check_freemask(bs, &irec); if (error) goto out; out: return error; }