/* * Add a reference to an extent in the rmap btree. */ int xfs_rmap_alloc( struct xfs_trans *tp, struct xfs_buf *agbp, xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len, struct xfs_owner_info *oinfo) { struct xfs_mount *mp = tp->t_mountp; struct xfs_btree_cur *cur; int error; if (!xfs_sb_version_hasrmapbt(&mp->m_sb)) return 0; cur = xfs_rmapbt_init_cursor(mp, tp, agbp, agno); error = xfs_rmap_map(cur, bno, len, false, oinfo); if (error) goto out_error; xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); return 0; out_error: xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); return error; }
/* Dispose of every block of every extent in the bitmap. */ int xrep_reap_extents( struct xfs_scrub *sc, struct xfs_bitmap *bitmap, const struct xfs_owner_info *oinfo, enum xfs_ag_resv_type type) { struct xfs_bitmap_range *bmr; struct xfs_bitmap_range *n; xfs_fsblock_t fsbno; int error = 0; ASSERT(xfs_sb_version_hasrmapbt(&sc->mp->m_sb)); for_each_xfs_bitmap_block(fsbno, bmr, n, bitmap) { ASSERT(sc->ip != NULL || XFS_FSB_TO_AGNO(sc->mp, fsbno) == sc->sa.agno); trace_xrep_dispose_btree_extent(sc->mp, XFS_FSB_TO_AGNO(sc->mp, fsbno), XFS_FSB_TO_AGBNO(sc->mp, fsbno), 1); error = xrep_reap_block(sc, fsbno, oinfo, type); if (error) goto out; }
/* * Per-extent log reservation for the btree changes involved in freeing or * allocating an extent. In classic XFS there were two trees that will be * modified (bnobt + cntbt). With rmap enabled, there are three trees * (rmapbt). With reflink, there are four trees (refcountbt). The number of * blocks reserved is based on the formula: * * num trees * ((2 blocks/level * max depth) - 1) * * Keep in mind that max depth is calculated separately for each type of tree. */ uint xfs_allocfree_log_count( struct xfs_mount *mp, uint num_ops) { uint blocks; blocks = num_ops * 2 * (2 * mp->m_ag_maxlevels - 1); if (xfs_sb_version_hasrmapbt(&mp->m_sb)) blocks += num_ops * (2 * mp->m_rmap_maxlevels - 1); if (xfs_sb_version_hasreflink(&mp->m_sb)) blocks += num_ops * (2 * mp->m_refc_maxlevels - 1); return blocks; }
/* * Do we have enough reserve in this AG to handle a reflink? The refcount * btree already reserved all the space it needs, but the rmap btree can grow * infinitely, so we won't allow more reflinks when the AG is down to the * btree reserves. */ static int xfs_reflink_ag_has_free_space( struct xfs_mount *mp, xfs_agnumber_t agno) { struct xfs_perag *pag; int error = 0; if (!xfs_sb_version_hasrmapbt(&mp->m_sb)) return 0; pag = xfs_perag_get(mp, agno); if (xfs_ag_resv_critical(pag, XFS_AG_RESV_AGFL) || xfs_ag_resv_critical(pag, XFS_AG_RESV_METADATA)) error = -ENOSPC; xfs_perag_put(pag); return error; }
static void xfs_agfblock_init( struct xfs_mount *mp, struct xfs_buf *bp, struct aghdr_init_data *id) { struct xfs_agf *agf = XFS_BUF_TO_AGF(bp); xfs_extlen_t tmpsize; agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC); agf->agf_versionnum = cpu_to_be32(XFS_AGF_VERSION); agf->agf_seqno = cpu_to_be32(id->agno); agf->agf_length = cpu_to_be32(id->agsize); agf->agf_roots[XFS_BTNUM_BNOi] = cpu_to_be32(XFS_BNO_BLOCK(mp)); agf->agf_roots[XFS_BTNUM_CNTi] = cpu_to_be32(XFS_CNT_BLOCK(mp)); agf->agf_levels[XFS_BTNUM_BNOi] = cpu_to_be32(1); agf->agf_levels[XFS_BTNUM_CNTi] = cpu_to_be32(1); if (xfs_sb_version_hasrmapbt(&mp->m_sb)) { agf->agf_roots[XFS_BTNUM_RMAPi] = cpu_to_be32(XFS_RMAP_BLOCK(mp)); agf->agf_levels[XFS_BTNUM_RMAPi] = cpu_to_be32(1); agf->agf_rmap_blocks = cpu_to_be32(1); } agf->agf_flfirst = cpu_to_be32(1); agf->agf_fllast = 0; agf->agf_flcount = 0; tmpsize = id->agsize - mp->m_ag_prealloc_blocks; agf->agf_freeblks = cpu_to_be32(tmpsize); agf->agf_longest = cpu_to_be32(tmpsize); if (xfs_sb_version_hascrc(&mp->m_sb)) uuid_copy(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid); if (xfs_sb_version_hasreflink(&mp->m_sb)) { agf->agf_refcount_root = cpu_to_be32( xfs_refc_block(mp)); agf->agf_refcount_level = cpu_to_be32(1); agf->agf_refcount_blocks = cpu_to_be32(1); } }
/* * Get filesystem's extents as described in head, and format for * output. Calls formatter to fill the user's buffer until all * extents are mapped, until the passed-in head->fmh_count slots have * been filled, or until the formatter short-circuits the loop, if it * is tracking filled-in extents on its own. * * Key to Confusion * ---------------- * There are multiple levels of keys and counters at work here: * xfs_fsmap_head.fmh_keys -- low and high fsmap keys passed in; * these reflect fs-wide sector addrs. * dkeys -- fmh_keys used to query each device; * these are fmh_keys but w/ the low key * bumped up by fmr_length. * xfs_getfsmap_info.next_daddr -- next disk addr we expect to see; this * is how we detect gaps in the fsmap records and report them. * xfs_getfsmap_info.low/high -- per-AG low/high keys computed from * dkeys; used to query the metadata. */ int xfs_getfsmap( struct xfs_mount *mp, struct xfs_fsmap_head *head, xfs_fsmap_format_t formatter, void *arg) { struct xfs_trans *tp = NULL; struct xfs_fsmap dkeys[2]; /* per-dev keys */ struct xfs_getfsmap_dev handlers[XFS_GETFSMAP_DEVS]; struct xfs_getfsmap_info info = { NULL }; bool use_rmap; int i; int error = 0; if (head->fmh_iflags & ~FMH_IF_VALID) return -EINVAL; if (!xfs_getfsmap_is_valid_device(mp, &head->fmh_keys[0]) || !xfs_getfsmap_is_valid_device(mp, &head->fmh_keys[1])) return -EINVAL; use_rmap = capable(CAP_SYS_ADMIN) && xfs_sb_version_hasrmapbt(&mp->m_sb); head->fmh_entries = 0; /* Set up our device handlers. */ memset(handlers, 0, sizeof(handlers)); handlers[0].dev = new_encode_dev(mp->m_ddev_targp->bt_dev); if (use_rmap) handlers[0].fn = xfs_getfsmap_datadev_rmapbt; else handlers[0].fn = xfs_getfsmap_datadev_bnobt; if (mp->m_logdev_targp != mp->m_ddev_targp) { handlers[1].dev = new_encode_dev(mp->m_logdev_targp->bt_dev); handlers[1].fn = xfs_getfsmap_logdev; } #ifdef CONFIG_XFS_RT if (mp->m_rtdev_targp) { handlers[2].dev = new_encode_dev(mp->m_rtdev_targp->bt_dev); handlers[2].fn = xfs_getfsmap_rtdev_rtbitmap; } #endif /* CONFIG_XFS_RT */ xfs_sort(handlers, XFS_GETFSMAP_DEVS, sizeof(struct xfs_getfsmap_dev), xfs_getfsmap_dev_compare); /* * To continue where we left off, we allow userspace to use the * last mapping from a previous call as the low key of the next. * This is identified by a non-zero length in the low key. We * have to increment the low key in this scenario to ensure we * don't return the same mapping again, and instead return the * very next mapping. * * If the low key mapping refers to file data, the same physical * blocks could be mapped to several other files/offsets. * According to rmapbt record ordering, the minimal next * possible record for the block range is the next starting * offset in the same inode. Therefore, bump the file offset to * continue the search appropriately. For all other low key * mapping types (attr blocks, metadata), bump the physical * offset as there can be no other mapping for the same physical * block range. */ dkeys[0] = head->fmh_keys[0]; if (dkeys[0].fmr_flags & (FMR_OF_SPECIAL_OWNER | FMR_OF_EXTENT_MAP)) { dkeys[0].fmr_physical += dkeys[0].fmr_length; dkeys[0].fmr_owner = 0; if (dkeys[0].fmr_offset) return -EINVAL; } else dkeys[0].fmr_offset += dkeys[0].fmr_length; dkeys[0].fmr_length = 0; memset(&dkeys[1], 0xFF, sizeof(struct xfs_fsmap)); if (!xfs_getfsmap_check_keys(dkeys, &head->fmh_keys[1])) return -EINVAL; info.next_daddr = head->fmh_keys[0].fmr_physical + head->fmh_keys[0].fmr_length; info.formatter = formatter; info.format_arg = arg; info.head = head; /* For each device we support... */ for (i = 0; i < XFS_GETFSMAP_DEVS; i++) { /* Is this device within the range the user asked for? */ if (!handlers[i].fn) continue; if (head->fmh_keys[0].fmr_device > handlers[i].dev) continue; if (head->fmh_keys[1].fmr_device < handlers[i].dev) break; /* * If this device number matches the high key, we have * to pass the high key to the handler to limit the * query results. If the device number exceeds the * low key, zero out the low key so that we get * everything from the beginning. */ if (handlers[i].dev == head->fmh_keys[1].fmr_device) dkeys[1] = head->fmh_keys[1]; if (handlers[i].dev > head->fmh_keys[0].fmr_device) memset(&dkeys[0], 0, sizeof(struct xfs_fsmap)); error = xfs_trans_alloc_empty(mp, &tp); if (error) break; info.dev = handlers[i].dev; info.last = false; info.agno = NULLAGNUMBER; error = handlers[i].fn(tp, dkeys, &info); if (error) break; xfs_trans_cancel(tp); tp = NULL; info.next_daddr = 0; } if (tp) xfs_trans_cancel(tp); head->fmh_oflags = FMH_OF_DEV_T; return error; }
/* * Don't defer an rmap if we aren't an rmap filesystem. */ static bool xfs_rmap_update_is_needed( struct xfs_mount *mp) { return xfs_sb_version_hasrmapbt(&mp->m_sb); }
/* * Figure out how many blocks to reserve for an AG repair. We calculate the * worst case estimate for the number of blocks we'd need to rebuild one of * any type of per-AG btree. */ xfs_extlen_t xrep_calc_ag_resblks( struct xfs_scrub *sc) { struct xfs_mount *mp = sc->mp; struct xfs_scrub_metadata *sm = sc->sm; struct xfs_perag *pag; struct xfs_buf *bp; xfs_agino_t icount = NULLAGINO; xfs_extlen_t aglen = NULLAGBLOCK; xfs_extlen_t usedlen; xfs_extlen_t freelen; xfs_extlen_t bnobt_sz; xfs_extlen_t inobt_sz; xfs_extlen_t rmapbt_sz; xfs_extlen_t refcbt_sz; int error; if (!(sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)) return 0; pag = xfs_perag_get(mp, sm->sm_agno); if (pag->pagi_init) { /* Use in-core icount if possible. */ icount = pag->pagi_count; } else { /* Try to get the actual counters from disk. */ error = xfs_ialloc_read_agi(mp, NULL, sm->sm_agno, &bp); if (!error) { icount = pag->pagi_count; xfs_buf_relse(bp); } } /* Now grab the block counters from the AGF. */ error = xfs_alloc_read_agf(mp, NULL, sm->sm_agno, 0, &bp); if (!error) { aglen = be32_to_cpu(XFS_BUF_TO_AGF(bp)->agf_length); freelen = be32_to_cpu(XFS_BUF_TO_AGF(bp)->agf_freeblks); usedlen = aglen - freelen; xfs_buf_relse(bp); } xfs_perag_put(pag); /* If the icount is impossible, make some worst-case assumptions. */ if (icount == NULLAGINO || !xfs_verify_agino(mp, sm->sm_agno, icount)) { xfs_agino_t first, last; xfs_agino_range(mp, sm->sm_agno, &first, &last); icount = last - first + 1; } /* If the block counts are impossible, make worst-case assumptions. */ if (aglen == NULLAGBLOCK || aglen != xfs_ag_block_count(mp, sm->sm_agno) || freelen >= aglen) { aglen = xfs_ag_block_count(mp, sm->sm_agno); freelen = aglen; usedlen = aglen; } trace_xrep_calc_ag_resblks(mp, sm->sm_agno, icount, aglen, freelen, usedlen); /* * Figure out how many blocks we'd need worst case to rebuild * each type of btree. Note that we can only rebuild the * bnobt/cntbt or inobt/finobt as pairs. */ bnobt_sz = 2 * xfs_allocbt_calc_size(mp, freelen); if (xfs_sb_version_hassparseinodes(&mp->m_sb)) inobt_sz = xfs_iallocbt_calc_size(mp, icount / XFS_INODES_PER_HOLEMASK_BIT); else inobt_sz = xfs_iallocbt_calc_size(mp, icount / XFS_INODES_PER_CHUNK); if (xfs_sb_version_hasfinobt(&mp->m_sb)) inobt_sz *= 2; if (xfs_sb_version_hasreflink(&mp->m_sb)) refcbt_sz = xfs_refcountbt_calc_size(mp, usedlen); else refcbt_sz = 0; if (xfs_sb_version_hasrmapbt(&mp->m_sb)) { /* * Guess how many blocks we need to rebuild the rmapbt. * For non-reflink filesystems we can't have more records than * used blocks. However, with reflink it's possible to have * more than one rmap record per AG block. We don't know how * many rmaps there could be in the AG, so we start off with * what we hope is an generous over-estimation. */ if (xfs_sb_version_hasreflink(&mp->m_sb)) rmapbt_sz = xfs_rmapbt_calc_size(mp, (unsigned long long)aglen * 2); else rmapbt_sz = xfs_rmapbt_calc_size(mp, usedlen); } else { rmapbt_sz = 0; } trace_xrep_calc_ag_resblks_btsize(mp, sm->sm_agno, bnobt_sz, inobt_sz, rmapbt_sz, refcbt_sz); return max(max(bnobt_sz, inobt_sz), max(rmapbt_sz, refcbt_sz)); }