/* Dispose of every block of every extent in the bitmap. */ int xrep_reap_extents( struct xfs_scrub *sc, struct xfs_bitmap *bitmap, const struct xfs_owner_info *oinfo, enum xfs_ag_resv_type type) { struct xfs_bitmap_range *bmr; struct xfs_bitmap_range *n; xfs_fsblock_t fsbno; int error = 0; ASSERT(xfs_sb_version_hasrmapbt(&sc->mp->m_sb)); for_each_xfs_bitmap_block(fsbno, bmr, n, bitmap) { ASSERT(sc->ip != NULL || XFS_FSB_TO_AGNO(sc->mp, fsbno) == sc->sa.agno); trace_xrep_dispose_btree_extent(sc->mp, XFS_FSB_TO_AGNO(sc->mp, fsbno), XFS_FSB_TO_AGBNO(sc->mp, fsbno), 1); error = xrep_reap_block(sc, fsbno, oinfo, type); if (error) goto out; }
/* Initialize a new AG btree root block with zero entries. */ int xrep_init_btblock( struct xfs_scrub *sc, xfs_fsblock_t fsb, struct xfs_buf **bpp, xfs_btnum_t btnum, const struct xfs_buf_ops *ops) { struct xfs_trans *tp = sc->tp; struct xfs_mount *mp = sc->mp; struct xfs_buf *bp; trace_xrep_init_btblock(mp, XFS_FSB_TO_AGNO(mp, fsb), XFS_FSB_TO_AGBNO(mp, fsb), btnum); ASSERT(XFS_FSB_TO_AGNO(mp, fsb) == sc->sa.agno); bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, fsb), XFS_FSB_TO_BB(mp, 1), 0); xfs_buf_zero(bp, 0, BBTOB(bp->b_length)); xfs_btree_init_block(mp, bp, btnum, 0, 0, sc->sa.agno, 0); xfs_trans_buf_set_type(tp, bp, XFS_BLFT_BTREE_BUF); xfs_trans_log_buf(tp, bp, 0, bp->b_length); bp->b_ops = ops; *bpp = bp; return 0; }
int xfs_ioc_trim( struct xfs_mount *mp, struct fstrim_range __user *urange) { struct request_queue *q = mp->m_ddev_targp->bt_bdev->bd_disk->queue; unsigned int granularity = q->limits.discard_granularity; struct fstrim_range range; xfs_fsblock_t start, len, minlen; xfs_agnumber_t start_agno, end_agno, agno; __uint64_t blocks_trimmed = 0; int error, last_error = 0; if (!capable(CAP_SYS_ADMIN)) return -XFS_ERROR(EPERM); if (!blk_queue_discard(q)) return -XFS_ERROR(EOPNOTSUPP); if (copy_from_user(&range, urange, sizeof(range))) return -XFS_ERROR(EFAULT); /* * Truncating down the len isn't actually quite correct, but using * XFS_B_TO_FSB would mean we trivially get overflows for values * of ULLONG_MAX or slightly lower. And ULLONG_MAX is the default * used by the fstrim application. In the end it really doesn't * matter as trimming blocks is an advisory interface. */ start = XFS_B_TO_FSBT(mp, range.start); len = XFS_B_TO_FSBT(mp, range.len); minlen = XFS_B_TO_FSB(mp, max_t(u64, granularity, range.minlen)); start_agno = XFS_FSB_TO_AGNO(mp, start); if (start_agno >= mp->m_sb.sb_agcount) return -XFS_ERROR(EINVAL); end_agno = XFS_FSB_TO_AGNO(mp, start + len); if (end_agno >= mp->m_sb.sb_agcount) end_agno = mp->m_sb.sb_agcount - 1; for (agno = start_agno; agno <= end_agno; agno++) { error = -xfs_trim_extents(mp, agno, start, len, minlen, &blocks_trimmed); if (error) last_error = error; } if (last_error) return last_error; range.len = XFS_FSB_TO_B(mp, blocks_trimmed); if (copy_to_user(urange, &range, sizeof(range))) return -XFS_ERROR(EFAULT); return 0; }
/* Sort refcount intents by AG. */ static int xfs_refcount_update_diff_items( void *priv, struct list_head *a, struct list_head *b) { struct xfs_mount *mp = priv; struct xfs_refcount_intent *ra; struct xfs_refcount_intent *rb; ra = container_of(a, struct xfs_refcount_intent, ri_list); rb = container_of(b, struct xfs_refcount_intent, ri_list); return XFS_FSB_TO_AGNO(mp, ra->ri_startblock) - XFS_FSB_TO_AGNO(mp, rb->ri_startblock); }
/* * Record a rmap intent; the list is kept sorted first by AG and then by * increasing age. */ static int __xfs_rmap_add( struct xfs_mount *mp, struct xfs_defer_ops *dfops, enum xfs_rmap_intent_type type, __uint64_t owner, int whichfork, struct xfs_bmbt_irec *bmap) { struct xfs_rmap_intent *ri; trace_xfs_rmap_defer(mp, XFS_FSB_TO_AGNO(mp, bmap->br_startblock), type, XFS_FSB_TO_AGBNO(mp, bmap->br_startblock), owner, whichfork, bmap->br_startoff, bmap->br_blockcount, bmap->br_state); ri = kmem_alloc(sizeof(struct xfs_rmap_intent), KM_SLEEP | KM_NOFS); INIT_LIST_HEAD(&ri->ri_list); ri->ri_type = type; ri->ri_owner = owner; ri->ri_whichfork = whichfork; ri->ri_bmap = *bmap; xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_RMAP, &ri->ri_list); return 0; }
/* * Trim the mapping to the next block where there's a change in the * shared/unshared status. More specifically, this means that we * find the lowest-numbered extent of shared blocks that coincides with * the given block mapping. If the shared extent overlaps the start of * the mapping, trim the mapping to the end of the shared extent. If * the shared region intersects the mapping, trim the mapping to the * start of the shared extent. If there are no shared regions that * overlap, just return the original extent. */ int xfs_reflink_trim_around_shared( struct xfs_inode *ip, struct xfs_bmbt_irec *irec, bool *shared, bool *trimmed) { xfs_agnumber_t agno; xfs_agblock_t agbno; xfs_extlen_t aglen; xfs_agblock_t fbno; xfs_extlen_t flen; int error = 0; /* Holes, unwritten, and delalloc extents cannot be shared */ if (!xfs_is_reflink_inode(ip) || !xfs_bmap_is_real_extent(irec)) { *shared = false; return 0; } trace_xfs_reflink_trim_around_shared(ip, irec); agno = XFS_FSB_TO_AGNO(ip->i_mount, irec->br_startblock); agbno = XFS_FSB_TO_AGBNO(ip->i_mount, irec->br_startblock); aglen = irec->br_blockcount; error = xfs_reflink_find_shared(ip->i_mount, NULL, agno, agbno, aglen, &fbno, &flen, true); if (error) return error; *shared = *trimmed = false; if (fbno == NULLAGBLOCK) { /* No shared blocks at all. */ return 0; } else if (fbno == agbno) { /* * The start of this extent is shared. Truncate the * mapping at the end of the shared region so that a * subsequent iteration starts at the start of the * unshared region. */ irec->br_blockcount = flen; *shared = true; if (flen != aglen) *trimmed = true; return 0; } else { /* * There's a shared extent midway through this extent. * Truncate the mapping at the start of the shared * extent so that a subsequent iteration starts at the * start of the shared region. */ irec->br_blockcount = fbno - agbno; *trimmed = true; return 0; } }
/* * Verify that an FS block number pointer neither points outside the * filesystem nor points at static AG metadata. */ bool xfs_verify_fsbno( struct xfs_mount *mp, xfs_fsblock_t fsbno) { xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, fsbno); if (agno >= mp->m_sb.sb_agcount) return false; return xfs_verify_agbno(mp, agno, XFS_FSB_TO_AGBNO(mp, fsbno)); }
/* Does this inode need the reflink flag? */ int xfs_reflink_inode_has_shared_extents( struct xfs_trans *tp, struct xfs_inode *ip, bool *has_shared) { struct xfs_bmbt_irec got; struct xfs_mount *mp = ip->i_mount; struct xfs_ifork *ifp; xfs_agnumber_t agno; xfs_agblock_t agbno; xfs_extlen_t aglen; xfs_agblock_t rbno; xfs_extlen_t rlen; xfs_extnum_t idx; bool found; int error; ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); if (!(ifp->if_flags & XFS_IFEXTENTS)) { error = xfs_iread_extents(tp, ip, XFS_DATA_FORK); if (error) return error; } *has_shared = false; found = xfs_iext_lookup_extent(ip, ifp, 0, &idx, &got); while (found) { if (isnullstartblock(got.br_startblock) || got.br_state != XFS_EXT_NORM) goto next; agno = XFS_FSB_TO_AGNO(mp, got.br_startblock); agbno = XFS_FSB_TO_AGBNO(mp, got.br_startblock); aglen = got.br_blockcount; error = xfs_reflink_find_shared(mp, tp, agno, agbno, aglen, &rbno, &rlen, false); if (error) return error; /* Is there still a shared block here? */ if (rbno != NULLAGBLOCK) { *has_shared = true; return 0; } next: found = xfs_iext_get_extent(ifp, ++idx, &got); } return 0; }
/* * Count fsblocks of the given fork. */ int /* error */ xfs_bmap_count_blocks( xfs_trans_t *tp, /* transaction pointer */ xfs_inode_t *ip, /* incore inode */ int whichfork, /* data or attr fork */ int *count) /* out: count of blocks */ { struct xfs_btree_block *block; /* current btree block */ xfs_fsblock_t bno; /* block # of "block" */ xfs_ifork_t *ifp; /* fork structure */ int level; /* btree level, for checking */ xfs_mount_t *mp; /* file system mount structure */ __be64 *pp; /* pointer to block address */ bno = NULLFSBLOCK; mp = ip->i_mount; ifp = XFS_IFORK_PTR(ip, whichfork); if ( XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ) { xfs_bmap_count_leaves(ifp, 0, ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t), count); return 0; } /* * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out. */ block = ifp->if_broot; level = be16_to_cpu(block->bb_level); ASSERT(level > 0); pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); bno = be64_to_cpu(*pp); ASSERT(bno != NULLDFSBNO); ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks); if (unlikely(xfs_bmap_count_tree(mp, tp, ifp, bno, level, count) < 0)) { XFS_ERROR_REPORT("xfs_bmap_count_blocks(2)", XFS_ERRLEVEL_LOW, mp); return XFS_ERROR(EFSCORRUPTED); } return 0; }
void reset_bmaps(xfs_mount_t *mp) { xfs_agnumber_t agno; xfs_agblock_t ag_size; int ag_hdr_block; ag_hdr_block = howmany(4 * mp->m_sb.sb_sectsize, mp->m_sb.sb_blocksize); ag_size = mp->m_sb.sb_agblocks; for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { if (agno == mp->m_sb.sb_agcount - 1) ag_size = (xfs_extlen_t)(mp->m_sb.sb_dblocks - (xfs_drfsbno_t)mp->m_sb.sb_agblocks * agno); #ifdef BTREE_STATS if (btree_find(ag_bmap[agno], 0, NULL)) { printf("ag_bmap[%d] btree stats:\n", i); btree_print_stats(ag_bmap[agno], stdout); } #endif /* * We always insert an item for the first block having a * given state. So the code below means: * * block 0..ag_hdr_block-1: XR_E_INUSE_FS * ag_hdr_block..ag_size: XR_E_UNKNOWN * ag_size... XR_E_BAD_STATE */ btree_clear(ag_bmap[agno]); btree_insert(ag_bmap[agno], 0, &states[XR_E_INUSE_FS]); btree_insert(ag_bmap[agno], ag_hdr_block, &states[XR_E_UNKNOWN]); btree_insert(ag_bmap[agno], ag_size, &states[XR_E_BAD_STATE]); } if (mp->m_sb.sb_logstart != 0) { set_bmap_ext(XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart), XFS_FSB_TO_AGBNO(mp, mp->m_sb.sb_logstart), mp->m_sb.sb_logblocks, XR_E_INUSE_FS); } reset_rt_bmap(); }
static void scan_lbtree( xfs_fsblock_t root, int nlevels, scan_lbtree_f_t func, extmap_t **extmapp, typnm_t btype) { push_cur(); set_cur(&typtab[btype], XFS_FSB_TO_DADDR(mp, root), blkbb, DB_RING_IGN, NULL); if (iocur_top->data == NULL) { dbprintf(_("can't read btree block %u/%u\n"), XFS_FSB_TO_AGNO(mp, root), XFS_FSB_TO_AGBNO(mp, root)); return; } (*func)(iocur_top->data, nlevels - 1, extmapp, btype); pop_cur(); }
static int bmap_f( int argc, char **argv) { int afork = 0; bmap_ext_t be; int c; xfs_dfiloff_t co, cosave; int dfork = 0; xfs_dinode_t *dip; xfs_dfiloff_t eo; xfs_dfilblks_t len; int nex; char *p; int whichfork; if (iocur_top->ino == NULLFSINO) { dbprintf(_("no current inode\n")); return 0; } optind = 0; if (argc) while ((c = getopt(argc, argv, "ad")) != EOF) { switch (c) { case 'a': afork = 1; break; case 'd': dfork = 1; break; default: dbprintf(_("bad option for bmap command\n")); return 0; } } if (afork + dfork == 0) { push_cur(); set_cur_inode(iocur_top->ino); dip = iocur_top->data; if (be32_to_cpu(dip->di_core.di_nextents)) dfork = 1; if (be16_to_cpu(dip->di_core.di_anextents)) afork = 1; pop_cur(); } if (optind < argc) { co = (xfs_dfiloff_t)strtoull(argv[optind], &p, 0); if (*p != '\0') { dbprintf(_("bad block number for bmap %s\n"), argv[optind]); return 0; } optind++; if (optind < argc) { len = (xfs_dfilblks_t)strtoull(argv[optind], &p, 0); if (*p != '\0') { dbprintf(_("bad len for bmap %s\n"), argv[optind]); return 0; } eo = co + len - 1; } else eo = co; } else { co = 0; eo = -1; } cosave = co; for (whichfork = XFS_DATA_FORK; whichfork <= XFS_ATTR_FORK; whichfork++) { if (whichfork == XFS_DATA_FORK && !dfork) continue; if (whichfork == XFS_ATTR_FORK && !afork) continue; for (;;) { nex = 1; bmap(co, eo - co + 1, whichfork, &nex, &be); if (nex == 0) break; dbprintf(_("%s offset %lld startblock %llu (%u/%u) count " "%llu flag %u\n"), whichfork == XFS_DATA_FORK ? _("data") : _("attr"), be.startoff, be.startblock, XFS_FSB_TO_AGNO(mp, be.startblock), XFS_FSB_TO_AGBNO(mp, be.startblock), be.blockcount, be.flag); co = be.startoff + be.blockcount; } co = cosave; } return 0; }
xfs_agnumber_t xfs_fsb_to_agno(xfs_mount_t *mp, xfs_fsblock_t fsbno) { return XFS_FSB_TO_AGNO(mp, fsbno); }
/* Execute a getfsmap query against the regular data device. */ STATIC int __xfs_getfsmap_datadev( struct xfs_trans *tp, struct xfs_fsmap *keys, struct xfs_getfsmap_info *info, int (*query_fn)(struct xfs_trans *, struct xfs_getfsmap_info *, struct xfs_btree_cur **, void *), void *priv) { struct xfs_mount *mp = tp->t_mountp; struct xfs_btree_cur *bt_cur = NULL; xfs_fsblock_t start_fsb; xfs_fsblock_t end_fsb; xfs_agnumber_t start_ag; xfs_agnumber_t end_ag; xfs_daddr_t eofs; int error = 0; eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); if (keys[0].fmr_physical >= eofs) return 0; if (keys[1].fmr_physical >= eofs) keys[1].fmr_physical = eofs - 1; start_fsb = XFS_DADDR_TO_FSB(mp, keys[0].fmr_physical); end_fsb = XFS_DADDR_TO_FSB(mp, keys[1].fmr_physical); /* * Convert the fsmap low/high keys to AG based keys. Initialize * low to the fsmap low key and max out the high key to the end * of the AG. */ info->low.rm_startblock = XFS_FSB_TO_AGBNO(mp, start_fsb); info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset); error = xfs_fsmap_owner_to_rmap(&info->low, &keys[0]); if (error) return error; info->low.rm_blockcount = 0; xfs_getfsmap_set_irec_flags(&info->low, &keys[0]); info->high.rm_startblock = -1U; info->high.rm_owner = ULLONG_MAX; info->high.rm_offset = ULLONG_MAX; info->high.rm_blockcount = 0; info->high.rm_flags = XFS_RMAP_KEY_FLAGS | XFS_RMAP_REC_FLAGS; start_ag = XFS_FSB_TO_AGNO(mp, start_fsb); end_ag = XFS_FSB_TO_AGNO(mp, end_fsb); /* Query each AG */ for (info->agno = start_ag; info->agno <= end_ag; info->agno++) { /* * Set the AG high key from the fsmap high key if this * is the last AG that we're querying. */ if (info->agno == end_ag) { info->high.rm_startblock = XFS_FSB_TO_AGBNO(mp, end_fsb); info->high.rm_offset = XFS_BB_TO_FSBT(mp, keys[1].fmr_offset); error = xfs_fsmap_owner_to_rmap(&info->high, &keys[1]); if (error) goto err; xfs_getfsmap_set_irec_flags(&info->high, &keys[1]); } if (bt_cur) { xfs_btree_del_cursor(bt_cur, XFS_BTREE_NOERROR); bt_cur = NULL; xfs_trans_brelse(tp, info->agf_bp); info->agf_bp = NULL; } error = xfs_alloc_read_agf(mp, tp, info->agno, 0, &info->agf_bp); if (error) goto err; trace_xfs_fsmap_low_key(mp, info->dev, info->agno, &info->low); trace_xfs_fsmap_high_key(mp, info->dev, info->agno, &info->high); error = query_fn(tp, info, &bt_cur, priv); if (error) goto err; /* * Set the AG low key to the start of the AG prior to * moving on to the next AG. */ if (info->agno == start_ag) { info->low.rm_startblock = 0; info->low.rm_owner = 0; info->low.rm_offset = 0; info->low.rm_flags = 0; } } /* Report any gap at the end of the AG */ info->last = true; error = query_fn(tp, info, &bt_cur, priv); if (error) goto err; err: if (bt_cur) xfs_btree_del_cursor(bt_cur, error < 0 ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); if (info->agf_bp) { xfs_trans_brelse(tp, info->agf_bp); info->agf_bp = NULL; } return error; }
/* * Start processing for a leaf or fuller btree. * A leaf directory is one where the attribute fork is too big for * the inode but is small enough to fit into one btree block * outside the inode. This code is modelled after process_leaf_dir_block. * * returns 0 if things are ok, 1 if bad (attributes needs to be junked) * repair is set, if anything was changed, but attributes can live thru it */ static int process_longform_attr( xfs_mount_t *mp, xfs_ino_t ino, xfs_dinode_t *dip, blkmap_t *blkmap, int *repair) /* out - 1 if something was fixed */ { xfs_attr_leafblock_t *leaf; xfs_fsblock_t bno; xfs_buf_t *bp; xfs_dahash_t next_hashval; int repairlinks = 0; struct xfs_attr3_icleaf_hdr leafhdr; int error; *repair = 0; bno = blkmap_get(blkmap, 0); if ( bno == NULLFSBLOCK ) { if (dip->di_aformat == XFS_DINODE_FMT_EXTENTS && be16_to_cpu(dip->di_anextents) == 0) return(0); /* the kernel can handle this state */ do_warn( _("block 0 of inode %" PRIu64 " attribute fork is missing\n"), ino); return(1); } /* FIX FOR bug 653709 -- EKN */ if (mp->m_sb.sb_agcount < XFS_FSB_TO_AGNO(mp, bno)) { do_warn( _("agno of attribute fork of inode %" PRIu64 " out of regular partition\n"), ino); return(1); } bp = libxfs_readbuf(mp->m_dev, XFS_FSB_TO_DADDR(mp, bno), XFS_FSB_TO_BB(mp, 1), 0, &xfs_da3_node_buf_ops); if (!bp) { do_warn( _("can't read block 0 of inode %" PRIu64 " attribute fork\n"), ino); return(1); } if (bp->b_error == -EFSBADCRC) (*repair)++; /* is this block sane? */ if (__check_attr_header(mp, bp, ino)) { *repair = 0; libxfs_putbuf(bp); return 1; } /* verify leaf block */ leaf = bp->b_addr; xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf); /* check sibling pointers in leaf block or root block 0 before * we have to release the btree block */ if (leafhdr.forw != 0 || leafhdr.back != 0) { if (!no_modify) { do_warn( _("clearing forw/back pointers in block 0 for attributes in inode %" PRIu64 "\n"), ino); repairlinks = 1; leafhdr.forw = 0; leafhdr.back = 0; xfs_attr3_leaf_hdr_to_disk(mp->m_attr_geo, leaf, &leafhdr); } else { do_warn( _("would clear forw/back pointers in block 0 for attributes in inode %" PRIu64 "\n"), ino); } } /* * use magic number to tell us what type of attribute this is. * it's possible to have a node or leaf attribute in either an * extent format or btree format attribute fork. */ switch (leafhdr.magic) { case XFS_ATTR_LEAF_MAGIC: /* leaf-form attribute */ case XFS_ATTR3_LEAF_MAGIC: if (process_leaf_attr_block(mp, leaf, 0, ino, blkmap, 0, &next_hashval, repair)) { *repair = 0; /* the block is bad. lose the attribute fork. */ libxfs_putbuf(bp); return(1); } *repair = *repair || repairlinks; break; case XFS_DA_NODE_MAGIC: /* btree-form attribute */ case XFS_DA3_NODE_MAGIC: /* must do this now, to release block 0 before the traversal */ if ((*repair || repairlinks) && !no_modify) { *repair = 1; libxfs_writebuf(bp, 0); } else libxfs_putbuf(bp); error = process_node_attr(mp, ino, dip, blkmap); /* + repair */ if (error) *repair = 0; return error; default: do_warn( _("bad attribute leaf magic # %#x for dir ino %" PRIu64 "\n"), be16_to_cpu(leaf->hdr.info.magic), ino); libxfs_putbuf(bp); *repair = 0; return(1); } if (*repair && !no_modify) libxfs_writebuf(bp, 0); else libxfs_putbuf(bp); return(0); /* repair may be set */ }
/* Dispose of a single block. */ STATIC int xrep_reap_block( struct xfs_scrub *sc, xfs_fsblock_t fsbno, const struct xfs_owner_info *oinfo, enum xfs_ag_resv_type resv) { struct xfs_btree_cur *cur; struct xfs_buf *agf_bp = NULL; xfs_agnumber_t agno; xfs_agblock_t agbno; bool has_other_rmap; int error; agno = XFS_FSB_TO_AGNO(sc->mp, fsbno); agbno = XFS_FSB_TO_AGBNO(sc->mp, fsbno); /* * If we are repairing per-inode metadata, we need to read in the AGF * buffer. Otherwise, we're repairing a per-AG structure, so reuse * the AGF buffer that the setup functions already grabbed. */ if (sc->ip) { error = xfs_alloc_read_agf(sc->mp, sc->tp, agno, 0, &agf_bp); if (error) return error; if (!agf_bp) return -ENOMEM; } else { agf_bp = sc->sa.agf_bp; } cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, agf_bp, agno); /* Can we find any other rmappings? */ error = xfs_rmap_has_other_keys(cur, agbno, 1, oinfo, &has_other_rmap); xfs_btree_del_cursor(cur, error); if (error) goto out_free; /* * If there are other rmappings, this block is cross linked and must * not be freed. Remove the reverse mapping and move on. Otherwise, * we were the only owner of the block, so free the extent, which will * also remove the rmap. * * XXX: XFS doesn't support detecting the case where a single block * metadata structure is crosslinked with a multi-block structure * because the buffer cache doesn't detect aliasing problems, so we * can't fix 100% of crosslinking problems (yet). The verifiers will * blow on writeout, the filesystem will shut down, and the admin gets * to run xfs_repair. */ if (has_other_rmap) error = xfs_rmap_free(sc->tp, agf_bp, agno, agbno, 1, oinfo); else if (resv == XFS_AG_RESV_AGFL) error = xrep_put_freelist(sc, agbno); else error = xfs_free_extent(sc->tp, fsbno, 1, oinfo, resv); if (agf_bp != sc->sa.agf_bp) xfs_trans_brelse(sc->tp, agf_bp); if (error) return error; if (sc->ip) return xfs_trans_roll_inode(&sc->tp, sc->ip); return xrep_roll_ag_trans(sc); out_free: if (agf_bp != sc->sa.agf_bp) xfs_trans_brelse(sc->tp, agf_bp); return error; }
/* * The user wants to preemptively CoW all shared blocks in this file, * which enables us to turn off the reflink flag. Iterate all * extents which are not prealloc/delalloc to see which ranges are * mentioned in the refcount tree, then read those blocks into the * pagecache, dirty them, fsync them back out, and then we can update * the inode flag. What happens if we run out of memory? :) */ STATIC int xfs_reflink_dirty_extents( struct xfs_inode *ip, xfs_fileoff_t fbno, xfs_filblks_t end, xfs_off_t isize) { struct xfs_mount *mp = ip->i_mount; xfs_agnumber_t agno; xfs_agblock_t agbno; xfs_extlen_t aglen; xfs_agblock_t rbno; xfs_extlen_t rlen; xfs_off_t fpos; xfs_off_t flen; struct xfs_bmbt_irec map[2]; int nmaps; int error = 0; while (end - fbno > 0) { nmaps = 1; /* * Look for extents in the file. Skip holes, delalloc, or * unwritten extents; they can't be reflinked. */ error = xfs_bmapi_read(ip, fbno, end - fbno, map, &nmaps, 0); if (error) goto out; if (nmaps == 0) break; if (!xfs_bmap_is_real_extent(&map[0])) goto next; map[1] = map[0]; while (map[1].br_blockcount) { agno = XFS_FSB_TO_AGNO(mp, map[1].br_startblock); agbno = XFS_FSB_TO_AGBNO(mp, map[1].br_startblock); aglen = map[1].br_blockcount; error = xfs_reflink_find_shared(mp, NULL, agno, agbno, aglen, &rbno, &rlen, true); if (error) goto out; if (rbno == NULLAGBLOCK) break; /* Dirty the pages */ xfs_iunlock(ip, XFS_ILOCK_EXCL); fpos = XFS_FSB_TO_B(mp, map[1].br_startoff + (rbno - agbno)); flen = XFS_FSB_TO_B(mp, rlen); if (fpos + flen > isize) flen = isize - fpos; error = iomap_file_dirty(VFS_I(ip), fpos, flen, &xfs_iomap_ops); xfs_ilock(ip, XFS_ILOCK_EXCL); if (error) goto out; map[1].br_blockcount -= (rbno - agbno + rlen); map[1].br_startoff += (rbno - agbno + rlen); map[1].br_startblock += (rbno - agbno + rlen); } next: fbno = map[0].br_startoff + map[0].br_blockcount; } out: return error; }
/* * Unmap a range of blocks from a file, then map other blocks into the hole. * The range to unmap is (destoff : destoff + srcioff + irec->br_blockcount). * The extent irec is mapped into dest at irec->br_startoff. */ STATIC int xfs_reflink_remap_extent( struct xfs_inode *ip, struct xfs_bmbt_irec *irec, xfs_fileoff_t destoff, xfs_off_t new_isize) { struct xfs_mount *mp = ip->i_mount; bool real_extent = xfs_bmap_is_real_extent(irec); struct xfs_trans *tp; xfs_fsblock_t firstfsb; unsigned int resblks; struct xfs_defer_ops dfops; struct xfs_bmbt_irec uirec; xfs_filblks_t rlen; xfs_filblks_t unmap_len; xfs_off_t newlen; int error; unmap_len = irec->br_startoff + irec->br_blockcount - destoff; trace_xfs_reflink_punch_range(ip, destoff, unmap_len); /* No reflinking if we're low on space */ if (real_extent) { error = xfs_reflink_ag_has_free_space(mp, XFS_FSB_TO_AGNO(mp, irec->br_startblock)); if (error) goto out; } /* Start a rolling transaction to switch the mappings */ resblks = XFS_EXTENTADD_SPACE_RES(ip->i_mount, XFS_DATA_FORK); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp); if (error) goto out; xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, 0); /* If we're not just clearing space, then do we have enough quota? */ if (real_extent) { error = xfs_trans_reserve_quota_nblks(tp, ip, irec->br_blockcount, 0, XFS_QMOPT_RES_REGBLKS); if (error) goto out_cancel; } trace_xfs_reflink_remap(ip, irec->br_startoff, irec->br_blockcount, irec->br_startblock); /* Unmap the old blocks in the data fork. */ rlen = unmap_len; while (rlen) { xfs_defer_init(&dfops, &firstfsb); error = __xfs_bunmapi(tp, ip, destoff, &rlen, 0, 1, &firstfsb, &dfops); if (error) goto out_defer; /* * Trim the extent to whatever got unmapped. * Remember, bunmapi works backwards. */ uirec.br_startblock = irec->br_startblock + rlen; uirec.br_startoff = irec->br_startoff + rlen; uirec.br_blockcount = unmap_len - rlen; unmap_len = rlen; /* If this isn't a real mapping, we're done. */ if (!real_extent || uirec.br_blockcount == 0) goto next_extent; trace_xfs_reflink_remap(ip, uirec.br_startoff, uirec.br_blockcount, uirec.br_startblock); /* Update the refcount tree */ error = xfs_refcount_increase_extent(mp, &dfops, &uirec); if (error) goto out_defer; /* Map the new blocks into the data fork. */ error = xfs_bmap_map_extent(mp, &dfops, ip, &uirec); if (error) goto out_defer; /* Update quota accounting. */ xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, uirec.br_blockcount); /* Update dest isize if needed. */ newlen = XFS_FSB_TO_B(mp, uirec.br_startoff + uirec.br_blockcount); newlen = min_t(xfs_off_t, newlen, new_isize); if (newlen > i_size_read(VFS_I(ip))) { trace_xfs_reflink_update_inode_size(ip, newlen); i_size_write(VFS_I(ip), newlen); ip->i_d.di_size = newlen; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); } next_extent: /* Process all the deferred stuff. */ xfs_defer_ijoin(&dfops, ip); error = xfs_defer_finish(&tp, &dfops); if (error) goto out_defer; } error = xfs_trans_commit(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); if (error) goto out; return 0; out_defer: xfs_defer_cancel(&dfops); out_cancel: xfs_trans_cancel(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); out: trace_xfs_reflink_remap_extent_error(ip, error, _RET_IP_); return error; }
/* Clear the inode reflink flag if there are no shared extents. */ int xfs_reflink_clear_inode_flag( struct xfs_inode *ip, struct xfs_trans **tpp) { struct xfs_mount *mp = ip->i_mount; xfs_fileoff_t fbno; xfs_filblks_t end; xfs_agnumber_t agno; xfs_agblock_t agbno; xfs_extlen_t aglen; xfs_agblock_t rbno; xfs_extlen_t rlen; struct xfs_bmbt_irec map; int nmaps; int error = 0; ASSERT(xfs_is_reflink_inode(ip)); fbno = 0; end = XFS_B_TO_FSB(mp, i_size_read(VFS_I(ip))); while (end - fbno > 0) { nmaps = 1; /* * Look for extents in the file. Skip holes, delalloc, or * unwritten extents; they can't be reflinked. */ error = xfs_bmapi_read(ip, fbno, end - fbno, &map, &nmaps, 0); if (error) return error; if (nmaps == 0) break; if (!xfs_bmap_is_real_extent(&map)) goto next; agno = XFS_FSB_TO_AGNO(mp, map.br_startblock); agbno = XFS_FSB_TO_AGBNO(mp, map.br_startblock); aglen = map.br_blockcount; error = xfs_reflink_find_shared(mp, agno, agbno, aglen, &rbno, &rlen, false); if (error) return error; /* Is there still a shared block here? */ if (rbno != NULLAGBLOCK) return 0; next: fbno = map.br_startoff + map.br_blockcount; } /* * We didn't find any shared blocks so turn off the reflink flag. * First, get rid of any leftover CoW mappings. */ error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, NULLFILEOFF, true); if (error) return error; /* Clear the inode flag. */ trace_xfs_reflink_unset_inode_flag(ip); ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK; xfs_inode_clear_cowblocks_tag(ip); xfs_trans_ijoin(*tpp, ip, 0); xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE); return error; }
/* * Process one of the deferred rmap operations. We pass back the * btree cursor to maintain our lock on the rmapbt between calls. * This saves time and eliminates a buffer deadlock between the * superblock and the AGF because we'll always grab them in the same * order. */ int xfs_rmap_finish_one( struct xfs_trans *tp, enum xfs_rmap_intent_type type, __uint64_t owner, int whichfork, xfs_fileoff_t startoff, xfs_fsblock_t startblock, xfs_filblks_t blockcount, xfs_exntst_t state, struct xfs_btree_cur **pcur) { struct xfs_mount *mp = tp->t_mountp; struct xfs_btree_cur *rcur; struct xfs_buf *agbp = NULL; int error = 0; xfs_agnumber_t agno; struct xfs_owner_info oinfo; xfs_agblock_t bno; bool unwritten; agno = XFS_FSB_TO_AGNO(mp, startblock); ASSERT(agno != NULLAGNUMBER); bno = XFS_FSB_TO_AGBNO(mp, startblock); trace_xfs_rmap_deferred(mp, agno, type, bno, owner, whichfork, startoff, blockcount, state); if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_RMAP_FINISH_ONE, XFS_RANDOM_RMAP_FINISH_ONE)) return -EIO; /* * If we haven't gotten a cursor or the cursor AG doesn't match * the startblock, get one now. */ rcur = *pcur; if (rcur != NULL && rcur->bc_private.a.agno != agno) { xfs_rmap_finish_one_cleanup(tp, rcur, 0); rcur = NULL; *pcur = NULL; } if (rcur == NULL) { /* * Refresh the freelist before we start changing the * rmapbt, because a shape change could cause us to * allocate blocks. */ error = xfs_free_extent_fix_freelist(tp, agno, &agbp); if (error) return error; if (!agbp) return -EFSCORRUPTED; rcur = xfs_rmapbt_init_cursor(mp, tp, agbp, agno); if (!rcur) { error = -ENOMEM; goto out_cur; } } *pcur = rcur; xfs_rmap_ino_owner(&oinfo, owner, whichfork, startoff); unwritten = state == XFS_EXT_UNWRITTEN; bno = XFS_FSB_TO_AGBNO(rcur->bc_mp, startblock); switch (type) { case XFS_RMAP_ALLOC: case XFS_RMAP_MAP: error = xfs_rmap_map(rcur, bno, blockcount, unwritten, &oinfo); break; case XFS_RMAP_FREE: case XFS_RMAP_UNMAP: error = xfs_rmap_unmap(rcur, bno, blockcount, unwritten, &oinfo); break; case XFS_RMAP_CONVERT: error = xfs_rmap_convert(rcur, bno, blockcount, !unwritten, &oinfo); break; default: ASSERT(0); error = -EFSCORRUPTED; } return error; out_cur: xfs_trans_brelse(tp, agbp); return error; }