int xfs_readfile_btree(xfs_inode_t *ip, void *buffer, off_t offset, size_t len, int *last_extent) { xfs_extnum_t nextents; xfs_extnum_t extent; xfs_ifork_t *dp; xfs_bmbt_rec_host_t *ep; xfs_bmbt_irec_t rec; xfs_mount_t *mp = ip->i_mount; /* filesystem mount point */ xfs_fsize_t size = ip->i_d.di_size; int error; if (offset >= size) return 0; if (offset + len > size) len = size - offset; dp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); if (!(dp->if_flags & XFS_IFEXTENTS) && (error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK))) return error; nextents = XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK); for (extent=0; extent<nextents; extent++) { ep = xfs_iext_get_ext(dp, extent); xfs_bmbt_get_all(ep, &rec); if (extent_overlaps_buffer(mp, rec, offset, len)) { error = copy_extent_to_buffer(mp, rec, buffer, offset, len); if (error) return error; } } return len; }
/* * Find the CoW reservation for a given byte offset of a file. */ bool xfs_reflink_find_cow_mapping( struct xfs_inode *ip, xfs_off_t offset, struct xfs_bmbt_irec *imap) { struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); xfs_fileoff_t offset_fsb; struct xfs_bmbt_irec got; xfs_extnum_t idx; ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)); ASSERT(xfs_is_reflink_inode(ip)); offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got)) return false; if (got.br_startoff > offset_fsb) return false; trace_xfs_reflink_find_cow_mapping(ip, offset, 1, XFS_IO_OVERWRITE, &got); *imap = got; return true; }
/* * Given a block number in a fork, return the next valid block number * (not a hole). * If this is the last block number then NULLFILEOFF is returned. * * This was originally in the kernel, but only used in xfs_repair. */ int libxfs_bmap_next_offset( xfs_trans_t *tp, /* transaction pointer */ xfs_inode_t *ip, /* incore inode */ xfs_fileoff_t *bnop, /* current block */ int whichfork) /* data or attr fork */ { xfs_fileoff_t bno; /* current block */ int eof; /* hit end of file */ int error; /* error return value */ xfs_bmbt_irec_t got; /* current extent value */ xfs_ifork_t *ifp; /* inode fork pointer */ xfs_extnum_t lastx; /* last extent used */ xfs_bmbt_irec_t prev; /* previous extent value */ if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE && XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL) return XFS_ERROR(EIO); if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { *bnop = NULLFILEOFF; return 0; } ifp = XFS_IFORK_PTR(ip, whichfork); if (!(ifp->if_flags & XFS_IFEXTENTS) && (error = xfs_iread_extents(tp, ip, whichfork))) return error; bno = *bnop + 1; xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, &prev); if (eof) *bnop = NULLFILEOFF; else *bnop = got.br_startoff < bno ? bno : got.br_startoff; return 0; }
/* * Verify the in-memory consistency of an inline symlink data fork. This * does not do on-disk format checks. */ xfs_failaddr_t xfs_symlink_shortform_verify( struct xfs_inode *ip) { char *sfp; char *endp; struct xfs_ifork *ifp; int size; ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_LOCAL); ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); sfp = (char *)ifp->if_u1.if_data; size = ifp->if_bytes; endp = sfp + size; /* * Zero length symlinks should never occur in memory as they are * never alllowed to exist on disk. */ if (!size) return __this_address; /* No negative sizes or overly long symlink targets. */ if (size < 0 || size > XFS_SYMLINK_MAXLEN) return __this_address; /* No NULLs in the target either. */ if (memchr(sfp, 0, size - 1)) return __this_address; /* We /did/ null-terminate the buffer, right? */ if (*endp != 0) return __this_address; return NULL; }
/* * Trim an extent to end at the next CoW reservation past offset_fsb. */ void xfs_reflink_trim_irec_to_next_cow( struct xfs_inode *ip, xfs_fileoff_t offset_fsb, struct xfs_bmbt_irec *imap) { struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); struct xfs_bmbt_irec got; xfs_extnum_t idx; if (!xfs_is_reflink_inode(ip)) return; /* Find the extent in the CoW fork. */ if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got)) return; /* This is the extent before; try sliding up one. */ if (got.br_startoff < offset_fsb) { if (!xfs_iext_get_extent(ifp, idx + 1, &got)) return; } if (got.br_startoff >= imap->br_startoff + imap->br_blockcount) return; imap->br_blockcount = got.br_startoff - imap->br_startoff; trace_xfs_reflink_trim_irec(ip, imap); }
/* Convert all of the unwritten CoW extents in a file's range to real ones. */ int xfs_reflink_convert_cow( struct xfs_inode *ip, xfs_off_t offset, xfs_off_t count) { struct xfs_bmbt_irec got; struct xfs_defer_ops dfops; struct xfs_mount *mp = ip->i_mount; struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + count); xfs_extnum_t idx; bool found; int error = 0; xfs_ilock(ip, XFS_ILOCK_EXCL); /* Convert all the extents to real from unwritten. */ for (found = xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got); found && got.br_startoff < end_fsb; found = xfs_iext_get_extent(ifp, ++idx, &got)) { error = xfs_reflink_convert_cow_extent(ip, &got, offset_fsb, end_fsb - offset_fsb, &dfops); if (error) break; } /* Finish up. */ xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; }
/* * Trim the passed in imap to the next shared/unshared extent boundary, and * if imap->br_startoff points to a shared extent reserve space for it in the * COW fork. In this case *shared is set to true, else to false. * * Note that imap will always contain the block numbers for the existing blocks * in the data fork, as the upper layers need them for read-modify-write * operations. */ int xfs_reflink_reserve_cow( struct xfs_inode *ip, struct xfs_bmbt_irec *imap, bool *shared) { struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); struct xfs_bmbt_irec got; int error = 0; bool eof = false, trimmed; xfs_extnum_t idx; /* * Search the COW fork extent list first. This serves two purposes: * first this implement the speculative preallocation using cowextisze, * so that we also unshared block adjacent to shared blocks instead * of just the shared blocks themselves. Second the lookup in the * extent list is generally faster than going out to the shared extent * tree. */ if (!xfs_iext_lookup_extent(ip, ifp, imap->br_startoff, &idx, &got)) eof = true; if (!eof && got.br_startoff <= imap->br_startoff) { trace_xfs_reflink_cow_found(ip, imap); xfs_trim_extent(imap, got.br_startoff, got.br_blockcount); *shared = true; return 0; } /* Trim the mapping to the nearest shared extent boundary. */ error = xfs_reflink_trim_around_shared(ip, imap, shared, &trimmed); if (error) return error; /* Not shared? Just report the (potentially capped) extent. */ if (!*shared) return 0; /* * Fork all the shared blocks from our write offset until the end of * the extent. */ error = xfs_qm_dqattach_locked(ip, 0); if (error) return error; error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, imap->br_startoff, imap->br_blockcount, 0, &got, &idx, eof); if (error == -ENOSPC || error == -EDQUOT) trace_xfs_reflink_cow_enospc(ip, imap); if (error) return error; trace_xfs_reflink_cow_alloc(ip, &got); return 0; }
/* * Automatic CoW Reservation Freeing * * These functions automatically garbage collect leftover CoW reservations * that were made on behalf of a cowextsize hint when we start to run out * of quota or when the reservations sit around for too long. If the file * has dirty pages or is undergoing writeback, its CoW reservations will * be retained. * * The actual garbage collection piggybacks off the same code that runs * the speculative EOF preallocation garbage collector. */ STATIC int xfs_inode_free_cowblocks( struct xfs_inode *ip, int flags, void *args) { int ret; struct xfs_eofblocks *eofb = args; int match; struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); /* * Just clear the tag if we have an empty cow fork or none at all. It's * possible the inode was fully unshared since it was originally tagged. */ if (!xfs_is_reflink_inode(ip) || !ifp->if_bytes) { trace_xfs_inode_free_cowblocks_invalid(ip); xfs_inode_clear_cowblocks_tag(ip); return 0; } /* * If the mapping is dirty or under writeback we cannot touch the * CoW fork. Leave it alone if we're in the midst of a directio. */ if ((VFS_I(ip)->i_state & I_DIRTY_PAGES) || mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY) || mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_WRITEBACK) || atomic_read(&VFS_I(ip)->i_dio_count)) return 0; if (eofb) { if (eofb->eof_flags & XFS_EOF_FLAGS_UNION) match = xfs_inode_match_id_union(ip, eofb); else match = xfs_inode_match_id(ip, eofb); if (!match) return 0; /* skip the inode if the file size is too small */ if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE && XFS_ISIZE(ip) < eofb->eof_min_file_size) return 0; } /* Free the CoW blocks */ xfs_ilock(ip, XFS_IOLOCK_EXCL); xfs_ilock(ip, XFS_MMAPLOCK_EXCL); ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false); xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); xfs_iunlock(ip, XFS_IOLOCK_EXCL); return ret; }
/* Does this inode need the reflink flag? */ int xfs_reflink_inode_has_shared_extents( struct xfs_trans *tp, struct xfs_inode *ip, bool *has_shared) { struct xfs_bmbt_irec got; struct xfs_mount *mp = ip->i_mount; struct xfs_ifork *ifp; xfs_agnumber_t agno; xfs_agblock_t agbno; xfs_extlen_t aglen; xfs_agblock_t rbno; xfs_extlen_t rlen; xfs_extnum_t idx; bool found; int error; ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); if (!(ifp->if_flags & XFS_IFEXTENTS)) { error = xfs_iread_extents(tp, ip, XFS_DATA_FORK); if (error) return error; } *has_shared = false; found = xfs_iext_lookup_extent(ip, ifp, 0, &idx, &got); while (found) { if (isnullstartblock(got.br_startblock) || got.br_state != XFS_EXT_NORM) goto next; agno = XFS_FSB_TO_AGNO(mp, got.br_startblock); agbno = XFS_FSB_TO_AGBNO(mp, got.br_startblock); aglen = got.br_blockcount; error = xfs_reflink_find_shared(mp, tp, agno, agbno, aglen, &rbno, &rlen, false); if (error) return error; /* Is there still a shared block here? */ if (rbno != NULLAGBLOCK) { *has_shared = true; return 0; } next: found = xfs_iext_get_extent(ifp, ++idx, &got); } return 0; }
int xfs_readlink_local(xfs_inode_t *ip, void *buffer, off_t offset, size_t len, int *last_extent) { xfs_ifork_t *dp; xfs_fsize_t size = ip->i_d.di_size; dp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); if (size - offset <= 0) return 0; if (size - offset < len) len = size - offset; memcpy(buffer, dp->if_u1.if_data + offset, len); return len; }
/* * Automatic CoW Reservation Freeing * * These functions automatically garbage collect leftover CoW reservations * that were made on behalf of a cowextsize hint when we start to run out * of quota or when the reservations sit around for too long. If the file * has dirty pages or is undergoing writeback, its CoW reservations will * be retained. * * The actual garbage collection piggybacks off the same code that runs * the speculative EOF preallocation garbage collector. */ STATIC int xfs_inode_free_cowblocks( struct xfs_inode *ip, int flags, void *args) { struct xfs_eofblocks *eofb = args; struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); int match; int ret = 0; if (!xfs_prep_free_cowblocks(ip, ifp)) return 0; if (eofb) { if (eofb->eof_flags & XFS_EOF_FLAGS_UNION) match = xfs_inode_match_id_union(ip, eofb); else match = xfs_inode_match_id(ip, eofb); if (!match) return 0; /* skip the inode if the file size is too small */ if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE && XFS_ISIZE(ip) < eofb->eof_min_file_size) return 0; } /* Free the CoW blocks */ xfs_ilock(ip, XFS_IOLOCK_EXCL); xfs_ilock(ip, XFS_MMAPLOCK_EXCL); /* * Check again, nobody else should be able to dirty blocks or change * the reflink iflag now that we have the first two locks held. */ if (xfs_prep_free_cowblocks(ip, ifp)) ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false); xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); xfs_iunlock(ip, XFS_IOLOCK_EXCL); return ret; }
/* * Count fsblocks of the given fork. */ int /* error */ xfs_bmap_count_blocks( xfs_trans_t *tp, /* transaction pointer */ xfs_inode_t *ip, /* incore inode */ int whichfork, /* data or attr fork */ int *count) /* out: count of blocks */ { struct xfs_btree_block *block; /* current btree block */ xfs_fsblock_t bno; /* block # of "block" */ xfs_ifork_t *ifp; /* fork structure */ int level; /* btree level, for checking */ xfs_mount_t *mp; /* file system mount structure */ __be64 *pp; /* pointer to block address */ bno = NULLFSBLOCK; mp = ip->i_mount; ifp = XFS_IFORK_PTR(ip, whichfork); if ( XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ) { xfs_bmap_count_leaves(ifp, 0, ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t), count); return 0; } /* * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out. */ block = ifp->if_broot; level = be16_to_cpu(block->bb_level); ASSERT(level > 0); pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); bno = be64_to_cpu(*pp); ASSERT(bno != NULLDFSBNO); ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks); if (unlikely(xfs_bmap_count_tree(mp, tp, ifp, bno, level, count) < 0)) { XFS_ERROR_REPORT("xfs_bmap_count_blocks(2)", XFS_ERRLEVEL_LOW, mp); return XFS_ERROR(EFSCORRUPTED); } return 0; }
/* * returns 1 for success, 0 if we failed to map the extent. */ STATIC int xfs_getbmapx_fix_eof_hole( xfs_inode_t *ip, /* xfs incore inode pointer */ struct getbmapx *out, /* output structure */ int prealloced, /* this is a file with * preallocated data space */ __int64_t end, /* last block requested */ xfs_fsblock_t startblock) { __int64_t fixlen; xfs_mount_t *mp; /* file system mount point */ xfs_ifork_t *ifp; /* inode fork pointer */ xfs_extnum_t lastx; /* last extent pointer */ xfs_fileoff_t fileblock; if (startblock == HOLESTARTBLOCK) { mp = ip->i_mount; out->bmv_block = -1; fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, XFS_ISIZE(ip))); fixlen -= out->bmv_offset; if (prealloced && out->bmv_offset + out->bmv_length == end) { /* Came to hole at EOF. Trim it. */ if (fixlen <= 0) return 0; out->bmv_length = fixlen; } } else { if (startblock == DELAYSTARTBLOCK) out->bmv_block = -2; else out->bmv_block = xfs_fsb_to_db(ip, startblock); fileblock = XFS_BB_TO_FSB(ip->i_mount, out->bmv_offset); ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); if (xfs_iext_bno_to_ext(ifp, fileblock, &lastx) && (lastx == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))-1)) out->bmv_oflags |= BMV_OF_LAST; } return 1; }
STATIC int xfs_qm_get_rtblks( xfs_inode_t *ip, xfs_qcnt_t *O_rtblks) { xfs_filblks_t rtblks; /* total rt blks */ xfs_extnum_t idx; /* extent record index */ xfs_ifork_t *ifp; /* inode fork pointer */ xfs_extnum_t nextents; /* number of extent entries */ int error; ASSERT(XFS_IS_REALTIME_INODE(ip)); ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); if (!(ifp->if_flags & XFS_IFEXTENTS)) { if ((error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK))) return error; } rtblks = 0; nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); for (idx = 0; idx < nextents; idx++) rtblks += xfs_bmbt_get_blockcount(xfs_iext_get_ext(ifp, idx)); *O_rtblks = (xfs_qcnt_t)rtblks; return 0; }
/* * Cancel CoW reservations for some block range of an inode. * * If cancel_real is true this function cancels all COW fork extents for the * inode; if cancel_real is false, real extents are not cleared. */ int xfs_reflink_cancel_cow_blocks( struct xfs_inode *ip, struct xfs_trans **tpp, xfs_fileoff_t offset_fsb, xfs_fileoff_t end_fsb, bool cancel_real) { struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); struct xfs_bmbt_irec got, del; xfs_extnum_t idx; xfs_fsblock_t firstfsb; struct xfs_defer_ops dfops; int error = 0; if (!xfs_is_reflink_inode(ip)) return 0; if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got)) return 0; while (got.br_startoff < end_fsb) { del = got; xfs_trim_extent(&del, offset_fsb, end_fsb - offset_fsb); trace_xfs_reflink_cancel_cow(ip, &del); if (isnullstartblock(del.br_startblock)) { error = xfs_bmap_del_extent_delay(ip, XFS_COW_FORK, &idx, &got, &del); if (error) break; } else if (del.br_state == XFS_EXT_UNWRITTEN || cancel_real) { xfs_trans_ijoin(*tpp, ip, 0); xfs_defer_init(&dfops, &firstfsb); /* Free the CoW orphan record. */ error = xfs_refcount_free_cow_extent(ip->i_mount, &dfops, del.br_startblock, del.br_blockcount); if (error) break; xfs_bmap_add_free(ip->i_mount, &dfops, del.br_startblock, del.br_blockcount, NULL); /* Update quota accounting */ xfs_trans_mod_dquot_byino(*tpp, ip, XFS_TRANS_DQ_BCOUNT, -(long)del.br_blockcount); /* Roll the transaction */ xfs_defer_ijoin(&dfops, ip); error = xfs_defer_finish(tpp, &dfops); if (error) { xfs_defer_cancel(&dfops); break; } /* Remove the mapping from the CoW fork. */ xfs_bmap_del_extent_cow(ip, &idx, &got, &del); } if (!xfs_iext_get_extent(ifp, ++idx, &got)) break; } /* clear tag if cow fork is emptied */ if (!ifp->if_bytes) xfs_inode_clear_cowblocks_tag(ip); return error; }
/* * Remap part of the CoW fork into the data fork. * * We aim to remap the range starting at @offset_fsb and ending at @end_fsb * into the data fork; this function will remap what it can (at the end of the * range) and update @end_fsb appropriately. Each remap gets its own * transaction because we can end up merging and splitting bmbt blocks for * every remap operation and we'd like to keep the block reservation * requirements as low as possible. */ STATIC int xfs_reflink_end_cow_extent( struct xfs_inode *ip, xfs_fileoff_t offset_fsb, xfs_fileoff_t *end_fsb) { struct xfs_bmbt_irec got, del; struct xfs_iext_cursor icur; struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); xfs_filblks_t rlen; unsigned int resblks; int error; /* No COW extents? That's easy! */ if (ifp->if_bytes == 0) { *end_fsb = offset_fsb; return 0; } resblks = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, XFS_TRANS_RESERVE | XFS_TRANS_NOFS, &tp); if (error) return error; /* * Lock the inode. We have to ijoin without automatic unlock because * the lead transaction is the refcountbt record deletion; the data * fork update follows as a deferred log item. */ xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, 0); /* * In case of racing, overlapping AIO writes no COW extents might be * left by the time I/O completes for the loser of the race. In that * case we are done. */ if (!xfs_iext_lookup_extent_before(ip, ifp, end_fsb, &icur, &got) || got.br_startoff + got.br_blockcount <= offset_fsb) { *end_fsb = offset_fsb; goto out_cancel; } /* * Structure copy @got into @del, then trim @del to the range that we * were asked to remap. We preserve @got for the eventual CoW fork * deletion; from now on @del represents the mapping that we're * actually remapping. */ del = got; xfs_trim_extent(&del, offset_fsb, *end_fsb - offset_fsb); ASSERT(del.br_blockcount > 0); /* * Only remap real extents that contain data. With AIO, speculative * preallocations can leak into the range we are called upon, and we * need to skip them. */ if (!xfs_bmap_is_real_extent(&got)) { *end_fsb = del.br_startoff; goto out_cancel; } /* Unmap the old blocks in the data fork. */ rlen = del.br_blockcount; error = __xfs_bunmapi(tp, ip, del.br_startoff, &rlen, 0, 1); if (error) goto out_cancel; /* Trim the extent to whatever got unmapped. */ xfs_trim_extent(&del, del.br_startoff + rlen, del.br_blockcount - rlen); trace_xfs_reflink_cow_remap(ip, &del); /* Free the CoW orphan record. */ error = xfs_refcount_free_cow_extent(tp, del.br_startblock, del.br_blockcount); if (error) goto out_cancel; /* Map the new blocks into the data fork. */ error = xfs_bmap_map_extent(tp, ip, &del); if (error) goto out_cancel; /* Charge this new data fork mapping to the on-disk quota. */ xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_DELBCOUNT, (long)del.br_blockcount); /* Remove the mapping from the CoW fork. */ xfs_bmap_del_extent_cow(ip, &icur, &got, &del); error = xfs_trans_commit(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); if (error) return error; /* Update the caller about how much progress we made. */ *end_fsb = del.br_startoff; return 0; out_cancel: xfs_trans_cancel(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; }
/* * Cancel CoW reservations for some block range of an inode. * * If cancel_real is true this function cancels all COW fork extents for the * inode; if cancel_real is false, real extents are not cleared. */ int xfs_reflink_cancel_cow_blocks( struct xfs_inode *ip, struct xfs_trans **tpp, xfs_fileoff_t offset_fsb, xfs_fileoff_t end_fsb, bool cancel_real) { struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); struct xfs_bmbt_irec got, del; struct xfs_iext_cursor icur; xfs_fsblock_t firstfsb; struct xfs_defer_ops dfops; int error = 0; if (!xfs_is_reflink_inode(ip)) return 0; if (!xfs_iext_lookup_extent_before(ip, ifp, &end_fsb, &icur, &got)) return 0; /* Walk backwards until we're out of the I/O range... */ while (got.br_startoff + got.br_blockcount > offset_fsb) { del = got; xfs_trim_extent(&del, offset_fsb, end_fsb - offset_fsb); /* Extent delete may have bumped ext forward */ if (!del.br_blockcount) { xfs_iext_prev(ifp, &icur); goto next_extent; } trace_xfs_reflink_cancel_cow(ip, &del); if (isnullstartblock(del.br_startblock)) { error = xfs_bmap_del_extent_delay(ip, XFS_COW_FORK, &icur, &got, &del); if (error) break; } else if (del.br_state == XFS_EXT_UNWRITTEN || cancel_real) { xfs_trans_ijoin(*tpp, ip, 0); xfs_defer_init(&dfops, &firstfsb); /* Free the CoW orphan record. */ error = xfs_refcount_free_cow_extent(ip->i_mount, &dfops, del.br_startblock, del.br_blockcount); if (error) break; xfs_bmap_add_free(ip->i_mount, &dfops, del.br_startblock, del.br_blockcount, NULL); /* Update quota accounting */ xfs_trans_mod_dquot_byino(*tpp, ip, XFS_TRANS_DQ_BCOUNT, -(long)del.br_blockcount); /* Roll the transaction */ xfs_defer_ijoin(&dfops, ip); error = xfs_defer_finish(tpp, &dfops); if (error) { xfs_defer_cancel(&dfops); break; } /* Remove the mapping from the CoW fork. */ xfs_bmap_del_extent_cow(ip, &icur, &got, &del); } else { /* Didn't do anything, push cursor back. */ xfs_iext_prev(ifp, &icur); } next_extent: if (!xfs_iext_get_extent(ifp, &icur, &got)) break; } /* clear tag if cow fork is emptied */ if (!ifp->if_bytes) xfs_inode_clear_cowblocks_tag(ip); return error; }
xfs_ifork_t * xfs_ifork_ptr(xfs_inode_t *ip, int w) { return XFS_IFORK_PTR(ip, w); }
/* * Convert the shortform directory to block form. */ int /* error */ xfs_dir2_sf_to_block( xfs_da_args_t *args) /* operation arguments */ { xfs_dir2_db_t blkno; /* dir-relative block # (0) */ xfs_dir2_data_hdr_t *hdr; /* block header */ xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ struct xfs_buf *bp; /* block buffer */ xfs_dir2_block_tail_t *btp; /* block tail pointer */ xfs_dir2_data_entry_t *dep; /* data entry pointer */ xfs_inode_t *dp; /* incore directory inode */ int dummy; /* trash */ xfs_dir2_data_unused_t *dup; /* unused entry pointer */ int endoffset; /* end of data objects */ int error; /* error return value */ int i; /* index */ xfs_mount_t *mp; /* filesystem mount point */ int needlog; /* need to log block header */ int needscan; /* need to scan block freespc */ int newoffset; /* offset from current entry */ int offset; /* target block offset */ xfs_dir2_sf_entry_t *sfep; /* sf entry pointer */ xfs_dir2_sf_hdr_t *oldsfp; /* old shortform header */ xfs_dir2_sf_hdr_t *sfp; /* shortform header */ __be16 *tagp; /* end of data entry */ xfs_trans_t *tp; /* transaction pointer */ struct xfs_name name; struct xfs_ifork *ifp; trace_xfs_dir2_sf_to_block(args); dp = args->dp; tp = args->trans; mp = dp->i_mount; ifp = XFS_IFORK_PTR(dp, XFS_DATA_FORK); ASSERT(ifp->if_flags & XFS_IFINLINE); /* * Bomb out if the shortform directory is way too short. */ if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) { ASSERT(XFS_FORCED_SHUTDOWN(mp)); return -EIO; } oldsfp = (xfs_dir2_sf_hdr_t *)ifp->if_u1.if_data; ASSERT(ifp->if_bytes == dp->i_d.di_size); ASSERT(ifp->if_u1.if_data != NULL); ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(oldsfp->i8count)); ASSERT(dp->i_d.di_nextents == 0); /* * Copy the directory into a temporary buffer. * Then pitch the incore inode data so we can make extents. */ sfp = kmem_alloc(ifp->if_bytes, KM_SLEEP); memcpy(sfp, oldsfp, ifp->if_bytes); xfs_idata_realloc(dp, -ifp->if_bytes, XFS_DATA_FORK); xfs_bmap_local_to_extents_empty(dp, XFS_DATA_FORK); dp->i_d.di_size = 0; /* * Add block 0 to the inode. */ error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, &blkno); if (error) { kmem_free(sfp); return error; } /* * Initialize the data block, then convert it to block format. */ error = xfs_dir3_data_init(args, blkno, &bp); if (error) { kmem_free(sfp); return error; } xfs_dir3_block_init(mp, tp, bp, dp); hdr = bp->b_addr; /* * Compute size of block "tail" area. */ i = (uint)sizeof(*btp) + (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t); /* * The whole thing is initialized to free by the init routine. * Say we're using the leaf and tail area. */ dup = dp->d_ops->data_unused_p(hdr); needlog = needscan = 0; xfs_dir2_data_use_free(args, bp, dup, args->geo->blksize - i, i, &needlog, &needscan); ASSERT(needscan == 0); /* * Fill in the tail. */ btp = xfs_dir2_block_tail_p(args->geo, hdr); btp->count = cpu_to_be32(sfp->count + 2); /* ., .. */ btp->stale = 0; blp = xfs_dir2_block_leaf_p(btp); endoffset = (uint)((char *)blp - (char *)hdr); /* * Remove the freespace, we'll manage it. */ xfs_dir2_data_use_free(args, bp, dup, (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), be16_to_cpu(dup->length), &needlog, &needscan); /* * Create entry for . */ dep = dp->d_ops->data_dot_entry_p(hdr); dep->inumber = cpu_to_be64(dp->i_ino); dep->namelen = 1; dep->name[0] = '.'; dp->d_ops->data_put_ftype(dep, XFS_DIR3_FT_DIR); tagp = dp->d_ops->data_entry_tag_p(dep); *tagp = cpu_to_be16((char *)dep - (char *)hdr); xfs_dir2_data_log_entry(args, bp, dep); blp[0].hashval = cpu_to_be32(xfs_dir_hash_dot); blp[0].address = cpu_to_be32(xfs_dir2_byte_to_dataptr( (char *)dep - (char *)hdr)); /* * Create entry for .. */ dep = dp->d_ops->data_dotdot_entry_p(hdr); dep->inumber = cpu_to_be64(dp->d_ops->sf_get_parent_ino(sfp)); dep->namelen = 2; dep->name[0] = dep->name[1] = '.'; dp->d_ops->data_put_ftype(dep, XFS_DIR3_FT_DIR); tagp = dp->d_ops->data_entry_tag_p(dep); *tagp = cpu_to_be16((char *)dep - (char *)hdr); xfs_dir2_data_log_entry(args, bp, dep); blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot); blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr( (char *)dep - (char *)hdr)); offset = dp->d_ops->data_first_offset; /* * Loop over existing entries, stuff them in. */ i = 0; if (!sfp->count) sfep = NULL; else sfep = xfs_dir2_sf_firstentry(sfp); /* * Need to preserve the existing offset values in the sf directory. * Insert holes (unused entries) where necessary. */ while (offset < endoffset) { /* * sfep is null when we reach the end of the list. */ if (sfep == NULL) newoffset = endoffset; else newoffset = xfs_dir2_sf_get_offset(sfep); /* * There should be a hole here, make one. */ if (offset < newoffset) { dup = (xfs_dir2_data_unused_t *)((char *)hdr + offset); dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); dup->length = cpu_to_be16(newoffset - offset); *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16( ((char *)dup - (char *)hdr)); xfs_dir2_data_log_unused(args, bp, dup); xfs_dir2_data_freeinsert(hdr, dp->d_ops->data_bestfree_p(hdr), dup, &dummy); offset += be16_to_cpu(dup->length); continue; } /* * Copy a real entry. */ dep = (xfs_dir2_data_entry_t *)((char *)hdr + newoffset); dep->inumber = cpu_to_be64(dp->d_ops->sf_get_ino(sfp, sfep)); dep->namelen = sfep->namelen; dp->d_ops->data_put_ftype(dep, dp->d_ops->sf_get_ftype(sfep)); memcpy(dep->name, sfep->name, dep->namelen); tagp = dp->d_ops->data_entry_tag_p(dep); *tagp = cpu_to_be16((char *)dep - (char *)hdr); xfs_dir2_data_log_entry(args, bp, dep); name.name = sfep->name; name.len = sfep->namelen; blp[2 + i].hashval = cpu_to_be32(mp->m_dirnameops-> hashname(&name)); blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr( (char *)dep - (char *)hdr)); offset = (int)((char *)(tagp + 1) - (char *)hdr); if (++i == sfp->count) sfep = NULL; else sfep = dp->d_ops->sf_nextentry(sfp, sfep); } /* Done with the temporary buffer */ kmem_free(sfp); /* * Sort the leaf entries by hash value. */ xfs_sort(blp, be32_to_cpu(btp->count), sizeof(*blp), xfs_dir2_block_sort); /* * Log the leaf entry area and tail. * Already logged the header in data_init, ignore needlog. */ ASSERT(needscan == 0); xfs_dir2_block_log_leaf(tp, bp, 0, be32_to_cpu(btp->count) - 1); xfs_dir2_block_log_tail(tp, bp); xfs_dir3_data_check(dp, bp); return 0; }
/* * Cancel CoW reservations for some block range of an inode. * * If cancel_real is true this function cancels all COW fork extents for the * inode; if cancel_real is false, real extents are not cleared. * * Caller must have already joined the inode to the current transaction. The * inode will be joined to the transaction returned to the caller. */ int xfs_reflink_cancel_cow_blocks( struct xfs_inode *ip, struct xfs_trans **tpp, xfs_fileoff_t offset_fsb, xfs_fileoff_t end_fsb, bool cancel_real) { struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); struct xfs_bmbt_irec got, del; struct xfs_iext_cursor icur; int error = 0; if (!xfs_inode_has_cow_data(ip)) return 0; if (!xfs_iext_lookup_extent_before(ip, ifp, &end_fsb, &icur, &got)) return 0; /* Walk backwards until we're out of the I/O range... */ while (got.br_startoff + got.br_blockcount > offset_fsb) { del = got; xfs_trim_extent(&del, offset_fsb, end_fsb - offset_fsb); /* Extent delete may have bumped ext forward */ if (!del.br_blockcount) { xfs_iext_prev(ifp, &icur); goto next_extent; } trace_xfs_reflink_cancel_cow(ip, &del); if (isnullstartblock(del.br_startblock)) { error = xfs_bmap_del_extent_delay(ip, XFS_COW_FORK, &icur, &got, &del); if (error) break; } else if (del.br_state == XFS_EXT_UNWRITTEN || cancel_real) { ASSERT((*tpp)->t_firstblock == NULLFSBLOCK); /* Free the CoW orphan record. */ error = xfs_refcount_free_cow_extent(*tpp, del.br_startblock, del.br_blockcount); if (error) break; xfs_bmap_add_free(*tpp, del.br_startblock, del.br_blockcount, NULL); /* Roll the transaction */ error = xfs_defer_finish(tpp); if (error) break; /* Remove the mapping from the CoW fork. */ xfs_bmap_del_extent_cow(ip, &icur, &got, &del); /* Remove the quota reservation */ error = xfs_trans_reserve_quota_nblks(NULL, ip, -(long)del.br_blockcount, 0, XFS_QMOPT_RES_REGBLKS); if (error) break; } else { /* Didn't do anything, push cursor back. */ xfs_iext_prev(ifp, &icur); } next_extent: if (!xfs_iext_get_extent(ifp, &icur, &got)) break; } /* clear tag if cow fork is emptied */ if (!ifp->if_bytes) xfs_inode_clear_cowblocks_tag(ip); return error; }
/* * Remap parts of a file's data fork after a successful CoW. */ int xfs_reflink_end_cow( struct xfs_inode *ip, xfs_off_t offset, xfs_off_t count) { struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); struct xfs_bmbt_irec got, del; struct xfs_trans *tp; xfs_fileoff_t offset_fsb; xfs_fileoff_t end_fsb; xfs_fsblock_t firstfsb; struct xfs_defer_ops dfops; int error; unsigned int resblks; xfs_filblks_t rlen; xfs_extnum_t idx; trace_xfs_reflink_end_cow(ip, offset, count); /* No COW extents? That's easy! */ if (ifp->if_bytes == 0) return 0; offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); end_fsb = XFS_B_TO_FSB(ip->i_mount, offset + count); /* * Start a rolling transaction to switch the mappings. We're * unlikely ever to have to remap 16T worth of single-block * extents, so just cap the worst case extent count to 2^32-1. * Stick a warning in just in case, and avoid 64-bit division. */ BUILD_BUG_ON(MAX_RW_COUNT > UINT_MAX); if (end_fsb - offset_fsb > UINT_MAX) { error = -EFSCORRUPTED; xfs_force_shutdown(ip->i_mount, SHUTDOWN_CORRUPT_INCORE); ASSERT(0); goto out; } resblks = XFS_NEXTENTADD_SPACE_RES(ip->i_mount, (unsigned int)(end_fsb - offset_fsb), XFS_DATA_FORK); error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write, resblks, 0, 0, &tp); if (error) goto out; xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, 0); /* If there is a hole at end_fsb - 1 go to the previous extent */ if (!xfs_iext_lookup_extent(ip, ifp, end_fsb - 1, &idx, &got) || got.br_startoff > end_fsb) { /* * In case of racing, overlapping AIO writes no COW extents * might be left by the time I/O completes for the loser of * the race. In that case we are done. */ if (idx <= 0) goto out_cancel; xfs_iext_get_extent(ifp, --idx, &got); } /* Walk backwards until we're out of the I/O range... */ while (got.br_startoff + got.br_blockcount > offset_fsb) { del = got; xfs_trim_extent(&del, offset_fsb, end_fsb - offset_fsb); /* Extent delete may have bumped idx forward */ if (!del.br_blockcount) { idx--; goto next_extent; } ASSERT(!isnullstartblock(got.br_startblock)); /* * Don't remap unwritten extents; these are * speculatively preallocated CoW extents that have been * allocated but have not yet been involved in a write. */ if (got.br_state == XFS_EXT_UNWRITTEN) { idx--; goto next_extent; } /* Unmap the old blocks in the data fork. */ xfs_defer_init(&dfops, &firstfsb); rlen = del.br_blockcount; error = __xfs_bunmapi(tp, ip, del.br_startoff, &rlen, 0, 1, &firstfsb, &dfops); if (error) goto out_defer; /* Trim the extent to whatever got unmapped. */ if (rlen) { xfs_trim_extent(&del, del.br_startoff + rlen, del.br_blockcount - rlen); } trace_xfs_reflink_cow_remap(ip, &del); /* Free the CoW orphan record. */ error = xfs_refcount_free_cow_extent(tp->t_mountp, &dfops, del.br_startblock, del.br_blockcount); if (error) goto out_defer; /* Map the new blocks into the data fork. */ error = xfs_bmap_map_extent(tp->t_mountp, &dfops, ip, &del); if (error) goto out_defer; /* Remove the mapping from the CoW fork. */ xfs_bmap_del_extent_cow(ip, &idx, &got, &del); xfs_defer_ijoin(&dfops, ip); error = xfs_defer_finish(&tp, &dfops); if (error) goto out_defer; next_extent: if (!xfs_iext_get_extent(ifp, idx, &got)) break; } error = xfs_trans_commit(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); if (error) goto out; return 0; out_defer: xfs_defer_cancel(&dfops); out_cancel: xfs_trans_cancel(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); out: trace_xfs_reflink_end_cow_error(ip, error, _RET_IP_); return error; }