/* * Link a range of blocks from one file to another. */ int xfs_reflink_remap_range( struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, u64 len, bool is_dedupe) { struct inode *inode_in = file_inode(file_in); struct xfs_inode *src = XFS_I(inode_in); struct inode *inode_out = file_inode(file_out); struct xfs_inode *dest = XFS_I(inode_out); struct xfs_mount *mp = src->i_mount; bool same_inode = (inode_in == inode_out); xfs_fileoff_t sfsbno, dfsbno; xfs_filblks_t fsblen; xfs_extlen_t cowextsize; ssize_t ret; if (!xfs_sb_version_hasreflink(&mp->m_sb)) return -EOPNOTSUPP; if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; /* Lock both files against IO */ lock_two_nondirectories(inode_in, inode_out); if (same_inode) xfs_ilock(src, XFS_MMAPLOCK_EXCL); else xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL); /* Check file eligibility and prepare for block sharing. */ ret = -EINVAL; /* Don't reflink realtime inodes */ if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest)) goto out_unlock; /* Don't share DAX file data for now. */ if (IS_DAX(inode_in) || IS_DAX(inode_out)) goto out_unlock; ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out, &len, is_dedupe); if (ret <= 0) goto out_unlock; trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); /* Set flags and remap blocks. */ ret = xfs_reflink_set_inode_flag(src, dest); if (ret) goto out_unlock; dfsbno = XFS_B_TO_FSBT(mp, pos_out); sfsbno = XFS_B_TO_FSBT(mp, pos_in); fsblen = XFS_B_TO_FSB(mp, len); ret = xfs_reflink_remap_blocks(src, sfsbno, dest, dfsbno, fsblen, pos_out + len); if (ret) goto out_unlock; /* Zap any page cache for the destination file's range. */ truncate_inode_pages_range(&inode_out->i_data, pos_out, PAGE_ALIGN(pos_out + len) - 1); /* * Carry the cowextsize hint from src to dest if we're sharing the * entire source file to the entire destination file, the source file * has a cowextsize hint, and the destination file does not. */ cowextsize = 0; if (pos_in == 0 && len == i_size_read(inode_in) && (src->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) && pos_out == 0 && len >= i_size_read(inode_out) && !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE)) cowextsize = src->i_d.di_cowextsize; ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize, is_dedupe); out_unlock: xfs_iunlock(src, XFS_MMAPLOCK_EXCL); if (!same_inode) xfs_iunlock(dest, XFS_MMAPLOCK_EXCL); unlock_two_nondirectories(inode_in, inode_out); if (ret) trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_); return ret; }
/* * Prepare two files for range cloning. Upon a successful return both inodes * will have the iolock and mmaplock held, the page cache of the out file will * be truncated, and any leases on the out file will have been broken. This * function borrows heavily from xfs_file_aio_write_checks. * * The VFS allows partial EOF blocks to "match" for dedupe even though it hasn't * checked that the bytes beyond EOF physically match. Hence we cannot use the * EOF block in the source dedupe range because it's not a complete block match, * hence can introduce a corruption into the file that has it's block replaced. * * In similar fashion, the VFS file cloning also allows partial EOF blocks to be * "block aligned" for the purposes of cloning entire files. However, if the * source file range includes the EOF block and it lands within the existing EOF * of the destination file, then we can expose stale data from beyond the source * file EOF in the destination file. * * XFS doesn't support partial block sharing, so in both cases we have check * these cases ourselves. For dedupe, we can simply round the length to dedupe * down to the previous whole block and ignore the partial EOF block. While this * means we can't dedupe the last block of a file, this is an acceptible * tradeoff for simplicity on implementation. * * For cloning, we want to share the partial EOF block if it is also the new EOF * block of the destination file. If the partial EOF block lies inside the * existing destination EOF, then we have to abort the clone to avoid exposing * stale data in the destination file. Hence we reject these clone attempts with * -EINVAL in this case. */ int xfs_reflink_remap_prep( struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t *len, unsigned int remap_flags) { struct inode *inode_in = file_inode(file_in); struct xfs_inode *src = XFS_I(inode_in); struct inode *inode_out = file_inode(file_out); struct xfs_inode *dest = XFS_I(inode_out); bool same_inode = (inode_in == inode_out); ssize_t ret; /* Lock both files against IO */ ret = xfs_iolock_two_inodes_and_break_layout(inode_in, inode_out); if (ret) return ret; if (same_inode) xfs_ilock(src, XFS_MMAPLOCK_EXCL); else xfs_lock_two_inodes(src, XFS_MMAPLOCK_SHARED, dest, XFS_MMAPLOCK_EXCL); /* Check file eligibility and prepare for block sharing. */ ret = -EINVAL; /* Don't reflink realtime inodes */ if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest)) goto out_unlock; /* Don't share DAX file data for now. */ if (IS_DAX(inode_in) || IS_DAX(inode_out)) goto out_unlock; ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out, len, remap_flags); if (ret < 0 || *len == 0) goto out_unlock; /* Attach dquots to dest inode before changing block map */ ret = xfs_qm_dqattach(dest); if (ret) goto out_unlock; /* * Zero existing post-eof speculative preallocations in the destination * file. */ ret = xfs_reflink_zero_posteof(dest, pos_out); if (ret) goto out_unlock; /* Set flags and remap blocks. */ ret = xfs_reflink_set_inode_flag(src, dest); if (ret) goto out_unlock; /* * If pos_out > EOF, we may have dirtied blocks between EOF and * pos_out. In that case, we need to extend the flush and unmap to cover * from EOF to the end of the copy length. */ if (pos_out > XFS_ISIZE(dest)) { loff_t flen = *len + (pos_out - XFS_ISIZE(dest)); ret = xfs_flush_unmap_range(dest, XFS_ISIZE(dest), flen); } else { ret = xfs_flush_unmap_range(dest, pos_out, *len); } if (ret) goto out_unlock; return 1; out_unlock: xfs_reflink_remap_unlock(file_in, file_out); return ret; }