Beispiel #1
0
/*
 * Ensure the reflink bit is set in both inodes.
 */
STATIC int
xfs_reflink_set_inode_flag(
	struct xfs_inode	*src,
	struct xfs_inode	*dest)
{
	struct xfs_mount	*mp = src->i_mount;
	int			error;
	struct xfs_trans	*tp;

	if (xfs_is_reflink_inode(src) && xfs_is_reflink_inode(dest))
		return 0;

	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp);
	if (error)
		goto out_error;

	/* Lock both files against IO */
	if (src->i_ino == dest->i_ino)
		xfs_ilock(src, XFS_ILOCK_EXCL);
	else
		xfs_lock_two_inodes(src, dest, XFS_ILOCK_EXCL);

	if (!xfs_is_reflink_inode(src)) {
		trace_xfs_reflink_set_inode_flag(src);
		xfs_trans_ijoin(tp, src, XFS_ILOCK_EXCL);
		src->i_d.di_flags2 |= XFS_DIFLAG2_REFLINK;
		xfs_trans_log_inode(tp, src, XFS_ILOG_CORE);
		xfs_ifork_init_cow(src);
	} else
		xfs_iunlock(src, XFS_ILOCK_EXCL);

	if (src->i_ino == dest->i_ino)
		goto commit_flags;

	if (!xfs_is_reflink_inode(dest)) {
		trace_xfs_reflink_set_inode_flag(dest);
		xfs_trans_ijoin(tp, dest, XFS_ILOCK_EXCL);
		dest->i_d.di_flags2 |= XFS_DIFLAG2_REFLINK;
		xfs_trans_log_inode(tp, dest, XFS_ILOG_CORE);
		xfs_ifork_init_cow(dest);
	} else
		xfs_iunlock(dest, XFS_ILOCK_EXCL);

commit_flags:
	error = xfs_trans_commit(tp);
	if (error)
		goto out_error;
	return error;

out_error:
	trace_xfs_reflink_set_inode_flag_error(dest, error, _RET_IP_);
	return error;
}
Beispiel #2
0
/* Clear the inode reflink flag if there are no shared extents. */
int
xfs_reflink_clear_inode_flag(
	struct xfs_inode	*ip,
	struct xfs_trans	**tpp)
{
	bool			needs_flag;
	int			error = 0;

	ASSERT(xfs_is_reflink_inode(ip));

	error = xfs_reflink_inode_has_shared_extents(*tpp, ip, &needs_flag);
	if (error || needs_flag)
		return error;

	/*
	 * We didn't find any shared blocks so turn off the reflink flag.
	 * First, get rid of any leftover CoW mappings.
	 */
	error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, NULLFILEOFF, true);
	if (error)
		return error;

	/* Clear the inode flag. */
	trace_xfs_reflink_unset_inode_flag(ip);
	ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
	xfs_inode_clear_cowblocks_tag(ip);
	xfs_trans_ijoin(*tpp, ip, 0);
	xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE);

	return error;
}
Beispiel #3
0
/*
 * Find the CoW reservation for a given byte offset of a file.
 */
bool
xfs_reflink_find_cow_mapping(
	struct xfs_inode		*ip,
	xfs_off_t			offset,
	struct xfs_bmbt_irec		*imap)
{
	struct xfs_ifork		*ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
	xfs_fileoff_t			offset_fsb;
	struct xfs_bmbt_irec		got;
	xfs_extnum_t			idx;

	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED));
	ASSERT(xfs_is_reflink_inode(ip));

	offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
	if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got))
		return false;
	if (got.br_startoff > offset_fsb)
		return false;

	trace_xfs_reflink_find_cow_mapping(ip, offset, 1, XFS_IO_OVERWRITE,
			&got);
	*imap = got;
	return true;
}
Beispiel #4
0
/*
 * Trim an extent to end at the next CoW reservation past offset_fsb.
 */
void
xfs_reflink_trim_irec_to_next_cow(
	struct xfs_inode		*ip,
	xfs_fileoff_t			offset_fsb,
	struct xfs_bmbt_irec		*imap)
{
	struct xfs_ifork		*ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
	struct xfs_bmbt_irec		got;
	xfs_extnum_t			idx;

	if (!xfs_is_reflink_inode(ip))
		return;

	/* Find the extent in the CoW fork. */
	if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got))
		return;

	/* This is the extent before; try sliding up one. */
	if (got.br_startoff < offset_fsb) {
		if (!xfs_iext_get_extent(ifp, idx + 1, &got))
			return;
	}

	if (got.br_startoff >= imap->br_startoff + imap->br_blockcount)
		return;

	imap->br_blockcount = got.br_startoff - imap->br_startoff;
	trace_xfs_reflink_trim_irec(ip, imap);
}
Beispiel #5
0
STATIC void
xfs_diflags_to_iflags(
	struct inode		*inode,
	struct xfs_inode	*ip)
{
	uint16_t		flags = ip->i_d.di_flags;

	inode->i_flags &= ~(S_IMMUTABLE | S_APPEND | S_SYNC |
			    S_NOATIME | S_DAX);

	if (flags & XFS_DIFLAG_IMMUTABLE)
		inode->i_flags |= S_IMMUTABLE;
	if (flags & XFS_DIFLAG_APPEND)
		inode->i_flags |= S_APPEND;
	if (flags & XFS_DIFLAG_SYNC)
		inode->i_flags |= S_SYNC;
	if (flags & XFS_DIFLAG_NOATIME)
		inode->i_flags |= S_NOATIME;
	if (S_ISREG(inode->i_mode) &&
	    ip->i_mount->m_sb.sb_blocksize == PAGE_SIZE &&
	    !xfs_is_reflink_inode(ip) &&
	    (ip->i_mount->m_flags & XFS_MOUNT_DAX ||
	     ip->i_d.di_flags2 & XFS_DIFLAG2_DAX))
		inode->i_flags |= S_DAX;
}
Beispiel #6
0
/*
 * Set ourselves up to free CoW blocks from this file.  If it's already clean
 * then we can bail out quickly, but otherwise we must back off if the file
 * is undergoing some kind of write.
 */
static bool
xfs_prep_free_cowblocks(
	struct xfs_inode	*ip,
	struct xfs_ifork	*ifp)
{
	/*
	 * Just clear the tag if we have an empty cow fork or none at all. It's
	 * possible the inode was fully unshared since it was originally tagged.
	 */
	if (!xfs_is_reflink_inode(ip) || !ifp->if_bytes) {
		trace_xfs_inode_free_cowblocks_invalid(ip);
		xfs_inode_clear_cowblocks_tag(ip);
		return false;
	}

	/*
	 * If the mapping is dirty or under writeback we cannot touch the
	 * CoW fork.  Leave it alone if we're in the midst of a directio.
	 */
	if ((VFS_I(ip)->i_state & I_DIRTY_PAGES) ||
	    mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY) ||
	    mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_WRITEBACK) ||
	    atomic_read(&VFS_I(ip)->i_dio_count))
		return false;

	return true;
}
Beispiel #7
0
/*
 * Trim the mapping to the next block where there's a change in the
 * shared/unshared status.  More specifically, this means that we
 * find the lowest-numbered extent of shared blocks that coincides with
 * the given block mapping.  If the shared extent overlaps the start of
 * the mapping, trim the mapping to the end of the shared extent.  If
 * the shared region intersects the mapping, trim the mapping to the
 * start of the shared extent.  If there are no shared regions that
 * overlap, just return the original extent.
 */
int
xfs_reflink_trim_around_shared(
	struct xfs_inode	*ip,
	struct xfs_bmbt_irec	*irec,
	bool			*shared,
	bool			*trimmed)
{
	xfs_agnumber_t		agno;
	xfs_agblock_t		agbno;
	xfs_extlen_t		aglen;
	xfs_agblock_t		fbno;
	xfs_extlen_t		flen;
	int			error = 0;

	/* Holes, unwritten, and delalloc extents cannot be shared */
	if (!xfs_is_reflink_inode(ip) || !xfs_bmap_is_real_extent(irec)) {
		*shared = false;
		return 0;
	}

	trace_xfs_reflink_trim_around_shared(ip, irec);

	agno = XFS_FSB_TO_AGNO(ip->i_mount, irec->br_startblock);
	agbno = XFS_FSB_TO_AGBNO(ip->i_mount, irec->br_startblock);
	aglen = irec->br_blockcount;

	error = xfs_reflink_find_shared(ip->i_mount, NULL, agno, agbno,
			aglen, &fbno, &flen, true);
	if (error)
		return error;

	*shared = *trimmed = false;
	if (fbno == NULLAGBLOCK) {
		/* No shared blocks at all. */
		return 0;
	} else if (fbno == agbno) {
		/*
		 * The start of this extent is shared.  Truncate the
		 * mapping at the end of the shared region so that a
		 * subsequent iteration starts at the start of the
		 * unshared region.
		 */
		irec->br_blockcount = flen;
		*shared = true;
		if (flen != aglen)
			*trimmed = true;
		return 0;
	} else {
		/*
		 * There's a shared extent midway through this extent.
		 * Truncate the mapping at the start of the shared
		 * extent so that a subsequent iteration starts at the
		 * start of the shared region.
		 */
		irec->br_blockcount = fbno - agbno;
		*trimmed = true;
		return 0;
	}
}
Beispiel #8
0
static int
xfs_ioctl_setattr_xflags(
	struct xfs_trans	*tp,
	struct xfs_inode	*ip,
	struct fsxattr		*fa)
{
	struct xfs_mount	*mp = ip->i_mount;

	/* Can't change realtime flag if any extents are allocated. */
	if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
	    XFS_IS_REALTIME_INODE(ip) != (fa->fsx_xflags & FS_XFLAG_REALTIME))
		return -EINVAL;

	/* If realtime flag is set then must have realtime device */
	if (fa->fsx_xflags & FS_XFLAG_REALTIME) {
		if (mp->m_sb.sb_rblocks == 0 || mp->m_sb.sb_rextsize == 0 ||
		    (ip->i_d.di_extsize % mp->m_sb.sb_rextsize))
			return -EINVAL;
	}

	/* Clear reflink if we are actually able to set the rt flag. */
	if ((fa->fsx_xflags & FS_XFLAG_REALTIME) && xfs_is_reflink_inode(ip))
		ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;

	/* Don't allow us to set DAX mode for a reflinked file for now. */
	if ((fa->fsx_xflags & FS_XFLAG_DAX) && xfs_is_reflink_inode(ip))
		return -EINVAL;

	/*
	 * Can't modify an immutable/append-only file unless
	 * we have appropriate permission.
	 */
	if (((ip->i_d.di_flags & (XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND)) ||
	     (fa->fsx_xflags & (FS_XFLAG_IMMUTABLE | FS_XFLAG_APPEND))) &&
	    !capable(CAP_LINUX_IMMUTABLE))
		return -EPERM;

	xfs_set_diflags(ip, fa->fsx_xflags);
	xfs_diflags_to_linux(ip);
	xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
	XFS_STATS_INC(mp, xs_ig_attrchg);
	return 0;
}
Beispiel #9
0
/*
 * Automatic CoW Reservation Freeing
 *
 * These functions automatically garbage collect leftover CoW reservations
 * that were made on behalf of a cowextsize hint when we start to run out
 * of quota or when the reservations sit around for too long.  If the file
 * has dirty pages or is undergoing writeback, its CoW reservations will
 * be retained.
 *
 * The actual garbage collection piggybacks off the same code that runs
 * the speculative EOF preallocation garbage collector.
 */
STATIC int
xfs_inode_free_cowblocks(
	struct xfs_inode	*ip,
	int			flags,
	void			*args)
{
	int ret;
	struct xfs_eofblocks *eofb = args;
	int match;
	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);

	/*
	 * Just clear the tag if we have an empty cow fork or none at all. It's
	 * possible the inode was fully unshared since it was originally tagged.
	 */
	if (!xfs_is_reflink_inode(ip) || !ifp->if_bytes) {
		trace_xfs_inode_free_cowblocks_invalid(ip);
		xfs_inode_clear_cowblocks_tag(ip);
		return 0;
	}

	/*
	 * If the mapping is dirty or under writeback we cannot touch the
	 * CoW fork.  Leave it alone if we're in the midst of a directio.
	 */
	if ((VFS_I(ip)->i_state & I_DIRTY_PAGES) ||
	    mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY) ||
	    mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_WRITEBACK) ||
	    atomic_read(&VFS_I(ip)->i_dio_count))
		return 0;

	if (eofb) {
		if (eofb->eof_flags & XFS_EOF_FLAGS_UNION)
			match = xfs_inode_match_id_union(ip, eofb);
		else
			match = xfs_inode_match_id(ip, eofb);
		if (!match)
			return 0;

		/* skip the inode if the file size is too small */
		if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE &&
		    XFS_ISIZE(ip) < eofb->eof_min_file_size)
			return 0;
	}

	/* Free the CoW blocks */
	xfs_ilock(ip, XFS_IOLOCK_EXCL);
	xfs_ilock(ip, XFS_MMAPLOCK_EXCL);

	ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false);

	xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
	xfs_iunlock(ip, XFS_IOLOCK_EXCL);

	return ret;
}
Beispiel #10
0
/*
 * If the reflink iflag disagrees with a scan for shared data fork extents,
 * either flag an error (shared extents w/ no flag) or a preen (flag set w/o
 * any shared extents).  We already checked for reflink iflag set on a non
 * reflink filesystem.
 */
static void
xchk_inode_check_reflink_iflag(
	struct xfs_scrub	*sc,
	xfs_ino_t		ino)
{
	struct xfs_mount	*mp = sc->mp;
	bool			has_shared;
	int			error;

	if (!xfs_sb_version_hasreflink(&mp->m_sb))
		return;

	error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip,
			&has_shared);
	if (!xchk_xref_process_error(sc, XFS_INO_TO_AGNO(mp, ino),
			XFS_INO_TO_AGBNO(mp, ino), &error))
		return;
	if (xfs_is_reflink_inode(sc->ip) && !has_shared)
		xchk_ino_set_preen(sc, ino);
	else if (!xfs_is_reflink_inode(sc->ip) && has_shared)
		xchk_ino_set_corrupt(sc, ino);
}
Beispiel #11
0
/*
 * Cancel CoW reservations for some byte range of an inode.
 *
 * If cancel_real is true this function cancels all COW fork extents for the
 * inode; if cancel_real is false, real extents are not cleared.
 */
int
xfs_reflink_cancel_cow_range(
	struct xfs_inode	*ip,
	xfs_off_t		offset,
	xfs_off_t		count,
	bool			cancel_real)
{
	struct xfs_trans	*tp;
	xfs_fileoff_t		offset_fsb;
	xfs_fileoff_t		end_fsb;
	int			error;

	trace_xfs_reflink_cancel_cow_range(ip, offset, count);
	ASSERT(xfs_is_reflink_inode(ip));

	offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
	if (count == NULLFILEOFF)
		end_fsb = NULLFILEOFF;
	else
		end_fsb = XFS_B_TO_FSB(ip->i_mount, offset + count);

	/* Start a rolling transaction to remove the mappings */
	error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write,
			0, 0, 0, &tp);
	if (error)
		goto out;

	xfs_ilock(ip, XFS_ILOCK_EXCL);
	xfs_trans_ijoin(tp, ip, 0);

	/* Scrape out the old CoW reservations */
	error = xfs_reflink_cancel_cow_blocks(ip, &tp, offset_fsb, end_fsb,
			cancel_real);
	if (error)
		goto out_cancel;

	error = xfs_trans_commit(tp);

	xfs_iunlock(ip, XFS_ILOCK_EXCL);
	return error;

out_cancel:
	xfs_trans_cancel(tp);
	xfs_iunlock(ip, XFS_ILOCK_EXCL);
out:
	trace_xfs_reflink_cancel_cow_range_error(ip, error, _RET_IP_);
	return error;
}
Beispiel #12
0
/*
 * Pre-COW all shared blocks within a given byte range of a file and turn off
 * the reflink flag if we unshare all of the file's blocks.
 */
int
xfs_reflink_unshare(
	struct xfs_inode	*ip,
	xfs_off_t		offset,
	xfs_off_t		len)
{
	struct xfs_mount	*mp = ip->i_mount;
	xfs_fileoff_t		fbno;
	xfs_filblks_t		end;
	xfs_off_t		isize;
	int			error;

	if (!xfs_is_reflink_inode(ip))
		return 0;

	trace_xfs_reflink_unshare(ip, offset, len);

	inode_dio_wait(VFS_I(ip));

	/* Try to CoW the selected ranges */
	xfs_ilock(ip, XFS_ILOCK_EXCL);
	fbno = XFS_B_TO_FSBT(mp, offset);
	isize = i_size_read(VFS_I(ip));
	end = XFS_B_TO_FSB(mp, offset + len);
	error = xfs_reflink_dirty_extents(ip, fbno, end, isize);
	if (error)
		goto out_unlock;
	xfs_iunlock(ip, XFS_ILOCK_EXCL);

	/* Wait for the IO to finish */
	error = filemap_write_and_wait(VFS_I(ip)->i_mapping);
	if (error)
		goto out;

	/* Turn off the reflink flag if possible. */
	error = xfs_reflink_try_clear_inode_flag(ip);
	if (error)
		goto out;

	return 0;

out_unlock:
	xfs_iunlock(ip, XFS_ILOCK_EXCL);
out:
	trace_xfs_reflink_unshare_error(ip, error, _RET_IP_);
	return error;
}
Beispiel #13
0
/*
 * Cancel CoW reservations for some block range of an inode.
 *
 * If cancel_real is true this function cancels all COW fork extents for the
 * inode; if cancel_real is false, real extents are not cleared.
 */
int
xfs_reflink_cancel_cow_blocks(
	struct xfs_inode		*ip,
	struct xfs_trans		**tpp,
	xfs_fileoff_t			offset_fsb,
	xfs_fileoff_t			end_fsb,
	bool				cancel_real)
{
	struct xfs_ifork		*ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
	struct xfs_bmbt_irec		got, del;
	xfs_extnum_t			idx;
	xfs_fsblock_t			firstfsb;
	struct xfs_defer_ops		dfops;
	int				error = 0;

	if (!xfs_is_reflink_inode(ip))
		return 0;
	if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got))
		return 0;

	while (got.br_startoff < end_fsb) {
		del = got;
		xfs_trim_extent(&del, offset_fsb, end_fsb - offset_fsb);
		trace_xfs_reflink_cancel_cow(ip, &del);

		if (isnullstartblock(del.br_startblock)) {
			error = xfs_bmap_del_extent_delay(ip, XFS_COW_FORK,
					&idx, &got, &del);
			if (error)
				break;
		} else if (del.br_state == XFS_EXT_UNWRITTEN || cancel_real) {
			xfs_trans_ijoin(*tpp, ip, 0);
			xfs_defer_init(&dfops, &firstfsb);

			/* Free the CoW orphan record. */
			error = xfs_refcount_free_cow_extent(ip->i_mount,
					&dfops, del.br_startblock,
					del.br_blockcount);
			if (error)
				break;

			xfs_bmap_add_free(ip->i_mount, &dfops,
					del.br_startblock, del.br_blockcount,
					NULL);

			/* Update quota accounting */
			xfs_trans_mod_dquot_byino(*tpp, ip, XFS_TRANS_DQ_BCOUNT,
					-(long)del.br_blockcount);

			/* Roll the transaction */
			xfs_defer_ijoin(&dfops, ip);
			error = xfs_defer_finish(tpp, &dfops);
			if (error) {
				xfs_defer_cancel(&dfops);
				break;
			}

			/* Remove the mapping from the CoW fork. */
			xfs_bmap_del_extent_cow(ip, &idx, &got, &del);
		}

		if (!xfs_iext_get_extent(ifp, ++idx, &got))
			break;
	}

	/* clear tag if cow fork is emptied */
	if (!ifp->if_bytes)
		xfs_inode_clear_cowblocks_tag(ip);

	return error;
}
Beispiel #14
0
/* Allocate all CoW reservations covering a range of blocks in a file. */
int
xfs_reflink_allocate_cow(
	struct xfs_inode	*ip,
	struct xfs_bmbt_irec	*imap,
	bool			*shared,
	uint			*lockmode)
{
	struct xfs_mount	*mp = ip->i_mount;
	xfs_fileoff_t		offset_fsb = imap->br_startoff;
	xfs_filblks_t		count_fsb = imap->br_blockcount;
	struct xfs_bmbt_irec	got;
	struct xfs_defer_ops	dfops;
	struct xfs_trans	*tp = NULL;
	xfs_fsblock_t		first_block;
	int			nimaps, error = 0;
	bool			trimmed;
	xfs_filblks_t		resaligned;
	xfs_extlen_t		resblks = 0;
	xfs_extnum_t		idx;

retry:
	ASSERT(xfs_is_reflink_inode(ip));
	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED));

	/*
	 * Even if the extent is not shared we might have a preallocation for
	 * it in the COW fork.  If so use it.
	 */
	if (xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &idx, &got) &&
	    got.br_startoff <= offset_fsb) {
		*shared = true;

		/* If we have a real allocation in the COW fork we're done. */
		if (!isnullstartblock(got.br_startblock)) {
			xfs_trim_extent(&got, offset_fsb, count_fsb);
			*imap = got;
			goto convert;
		}

		xfs_trim_extent(imap, got.br_startoff, got.br_blockcount);
	} else {
		error = xfs_reflink_trim_around_shared(ip, imap, shared, &trimmed);
		if (error || !*shared)
			goto out;
	}

	if (!tp) {
		resaligned = xfs_aligned_fsb_count(imap->br_startoff,
			imap->br_blockcount, xfs_get_cowextsz_hint(ip));
		resblks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);

		xfs_iunlock(ip, *lockmode);
		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
		*lockmode = XFS_ILOCK_EXCL;
		xfs_ilock(ip, *lockmode);

		if (error)
			return error;

		error = xfs_qm_dqattach_locked(ip, 0);
		if (error)
			goto out;
		goto retry;
	}

	error = xfs_trans_reserve_quota_nblks(tp, ip, resblks, 0,
			XFS_QMOPT_RES_REGBLKS);
	if (error)
		goto out;

	xfs_trans_ijoin(tp, ip, 0);

	xfs_defer_init(&dfops, &first_block);
	nimaps = 1;

	/* Allocate the entire reservation as unwritten blocks. */
	error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount,
			XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, &first_block,
			resblks, imap, &nimaps, &dfops);
	if (error)
		goto out_bmap_cancel;

	/* Finish up. */
	error = xfs_defer_finish(&tp, &dfops);
	if (error)
		goto out_bmap_cancel;

	error = xfs_trans_commit(tp);
	if (error)
		return error;
convert:
	return xfs_reflink_convert_cow_extent(ip, imap, offset_fsb, count_fsb,
			&dfops);
out_bmap_cancel:
	xfs_defer_cancel(&dfops);
	xfs_trans_unreserve_quota_nblks(tp, ip, (long)resblks, 0,
			XFS_QMOPT_RES_REGBLKS);
out:
	if (tp)
		xfs_trans_cancel(tp);
	return error;
}
Beispiel #15
0
/*
 * xfs_file_dio_aio_write - handle direct IO writes
 *
 * Lock the inode appropriately to prepare for and issue a direct IO write.
 * By separating it from the buffered write path we remove all the tricky to
 * follow locking changes and looping.
 *
 * If there are cached pages or we're extending the file, we need IOLOCK_EXCL
 * until we're sure the bytes at the new EOF have been zeroed and/or the cached
 * pages are flushed out.
 *
 * In most cases the direct IO writes will be done holding IOLOCK_SHARED
 * allowing them to be done in parallel with reads and other direct IO writes.
 * However, if the IO is not aligned to filesystem blocks, the direct IO layer
 * needs to do sub-block zeroing and that requires serialisation against other
 * direct IOs to the same block. In this case we need to serialise the
 * submission of the unaligned IOs so that we don't get racing block zeroing in
 * the dio layer.  To avoid the problem with aio, we also need to wait for
 * outstanding IOs to complete so that unwritten extent conversion is completed
 * before we try to map the overlapping block. This is currently implemented by
 * hitting it with a big hammer (i.e. inode_dio_wait()).
 *
 * Returns with locks held indicated by @iolock and errors indicated by
 * negative return values.
 */
STATIC ssize_t
xfs_file_dio_aio_write(
	struct kiocb		*iocb,
	struct iov_iter		*from)
{
	struct file		*file = iocb->ki_filp;
	struct address_space	*mapping = file->f_mapping;
	struct inode		*inode = mapping->host;
	struct xfs_inode	*ip = XFS_I(inode);
	struct xfs_mount	*mp = ip->i_mount;
	ssize_t			ret = 0;
	int			unaligned_io = 0;
	int			iolock;
	size_t			count = iov_iter_count(from);
	loff_t			end;
	struct iov_iter		data;
	struct xfs_buftarg	*target = XFS_IS_REALTIME_INODE(ip) ?
					mp->m_rtdev_targp : mp->m_ddev_targp;

	/* DIO must be aligned to device logical sector size */
	if ((iocb->ki_pos | count) & target->bt_logical_sectormask)
		return -EINVAL;

	/* "unaligned" here means not aligned to a filesystem block */
	if ((iocb->ki_pos & mp->m_blockmask) ||
	    ((iocb->ki_pos + count) & mp->m_blockmask))
		unaligned_io = 1;

	/*
	 * We don't need to take an exclusive lock unless there page cache needs
	 * to be invalidated or unaligned IO is being executed. We don't need to
	 * consider the EOF extension case here because
	 * xfs_file_aio_write_checks() will relock the inode as necessary for
	 * EOF zeroing cases and fill out the new inode size as appropriate.
	 */
	if (unaligned_io || mapping->nrpages)
		iolock = XFS_IOLOCK_EXCL;
	else
		iolock = XFS_IOLOCK_SHARED;
	xfs_rw_ilock(ip, iolock);

	/*
	 * Recheck if there are cached pages that need invalidate after we got
	 * the iolock to protect against other threads adding new pages while
	 * we were waiting for the iolock.
	 */
	if (mapping->nrpages && iolock == XFS_IOLOCK_SHARED) {
		xfs_rw_iunlock(ip, iolock);
		iolock = XFS_IOLOCK_EXCL;
		xfs_rw_ilock(ip, iolock);
	}

	ret = xfs_file_aio_write_checks(iocb, from, &iolock);
	if (ret)
		goto out;
	count = iov_iter_count(from);
	end = iocb->ki_pos + count - 1;

	/*
	 * See xfs_file_dio_aio_read() for why we do a full-file flush here.
	 */
	if (mapping->nrpages) {
		ret = filemap_write_and_wait(VFS_I(ip)->i_mapping);
		if (ret)
			goto out;
		/*
		 * Invalidate whole pages. This can return an error if we fail
		 * to invalidate a page, but this should never happen on XFS.
		 * Warn if it does fail.
		 */
		ret = invalidate_inode_pages2(VFS_I(ip)->i_mapping);
		WARN_ON_ONCE(ret);
		ret = 0;
	}

	/*
	 * If we are doing unaligned IO, wait for all other IO to drain,
	 * otherwise demote the lock if we had to flush cached pages
	 */
	if (unaligned_io)
		inode_dio_wait(inode);
	else if (iolock == XFS_IOLOCK_EXCL) {
		xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
		iolock = XFS_IOLOCK_SHARED;
	}

	trace_xfs_file_direct_write(ip, count, iocb->ki_pos);

	/* If this is a block-aligned directio CoW, remap immediately. */
	if (xfs_is_reflink_inode(ip) && !unaligned_io) {
		ret = xfs_reflink_allocate_cow_range(ip, iocb->ki_pos, count);
		if (ret)
			goto out;
	}

	data = *from;
	ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
			xfs_get_blocks_direct, xfs_end_io_direct_write,
			NULL, DIO_ASYNC_EXTEND);

	/* see generic_file_direct_write() for why this is necessary */
	if (mapping->nrpages) {
		invalidate_inode_pages2_range(mapping,
					      iocb->ki_pos >> PAGE_SHIFT,
					      end >> PAGE_SHIFT);
	}
Beispiel #16
0
/*
 * Cancel CoW reservations for some block range of an inode.
 *
 * If cancel_real is true this function cancels all COW fork extents for the
 * inode; if cancel_real is false, real extents are not cleared.
 */
int
xfs_reflink_cancel_cow_blocks(
	struct xfs_inode		*ip,
	struct xfs_trans		**tpp,
	xfs_fileoff_t			offset_fsb,
	xfs_fileoff_t			end_fsb,
	bool				cancel_real)
{
	struct xfs_ifork		*ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
	struct xfs_bmbt_irec		got, del;
	struct xfs_iext_cursor		icur;
	xfs_fsblock_t			firstfsb;
	struct xfs_defer_ops		dfops;
	int				error = 0;

	if (!xfs_is_reflink_inode(ip))
		return 0;
	if (!xfs_iext_lookup_extent_before(ip, ifp, &end_fsb, &icur, &got))
		return 0;

	/* Walk backwards until we're out of the I/O range... */
	while (got.br_startoff + got.br_blockcount > offset_fsb) {
		del = got;
		xfs_trim_extent(&del, offset_fsb, end_fsb - offset_fsb);

		/* Extent delete may have bumped ext forward */
		if (!del.br_blockcount) {
			xfs_iext_prev(ifp, &icur);
			goto next_extent;
		}

		trace_xfs_reflink_cancel_cow(ip, &del);

		if (isnullstartblock(del.br_startblock)) {
			error = xfs_bmap_del_extent_delay(ip, XFS_COW_FORK,
					&icur, &got, &del);
			if (error)
				break;
		} else if (del.br_state == XFS_EXT_UNWRITTEN || cancel_real) {
			xfs_trans_ijoin(*tpp, ip, 0);
			xfs_defer_init(&dfops, &firstfsb);

			/* Free the CoW orphan record. */
			error = xfs_refcount_free_cow_extent(ip->i_mount,
					&dfops, del.br_startblock,
					del.br_blockcount);
			if (error)
				break;

			xfs_bmap_add_free(ip->i_mount, &dfops,
					del.br_startblock, del.br_blockcount,
					NULL);

			/* Update quota accounting */
			xfs_trans_mod_dquot_byino(*tpp, ip, XFS_TRANS_DQ_BCOUNT,
					-(long)del.br_blockcount);

			/* Roll the transaction */
			xfs_defer_ijoin(&dfops, ip);
			error = xfs_defer_finish(tpp, &dfops);
			if (error) {
				xfs_defer_cancel(&dfops);
				break;
			}

			/* Remove the mapping from the CoW fork. */
			xfs_bmap_del_extent_cow(ip, &icur, &got, &del);
		} else {
			/* Didn't do anything, push cursor back. */
			xfs_iext_prev(ifp, &icur);
		}
next_extent:
		if (!xfs_iext_get_extent(ifp, &icur, &got))
			break;
	}

	/* clear tag if cow fork is emptied */
	if (!ifp->if_bytes)
		xfs_inode_clear_cowblocks_tag(ip);

	return error;
}
Beispiel #17
0
/* Clear the inode reflink flag if there are no shared extents. */
int
xfs_reflink_clear_inode_flag(
	struct xfs_inode	*ip,
	struct xfs_trans	**tpp)
{
	struct xfs_mount	*mp = ip->i_mount;
	xfs_fileoff_t		fbno;
	xfs_filblks_t		end;
	xfs_agnumber_t		agno;
	xfs_agblock_t		agbno;
	xfs_extlen_t		aglen;
	xfs_agblock_t		rbno;
	xfs_extlen_t		rlen;
	struct xfs_bmbt_irec	map;
	int			nmaps;
	int			error = 0;

	ASSERT(xfs_is_reflink_inode(ip));

	fbno = 0;
	end = XFS_B_TO_FSB(mp, i_size_read(VFS_I(ip)));
	while (end - fbno > 0) {
		nmaps = 1;
		/*
		 * Look for extents in the file.  Skip holes, delalloc, or
		 * unwritten extents; they can't be reflinked.
		 */
		error = xfs_bmapi_read(ip, fbno, end - fbno, &map, &nmaps, 0);
		if (error)
			return error;
		if (nmaps == 0)
			break;
		if (!xfs_bmap_is_real_extent(&map))
			goto next;

		agno = XFS_FSB_TO_AGNO(mp, map.br_startblock);
		agbno = XFS_FSB_TO_AGBNO(mp, map.br_startblock);
		aglen = map.br_blockcount;

		error = xfs_reflink_find_shared(mp, agno, agbno, aglen,
				&rbno, &rlen, false);
		if (error)
			return error;
		/* Is there still a shared block here? */
		if (rbno != NULLAGBLOCK)
			return 0;
next:
		fbno = map.br_startoff + map.br_blockcount;
	}

	/*
	 * We didn't find any shared blocks so turn off the reflink flag.
	 * First, get rid of any leftover CoW mappings.
	 */
	error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, NULLFILEOFF, true);
	if (error)
		return error;

	/* Clear the inode flag. */
	trace_xfs_reflink_unset_inode_flag(ip);
	ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
	xfs_inode_clear_cowblocks_tag(ip);
	xfs_trans_ijoin(*tpp, ip, 0);
	xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE);

	return error;
}
Beispiel #18
0
/* Allocate all CoW reservations covering a range of blocks in a file. */
int
xfs_reflink_allocate_cow(
	struct xfs_inode	*ip,
	struct xfs_bmbt_irec	*imap,
	bool			*shared,
	uint			*lockmode,
	bool			convert_now)
{
	struct xfs_mount	*mp = ip->i_mount;
	xfs_fileoff_t		offset_fsb = imap->br_startoff;
	xfs_filblks_t		count_fsb = imap->br_blockcount;
	struct xfs_trans	*tp;
	int			nimaps, error = 0;
	bool			found;
	xfs_filblks_t		resaligned;
	xfs_extlen_t		resblks = 0;

	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
	if (!ip->i_cowfp) {
		ASSERT(!xfs_is_reflink_inode(ip));
		xfs_ifork_init_cow(ip);
	}

	error = xfs_find_trim_cow_extent(ip, imap, shared, &found);
	if (error || !*shared)
		return error;
	if (found)
		goto convert;

	resaligned = xfs_aligned_fsb_count(imap->br_startoff,
		imap->br_blockcount, xfs_get_cowextsz_hint(ip));
	resblks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);

	xfs_iunlock(ip, *lockmode);
	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
	*lockmode = XFS_ILOCK_EXCL;
	xfs_ilock(ip, *lockmode);

	if (error)
		return error;

	error = xfs_qm_dqattach_locked(ip, false);
	if (error)
		goto out_trans_cancel;

	/*
	 * Check for an overlapping extent again now that we dropped the ilock.
	 */
	error = xfs_find_trim_cow_extent(ip, imap, shared, &found);
	if (error || !*shared)
		goto out_trans_cancel;
	if (found) {
		xfs_trans_cancel(tp);
		goto convert;
	}

	error = xfs_trans_reserve_quota_nblks(tp, ip, resblks, 0,
			XFS_QMOPT_RES_REGBLKS);
	if (error)
		goto out_trans_cancel;

	xfs_trans_ijoin(tp, ip, 0);

	/* Allocate the entire reservation as unwritten blocks. */
	nimaps = 1;
	error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount,
			XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC,
			resblks, imap, &nimaps);
	if (error)
		goto out_unreserve;

	xfs_inode_set_cowblocks_tag(ip);
	error = xfs_trans_commit(tp);
	if (error)
		return error;

	/*
	 * Allocation succeeded but the requested range was not even partially
	 * satisfied?  Bail out!
	 */
	if (nimaps == 0)
		return -ENOSPC;
convert:
	xfs_trim_extent(imap, offset_fsb, count_fsb);
	/*
	 * COW fork extents are supposed to remain unwritten until we're ready
	 * to initiate a disk write.  For direct I/O we are going to write the
	 * data and need the conversion, but for buffered writes we're done.
	 */
	if (!convert_now || imap->br_state == XFS_EXT_NORM)
		return 0;
	trace_xfs_reflink_convert_cow(ip, imap);
	return xfs_reflink_convert_cow_locked(ip, offset_fsb, count_fsb);

out_unreserve:
	xfs_trans_unreserve_quota_nblks(tp, ip, (long)resblks, 0,
			XFS_QMOPT_RES_REGBLKS);
out_trans_cancel:
	xfs_trans_cancel(tp);
	return error;
}
Beispiel #19
0
/*
 * xfs_file_dio_aio_write - handle direct IO writes
 *
 * Lock the inode appropriately to prepare for and issue a direct IO write.
 * By separating it from the buffered write path we remove all the tricky to
 * follow locking changes and looping.
 *
 * If there are cached pages or we're extending the file, we need IOLOCK_EXCL
 * until we're sure the bytes at the new EOF have been zeroed and/or the cached
 * pages are flushed out.
 *
 * In most cases the direct IO writes will be done holding IOLOCK_SHARED
 * allowing them to be done in parallel with reads and other direct IO writes.
 * However, if the IO is not aligned to filesystem blocks, the direct IO layer
 * needs to do sub-block zeroing and that requires serialisation against other
 * direct IOs to the same block. In this case we need to serialise the
 * submission of the unaligned IOs so that we don't get racing block zeroing in
 * the dio layer.  To avoid the problem with aio, we also need to wait for
 * outstanding IOs to complete so that unwritten extent conversion is completed
 * before we try to map the overlapping block. This is currently implemented by
 * hitting it with a big hammer (i.e. inode_dio_wait()).
 *
 * Returns with locks held indicated by @iolock and errors indicated by
 * negative return values.
 */
STATIC ssize_t
xfs_file_dio_aio_write(
	struct kiocb		*iocb,
	struct iov_iter		*from)
{
	struct file		*file = iocb->ki_filp;
	struct address_space	*mapping = file->f_mapping;
	struct inode		*inode = mapping->host;
	struct xfs_inode	*ip = XFS_I(inode);
	struct xfs_mount	*mp = ip->i_mount;
	ssize_t			ret = 0;
	int			unaligned_io = 0;
	int			iolock;
	size_t			count = iov_iter_count(from);
	struct xfs_buftarg      *target = XFS_IS_REALTIME_INODE(ip) ?
					mp->m_rtdev_targp : mp->m_ddev_targp;

	/* DIO must be aligned to device logical sector size */
	if ((iocb->ki_pos | count) & target->bt_logical_sectormask)
		return -EINVAL;

	/*
	 * Don't take the exclusive iolock here unless the I/O is unaligned to
	 * the file system block size.  We don't need to consider the EOF
	 * extension case here because xfs_file_aio_write_checks() will relock
	 * the inode as necessary for EOF zeroing cases and fill out the new
	 * inode size as appropriate.
	 */
	if ((iocb->ki_pos & mp->m_blockmask) ||
	    ((iocb->ki_pos + count) & mp->m_blockmask)) {
		unaligned_io = 1;

		/*
		 * We can't properly handle unaligned direct I/O to reflink
		 * files yet, as we can't unshare a partial block.
		 */
		if (xfs_is_reflink_inode(ip)) {
			trace_xfs_reflink_bounce_dio_write(ip, iocb->ki_pos, count);
			return -EREMCHG;
		}
		iolock = XFS_IOLOCK_EXCL;
	} else {
		iolock = XFS_IOLOCK_SHARED;
	}

	if (iocb->ki_flags & IOCB_NOWAIT) {
		if (!xfs_ilock_nowait(ip, iolock))
			return -EAGAIN;
	} else {
		xfs_ilock(ip, iolock);
	}

	ret = xfs_file_aio_write_checks(iocb, from, &iolock);
	if (ret)
		goto out;
	count = iov_iter_count(from);

	/*
	 * If we are doing unaligned IO, wait for all other IO to drain,
	 * otherwise demote the lock if we had to take the exclusive lock
	 * for other reasons in xfs_file_aio_write_checks.
	 */
	if (unaligned_io) {
		/* If we are going to wait for other DIO to finish, bail */
		if (iocb->ki_flags & IOCB_NOWAIT) {
			if (atomic_read(&inode->i_dio_count))
				return -EAGAIN;
		} else {
			inode_dio_wait(inode);
		}
	} else if (iolock == XFS_IOLOCK_EXCL) {
		xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
		iolock = XFS_IOLOCK_SHARED;
	}

	trace_xfs_file_direct_write(ip, count, iocb->ki_pos);
	ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, xfs_dio_write_end_io);
out:
	xfs_iunlock(ip, iolock);

	/*
	 * No fallback to buffered IO on errors for XFS, direct IO will either
	 * complete fully or fail.
	 */
	ASSERT(ret < 0 || ret == count);
	return ret;
}
Beispiel #20
0
/*
 * Get a layout for the pNFS client.
 */
int
xfs_fs_map_blocks(
	struct inode		*inode,
	loff_t			offset,
	u64			length,
	struct iomap		*iomap,
	bool			write,
	u32			*device_generation)
{
	struct xfs_inode	*ip = XFS_I(inode);
	struct xfs_mount	*mp = ip->i_mount;
	struct xfs_bmbt_irec	imap;
	xfs_fileoff_t		offset_fsb, end_fsb;
	loff_t			limit;
	int			bmapi_flags = XFS_BMAPI_ENTIRE;
	int			nimaps = 1;
	uint			lock_flags;
	int			error = 0;

	if (XFS_FORCED_SHUTDOWN(mp))
		return -EIO;

	/*
	 * We can't export inodes residing on the realtime device.  The realtime
	 * device doesn't have a UUID to identify it, so the client has no way
	 * to find it.
	 */
	if (XFS_IS_REALTIME_INODE(ip))
		return -ENXIO;

	/*
	 * The pNFS block layout spec actually supports reflink like
	 * functionality, but the Linux pNFS server doesn't implement it yet.
	 */
	if (xfs_is_reflink_inode(ip))
		return -ENXIO;

	/*
	 * Lock out any other I/O before we flush and invalidate the pagecache,
	 * and then hand out a layout to the remote system.  This is very
	 * similar to direct I/O, except that the synchronization is much more
	 * complicated.  See the comment near xfs_break_layouts for a detailed
	 * explanation.
	 */
	xfs_ilock(ip, XFS_IOLOCK_EXCL);

	error = -EINVAL;
	limit = mp->m_super->s_maxbytes;
	if (!write)
		limit = max(limit, round_up(i_size_read(inode),
				     inode->i_sb->s_blocksize));
	if (offset > limit)
		goto out_unlock;
	if (offset > limit - length)
		length = limit - offset;

	error = filemap_write_and_wait(inode->i_mapping);
	if (error)
		goto out_unlock;
	error = invalidate_inode_pages2(inode->i_mapping);
	if (WARN_ON_ONCE(error))
		return error;

	end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + length);
	offset_fsb = XFS_B_TO_FSBT(mp, offset);

	lock_flags = xfs_ilock_data_map_shared(ip);
	error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
				&imap, &nimaps, bmapi_flags);
	xfs_iunlock(ip, lock_flags);

	if (error)
		goto out_unlock;

	if (write) {
		enum xfs_prealloc_flags	flags = 0;

		ASSERT(imap.br_startblock != DELAYSTARTBLOCK);

		if (!nimaps || imap.br_startblock == HOLESTARTBLOCK) {
			/*
			 * xfs_iomap_write_direct() expects to take ownership of
			 * the shared ilock.
			 */
			xfs_ilock(ip, XFS_ILOCK_SHARED);
			error = xfs_iomap_write_direct(ip, offset, length,
						       &imap, nimaps);
			if (error)
				goto out_unlock;

			/*
			 * Ensure the next transaction is committed
			 * synchronously so that the blocks allocated and
			 * handed out to the client are guaranteed to be
			 * present even after a server crash.
			 */
			flags |= XFS_PREALLOC_SET | XFS_PREALLOC_SYNC;
		}

		error = xfs_update_prealloc_flags(ip, flags);
		if (error)
			goto out_unlock;
	}
	xfs_iunlock(ip, XFS_IOLOCK_EXCL);

	xfs_bmbt_to_iomap(ip, iomap, &imap);
	*device_generation = mp->m_generation;
	return error;
out_unlock:
	xfs_iunlock(ip, XFS_IOLOCK_EXCL);
	return error;
}