STATIC void
xfs_fstrm_free_func(
	unsigned long	ino,
	void		*data)
{
	fstrm_item_t	*item  = (fstrm_item_t *)data;
	xfs_inode_t	*ip = item->ip;

	ASSERT(ip->i_ino == ino);

	xfs_iflags_clear(ip, XFS_IFILESTREAM);

	
	xfs_filestream_put_ag(ip->i_mount, item->ag);

	TRACE_FREE(ip->i_mount, ip, item->pip, item->ag,
		xfs_filestream_peek_ag(ip->i_mount, item->ag));

	IRELE(ip);

	if (item->pip)
		IRELE(item->pip);

	
	kmem_zone_free(item_zone, item);
}
Ejemplo n.º 2
0
STATIC int
xfs_file_fsync(
    struct file		*file,
    loff_t			start,
    loff_t			end,
    int			datasync)
{
    struct inode		*inode = file->f_mapping->host;
    struct xfs_inode	*ip = XFS_I(inode);
    struct xfs_mount	*mp = ip->i_mount;
    int			error = 0;
    int			log_flushed = 0;
    xfs_lsn_t		lsn = 0;

    trace_xfs_file_fsync(ip);

    error = filemap_write_and_wait_range(inode->i_mapping, start, end);
    if (error)
        return error;

    if (XFS_FORCED_SHUTDOWN(mp))
        return -XFS_ERROR(EIO);

    xfs_iflags_clear(ip, XFS_ITRUNCATED);

    if (mp->m_flags & XFS_MOUNT_BARRIER) {
        /*
         * If we have an RT and/or log subvolume we need to make sure
         * to flush the write cache the device used for file data
         * first.  This is to ensure newly written file data make
         * it to disk before logging the new inode size in case of
         * an extending write.
         */
        if (XFS_IS_REALTIME_INODE(ip))
            xfs_blkdev_issue_flush(mp->m_rtdev_targp);
        else if (mp->m_logdev_targp != mp->m_ddev_targp)
            xfs_blkdev_issue_flush(mp->m_ddev_targp);
    }

    xfs_ilock(ip, XFS_ILOCK_SHARED);
    if (xfs_ipincount(ip)) {
        if (!datasync ||
                (ip->i_itemp->ili_fields & ~XFS_ILOG_TIMESTAMP))
            lsn = ip->i_itemp->ili_last_lsn;
    }
    xfs_iunlock(ip, XFS_ILOCK_SHARED);

    if (lsn)
        error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed);

    if ((mp->m_flags & XFS_MOUNT_BARRIER) &&
            mp->m_logdev_targp == mp->m_ddev_targp &&
            !XFS_IS_REALTIME_INODE(ip) &&
            !log_flushed)
        xfs_blkdev_issue_flush(mp->m_ddev_targp);

    return -error;
}
Ejemplo n.º 3
0
/*
 * We ignore the datasync flag here because a datasync is effectively
 * identical to an fsync. That is, datasync implies that we need to write
 * only the metadata needed to be able to access the data that is written
 * if we crash after the call completes. Hence if we are writing beyond
 * EOF we have to log the inode size change as well, which makes it a
 * full fsync. If we don't write beyond EOF, the inode core will be
 * clean in memory and so we don't need to log the inode, just like
 * fsync.
 */
STATIC int
xfs_file_fsync(
	struct file	*filp,
	struct dentry	*dentry,
	int		datasync)
{
	xfs_iflags_clear(XFS_I(dentry->d_inode), XFS_ITRUNCATED);
	return -xfs_fsync(XFS_I(dentry->d_inode));
}
Ejemplo n.º 4
0
STATIC int
xfs_file_fsync(
	struct file	*filp,
	struct dentry	*dentry,
	int		datasync)
{
	int		flags = FSYNC_WAIT;

	if (datasync)
		flags |= FSYNC_DATA;
	xfs_iflags_clear(XFS_I(dentry->d_inode), XFS_ITRUNCATED);
	return -xfs_fsync(XFS_I(dentry->d_inode), flags,
			(xfs_off_t)0, (xfs_off_t)-1);
}
Ejemplo n.º 5
0
/*
 * Revalidate the Linux inode from the XFS inode.
 * Note: i_size _not_ updated; we must hold the inode
 * semaphore when doing that - callers responsibility.
 */
int
vn_revalidate(
    bhv_vnode_t        *vp)
{
    struct inode        *inode = vn_to_inode(vp);
    struct xfs_inode    *ip = XFS_I(inode);
    struct xfs_mount    *mp = ip->i_mount;
    unsigned long        xflags;

    xfs_itrace_entry(ip);

    if (XFS_FORCED_SHUTDOWN(mp))
        return -EIO;

    xfs_ilock(ip, XFS_ILOCK_SHARED);
    inode->i_mode        = ip->i_d.di_mode;
    inode->i_uid        = ip->i_d.di_uid;
    inode->i_gid        = ip->i_d.di_gid;
    inode->i_mtime.tv_sec = ip->i_d.di_mtime.t_sec;
    inode->i_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec;
    inode->i_ctime.tv_sec = ip->i_d.di_ctime.t_sec;
    inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec;

    xflags = xfs_ip2xflags(ip);
    if (xflags & XFS_XFLAG_IMMUTABLE)
        inode->i_flags |= S_IMMUTABLE;
    else
        inode->i_flags &= ~S_IMMUTABLE;
    if (xflags & XFS_XFLAG_APPEND)
        inode->i_flags |= S_APPEND;
    else
        inode->i_flags &= ~S_APPEND;
    if (xflags & XFS_XFLAG_SYNC)
        inode->i_flags |= S_SYNC;
    else
        inode->i_flags &= ~S_SYNC;
    if (xflags & XFS_XFLAG_NOATIME)
        inode->i_flags |= S_NOATIME;
    else
        inode->i_flags &= ~S_NOATIME;
    xfs_iunlock(ip, XFS_ILOCK_SHARED);

    xfs_iflags_clear(ip, XFS_IMODIFIED);
    return 0;
}
Ejemplo n.º 6
0
int
xfs_flushinval_pages(
	xfs_inode_t	*ip,
	xfs_off_t	first,
	xfs_off_t	last,
	int		fiopt)
{
	struct address_space *mapping = ip->i_vnode->i_mapping;
	int		ret = 0;

	if (mapping->nrpages) {
		xfs_iflags_clear(ip, XFS_ITRUNCATED);
		ret = filemap_write_and_wait(mapping);
		if (!ret)
			truncate_inode_pages(mapping, first);
	}
	return ret;
}
int
xfs_flushinval_pages(
	xfs_inode_t	*ip,
	xfs_off_t	first,
	xfs_off_t	last,
	int		fiopt)
{
	struct address_space *mapping = VFS_I(ip)->i_mapping;
	int		ret = 0;

	trace_xfs_pagecache_inval(ip, first, last);

	xfs_iflags_clear(ip, XFS_ITRUNCATED);
	ret = filemap_write_and_wait_range(mapping, first,
				last == -1 ? LLONG_MAX : last);
	if (!ret)
		truncate_inode_pages_range(mapping, first, last);
	return -ret;
}
Ejemplo n.º 8
0
/* xfs_fstrm_free_func(): callback for freeing cached stream items. */
STATIC void
xfs_fstrm_free_func(
	unsigned long	ino,
	void		*data)
{
	fstrm_item_t	*item  = (fstrm_item_t *)data;
	xfs_inode_t	*ip = item->ip;
	int ref;

	ASSERT(ip->i_ino == ino);

	xfs_iflags_clear(ip, XFS_IFILESTREAM);

	/* Drop the reference taken on the AG when the item was added. */
	ref = xfs_filestream_put_ag(ip->i_mount, item->ag);

	ASSERT(ref >= 0);
	TRACE_FREE(ip->i_mount, ip, item->pip, item->ag,
		xfs_filestream_peek_ag(ip->i_mount, item->ag));

	/*
	 * _xfs_filestream_update_ag() always takes a reference on the inode
	 * itself, whether it's a file or a directory.  Release it here.
	 * This can result in the inode being freed and so we must
	 * not hold any inode locks when freeing filesstreams objects
	 * otherwise we can deadlock here.
	 */
	IRELE(ip);

	/*
	 * In the case of a regular file, _xfs_filestream_update_ag() also
	 * takes a ref on the parent inode to keep it in-core.  Release that
	 * too.
	 */
	if (item->pip)
		IRELE(item->pip);

	/* Finally, free the memory allocated for the item. */
	kmem_zone_free(item_zone, item);
}
int
xfs_flush_pages(
	xfs_inode_t	*ip,
	xfs_off_t	first,
	xfs_off_t	last,
	uint64_t	flags,
	int		fiopt)
{
	struct address_space *mapping = VFS_I(ip)->i_mapping;
	int		ret = 0;
	int		ret2;

	xfs_iflags_clear(ip, XFS_ITRUNCATED);
	ret = -filemap_fdatawrite_range(mapping, first,
				last == -1 ? LLONG_MAX : last);
	if (flags & XBF_ASYNC)
		return ret;
	ret2 = xfs_wait_on_pages(ip, first, last);
	if (!ret)
		ret = ret2;
	return ret;
}
Ejemplo n.º 10
0
int
xfs_flush_pages(
	xfs_inode_t	*ip,
	xfs_off_t	first,
	xfs_off_t	last,
	uint64_t	flags,
	int		fiopt)
{
	struct address_space *mapping = VFS_I(ip)->i_mapping;
	int		ret = 0;
	int		ret2;

	if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
		xfs_iflags_clear(ip, XFS_ITRUNCATED);
		ret = -filemap_fdatawrite(mapping);
	}
	if (flags & XBF_ASYNC)
		return ret;
	ret2 = xfs_wait_on_pages(ip, first, last);
	if (!ret)
		ret = ret2;
	return ret;
}
Ejemplo n.º 11
0
/*
 * Initialize the Linux inode, set up the operation vectors and
 * unlock the inode.
 *
 * When reading existing inodes from disk this is called directly
 * from xfs_iget, when creating a new inode it is called from
 * xfs_ialloc after setting up the inode.
 *
 * We are always called with an uninitialised linux inode here.
 * We need to initialise the necessary fields and take a reference
 * on it.
 */
void
xfs_setup_inode(
	struct xfs_inode	*ip)
{
	struct inode		*inode = &ip->i_vnode;

	inode->i_ino = ip->i_ino;
	inode->i_state = I_NEW;

	inode_sb_list_add(inode);
	insert_inode_hash(inode);

	inode->i_mode	= ip->i_d.di_mode;
	inode->i_nlink	= ip->i_d.di_nlink;
	inode->i_uid	= ip->i_d.di_uid;
	inode->i_gid	= ip->i_d.di_gid;

	switch (inode->i_mode & S_IFMT) {
	case S_IFBLK:
	case S_IFCHR:
		inode->i_rdev =
			MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff,
			      sysv_minor(ip->i_df.if_u2.if_rdev));
		break;
	default:
		inode->i_rdev = 0;
		break;
	}

	inode->i_generation = ip->i_d.di_gen;
	i_size_write(inode, ip->i_d.di_size);
	inode->i_atime.tv_sec	= ip->i_d.di_atime.t_sec;
	inode->i_atime.tv_nsec	= ip->i_d.di_atime.t_nsec;
	inode->i_mtime.tv_sec	= ip->i_d.di_mtime.t_sec;
	inode->i_mtime.tv_nsec	= ip->i_d.di_mtime.t_nsec;
	inode->i_ctime.tv_sec	= ip->i_d.di_ctime.t_sec;
	inode->i_ctime.tv_nsec	= ip->i_d.di_ctime.t_nsec;
	xfs_diflags_to_iflags(inode, ip);

	switch (inode->i_mode & S_IFMT) {
	case S_IFREG:
		inode->i_op = &xfs_inode_operations;
		inode->i_fop = &xfs_file_operations;
		inode->i_mapping->a_ops = &xfs_address_space_operations;
		break;
	case S_IFDIR:
		if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb))
			inode->i_op = &xfs_dir_ci_inode_operations;
		else
			inode->i_op = &xfs_dir_inode_operations;
		inode->i_fop = &xfs_dir_file_operations;
		break;
	case S_IFLNK:
		inode->i_op = &xfs_symlink_inode_operations;
		if (!(ip->i_df.if_flags & XFS_IFINLINE))
			inode->i_mapping->a_ops = &xfs_address_space_operations;
		break;
	default:
		inode->i_op = &xfs_inode_operations;
		init_special_inode(inode, inode->i_mode, inode->i_rdev);
		break;
	}

	xfs_iflags_clear(ip, XFS_INEW);
	barrier();

	unlock_new_inode(inode);
}
Ejemplo n.º 12
0
/*
 * Initialize the Linux inode, set up the operation vectors and
 * unlock the inode.
 *
 * When reading existing inodes from disk this is called directly
 * from xfs_iget, when creating a new inode it is called from
 * xfs_ialloc after setting up the inode.
 *
 * We are always called with an uninitialised linux inode here.
 * We need to initialise the necessary fields and take a reference
 * on it.
 */
void
xfs_setup_inode(
	struct xfs_inode	*ip)
{
	struct inode		*inode = &ip->i_vnode;
	gfp_t			gfp_mask;

	inode->i_ino = ip->i_ino;
	inode->i_state = I_NEW;

	inode_sb_list_add(inode);
	/* make the inode look hashed for the writeback code */
	hlist_add_fake(&inode->i_hash);

	inode->i_mode	= ip->i_d.di_mode;
	set_nlink(inode, ip->i_d.di_nlink);
	inode->i_uid    = xfs_uid_to_kuid(ip->i_d.di_uid);
	inode->i_gid    = xfs_gid_to_kgid(ip->i_d.di_gid);

	switch (inode->i_mode & S_IFMT) {
	case S_IFBLK:
	case S_IFCHR:
		inode->i_rdev =
			MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff,
			      sysv_minor(ip->i_df.if_u2.if_rdev));
		break;
	default:
		inode->i_rdev = 0;
		break;
	}

	inode->i_generation = ip->i_d.di_gen;
	i_size_write(inode, ip->i_d.di_size);
	inode->i_atime.tv_sec	= ip->i_d.di_atime.t_sec;
	inode->i_atime.tv_nsec	= ip->i_d.di_atime.t_nsec;
	inode->i_mtime.tv_sec	= ip->i_d.di_mtime.t_sec;
	inode->i_mtime.tv_nsec	= ip->i_d.di_mtime.t_nsec;
	inode->i_ctime.tv_sec	= ip->i_d.di_ctime.t_sec;
	inode->i_ctime.tv_nsec	= ip->i_d.di_ctime.t_nsec;
	xfs_diflags_to_iflags(inode, ip);

	ip->d_ops = ip->i_mount->m_nondir_inode_ops;
	lockdep_set_class(&ip->i_lock.mr_lock, &xfs_nondir_ilock_class);
	switch (inode->i_mode & S_IFMT) {
	case S_IFREG:
		inode->i_op = &xfs_inode_operations;
		inode->i_fop = &xfs_file_operations;
		inode->i_mapping->a_ops = &xfs_address_space_operations;
		break;
	case S_IFDIR:
		lockdep_set_class(&ip->i_lock.mr_lock, &xfs_dir_ilock_class);
		if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb))
			inode->i_op = &xfs_dir_ci_inode_operations;
		else
			inode->i_op = &xfs_dir_inode_operations;
		inode->i_fop = &xfs_dir_file_operations;
		ip->d_ops = ip->i_mount->m_dir_inode_ops;
		break;
	case S_IFLNK:
		inode->i_op = &xfs_symlink_inode_operations;
		if (!(ip->i_df.if_flags & XFS_IFINLINE))
			inode->i_mapping->a_ops = &xfs_address_space_operations;
		break;
	default:
		inode->i_op = &xfs_inode_operations;
		init_special_inode(inode, inode->i_mode, inode->i_rdev);
		break;
	}

	/*
	 * Ensure all page cache allocations are done from GFP_NOFS context to
	 * prevent direct reclaim recursion back into the filesystem and blowing
	 * stacks or deadlocking.
	 */
	gfp_mask = mapping_gfp_mask(inode->i_mapping);
	mapping_set_gfp_mask(inode->i_mapping, (gfp_mask & ~(__GFP_FS)));

	/*
	 * If there is no attribute fork no ACL can exist on this inode,
	 * and it can't have any file capabilities attached to it either.
	 */
	if (!XFS_IFORK_Q(ip)) {
		inode_has_no_xattr(inode);
		cache_no_acl(inode);
	}

	xfs_iflags_clear(ip, XFS_INEW);
	barrier();

	unlock_new_inode(inode);
}
Ejemplo n.º 13
0
STATIC int
xfs_file_fsync(
	struct file		*file,
	loff_t			start,
	loff_t			end,
	int			datasync)
{
	struct inode		*inode = file->f_mapping->host;
	struct xfs_inode	*ip = XFS_I(inode);
	struct xfs_mount	*mp = ip->i_mount;
	int			error = 0;
	int			log_flushed = 0;
	xfs_lsn_t		lsn = 0;

	trace_xfs_file_fsync(ip);

	error = filemap_write_and_wait_range(inode->i_mapping, start, end);
	if (error)
		return error;

	if (XFS_FORCED_SHUTDOWN(mp))
		return -EIO;

	xfs_iflags_clear(ip, XFS_ITRUNCATED);

	if (mp->m_flags & XFS_MOUNT_BARRIER) {
		/*
		 * If we have an RT and/or log subvolume we need to make sure
		 * to flush the write cache the device used for file data
		 * first.  This is to ensure newly written file data make
		 * it to disk before logging the new inode size in case of
		 * an extending write.
		 */
		if (XFS_IS_REALTIME_INODE(ip))
			xfs_blkdev_issue_flush(mp->m_rtdev_targp);
		else if (mp->m_logdev_targp != mp->m_ddev_targp)
			xfs_blkdev_issue_flush(mp->m_ddev_targp);
	}

	/*
	 * All metadata updates are logged, which means that we just have
	 * to flush the log up to the latest LSN that touched the inode.
	 */
	xfs_ilock(ip, XFS_ILOCK_SHARED);
	if (xfs_ipincount(ip)) {
		if (!datasync ||
		    (ip->i_itemp->ili_fields & ~XFS_ILOG_TIMESTAMP))
			lsn = ip->i_itemp->ili_last_lsn;
	}
	xfs_iunlock(ip, XFS_ILOCK_SHARED);

	if (lsn)
		error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed);

	/*
	 * If we only have a single device, and the log force about was
	 * a no-op we might have to flush the data device cache here.
	 * This can only happen for fdatasync/O_DSYNC if we were overwriting
	 * an already allocated file and thus do not have any metadata to
	 * commit.
	 */
	if ((mp->m_flags & XFS_MOUNT_BARRIER) &&
	    mp->m_logdev_targp == mp->m_ddev_targp &&
	    !XFS_IS_REALTIME_INODE(ip) &&
	    !log_flushed)
		xfs_blkdev_issue_flush(mp->m_ddev_targp);

	return error;
}
Ejemplo n.º 14
0
STATIC int
xfs_file_fsync(
	struct file		*file,
	struct dentry		*dentry,
	int			datasync)
{
	struct xfs_inode	*ip = XFS_I(dentry->d_inode);
	struct xfs_trans	*tp;
	int			error = 0;
	int			log_flushed = 0;

	xfs_itrace_entry(ip);

	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
		return -XFS_ERROR(EIO);

	xfs_iflags_clear(ip, XFS_ITRUNCATED);

	/*
	 * We always need to make sure that the required inode state is safe on
	 * disk.  The inode might be clean but we still might need to force the
	 * log because of committed transactions that haven't hit the disk yet.
	 * Likewise, there could be unflushed non-transactional changes to the
	 * inode core that have to go to disk and this requires us to issue
	 * a synchronous transaction to capture these changes correctly.
	 *
	 * This code relies on the assumption that if the i_update_core field
	 * of the inode is clear and the inode is unpinned then it is clean
	 * and no action is required.
	 */
	xfs_ilock(ip, XFS_ILOCK_SHARED);

	/*
	 * First check if the VFS inode is marked dirty.  All the dirtying
	 * of non-transactional updates no goes through mark_inode_dirty*,
	 * which allows us to distinguish beteeen pure timestamp updates
	 * and i_size updates which need to be caught for fdatasync.
	 * After that also theck for the dirty state in the XFS inode, which
	 * might gets cleared when the inode gets written out via the AIL
	 * or xfs_iflush_cluster.
	 */
	if (((dentry->d_inode->i_state & I_DIRTY_DATASYNC) ||
	    ((dentry->d_inode->i_state & I_DIRTY_SYNC) && !datasync)) &&
	    ip->i_update_core) {
		/*
		 * Kick off a transaction to log the inode core to get the
		 * updates.  The sync transaction will also force the log.
		 */
		xfs_iunlock(ip, XFS_ILOCK_SHARED);
		tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS);
		error = xfs_trans_reserve(tp, 0,
				XFS_FSYNC_TS_LOG_RES(ip->i_mount), 0, 0, 0);
		if (error) {
			xfs_trans_cancel(tp, 0);
			return -error;
		}
		xfs_ilock(ip, XFS_ILOCK_EXCL);

		/*
		 * Note - it's possible that we might have pushed ourselves out
		 * of the way during trans_reserve which would flush the inode.
		 * But there's no guarantee that the inode buffer has actually
		 * gone out yet (it's delwri).	Plus the buffer could be pinned
		 * anyway if it's part of an inode in another recent
		 * transaction.	 So we play it safe and fire off the
		 * transaction anyway.
		 */
		xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
		xfs_trans_ihold(tp, ip);
		xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
		xfs_trans_set_sync(tp);
		error = _xfs_trans_commit(tp, 0, &log_flushed);

		xfs_iunlock(ip, XFS_ILOCK_EXCL);
	} else {
		/*
		 * Timestamps/size haven't changed since last inode flush or
		 * inode transaction commit.  That means either nothing got
		 * written or a transaction committed which caught the updates.
		 * If the latter happened and the transaction hasn't hit the
		 * disk yet, the inode will be still be pinned.  If it is,
		 * force the log.
		 */
		if (xfs_ipincount(ip)) {
			error = _xfs_log_force_lsn(ip->i_mount,
					ip->i_itemp->ili_last_lsn,
					XFS_LOG_SYNC, &log_flushed);
		}
		xfs_iunlock(ip, XFS_ILOCK_SHARED);
	}

	if (ip->i_mount->m_flags & XFS_MOUNT_BARRIER) {
		/*
		 * If the log write didn't issue an ordered tag we need
		 * to flush the disk cache for the data device now.
		 */
		if (!log_flushed)
			xfs_blkdev_issue_flush(ip->i_mount->m_ddev_targp);

		/*
		 * If this inode is on the RT dev we need to flush that
		 * cache as well.
		 */
		if (XFS_IS_REALTIME_INODE(ip))
			xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp);
	}

	return -error;
}
Ejemplo n.º 15
0
/*
 * Inodes in different states need to be treated differently, and the return
 * value of xfs_iflush is not sufficient to get this right. The following table
 * lists the inode states and the reclaim actions necessary for non-blocking
 * reclaim:
 *
 *
 *	inode state	     iflush ret		required action
 *      ---------------      ----------         ---------------
 *	bad			-		reclaim
 *	shutdown		EIO		unpin and reclaim
 *	clean, unpinned		0		reclaim
 *	stale, unpinned		0		reclaim
 *	clean, pinned(*)	0		requeue
 *	stale, pinned		EAGAIN		requeue
 *	dirty, delwri ok	0		requeue
 *	dirty, delwri blocked	EAGAIN		requeue
 *	dirty, sync flush	0		reclaim
 *
 * (*) dgc: I don't think the clean, pinned state is possible but it gets
 * handled anyway given the order of checks implemented.
 *
 * As can be seen from the table, the return value of xfs_iflush() is not
 * sufficient to correctly decide the reclaim action here. The checks in
 * xfs_iflush() might look like duplicates, but they are not.
 *
 * Also, because we get the flush lock first, we know that any inode that has
 * been flushed delwri has had the flush completed by the time we check that
 * the inode is clean. The clean inode check needs to be done before flushing
 * the inode delwri otherwise we would loop forever requeuing clean inodes as
 * we cannot tell apart a successful delwri flush and a clean inode from the
 * return value of xfs_iflush().
 *
 * Note that because the inode is flushed delayed write by background
 * writeback, the flush lock may already be held here and waiting on it can
 * result in very long latencies. Hence for sync reclaims, where we wait on the
 * flush lock, the caller should push out delayed write inodes first before
 * trying to reclaim them to minimise the amount of time spent waiting. For
 * background relaim, we just requeue the inode for the next pass.
 *
 * Hence the order of actions after gaining the locks should be:
 *	bad		=> reclaim
 *	shutdown	=> unpin and reclaim
 *	pinned, delwri	=> requeue
 *	pinned, sync	=> unpin
 *	stale		=> reclaim
 *	clean		=> reclaim
 *	dirty, delwri	=> flush and requeue
 *	dirty, sync	=> flush, wait and reclaim
 */
STATIC int
xfs_reclaim_inode(
	struct xfs_inode	*ip,
	struct xfs_perag	*pag,
	int			sync_mode)
{
	int	error;

restart:
	error = 0;
	xfs_ilock(ip, XFS_ILOCK_EXCL);
	if (!xfs_iflock_nowait(ip)) {
		if (!(sync_mode & SYNC_WAIT))
			goto out;

		/*
		 * If we only have a single dirty inode in a cluster there is
		 * a fair chance that the AIL push may have pushed it into
		 * the buffer, but xfsbufd won't touch it until 30 seconds
		 * from now, and thus we will lock up here.
		 *
		 * Promote the inode buffer to the front of the delwri list
		 * and wake up xfsbufd now.
		 */
		xfs_promote_inode(ip);
		xfs_iflock(ip);
	}

	if (is_bad_inode(VFS_I(ip)))
		goto reclaim;
	if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
		xfs_iunpin_wait(ip);
		goto reclaim;
	}
	if (xfs_ipincount(ip)) {
		if (!(sync_mode & SYNC_WAIT)) {
			xfs_ifunlock(ip);
			goto out;
		}
		xfs_iunpin_wait(ip);
	}
	if (xfs_iflags_test(ip, XFS_ISTALE))
		goto reclaim;
	if (xfs_inode_clean(ip))
		goto reclaim;

	/*
	 * Now we have an inode that needs flushing.
	 *
	 * We do a nonblocking flush here even if we are doing a SYNC_WAIT
	 * reclaim as we can deadlock with inode cluster removal.
	 * xfs_ifree_cluster() can lock the inode buffer before it locks the
	 * ip->i_lock, and we are doing the exact opposite here. As a result,
	 * doing a blocking xfs_itobp() to get the cluster buffer will result
	 * in an ABBA deadlock with xfs_ifree_cluster().
	 *
	 * As xfs_ifree_cluser() must gather all inodes that are active in the
	 * cache to mark them stale, if we hit this case we don't actually want
	 * to do IO here - we want the inode marked stale so we can simply
	 * reclaim it. Hence if we get an EAGAIN error on a SYNC_WAIT flush,
	 * just unlock the inode, back off and try again. Hopefully the next
	 * pass through will see the stale flag set on the inode.
	 */
	error = xfs_iflush(ip, SYNC_TRYLOCK | sync_mode);
	if (sync_mode & SYNC_WAIT) {
		if (error == EAGAIN) {
			xfs_iunlock(ip, XFS_ILOCK_EXCL);
			/* backoff longer than in xfs_ifree_cluster */
			delay(2);
			goto restart;
		}
		xfs_iflock(ip);
		goto reclaim;
	}

	/*
	 * When we have to flush an inode but don't have SYNC_WAIT set, we
	 * flush the inode out using a delwri buffer and wait for the next
	 * call into reclaim to find it in a clean state instead of waiting for
	 * it now. We also don't return errors here - if the error is transient
	 * then the next reclaim pass will flush the inode, and if the error
	 * is permanent then the next sync reclaim will reclaim the inode and
	 * pass on the error.
	 */
	if (error && error != EAGAIN && !XFS_FORCED_SHUTDOWN(ip->i_mount)) {
		xfs_warn(ip->i_mount,
			"inode 0x%llx background reclaim flush failed with %d",
			(long long)ip->i_ino, error);
	}
out:
	xfs_iflags_clear(ip, XFS_IRECLAIM);
	xfs_iunlock(ip, XFS_ILOCK_EXCL);
	/*
	 * We could return EAGAIN here to make reclaim rescan the inode tree in
	 * a short while. However, this just burns CPU time scanning the tree
	 * waiting for IO to complete and xfssyncd never goes back to the idle
	 * state. Instead, return 0 to let the next scheduled background reclaim
	 * attempt to reclaim the inode again.
	 */
	return 0;

reclaim:
	xfs_ifunlock(ip);
	xfs_iunlock(ip, XFS_ILOCK_EXCL);

	XFS_STATS_INC(xs_ig_reclaims);
	/*
	 * Remove the inode from the per-AG radix tree.
	 *
	 * Because radix_tree_delete won't complain even if the item was never
	 * added to the tree assert that it's been there before to catch
	 * problems with the inode life time early on.
	 */
	spin_lock(&pag->pag_ici_lock);
	if (!radix_tree_delete(&pag->pag_ici_root,
				XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino)))
		ASSERT(0);
	__xfs_inode_clear_reclaim(pag, ip);
	spin_unlock(&pag->pag_ici_lock);

	/*
	 * Here we do an (almost) spurious inode lock in order to coordinate
	 * with inode cache radix tree lookups.  This is because the lookup
	 * can reference the inodes in the cache without taking references.
	 *
	 * We make that OK here by ensuring that we wait until the inode is
	 * unlocked after the lookup before we go ahead and free it.  We get
	 * both the ilock and the iolock because the code may need to drop the
	 * ilock one but will still hold the iolock.
	 */
	xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
	xfs_qm_dqdetach(ip);
	xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);

	xfs_inode_free(ip);
	return error;

}
Ejemplo n.º 16
0
/*
 * Inodes in different states need to be treated differently. The following
 * table lists the inode states and the reclaim actions necessary:
 *
 *	inode state	     iflush ret		required action
 *      ---------------      ----------         ---------------
 *	bad			-		reclaim
 *	shutdown		EIO		unpin and reclaim
 *	clean, unpinned		0		reclaim
 *	stale, unpinned		0		reclaim
 *	clean, pinned(*)	0		requeue
 *	stale, pinned		EAGAIN		requeue
 *	dirty, async		-		requeue
 *	dirty, sync		0		reclaim
 *
 * (*) dgc: I don't think the clean, pinned state is possible but it gets
 * handled anyway given the order of checks implemented.
 *
 * Also, because we get the flush lock first, we know that any inode that has
 * been flushed delwri has had the flush completed by the time we check that
 * the inode is clean.
 *
 * Note that because the inode is flushed delayed write by AIL pushing, the
 * flush lock may already be held here and waiting on it can result in very
 * long latencies.  Hence for sync reclaims, where we wait on the flush lock,
 * the caller should push the AIL first before trying to reclaim inodes to
 * minimise the amount of time spent waiting.  For background relaim, we only
 * bother to reclaim clean inodes anyway.
 *
 * Hence the order of actions after gaining the locks should be:
 *	bad		=> reclaim
 *	shutdown	=> unpin and reclaim
 *	pinned, async	=> requeue
 *	pinned, sync	=> unpin
 *	stale		=> reclaim
 *	clean		=> reclaim
 *	dirty, async	=> requeue
 *	dirty, sync	=> flush, wait and reclaim
 */
STATIC int
xfs_reclaim_inode(
	struct xfs_inode	*ip,
	struct xfs_perag	*pag,
	int			sync_mode)
{
	struct xfs_buf		*bp = NULL;
	xfs_ino_t		ino = ip->i_ino; /* for radix_tree_delete */
	int			error;

restart:
	error = 0;
	xfs_ilock(ip, XFS_ILOCK_EXCL);
	if (!xfs_iflock_nowait(ip)) {
		if (!(sync_mode & SYNC_WAIT))
			goto out;
		xfs_iflock(ip);
	}

	if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
		xfs_iunpin_wait(ip);
		/* xfs_iflush_abort() drops the flush lock */
		xfs_iflush_abort(ip, false);
		goto reclaim;
	}
	if (xfs_ipincount(ip)) {
		if (!(sync_mode & SYNC_WAIT))
			goto out_ifunlock;
		xfs_iunpin_wait(ip);
	}
	if (xfs_iflags_test(ip, XFS_ISTALE) || xfs_inode_clean(ip)) {
		xfs_ifunlock(ip);
		goto reclaim;
	}

	/*
	 * Never flush out dirty data during non-blocking reclaim, as it would
	 * just contend with AIL pushing trying to do the same job.
	 */
	if (!(sync_mode & SYNC_WAIT))
		goto out_ifunlock;

	/*
	 * Now we have an inode that needs flushing.
	 *
	 * Note that xfs_iflush will never block on the inode buffer lock, as
	 * xfs_ifree_cluster() can lock the inode buffer before it locks the
	 * ip->i_lock, and we are doing the exact opposite here.  As a result,
	 * doing a blocking xfs_imap_to_bp() to get the cluster buffer would
	 * result in an ABBA deadlock with xfs_ifree_cluster().
	 *
	 * As xfs_ifree_cluser() must gather all inodes that are active in the
	 * cache to mark them stale, if we hit this case we don't actually want
	 * to do IO here - we want the inode marked stale so we can simply
	 * reclaim it.  Hence if we get an EAGAIN error here,  just unlock the
	 * inode, back off and try again.  Hopefully the next pass through will
	 * see the stale flag set on the inode.
	 */
	error = xfs_iflush(ip, &bp);
	if (error == -EAGAIN) {
		xfs_iunlock(ip, XFS_ILOCK_EXCL);
		/* backoff longer than in xfs_ifree_cluster */
		delay(2);
		goto restart;
	}

	if (!error) {
		error = xfs_bwrite(bp);
		xfs_buf_relse(bp);
	}

reclaim:
	ASSERT(!xfs_isiflocked(ip));

	/*
	 * Because we use RCU freeing we need to ensure the inode always appears
	 * to be reclaimed with an invalid inode number when in the free state.
	 * We do this as early as possible under the ILOCK so that
	 * xfs_iflush_cluster() can be guaranteed to detect races with us here.
	 * By doing this, we guarantee that once xfs_iflush_cluster has locked
	 * XFS_ILOCK that it will see either a valid, flushable inode that will
	 * serialise correctly, or it will see a clean (and invalid) inode that
	 * it can skip.
	 */
	spin_lock(&ip->i_flags_lock);
	ip->i_flags = XFS_IRECLAIM;
	ip->i_ino = 0;
	spin_unlock(&ip->i_flags_lock);

	xfs_iunlock(ip, XFS_ILOCK_EXCL);

	XFS_STATS_INC(ip->i_mount, xs_ig_reclaims);
	/*
	 * Remove the inode from the per-AG radix tree.
	 *
	 * Because radix_tree_delete won't complain even if the item was never
	 * added to the tree assert that it's been there before to catch
	 * problems with the inode life time early on.
	 */
	spin_lock(&pag->pag_ici_lock);
	if (!radix_tree_delete(&pag->pag_ici_root,
				XFS_INO_TO_AGINO(ip->i_mount, ino)))
		ASSERT(0);
	xfs_perag_clear_reclaim_tag(pag);
	spin_unlock(&pag->pag_ici_lock);

	/*
	 * Here we do an (almost) spurious inode lock in order to coordinate
	 * with inode cache radix tree lookups.  This is because the lookup
	 * can reference the inodes in the cache without taking references.
	 *
	 * We make that OK here by ensuring that we wait until the inode is
	 * unlocked after the lookup before we go ahead and free it.
	 */
	xfs_ilock(ip, XFS_ILOCK_EXCL);
	xfs_qm_dqdetach(ip);
	xfs_iunlock(ip, XFS_ILOCK_EXCL);

	__xfs_inode_free(ip);
	return error;

out_ifunlock:
	xfs_ifunlock(ip);
out:
	xfs_iflags_clear(ip, XFS_IRECLAIM);
	xfs_iunlock(ip, XFS_ILOCK_EXCL);
	/*
	 * We could return -EAGAIN here to make reclaim rescan the inode tree in
	 * a short while. However, this just burns CPU time scanning the tree
	 * waiting for IO to complete and the reclaim work never goes back to
	 * the idle state. Instead, return 0 to let the next scheduled
	 * background reclaim attempt to reclaim the inode again.
	 */
	return 0;
}
Ejemplo n.º 17
0
/*
 * Initialize the Linux inode, set up the operation vectors and
 * unlock the inode.
 *
 * When reading existing inodes from disk this is called directly
 * from xfs_iget, when creating a new inode it is called from
 * xfs_ialloc after setting up the inode.
 *
 * We are always called with an uninitialised linux inode here.
 * We need to initialise the necessary fields and take a reference
 * on it.
 */
void
xfs_setup_inode(
	struct xfs_inode	*ip)
{
	struct inode		*inode = &ip->i_vnode;
	gfp_t			gfp_mask;

	inode->i_ino = ip->i_ino;
	inode->i_state = I_NEW|I_LOCK;
	inode_add_to_lists(ip->i_mount->m_super, inode);

	inode->i_mode	= ip->i_d.di_mode;
	inode->i_nlink	= ip->i_d.di_nlink;
	inode->i_uid	= ip->i_d.di_uid;
	inode->i_gid	= ip->i_d.di_gid;

	switch (inode->i_mode & S_IFMT) {
	case S_IFBLK:
	case S_IFCHR:
		inode->i_rdev =
			MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff,
			      sysv_minor(ip->i_df.if_u2.if_rdev));
		break;
	default:
		inode->i_rdev = 0;
		break;
	}

	inode->i_generation = ip->i_d.di_gen;
	i_size_write(inode, ip->i_d.di_size);
	inode->i_atime.tv_sec	= ip->i_d.di_atime.t_sec;
	inode->i_atime.tv_nsec	= ip->i_d.di_atime.t_nsec;
	inode->i_mtime.tv_sec	= ip->i_d.di_mtime.t_sec;
	inode->i_mtime.tv_nsec	= ip->i_d.di_mtime.t_nsec;
	inode->i_ctime.tv_sec	= ip->i_d.di_ctime.t_sec;
	inode->i_ctime.tv_nsec	= ip->i_d.di_ctime.t_nsec;
	xfs_diflags_to_iflags(inode, ip);

	switch (inode->i_mode & S_IFMT) {
	case S_IFREG:
		inode->i_op = &xfs_inode_operations;
		inode->i_fop = &xfs_file_operations;
		inode->i_mapping->a_ops = &xfs_address_space_operations;
		break;
	case S_IFDIR:
		if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb))
			inode->i_op = &xfs_dir_ci_inode_operations;
		else
			inode->i_op = &xfs_dir_inode_operations;
		inode->i_fop = &xfs_dir_file_operations;
		break;
	case S_IFLNK:
		inode->i_op = &xfs_symlink_inode_operations;
		if (!(ip->i_df.if_flags & XFS_IFINLINE))
			inode->i_mapping->a_ops = &xfs_address_space_operations;
		break;
	default:
		inode->i_op = &xfs_inode_operations;
		init_special_inode(inode, inode->i_mode, inode->i_rdev);
		break;
	}

	/*
	 * Ensure all page cache allocations are done from GFP_NOFS context to
	 * prevent direct reclaim recursion back into the filesystem and blowing
	 * stacks or deadlocking.
	 */
	gfp_mask = mapping_gfp_mask(inode->i_mapping);
	mapping_set_gfp_mask(inode->i_mapping, (gfp_mask & ~(__GFP_FS)));

	xfs_iflags_clear(ip, XFS_INEW);
	barrier();

	unlock_new_inode(inode);
}
/*
 * Initialize the Linux inode, set up the operation vectors and
 * unlock the inode.
 *
 * When reading existing inodes from disk this is called directly
 * from xfs_iget, when creating a new inode it is called from
 * xfs_ialloc after setting up the inode.
 *
 * We are always called with an uninitialised linux inode here.
 * We need to initialise the necessary fields and take a reference
 * on it.
 */
void
xfs_setup_inode(
	struct xfs_inode	*ip)
{
	struct inode		*inode = &ip->i_vnode;

	inode->i_ino = ip->i_ino;
	inode->i_state = I_NEW;

	inode_sb_list_add(inode);
	/* make the inode look hashed for the writeback code */
	hlist_add_fake(&inode->i_hash);

	inode->i_mode	= ip->i_d.di_mode;
	inode->i_nlink	= ip->i_d.di_nlink;
	inode->i_uid	= ip->i_d.di_uid;
	inode->i_gid	= ip->i_d.di_gid;

	switch (inode->i_mode & S_IFMT) {
	case S_IFBLK:
	case S_IFCHR:
		inode->i_rdev =
			MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff,
			      sysv_minor(ip->i_df.if_u2.if_rdev));
		break;
	default:
		inode->i_rdev = 0;
		break;
	}

	inode->i_generation = ip->i_d.di_gen;
	i_size_write(inode, ip->i_d.di_size);
	inode->i_atime.tv_sec	= ip->i_d.di_atime.t_sec;
	inode->i_atime.tv_nsec	= ip->i_d.di_atime.t_nsec;
	inode->i_mtime.tv_sec	= ip->i_d.di_mtime.t_sec;
	inode->i_mtime.tv_nsec	= ip->i_d.di_mtime.t_nsec;
	inode->i_ctime.tv_sec	= ip->i_d.di_ctime.t_sec;
	inode->i_ctime.tv_nsec	= ip->i_d.di_ctime.t_nsec;
	xfs_diflags_to_iflags(inode, ip);

	switch (inode->i_mode & S_IFMT) {
	case S_IFREG:
		inode->i_op = &xfs_inode_operations;
		inode->i_fop = &xfs_file_operations;
		inode->i_mapping->a_ops = &xfs_address_space_operations;
		break;
	case S_IFDIR:
		if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb))
			inode->i_op = &xfs_dir_ci_inode_operations;
		else
			inode->i_op = &xfs_dir_inode_operations;
		inode->i_fop = &xfs_dir_file_operations;
		break;
	case S_IFLNK:
		inode->i_op = &xfs_symlink_inode_operations;
		if (!(ip->i_df.if_flags & XFS_IFINLINE))
			inode->i_mapping->a_ops = &xfs_address_space_operations;
		break;
	default:
		inode->i_op = &xfs_inode_operations;
		init_special_inode(inode, inode->i_mode, inode->i_rdev);
		break;
	}

	/*
	 * If there is no attribute fork no ACL can exist on this inode,
	 * and it can't have any file capabilities attached to it either.
	 */
	if (!XFS_IFORK_Q(ip)) {
		inode_has_no_xattr(inode);
		cache_no_acl(inode);
	}

	xfs_iflags_clear(ip, XFS_INEW);
	barrier();

	unlock_new_inode(inode);
}
Ejemplo n.º 19
0
STATIC int
xfs_file_fsync(
	struct file		*file,
	loff_t			start,
	loff_t			end,
	int			datasync)
{
	struct inode		*inode = file->f_mapping->host;
	struct xfs_inode	*ip = XFS_I(inode);
	struct xfs_mount	*mp = ip->i_mount;
	int			error = 0;
	int			log_flushed = 0;
	xfs_lsn_t		lsn = 0;

	trace_xfs_file_fsync(ip);

	error = file_write_and_wait_range(file, start, end);
	if (error)
		return error;

	if (XFS_FORCED_SHUTDOWN(mp))
		return -EIO;

	xfs_iflags_clear(ip, XFS_ITRUNCATED);

	/*
	 * If we have an RT and/or log subvolume we need to make sure to flush
	 * the write cache the device used for file data first.  This is to
	 * ensure newly written file data make it to disk before logging the new
	 * inode size in case of an extending write.
	 */
	if (XFS_IS_REALTIME_INODE(ip))
		xfs_blkdev_issue_flush(mp->m_rtdev_targp);
	else if (mp->m_logdev_targp != mp->m_ddev_targp)
		xfs_blkdev_issue_flush(mp->m_ddev_targp);

	/*
	 * All metadata updates are logged, which means that we just have to
	 * flush the log up to the latest LSN that touched the inode. If we have
	 * concurrent fsync/fdatasync() calls, we need them to all block on the
	 * log force before we clear the ili_fsync_fields field. This ensures
	 * that we don't get a racing sync operation that does not wait for the
	 * metadata to hit the journal before returning. If we race with
	 * clearing the ili_fsync_fields, then all that will happen is the log
	 * force will do nothing as the lsn will already be on disk. We can't
	 * race with setting ili_fsync_fields because that is done under
	 * XFS_ILOCK_EXCL, and that can't happen because we hold the lock shared
	 * until after the ili_fsync_fields is cleared.
	 */
	xfs_ilock(ip, XFS_ILOCK_SHARED);
	if (xfs_ipincount(ip)) {
		if (!datasync ||
		    (ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP))
			lsn = ip->i_itemp->ili_last_lsn;
	}

	if (lsn) {
		error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed);
		ip->i_itemp->ili_fsync_fields = 0;
	}
	xfs_iunlock(ip, XFS_ILOCK_SHARED);

	/*
	 * If we only have a single device, and the log force about was
	 * a no-op we might have to flush the data device cache here.
	 * This can only happen for fdatasync/O_DSYNC if we were overwriting
	 * an already allocated file and thus do not have any metadata to
	 * commit.
	 */
	if (!log_flushed && !XFS_IS_REALTIME_INODE(ip) &&
	    mp->m_logdev_targp == mp->m_ddev_targp)
		xfs_blkdev_issue_flush(mp->m_ddev_targp);

	return error;
}
Ejemplo n.º 20
0
STATIC int
xfs_reclaim_inode(
	struct xfs_inode	*ip,
	struct xfs_perag	*pag,
	int			sync_mode)
{
	int	error;

restart:
	error = 0;
	xfs_ilock(ip, XFS_ILOCK_EXCL);
	if (!xfs_iflock_nowait(ip)) {
		if (!(sync_mode & SYNC_WAIT))
			goto out;

		xfs_promote_inode(ip);
		xfs_iflock(ip);
	}

	if (is_bad_inode(VFS_I(ip)))
		goto reclaim;
	if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
		xfs_iunpin_wait(ip);
		goto reclaim;
	}
	if (xfs_ipincount(ip)) {
		if (!(sync_mode & SYNC_WAIT)) {
			xfs_ifunlock(ip);
			goto out;
		}
		xfs_iunpin_wait(ip);
	}
	if (xfs_iflags_test(ip, XFS_ISTALE))
		goto reclaim;
	if (xfs_inode_clean(ip))
		goto reclaim;

	error = xfs_iflush(ip, SYNC_TRYLOCK | sync_mode);
	if (sync_mode & SYNC_WAIT) {
		if (error == EAGAIN) {
			xfs_iunlock(ip, XFS_ILOCK_EXCL);
			
			delay(2);
			goto restart;
		}
		xfs_iflock(ip);
		goto reclaim;
	}

	if (error && error != EAGAIN && !XFS_FORCED_SHUTDOWN(ip->i_mount)) {
		xfs_warn(ip->i_mount,
			"inode 0x%llx background reclaim flush failed with %d",
			(long long)ip->i_ino, error);
	}
out:
	xfs_iflags_clear(ip, XFS_IRECLAIM);
	xfs_iunlock(ip, XFS_ILOCK_EXCL);
	return 0;

reclaim:
	xfs_ifunlock(ip);
	xfs_iunlock(ip, XFS_ILOCK_EXCL);

	XFS_STATS_INC(xs_ig_reclaims);
	spin_lock(&pag->pag_ici_lock);
	if (!radix_tree_delete(&pag->pag_ici_root,
				XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino)))
		ASSERT(0);
	__xfs_inode_clear_reclaim(pag, ip);
	spin_unlock(&pag->pag_ici_lock);

	xfs_ilock(ip, XFS_ILOCK_EXCL);
	xfs_qm_dqdetach(ip);
	xfs_iunlock(ip, XFS_ILOCK_EXCL);

	xfs_inode_free(ip);

	return error;
}
Ejemplo n.º 21
0
STATIC int
xfs_file_fsync(
	struct file		*file,
	loff_t			start,
	loff_t			end,
	int			datasync)
{
	struct inode		*inode = file->f_mapping->host;
	struct xfs_inode	*ip = XFS_I(inode);
	struct xfs_mount	*mp = ip->i_mount;
	struct xfs_trans	*tp;
	int			error = 0;
	int			log_flushed = 0;

	trace_xfs_file_fsync(ip);

	error = filemap_write_and_wait_range(inode->i_mapping, start, end);
	if (error)
		return error;

	if (XFS_FORCED_SHUTDOWN(mp))
		return -XFS_ERROR(EIO);

	xfs_iflags_clear(ip, XFS_ITRUNCATED);

	xfs_ilock(ip, XFS_IOLOCK_SHARED);
	xfs_ioend_wait(ip);
	xfs_iunlock(ip, XFS_IOLOCK_SHARED);

	if (mp->m_flags & XFS_MOUNT_BARRIER) {
		/*
		 * If we have an RT and/or log subvolume we need to make sure
		 * to flush the write cache the device used for file data
		 * first.  This is to ensure newly written file data make
		 * it to disk before logging the new inode size in case of
		 * an extending write.
		 */
		if (XFS_IS_REALTIME_INODE(ip))
			xfs_blkdev_issue_flush(mp->m_rtdev_targp);
		else if (mp->m_logdev_targp != mp->m_ddev_targp)
			xfs_blkdev_issue_flush(mp->m_ddev_targp);
	}

	/*
	 * We always need to make sure that the required inode state is safe on
	 * disk.  The inode might be clean but we still might need to force the
	 * log because of committed transactions that haven't hit the disk yet.
	 * Likewise, there could be unflushed non-transactional changes to the
	 * inode core that have to go to disk and this requires us to issue
	 * a synchronous transaction to capture these changes correctly.
	 *
	 * This code relies on the assumption that if the i_update_core field
	 * of the inode is clear and the inode is unpinned then it is clean
	 * and no action is required.
	 */
	xfs_ilock(ip, XFS_ILOCK_SHARED);

	/*
	 * First check if the VFS inode is marked dirty.  All the dirtying
	 * of non-transactional updates no goes through mark_inode_dirty*,
	 * which allows us to distinguish beteeen pure timestamp updates
	 * and i_size updates which need to be caught for fdatasync.
	 * After that also theck for the dirty state in the XFS inode, which
	 * might gets cleared when the inode gets written out via the AIL
	 * or xfs_iflush_cluster.
	 */
	if (((inode->i_state & I_DIRTY_DATASYNC) ||
	    ((inode->i_state & I_DIRTY_SYNC) && !datasync)) &&
	    ip->i_update_core) {
		/*
		 * Kick off a transaction to log the inode core to get the
		 * updates.  The sync transaction will also force the log.
		 */
		xfs_iunlock(ip, XFS_ILOCK_SHARED);
		tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
		error = xfs_trans_reserve(tp, 0,
				XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
		if (error) {
			xfs_trans_cancel(tp, 0);
			return -error;
		}
		xfs_ilock(ip, XFS_ILOCK_EXCL);

		/*
		 * Note - it's possible that we might have pushed ourselves out
		 * of the way during trans_reserve which would flush the inode.
		 * But there's no guarantee that the inode buffer has actually
		 * gone out yet (it's delwri).	Plus the buffer could be pinned
		 * anyway if it's part of an inode in another recent
		 * transaction.	 So we play it safe and fire off the
		 * transaction anyway.
		 */
		xfs_trans_ijoin(tp, ip);
		xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
		xfs_trans_set_sync(tp);
		error = _xfs_trans_commit(tp, 0, &log_flushed);

		xfs_iunlock(ip, XFS_ILOCK_EXCL);
	} else {
		/*
		 * Timestamps/size haven't changed since last inode flush or
		 * inode transaction commit.  That means either nothing got
		 * written or a transaction committed which caught the updates.
		 * If the latter happened and the transaction hasn't hit the
		 * disk yet, the inode will be still be pinned.  If it is,
		 * force the log.
		 */
		if (xfs_ipincount(ip)) {
			error = _xfs_log_force_lsn(mp,
					ip->i_itemp->ili_last_lsn,
					XFS_LOG_SYNC, &log_flushed);
		}
		xfs_iunlock(ip, XFS_ILOCK_SHARED);
	}

	/*
	 * If we only have a single device, and the log force about was
	 * a no-op we might have to flush the data device cache here.
	 * This can only happen for fdatasync/O_DSYNC if we were overwriting
	 * an already allocated file and thus do not have any metadata to
	 * commit.
	 */
	if ((mp->m_flags & XFS_MOUNT_BARRIER) &&
	    mp->m_logdev_targp == mp->m_ddev_targp &&
	    !XFS_IS_REALTIME_INODE(ip) &&
	    !log_flushed)
		xfs_blkdev_issue_flush(mp->m_ddev_targp);

	return -error;
}