STATIC int xfs_file_fsync( struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; int error = 0; int log_flushed = 0; xfs_lsn_t lsn = 0; trace_xfs_file_fsync(ip); error = filemap_write_and_wait_range(inode->i_mapping, start, end); if (error) return error; if (XFS_FORCED_SHUTDOWN(mp)) return -XFS_ERROR(EIO); xfs_iflags_clear(ip, XFS_ITRUNCATED); if (mp->m_flags & XFS_MOUNT_BARRIER) { /* * If we have an RT and/or log subvolume we need to make sure * to flush the write cache the device used for file data * first. This is to ensure newly written file data make * it to disk before logging the new inode size in case of * an extending write. */ if (XFS_IS_REALTIME_INODE(ip)) xfs_blkdev_issue_flush(mp->m_rtdev_targp); else if (mp->m_logdev_targp != mp->m_ddev_targp) xfs_blkdev_issue_flush(mp->m_ddev_targp); } xfs_ilock(ip, XFS_ILOCK_SHARED); if (xfs_ipincount(ip)) { if (!datasync || (ip->i_itemp->ili_fields & ~XFS_ILOG_TIMESTAMP)) lsn = ip->i_itemp->ili_last_lsn; } xfs_iunlock(ip, XFS_ILOCK_SHARED); if (lsn) error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed); if ((mp->m_flags & XFS_MOUNT_BARRIER) && mp->m_logdev_targp == mp->m_ddev_targp && !XFS_IS_REALTIME_INODE(ip) && !log_flushed) xfs_blkdev_issue_flush(mp->m_ddev_targp); return -error; }
STATIC int xfs_fs_nfs_commit_metadata( struct inode *inode) { struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; xfs_lsn_t lsn = 0; xfs_ilock(ip, XFS_ILOCK_SHARED); if (xfs_ipincount(ip)) lsn = ip->i_itemp->ili_last_lsn; xfs_iunlock(ip, XFS_ILOCK_SHARED); if (!lsn) return 0; return _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL); }
STATIC int xfs_fs_nfs_commit_metadata( struct inode *inode) { struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; int error = 0; xfs_ilock(ip, XFS_ILOCK_SHARED); if (xfs_ipincount(ip)) { error = _xfs_log_force_lsn(mp, ip->i_itemp->ili_last_lsn, XFS_LOG_SYNC, NULL); } xfs_iunlock(ip, XFS_ILOCK_SHARED); return error; }
/* * Fsync operations on directories are much simpler than on regular files, * as there is no file data to flush, and thus also no need for explicit * cache flush operations, and there are no non-transaction metadata updates * on directories either. */ STATIC int xfs_dir_fsync( struct file *file, loff_t start, loff_t end, int datasync) { struct xfs_inode *ip = XFS_I(file->f_mapping->host); struct xfs_mount *mp = ip->i_mount; xfs_lsn_t lsn = 0; trace_xfs_dir_fsync(ip); xfs_ilock(ip, XFS_ILOCK_SHARED); if (xfs_ipincount(ip)) lsn = ip->i_itemp->ili_last_lsn; xfs_iunlock(ip, XFS_ILOCK_SHARED); if (!lsn) return 0; return _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL); }
STATIC int xfs_file_fsync( struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; int error = 0; int log_flushed = 0; xfs_lsn_t lsn = 0; trace_xfs_file_fsync(ip); error = file_write_and_wait_range(file, start, end); if (error) return error; if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; xfs_iflags_clear(ip, XFS_ITRUNCATED); /* * If we have an RT and/or log subvolume we need to make sure to flush * the write cache the device used for file data first. This is to * ensure newly written file data make it to disk before logging the new * inode size in case of an extending write. */ if (XFS_IS_REALTIME_INODE(ip)) xfs_blkdev_issue_flush(mp->m_rtdev_targp); else if (mp->m_logdev_targp != mp->m_ddev_targp) xfs_blkdev_issue_flush(mp->m_ddev_targp); /* * All metadata updates are logged, which means that we just have to * flush the log up to the latest LSN that touched the inode. If we have * concurrent fsync/fdatasync() calls, we need them to all block on the * log force before we clear the ili_fsync_fields field. This ensures * that we don't get a racing sync operation that does not wait for the * metadata to hit the journal before returning. If we race with * clearing the ili_fsync_fields, then all that will happen is the log * force will do nothing as the lsn will already be on disk. We can't * race with setting ili_fsync_fields because that is done under * XFS_ILOCK_EXCL, and that can't happen because we hold the lock shared * until after the ili_fsync_fields is cleared. */ xfs_ilock(ip, XFS_ILOCK_SHARED); if (xfs_ipincount(ip)) { if (!datasync || (ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP)) lsn = ip->i_itemp->ili_last_lsn; } if (lsn) { error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed); ip->i_itemp->ili_fsync_fields = 0; } xfs_iunlock(ip, XFS_ILOCK_SHARED); /* * If we only have a single device, and the log force about was * a no-op we might have to flush the data device cache here. * This can only happen for fdatasync/O_DSYNC if we were overwriting * an already allocated file and thus do not have any metadata to * commit. */ if (!log_flushed && !XFS_IS_REALTIME_INODE(ip) && mp->m_logdev_targp == mp->m_ddev_targp) xfs_blkdev_issue_flush(mp->m_ddev_targp); return error; }
STATIC int xfs_file_fsync( struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; int error = 0; int log_flushed = 0; xfs_lsn_t lsn = 0; trace_xfs_file_fsync(ip); error = filemap_write_and_wait_range(inode->i_mapping, start, end); if (error) return error; if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; xfs_iflags_clear(ip, XFS_ITRUNCATED); if (mp->m_flags & XFS_MOUNT_BARRIER) { /* * If we have an RT and/or log subvolume we need to make sure * to flush the write cache the device used for file data * first. This is to ensure newly written file data make * it to disk before logging the new inode size in case of * an extending write. */ if (XFS_IS_REALTIME_INODE(ip)) xfs_blkdev_issue_flush(mp->m_rtdev_targp); else if (mp->m_logdev_targp != mp->m_ddev_targp) xfs_blkdev_issue_flush(mp->m_ddev_targp); } /* * All metadata updates are logged, which means that we just have * to flush the log up to the latest LSN that touched the inode. */ xfs_ilock(ip, XFS_ILOCK_SHARED); if (xfs_ipincount(ip)) { if (!datasync || (ip->i_itemp->ili_fields & ~XFS_ILOG_TIMESTAMP)) lsn = ip->i_itemp->ili_last_lsn; } xfs_iunlock(ip, XFS_ILOCK_SHARED); if (lsn) error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed); /* * If we only have a single device, and the log force about was * a no-op we might have to flush the data device cache here. * This can only happen for fdatasync/O_DSYNC if we were overwriting * an already allocated file and thus do not have any metadata to * commit. */ if ((mp->m_flags & XFS_MOUNT_BARRIER) && mp->m_logdev_targp == mp->m_ddev_targp && !XFS_IS_REALTIME_INODE(ip) && !log_flushed) xfs_blkdev_issue_flush(mp->m_ddev_targp); return error; }
STATIC int xfs_file_fsync( struct file *file, struct dentry *dentry, int datasync) { struct xfs_inode *ip = XFS_I(dentry->d_inode); struct xfs_trans *tp; int error = 0; int log_flushed = 0; xfs_itrace_entry(ip); if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return -XFS_ERROR(EIO); xfs_iflags_clear(ip, XFS_ITRUNCATED); /* * We always need to make sure that the required inode state is safe on * disk. The inode might be clean but we still might need to force the * log because of committed transactions that haven't hit the disk yet. * Likewise, there could be unflushed non-transactional changes to the * inode core that have to go to disk and this requires us to issue * a synchronous transaction to capture these changes correctly. * * This code relies on the assumption that if the i_update_core field * of the inode is clear and the inode is unpinned then it is clean * and no action is required. */ xfs_ilock(ip, XFS_ILOCK_SHARED); /* * First check if the VFS inode is marked dirty. All the dirtying * of non-transactional updates no goes through mark_inode_dirty*, * which allows us to distinguish beteeen pure timestamp updates * and i_size updates which need to be caught for fdatasync. * After that also theck for the dirty state in the XFS inode, which * might gets cleared when the inode gets written out via the AIL * or xfs_iflush_cluster. */ if (((dentry->d_inode->i_state & I_DIRTY_DATASYNC) || ((dentry->d_inode->i_state & I_DIRTY_SYNC) && !datasync)) && ip->i_update_core) { /* * Kick off a transaction to log the inode core to get the * updates. The sync transaction will also force the log. */ xfs_iunlock(ip, XFS_ILOCK_SHARED); tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS); error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(ip->i_mount), 0, 0, 0); if (error) { xfs_trans_cancel(tp, 0); return -error; } xfs_ilock(ip, XFS_ILOCK_EXCL); /* * Note - it's possible that we might have pushed ourselves out * of the way during trans_reserve which would flush the inode. * But there's no guarantee that the inode buffer has actually * gone out yet (it's delwri). Plus the buffer could be pinned * anyway if it's part of an inode in another recent * transaction. So we play it safe and fire off the * transaction anyway. */ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_ihold(tp, ip); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); xfs_trans_set_sync(tp); error = _xfs_trans_commit(tp, 0, &log_flushed); xfs_iunlock(ip, XFS_ILOCK_EXCL); } else { /* * Timestamps/size haven't changed since last inode flush or * inode transaction commit. That means either nothing got * written or a transaction committed which caught the updates. * If the latter happened and the transaction hasn't hit the * disk yet, the inode will be still be pinned. If it is, * force the log. */ if (xfs_ipincount(ip)) { error = _xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, XFS_LOG_SYNC, &log_flushed); } xfs_iunlock(ip, XFS_ILOCK_SHARED); } if (ip->i_mount->m_flags & XFS_MOUNT_BARRIER) { /* * If the log write didn't issue an ordered tag we need * to flush the disk cache for the data device now. */ if (!log_flushed) xfs_blkdev_issue_flush(ip->i_mount->m_ddev_targp); /* * If this inode is on the RT dev we need to flush that * cache as well. */ if (XFS_IS_REALTIME_INODE(ip)) xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp); } return -error; }
STATIC int xfs_file_fsync( struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; int error = 0; int log_flushed = 0; trace_xfs_file_fsync(ip); error = filemap_write_and_wait_range(inode->i_mapping, start, end); if (error) return error; if (XFS_FORCED_SHUTDOWN(mp)) return -XFS_ERROR(EIO); xfs_iflags_clear(ip, XFS_ITRUNCATED); xfs_ilock(ip, XFS_IOLOCK_SHARED); xfs_ioend_wait(ip); xfs_iunlock(ip, XFS_IOLOCK_SHARED); if (mp->m_flags & XFS_MOUNT_BARRIER) { /* * If we have an RT and/or log subvolume we need to make sure * to flush the write cache the device used for file data * first. This is to ensure newly written file data make * it to disk before logging the new inode size in case of * an extending write. */ if (XFS_IS_REALTIME_INODE(ip)) xfs_blkdev_issue_flush(mp->m_rtdev_targp); else if (mp->m_logdev_targp != mp->m_ddev_targp) xfs_blkdev_issue_flush(mp->m_ddev_targp); } /* * We always need to make sure that the required inode state is safe on * disk. The inode might be clean but we still might need to force the * log because of committed transactions that haven't hit the disk yet. * Likewise, there could be unflushed non-transactional changes to the * inode core that have to go to disk and this requires us to issue * a synchronous transaction to capture these changes correctly. * * This code relies on the assumption that if the i_update_core field * of the inode is clear and the inode is unpinned then it is clean * and no action is required. */ xfs_ilock(ip, XFS_ILOCK_SHARED); /* * First check if the VFS inode is marked dirty. All the dirtying * of non-transactional updates no goes through mark_inode_dirty*, * which allows us to distinguish beteeen pure timestamp updates * and i_size updates which need to be caught for fdatasync. * After that also theck for the dirty state in the XFS inode, which * might gets cleared when the inode gets written out via the AIL * or xfs_iflush_cluster. */ if (((inode->i_state & I_DIRTY_DATASYNC) || ((inode->i_state & I_DIRTY_SYNC) && !datasync)) && ip->i_update_core) { /* * Kick off a transaction to log the inode core to get the * updates. The sync transaction will also force the log. */ xfs_iunlock(ip, XFS_ILOCK_SHARED); tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0); if (error) { xfs_trans_cancel(tp, 0); return -error; } xfs_ilock(ip, XFS_ILOCK_EXCL); /* * Note - it's possible that we might have pushed ourselves out * of the way during trans_reserve which would flush the inode. * But there's no guarantee that the inode buffer has actually * gone out yet (it's delwri). Plus the buffer could be pinned * anyway if it's part of an inode in another recent * transaction. So we play it safe and fire off the * transaction anyway. */ xfs_trans_ijoin(tp, ip); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); xfs_trans_set_sync(tp); error = _xfs_trans_commit(tp, 0, &log_flushed); xfs_iunlock(ip, XFS_ILOCK_EXCL); } else { /* * Timestamps/size haven't changed since last inode flush or * inode transaction commit. That means either nothing got * written or a transaction committed which caught the updates. * If the latter happened and the transaction hasn't hit the * disk yet, the inode will be still be pinned. If it is, * force the log. */ if (xfs_ipincount(ip)) { error = _xfs_log_force_lsn(mp, ip->i_itemp->ili_last_lsn, XFS_LOG_SYNC, &log_flushed); } xfs_iunlock(ip, XFS_ILOCK_SHARED); } /* * If we only have a single device, and the log force about was * a no-op we might have to flush the data device cache here. * This can only happen for fdatasync/O_DSYNC if we were overwriting * an already allocated file and thus do not have any metadata to * commit. */ if ((mp->m_flags & XFS_MOUNT_BARRIER) && mp->m_logdev_targp == mp->m_ddev_targp && !XFS_IS_REALTIME_INODE(ip) && !log_flushed) xfs_blkdev_issue_flush(mp->m_ddev_targp); return -error; }
/*ARGSUSED*/ int _xfs_trans_commit( xfs_trans_t *tp, uint flags, int *log_flushed) { xfs_log_iovec_t *log_vector; int nvec; xfs_mount_t *mp; xfs_lsn_t commit_lsn; /* REFERENCED */ int error; int log_flags; int sync; #define XFS_TRANS_LOGVEC_COUNT 16 xfs_log_iovec_t log_vector_fast[XFS_TRANS_LOGVEC_COUNT]; struct xlog_in_core *commit_iclog; int shutdown; commit_lsn = -1; /* * Determine whether this commit is releasing a permanent * log reservation or not. */ if (flags & XFS_TRANS_RELEASE_LOG_RES) { ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); log_flags = XFS_LOG_REL_PERM_RESERV; } else { log_flags = 0; } mp = tp->t_mountp; /* * If there is nothing to be logged by the transaction, * then unlock all of the items associated with the * transaction and free the transaction structure. * Also make sure to return any reserved blocks to * the free pool. */ shut_us_down: shutdown = XFS_FORCED_SHUTDOWN(mp) ? EIO : 0; if (!(tp->t_flags & XFS_TRANS_DIRTY) || shutdown) { xfs_trans_unreserve_and_mod_sb(tp); /* * It is indeed possible for the transaction to be * not dirty but the dqinfo portion to be. All that * means is that we have some (non-persistent) quota * reservations that need to be unreserved. */ xfs_trans_unreserve_and_mod_dquots(tp); if (tp->t_ticket) { commit_lsn = xfs_log_done(mp, tp->t_ticket, NULL, log_flags); if (commit_lsn == -1 && !shutdown) shutdown = XFS_ERROR(EIO); } current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); xfs_trans_free_items(tp, shutdown? XFS_TRANS_ABORT : 0); xfs_trans_free_busy(tp); xfs_trans_free(tp); XFS_STATS_INC(xs_trans_empty); return (shutdown); } ASSERT(tp->t_ticket != NULL); /* * If we need to update the superblock, then do it now. */ if (tp->t_flags & XFS_TRANS_SB_DIRTY) xfs_trans_apply_sb_deltas(tp); xfs_trans_apply_dquot_deltas(tp); /* * Ask each log item how many log_vector entries it will * need so we can figure out how many to allocate. * Try to avoid the kmem_alloc() call in the common case * by using a vector from the stack when it fits. */ nvec = xfs_trans_count_vecs(tp); if (nvec == 0) { xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); goto shut_us_down; } else if (nvec <= XFS_TRANS_LOGVEC_COUNT) { log_vector = log_vector_fast; } else { log_vector = (xfs_log_iovec_t *)kmem_alloc(nvec * sizeof(xfs_log_iovec_t), KM_SLEEP); } /* * Fill in the log_vector and pin the logged items, and * then write the transaction to the log. */ xfs_trans_fill_vecs(tp, log_vector); error = xfs_log_write(mp, log_vector, nvec, tp->t_ticket, &(tp->t_lsn)); /* * The transaction is committed incore here, and can go out to disk * at any time after this call. However, all the items associated * with the transaction are still locked and pinned in memory. */ commit_lsn = xfs_log_done(mp, tp->t_ticket, &commit_iclog, log_flags); tp->t_commit_lsn = commit_lsn; if (nvec > XFS_TRANS_LOGVEC_COUNT) { kmem_free(log_vector); } /* * If we got a log write error. Unpin the logitems that we * had pinned, clean up, free trans structure, and return error. */ if (error || commit_lsn == -1) { current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); xfs_trans_uncommit(tp, flags|XFS_TRANS_ABORT); return XFS_ERROR(EIO); } /* * Once the transaction has committed, unused * reservations need to be released and changes to * the superblock need to be reflected in the in-core * version. Do that now. */ xfs_trans_unreserve_and_mod_sb(tp); sync = tp->t_flags & XFS_TRANS_SYNC; /* * Tell the LM to call the transaction completion routine * when the log write with LSN commit_lsn completes (e.g. * when the transaction commit really hits the on-disk log). * After this call we cannot reference tp, because the call * can happen at any time and the call will free the transaction * structure pointed to by tp. The only case where we call * the completion routine (xfs_trans_committed) directly is * if the log is turned off on a debug kernel or we're * running in simulation mode (the log is explicitly turned * off). */ tp->t_logcb.cb_func = (void(*)(void*, int))xfs_trans_committed; tp->t_logcb.cb_arg = tp; /* * We need to pass the iclog buffer which was used for the * transaction commit record into this function, and attach * the callback to it. The callback must be attached before * the items are unlocked to avoid racing with other threads * waiting for an item to unlock. */ shutdown = xfs_log_notify(mp, commit_iclog, &(tp->t_logcb)); /* * Mark this thread as no longer being in a transaction */ current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); /* * Once all the items of the transaction have been copied * to the in core log and the callback is attached, the * items can be unlocked. * * This will free descriptors pointing to items which were * not logged since there is nothing more to do with them. * For items which were logged, we will keep pointers to them * so they can be unpinned after the transaction commits to disk. * This will also stamp each modified meta-data item with * the commit lsn of this transaction for dependency tracking * purposes. */ xfs_trans_unlock_items(tp, commit_lsn); /* * If we detected a log error earlier, finish committing * the transaction now (unpin log items, etc). * * Order is critical here, to avoid using the transaction * pointer after its been freed (by xfs_trans_committed * either here now, or as a callback). We cannot do this * step inside xfs_log_notify as was done earlier because * of this issue. */ if (shutdown) xfs_trans_committed(tp, XFS_LI_ABORTED); /* * Now that the xfs_trans_committed callback has been attached, * and the items are released we can finally allow the iclog to * go to disk. */ error = xfs_log_release_iclog(mp, commit_iclog); /* * If the transaction needs to be synchronous, then force the * log out now and wait for it. */ if (sync) { if (!error) { error = _xfs_log_force_lsn(mp, commit_lsn, XFS_LOG_SYNC, log_flushed); } XFS_STATS_INC(xs_trans_sync); } else { XFS_STATS_INC(xs_trans_async); } return (error); }