/* * We ignore the datasync flag here because a datasync is effectively * identical to an fsync. That is, datasync implies that we need to write * only the metadata needed to be able to access the data that is written * if we crash after the call completes. Hence if we are writing beyond * EOF we have to log the inode size change as well, which makes it a * full fsync. If we don't write beyond EOF, the inode core will be * clean in memory and so we don't need to log the inode, just like * fsync. */ STATIC int xfs_file_fsync( struct file *filp, struct dentry *dentry, int datasync) { xfs_iflags_clear(XFS_I(dentry->d_inode), XFS_ITRUNCATED); return -xfs_fsync(XFS_I(dentry->d_inode)); }
STATIC int xfs_file_fsync( struct file *filp, struct dentry *dentry, int datasync) { int flags = FSYNC_WAIT; if (datasync) flags |= FSYNC_DATA; xfs_iflags_clear(XFS_I(dentry->d_inode), XFS_ITRUNCATED); return -xfs_fsync(XFS_I(dentry->d_inode), flags, (xfs_off_t)0, (xfs_off_t)-1); }
/* * Notes about direct IO locking for write: * * If there are cached pages or we're extending the file, we need IOLOCK_EXCL * until we're sure the bytes at the new EOF have been zeroed and/or the cached * pages are flushed out. * * In most cases the direct IO writes will be done holding IOLOCK_SHARED * allowing them to be done in parallel with reads and other direct IO writes. * However, if the IO is not aligned to filesystem blocks, the direct IO layer * needs to do sub-block zeroing and that requires serialisation against other * direct IOs to the same block. In this case we need to serialise the * submission of the unaligned IOs so that we don't get racing block zeroing in * the dio layer. To avoid the problem with aio, we also need to wait for * outstanding IOs to complete so that unwritten extent conversion is completed * before we try to map the overlapping block. This is currently implemented by * hitting it with a big hammer (i.e. xfs_ioend_wait()). */ ssize_t /* bytes written, or (-) error */ xfs_write( struct xfs_inode *xip, struct kiocb *iocb, const struct iovec *iovp, unsigned int nsegs, loff_t *offset, int ioflags) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; unsigned long segs = nsegs; xfs_mount_t *mp; ssize_t ret = 0; xfs_fsize_t isize, new_size; int iolock; int eventsent = 0; size_t ocount = 0, count; loff_t pos; int need_i_mutex; int unaligned_io = 0; XFS_STATS_INC(xs_write_calls); ret = generic_segment_checks(iovp, &segs, &ocount, VERIFY_READ); if (ret) return ret; count = ocount; pos = *offset; if (count == 0) return 0; mp = xip->i_mount; xfs_wait_for_freeze(mp, SB_FREEZE_WRITE); if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; if ((ioflags & IO_ISDIRECT) && ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask))) unaligned_io = 1; relock: if ((ioflags & IO_ISDIRECT) && !unaligned_io) { iolock = XFS_IOLOCK_SHARED; need_i_mutex = 0; } else { iolock = XFS_IOLOCK_EXCL; need_i_mutex = 1; mutex_lock(&inode->i_mutex); } xfs_ilock(xip, XFS_ILOCK_EXCL|iolock); start: ret = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); if (ret) { xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); goto out_unlock_mutex; } if ((DM_EVENT_ENABLED(xip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS) && !eventsent)) { int dmflags = FILP_DELAY_FLAG(file); if (need_i_mutex) dmflags |= DM_FLAGS_IMUX; xfs_iunlock(xip, XFS_ILOCK_EXCL); ret = -XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, xip, pos, count, dmflags, &iolock); if (ret) goto out_unlock_internal; xfs_ilock(xip, XFS_ILOCK_EXCL); eventsent = 1; /* * The iolock was dropped and reacquired in XFS_SEND_DATA * so we have to recheck the size when appending. * We will only "goto start;" once, since having sent the * event prevents another call to XFS_SEND_DATA, which is * what allows the size to change in the first place. */ if ((file->f_flags & O_APPEND) && pos != xip->i_size) goto start; } if (ioflags & IO_ISDIRECT) { xfs_buftarg_t *target = XFS_IS_REALTIME_INODE(xip) ? mp->m_rtdev_targp : mp->m_ddev_targp; if ((pos & target->bt_smask) || (count & target->bt_smask)) { xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); return XFS_ERROR(-EINVAL); } if (!need_i_mutex && (mapping->nrpages || pos > xip->i_size)) { xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); iolock = XFS_IOLOCK_EXCL; need_i_mutex = 1; mutex_lock(&inode->i_mutex); xfs_ilock(xip, XFS_ILOCK_EXCL|iolock); goto start; } } new_size = pos + count; if (new_size > xip->i_size) xip->i_new_size = new_size; if (likely(!(ioflags & IO_INVIS))) file_update_time(file); /* * If the offset is beyond the size of the file, we have a couple * of things to do. First, if there is already space allocated * we need to either create holes or zero the disk or ... * * If there is a page where the previous size lands, we need * to zero it out up to the new size. */ if (pos > xip->i_size) { ret = -xfs_zero_eof(xip, pos, xip->i_size); if (ret) { xfs_iunlock(xip, XFS_ILOCK_EXCL); goto out_unlock_internal; } } xfs_iunlock(xip, XFS_ILOCK_EXCL); /* * If we're writing the file then make sure to clear the * setuid and setgid bits if the process is not being run * by root. This keeps people from modifying setuid and * setgid binaries. */ if (((xip->i_d.di_mode & S_ISUID) || ((xip->i_d.di_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP))) && !capable(CAP_FSETID)) { ret = -xfs_write_clear_setuid(xip); if (likely(!ret)) ret = file_remove_suid(file); if (unlikely(ret)) { goto out_unlock_internal; } } /* We can write back this queue in page reclaim */ current->backing_dev_info = mapping->backing_dev_info; if ((ioflags & IO_ISDIRECT)) { if (mapping->nrpages) { WARN_ON(need_i_mutex == 0); ret = -xfs_flushinval_pages(xip, (pos & PAGE_CACHE_MASK), -1, FI_REMAPF_LOCKED); if (ret) goto out_unlock_internal; } /* * If we are doing unaligned IO, wait for all other IO to drain, * otherwise demote the lock if we had to flush cached pages */ if (unaligned_io) xfs_ioend_wait(xip); else if (need_i_mutex) { /* demote the lock now the cached pages are gone */ xfs_ilock_demote(xip, XFS_IOLOCK_EXCL); mutex_unlock(&inode->i_mutex); iolock = XFS_IOLOCK_SHARED; need_i_mutex = 0; } trace_xfs_file_direct_write(xip, count, *offset, ioflags); ret = generic_file_direct_write(iocb, iovp, &segs, pos, offset, count, ocount); /* * direct-io write to a hole: fall through to buffered I/O * for completing the rest of the request. */ if (ret >= 0 && ret != count) { XFS_STATS_ADD(xs_write_bytes, ret); pos += ret; count -= ret; ioflags &= ~IO_ISDIRECT; xfs_iunlock(xip, iolock); if (need_i_mutex) mutex_unlock(&inode->i_mutex); goto relock; } } else { int enospc = 0; write_retry: trace_xfs_file_buffered_write(xip, count, *offset, ioflags); ret = generic_file_buffered_write(iocb, iovp, segs, pos, offset, count, ret); /* * if we just got an ENOSPC, flush the inode now we * aren't holding any page locks and retry *once* */ if (ret == -ENOSPC && !enospc) { ret = -xfs_flush_pages(xip, 0, -1, 0, FI_NONE); if (ret) goto out_unlock_internal; enospc = 1; goto write_retry; } } current->backing_dev_info = NULL; isize = i_size_read(inode); if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize)) *offset = isize; if (*offset > xip->i_size) { xfs_ilock(xip, XFS_ILOCK_EXCL); if (*offset > xip->i_size) xip->i_size = *offset; xfs_iunlock(xip, XFS_ILOCK_EXCL); } if (ret == -ENOSPC && DM_EVENT_ENABLED(xip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) { xfs_iunlock(xip, iolock); if (need_i_mutex) mutex_unlock(&inode->i_mutex); ret = -XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, xip, DM_RIGHT_NULL, xip, DM_RIGHT_NULL, NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */ if (need_i_mutex) mutex_lock(&inode->i_mutex); xfs_ilock(xip, iolock); if (ret) goto out_unlock_internal; goto start; } if (ret <= 0) goto out_unlock_internal; XFS_STATS_ADD(xs_write_bytes, ret); /* Handle various SYNC-type writes */ if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { loff_t end = pos + ret - 1; int error, error2; xfs_iunlock(xip, iolock); if (need_i_mutex) mutex_unlock(&inode->i_mutex); error = filemap_write_and_wait_range(mapping, pos, end); if (need_i_mutex) mutex_lock(&inode->i_mutex); xfs_ilock(xip, iolock); error2 = -xfs_fsync(xip); if (error) ret = error; else if (error2) ret = error2; } out_unlock_internal: if (xip->i_new_size) { xfs_ilock(xip, XFS_ILOCK_EXCL); xip->i_new_size = 0; /* * If this was a direct or synchronous I/O that failed (such * as ENOSPC) then part of the I/O may have been written to * disk before the error occured. In this case the on-disk * file size may have been adjusted beyond the in-memory file * size and now needs to be truncated back. */ if (xip->i_d.di_size > xip->i_size) xip->i_d.di_size = xip->i_size; xfs_iunlock(xip, XFS_ILOCK_EXCL); } xfs_iunlock(xip, iolock); out_unlock_mutex: if (need_i_mutex) mutex_unlock(&inode->i_mutex); return ret; }