void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, u64 attr_valid, u64 attr_version) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); loff_t oldsize; spin_lock(&fc->lock); if ((attr_version != 0 && fi->attr_version > attr_version) || test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) { spin_unlock(&fc->lock); return; } fuse_change_attributes_common(inode, attr, attr_valid); oldsize = inode->i_size; i_size_write(inode, attr->size); spin_unlock(&fc->lock); if (S_ISREG(inode->i_mode) && oldsize != attr->size) { truncate_pagecache(inode, oldsize, attr->size); invalidate_inode_pages2(inode->i_mapping); } }
void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, u64 attr_valid, u64 attr_version) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); loff_t oldsize; spin_lock(&fc->lock); if (attr_version != 0 && fi->attr_version > attr_version) { spin_unlock(&fc->lock); return; } fuse_change_attributes_common(inode, attr, attr_valid); oldsize = inode->i_size; i_size_write(inode, attr->size); spin_unlock(&fc->lock); if (S_ISREG(inode->i_mode) && oldsize != attr->size) { if (attr->size < oldsize) fuse_truncate(inode->i_mapping, attr->size); invalidate_inode_pages2(inode->i_mapping); } }
static int nfsio_prepare_snapshot(struct ploop_io * io, struct ploop_snapdata *sd) { int err; struct file * file = io->files.file; file = dentry_open(dget(F_DENTRY(file)), mntget(F_MNT(file)), O_RDONLY|O_LARGEFILE, current_cred()); if (IS_ERR(file)) return PTR_ERR(file); /* Sanity checks */ if (io->files.mapping != file->f_mapping || io->files.inode != file->f_mapping->host) { fput(file); return -EINVAL; } err = invalidate_inode_pages2(file->f_mapping); if (err) { fput(file); return err; } sd->file = file; return 0; }
static int nfsio_open(struct ploop_io * io) { struct ploop_delta * delta = container_of(io, struct ploop_delta, io); struct file * file = io->files.file; int err = 0; if (file == NULL) return -EBADF; err = invalidate_inode_pages2(file->f_mapping); if (err) return err; io->files.mapping = file->f_mapping; io->files.inode = io->files.mapping->host; io->files.bdev = NULL; if (!(delta->flags & PLOOP_FMT_RDONLY)) { io->fsync_thread = kthread_create(nfsio_fsync_thread, io, "nfsio_commit%d", delta->plo->index); if (IS_ERR(io->fsync_thread)) { err = PTR_ERR(io->fsync_thread); io->fsync_thread = NULL; goto out; } wake_up_process(io->fsync_thread); } out: return err; }
/* * mark the data attached to an inode as obsolete due to a write on the server * - might also want to ditch all the outstanding writes and dirty pages */ void afs_zap_data(struct afs_vnode *vnode) { _enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode); /* nuke all the non-dirty pages that aren't locked, mapped or being * written back in a regular file and completely discard the pages in a * directory or symlink */ if (S_ISREG(vnode->vfs_inode.i_mode)) invalidate_remote_inode(&vnode->vfs_inode); else invalidate_inode_pages2(vnode->vfs_inode.i_mapping); }
void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, u64 attr_valid, u64 attr_version) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); bool is_wb = fc->writeback_cache; loff_t oldsize; struct timespec old_mtime; spin_lock(&fc->lock); if ((attr_version != 0 && fi->attr_version > attr_version) || test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) { spin_unlock(&fc->lock); return; } old_mtime = inode->i_mtime; fuse_change_attributes_common(inode, attr, attr_valid); oldsize = inode->i_size; /* * In case of writeback_cache enabled, the cached writes beyond EOF * extend local i_size without keeping userspace server in sync. So, * attr->size coming from server can be stale. We cannot trust it. */ if (!is_wb || !S_ISREG(inode->i_mode)) i_size_write(inode, attr->size); spin_unlock(&fc->lock); if (!is_wb && S_ISREG(inode->i_mode)) { bool inval = false; if (oldsize != attr->size) { truncate_pagecache(inode, attr->size); inval = true; } else if (fc->auto_inval_data) { struct timespec new_mtime = { .tv_sec = attr->mtime, .tv_nsec = attr->mtimensec, }; /* * Auto inval mode also checks and invalidates if mtime * has changed. */ if (!timespec_equal(&old_mtime, &new_mtime)) inval = true; } if (inval) invalidate_inode_pages2(inode->i_mapping); } }
static ssize_t v9fs_file_write(struct file *filp, const char __user * data, size_t count, loff_t * offset) { struct inode *inode = filp->f_path.dentry->d_inode; struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); struct v9fs_fid *v9fid = filp->private_data; struct v9fs_fcall *fcall; int fid = v9fid->fid; int result = -EIO; int rsize = 0; int total = 0; dprintk(DEBUG_VFS, "data %p count %d offset %x\n", data, (int)count, (int)*offset); rsize = v9ses->maxdata - V9FS_IOHDRSZ; if (v9fid->iounit != 0 && rsize > v9fid->iounit) rsize = v9fid->iounit; do { if (count < rsize) rsize = count; result = v9fs_t_write(v9ses, fid, *offset, rsize, data, &fcall); if (result < 0) { PRINT_FCALL_ERROR("error while writing", fcall); kfree(fcall); return result; } else *offset += result; kfree(fcall); fcall = NULL; if (result != rsize) { eprintk(KERN_ERR, "short write: v9fs_t_write returned %d\n", result); break; } count -= result; data += result; total += result; } while (count); invalidate_inode_pages2(inode->i_mapping); return total; }
/* * Turn off the cache with regard to a per-inode cookie if opened for writing, * invalidating all the pages in the page cache relating to the associated * inode to clear the per-page caching. */ static void nfs_fscache_disable_inode_cookie(struct inode *inode) { clear_bit(NFS_INO_FSCACHE, &NFS_I(inode)->flags); if (NFS_I(inode)->fscache) { dfprintk(FSCACHE, "NFS: nfsi 0x%p turning cache off\n", NFS_I(inode)); /* Need to invalidate any mapped pages that were read in before * turning off the cache. */ if (inode->i_mapping && inode->i_mapping->nrpages) invalidate_inode_pages2(inode->i_mapping); nfs_fscache_zap_inode_cookie(inode); } }
void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, u64 attr_valid, u64 attr_version) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); loff_t oldsize; struct timespec old_mtime; spin_lock(&fc->lock); if ((attr_version != 0 && fi->attr_version > attr_version) || test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) { spin_unlock(&fc->lock); return; } old_mtime = inode->i_mtime; fuse_change_attributes_common(inode, attr, attr_valid); oldsize = inode->i_size; i_size_write(inode, attr->size); spin_unlock(&fc->lock); if (S_ISREG(inode->i_mode)) { bool inval = false; if (oldsize != attr->size) { truncate_pagecache(inode, oldsize, attr->size); inval = true; } else if (fc->auto_inval_data) { struct timespec new_mtime = { .tv_sec = attr->mtime, .tv_nsec = attr->mtimensec, }; /* * Auto inval mode also checks and invalidates if mtime * has changed. */ if (!timespec_equal(&old_mtime, &new_mtime)) inval = true; } if (inval) invalidate_inode_pages2(inode->i_mapping); } }
/* * If we are changing DAX flags, we have to ensure the file is clean and any * cached objects in the address space are invalidated and removed. This * requires us to lock out other IO and page faults similar to a truncate * operation. The locks need to be held until the transaction has been committed * so that the cache invalidation is atomic with respect to the DAX flag * manipulation. */ static int xfs_ioctl_setattr_dax_invalidate( struct xfs_inode *ip, struct fsxattr *fa, int *join_flags) { struct inode *inode = VFS_I(ip); int error; *join_flags = 0; /* * It is only valid to set the DAX flag on regular files and * directories on filesystems where the block size is equal to the page * size. On directories it serves as an inherit hint. */ if (fa->fsx_xflags & FS_XFLAG_DAX) { if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) return -EINVAL; if (ip->i_mount->m_sb.sb_blocksize != PAGE_SIZE) return -EINVAL; } /* If the DAX state is not changing, we have nothing to do here. */ if ((fa->fsx_xflags & FS_XFLAG_DAX) && IS_DAX(inode)) return 0; if (!(fa->fsx_xflags & FS_XFLAG_DAX) && !IS_DAX(inode)) return 0; /* lock, flush and invalidate mapping in preparation for flag change */ xfs_ilock(ip, XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL); error = filemap_write_and_wait(inode->i_mapping); if (error) goto out_unlock; error = invalidate_inode_pages2(inode->i_mapping); if (error) goto out_unlock; *join_flags = XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL; return 0; out_unlock: xfs_iunlock(ip, XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL); return error; }
static int nfsio_complete_snapshot(struct ploop_io * io, struct ploop_snapdata *sd) { struct file * file = io->files.file; mutex_lock(&io->plo->sysfs_mutex); io->files.file = sd->file; sd->file = NULL; (void)invalidate_inode_pages2(io->files.mapping); mutex_unlock(&io->plo->sysfs_mutex); if (io->fsync_thread) { kthread_stop(io->fsync_thread); io->fsync_thread = NULL; } fput(file); return 0; }
static int nfsio_prepare_merge(struct ploop_io * io, struct ploop_snapdata *sd) { int err; struct file * file = io->files.file; file = dentry_open(dget(F_DENTRY(file)), mntget(F_MNT(file)), O_RDWR|O_LARGEFILE, current_cred()); if (IS_ERR(file)) return PTR_ERR(file); /* Sanity checks */ if (io->files.mapping != file->f_mapping || io->files.inode != file->f_mapping->host || io->files.bdev != file->f_mapping->host->i_sb->s_bdev) { fput(file); return -EINVAL; } err = invalidate_inode_pages2(file->f_mapping); if (err) { fput(file); return err; } if (io->fsync_thread == NULL) { io->fsync_thread = kthread_create(nfsio_fsync_thread, io, "nfsio_commit%d", io->plo->index); if (IS_ERR(io->fsync_thread)) { io->fsync_thread = NULL; fput(file); return -ENOMEM; } wake_up_process(io->fsync_thread); } sd->file = file; return 0; }
static void xen_update_blkif_status(struct xen_blkif *blkif) { int err; char name[TASK_COMM_LEN]; /* Not ready to connect? */ if (!blkif->irq || !blkif->vbd.bdev) return; /* Already connected? */ if (blkif->be->dev->state == XenbusStateConnected) return; /* Attempt to connect: exit if we fail to. */ connect(blkif->be); if (blkif->be->dev->state != XenbusStateConnected) return; err = blkback_name(blkif, name); if (err) { xenbus_dev_error(blkif->be->dev, err, "get blkback dev name"); return; } err = filemap_write_and_wait(blkif->vbd.bdev->bd_inode->i_mapping); if (err) { xenbus_dev_error(blkif->be->dev, err, "block flush"); return; } invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping); blkif->xenblkd = kthread_run(xen_blkif_schedule, blkif, "%s", name); if (IS_ERR(blkif->xenblkd)) { err = PTR_ERR(blkif->xenblkd); blkif->xenblkd = NULL; xenbus_dev_error(blkif->be->dev, err, "start xenblkd"); } }
STATIC ssize_t xfs_file_read_iter( struct kiocb *iocb, struct iov_iter *to) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; size_t size = iov_iter_count(to); ssize_t ret = 0; int ioflags = 0; xfs_fsize_t n; loff_t pos = iocb->ki_pos; XFS_STATS_INC(mp, xs_read_calls); if (unlikely(iocb->ki_flags & IOCB_DIRECT)) ioflags |= XFS_IO_ISDIRECT; if (file->f_mode & FMODE_NOCMTIME) ioflags |= XFS_IO_INVIS; if ((ioflags & XFS_IO_ISDIRECT) && !IS_DAX(inode)) { xfs_buftarg_t *target = XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp; /* DIO must be aligned to device logical sector size */ if ((pos | size) & target->bt_logical_sectormask) { if (pos == i_size_read(inode)) return 0; return -EINVAL; } } n = mp->m_super->s_maxbytes - pos; if (n <= 0 || size == 0) return 0; if (n < size) size = n; if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; /* * Locking is a bit tricky here. If we take an exclusive lock for direct * IO, we effectively serialise all new concurrent read IO to this file * and block it behind IO that is currently in progress because IO in * progress holds the IO lock shared. We only need to hold the lock * exclusive to blow away the page cache, so only take lock exclusively * if the page cache needs invalidation. This allows the normal direct * IO case of no page cache pages to proceeed concurrently without * serialisation. */ xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); if ((ioflags & XFS_IO_ISDIRECT) && inode->i_mapping->nrpages) { xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); xfs_rw_ilock(ip, XFS_IOLOCK_EXCL); /* * The generic dio code only flushes the range of the particular * I/O. Because we take an exclusive lock here, this whole * sequence is considerably more expensive for us. This has a * noticeable performance impact for any file with cached pages, * even when outside of the range of the particular I/O. * * Hence, amortize the cost of the lock against a full file * flush and reduce the chances of repeated iolock cycles going * forward. */ if (inode->i_mapping->nrpages) { ret = filemap_write_and_wait(VFS_I(ip)->i_mapping); if (ret) { xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL); return ret; } /* * Invalidate whole pages. This can return an error if * we fail to invalidate a page, but this should never * happen on XFS. Warn if it does fail. */ ret = invalidate_inode_pages2(VFS_I(ip)->i_mapping); WARN_ON_ONCE(ret); ret = 0; } xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); } trace_xfs_file_read(ip, size, pos, ioflags); ret = generic_file_read_iter(iocb, to); if (ret > 0) XFS_STATS_ADD(mp, xs_read_bytes, ret); xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); return ret; }
static int rbio_submit(struct ploop_io * io, struct nfs_read_data * nreq, const struct rpc_call_ops * cb) { struct nfs_open_context *ctx = nfs_file_open_context(io->files.file); struct inode *inode = io->files.inode; struct rpc_task *task; struct rpc_message msg = { .rpc_cred = ctx->cred, }; struct rpc_task_setup task_setup_data = { .rpc_client = NFS_CLIENT(inode), .rpc_message = &msg, .callback_ops = cb, #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,25) .workqueue = nfsio_workqueue, #endif .flags = RPC_TASK_ASYNC, }; nreq->res.count = nreq->args.count; nreq->header->cred = msg.rpc_cred; nreq->args.context = ctx; task_setup_data.task = &nreq->task; task_setup_data.callback_data = nreq; msg.rpc_argp = &nreq->args; msg.rpc_resp = &nreq->res; NFS_PROTO(inode)->read_setup(nreq, &msg); task = rpc_run_task(&task_setup_data); if (unlikely(IS_ERR(task))) return PTR_ERR(task); rpc_put_task(task); return 0; } #else static int rbio_submit(struct ploop_io * io, struct nfs_read_data * nreq, const struct rpc_call_ops * cb) { struct nfs_open_context *ctx = nfs_file_open_context(io->files.file); struct inode *inode = io->files.inode; nreq->res.count = nreq->args.count; nreq->cred = ctx->cred; nreq->args.context = ctx; rpc_init_task(&nreq->task, NFS_CLIENT(inode), RPC_TASK_ASYNC, cb, nreq); NFS_PROTO(inode)->read_setup(nreq); nreq->task.tk_cookie = (unsigned long) inode; lock_kernel(); rpc_execute(&nreq->task); unlock_kernel(); return 0; } #endif static void nfsio_submit_read(struct ploop_io *io, struct ploop_request * preq, struct bio_list *sbl, iblock_t iblk, unsigned int size) { struct inode *inode = io->files.inode; size_t rsize = NFS_SERVER(inode)->rsize; struct nfs_read_data *nreq = NULL; loff_t pos; unsigned int prev_end; struct bio * b; ploop_prepare_io_request(preq); pos = sbl->head->bi_sector; pos = ((loff_t)iblk << preq->plo->cluster_log) | (pos & ((1<<preq->plo->cluster_log) - 1)); pos <<= 9; prev_end = PAGE_SIZE; for (b = sbl->head; b != NULL; b = b->bi_next) { int bv_idx; for (bv_idx = 0; bv_idx < b->bi_vcnt; bv_idx++) { struct bio_vec * bv = &b->bi_io_vec[bv_idx]; if (nreq && nreq->args.count + bv->bv_len <= rsize) { if (nreq->pages.pagevec[nreq->pages.npages-1] == bv->bv_page && prev_end == bv->bv_offset) { nreq->args.count += bv->bv_len; pos += bv->bv_len; prev_end += bv->bv_len; continue; } if (nreq->pages.npages < MAX_NBIO_PAGES && bv->bv_offset == 0 && prev_end == PAGE_SIZE) { nreq->args.count += bv->bv_len; nreq->pages.pagevec[nreq->pages.npages] = bv->bv_page; nreq->pages.npages++; pos += bv->bv_len; prev_end = bv->bv_offset + bv->bv_len; continue; } } if (nreq) { int err; atomic_inc(&preq->io_count); err = rbio_submit(io, nreq, &nfsio_read_ops); if (err) { PLOOP_REQ_SET_ERROR(preq, err); ploop_complete_io_request(preq); goto out; } } nreq = rbio_init(pos, bv->bv_page, bv->bv_offset, bv->bv_len, preq, inode); if (nreq == NULL) { PLOOP_REQ_SET_ERROR(preq, -ENOMEM); goto out; } pos += bv->bv_len; prev_end = bv->bv_offset + bv->bv_len; } } if (nreq) { int err; atomic_inc(&preq->io_count); err = rbio_submit(io, nreq, &nfsio_read_ops); if (err) { PLOOP_REQ_SET_ERROR(preq, err); ploop_complete_io_request(preq); goto out; } } out: ploop_complete_io_request(preq); } static void nfsio_write_result(struct rpc_task *task, void *calldata) { struct nfs_write_data *data = calldata; struct nfs_writeargs *argp = &data->args; struct nfs_writeres *resp = &data->res; int status; status = NFS_PROTO(data->header->inode)->write_done(task, data); if (status != 0) return; if (task->tk_status >= 0 && resp->count < argp->count) task->tk_status = -EIO; } static void nfsio_write_release(void *calldata) { struct nfs_write_data *nreq = calldata; struct ploop_request *preq = (struct ploop_request *) nreq->header->req; int status = nreq->task.tk_status; if (unlikely(status < 0)) PLOOP_REQ_SET_ERROR(preq, status); if (!preq->error && nreq->res.verf->committed != NFS_FILE_SYNC) { if (!test_and_set_bit(PLOOP_REQ_UNSTABLE, &preq->state)) memcpy(&preq->verf, &nreq->res.verf->verifier, 8); } nfsio_complete_io_request(preq); nfsio_wbio_release(calldata); } static const struct rpc_call_ops nfsio_write_ops = { .rpc_call_done = nfsio_write_result, .rpc_release = nfsio_write_release, }; static struct nfs_write_data * wbio_init(loff_t pos, struct page * page, unsigned int off, unsigned int len, void * priv, struct inode * inode) { struct nfs_write_data * nreq; nreq = nfsio_wbio_alloc(MAX_NBIO_PAGES); if (unlikely(nreq == NULL)) return NULL; nreq->args.offset = pos; nreq->args.pgbase = off; nreq->args.count = len; nreq->pages.pagevec[0] = page; nreq->pages.npages = 1; nreq->header->req = priv; nreq->header->inode = inode; nreq->args.fh = NFS_FH(inode); nreq->args.pages = nreq->pages.pagevec; nreq->args.stable = NFS_UNSTABLE; nreq->res.fattr = &nreq->fattr; nreq->res.verf = &nreq->verf; return nreq; } #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18) static int wbio_submit(struct ploop_io * io, struct nfs_write_data *nreq, const struct rpc_call_ops * cb) { struct nfs_open_context *ctx = nfs_file_open_context(io->files.file); struct inode *inode = io->files.inode; struct rpc_task *task; struct rpc_message msg = { .rpc_cred = ctx->cred, }; struct rpc_task_setup task_setup_data = { .rpc_client = NFS_CLIENT(inode), .rpc_message = &msg, .callback_ops = cb, #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,25) .workqueue = nfsio_workqueue, #endif .flags = RPC_TASK_ASYNC, }; if (verify_bounce(nreq)) return -ENOMEM; nreq->res.count = nreq->args.count; nreq->args.context = ctx; nreq->header->cred = msg.rpc_cred; task_setup_data.task = &nreq->task; task_setup_data.callback_data = nreq; msg.rpc_argp = &nreq->args; msg.rpc_resp = &nreq->res; NFS_PROTO(inode)->write_setup(nreq, &msg); task = rpc_run_task(&task_setup_data); if (unlikely(IS_ERR(task))) return PTR_ERR(task); rpc_put_task(task); return 0; } #else static int wbio_submit(struct ploop_io * io, struct nfs_write_data *nreq, const struct rpc_call_ops * cb) { struct nfs_open_context *ctx = nfs_file_open_context(io->files.file); struct inode *inode = io->files.inode; if (verify_bounce(nreq)) return -ENOMEM; nreq->res.count = nreq->args.count; nreq->args.context = ctx; nreq->cred = ctx->cred; rpc_init_task(&nreq->task, NFS_CLIENT(inode), RPC_TASK_ASYNC, cb, nreq); NFS_PROTO(inode)->write_setup(nreq, NFS_UNSTABLE); nreq->task.tk_priority = RPC_PRIORITY_NORMAL; nreq->task.tk_cookie = (unsigned long) inode; lock_kernel(); rpc_execute(&nreq->task); unlock_kernel(); return 0; } #endif static void nfsio_submit_write(struct ploop_io *io, struct ploop_request * preq, struct bio_list *sbl, iblock_t iblk, unsigned int size) { struct inode *inode = io->files.inode; size_t wsize = NFS_SERVER(inode)->wsize; struct nfs_write_data *nreq = NULL; loff_t pos; struct bio * b; unsigned int prev_end; nfsio_prepare_io_request(preq); pos = sbl->head->bi_sector; pos = ((loff_t)iblk << preq->plo->cluster_log) | (pos & ((1<<preq->plo->cluster_log) - 1)); ploop_prepare_tracker(preq, pos); pos <<= 9; prev_end = PAGE_SIZE; for (b = sbl->head; b != NULL; b = b->bi_next) { int bv_idx; for (bv_idx = 0; bv_idx < b->bi_vcnt; bv_idx++) { struct bio_vec * bv = &b->bi_io_vec[bv_idx]; if (nreq && nreq->args.count + bv->bv_len <= wsize) { if (nreq->pages.pagevec[nreq->pages.npages-1] == bv->bv_page && prev_end == bv->bv_offset) { nreq->args.count += bv->bv_len; pos += bv->bv_len; prev_end += bv->bv_len; continue; } if (nreq->pages.npages < MAX_NBIO_PAGES && bv->bv_offset == 0 && prev_end == PAGE_SIZE) { nreq->args.count += bv->bv_len; nreq->pages.pagevec[nreq->pages.npages] = bv->bv_page; nreq->pages.npages++; pos += bv->bv_len; prev_end = bv->bv_offset + bv->bv_len; continue; } } if (nreq) { int err; atomic_inc(&preq->io_count); err = wbio_submit(io, nreq, &nfsio_write_ops); if (err) { PLOOP_REQ_SET_ERROR(preq, err); nfsio_complete_io_request(preq); goto out; } } nreq = wbio_init(pos, bv->bv_page, bv->bv_offset, bv->bv_len, preq, inode); if (nreq == NULL) { PLOOP_REQ_SET_ERROR(preq, -ENOMEM); goto out; } prev_end = bv->bv_offset + bv->bv_len; pos += bv->bv_len; } } if (nreq) { int err; atomic_inc(&preq->io_count); err = wbio_submit(io, nreq, &nfsio_write_ops); if (err) { PLOOP_REQ_SET_ERROR(preq, err); nfsio_complete_io_request(preq); } } out: nfsio_complete_io_request(preq); } static void nfsio_submit(struct ploop_io *io, struct ploop_request * preq, unsigned long rw, struct bio_list *sbl, iblock_t iblk, unsigned int size) { if (iblk == PLOOP_ZERO_INDEX) iblk = 0; if (rw & (1<<BIO_RW)) nfsio_submit_write(io, preq, sbl, iblk, size); else nfsio_submit_read(io, preq, sbl, iblk, size); } struct bio_list_walk { struct bio * cur; int idx; int bv_off; }; static void nfsio_submit_write_pad(struct ploop_io *io, struct ploop_request * preq, struct bio_list *sbl, iblock_t iblk, unsigned int size) { struct inode *inode = io->files.inode; size_t wsize = NFS_SERVER(inode)->wsize; struct nfs_write_data *nreq = NULL; struct bio_list_walk bw; unsigned prev_end; loff_t pos, end_pos, start, end; /* pos..end_pos is the range which we are going to write */ pos = (loff_t)iblk << (preq->plo->cluster_log + 9); end_pos = pos + (1 << (preq->plo->cluster_log + 9)); /* start..end is data that we have. The rest must be zero padded. */ start = pos + ((sbl->head->bi_sector & ((1<<preq->plo->cluster_log) - 1)) << 9); end = start + (size << 9); nfsio_prepare_io_request(preq); ploop_prepare_tracker(preq, start >> 9); prev_end = PAGE_SIZE; #if 1 /* GCC, shut up! */ bw.cur = sbl->head; bw.idx = 0; bw.bv_off = 0; BUG_ON(bw.cur->bi_io_vec[0].bv_len & 511); #endif while (pos < end_pos) { struct page * page; unsigned int poff, plen; if (pos < start) { page = ZERO_PAGE(0); poff = 0; plen = start - pos; if (plen > PAGE_SIZE) plen = PAGE_SIZE; } else if (pos >= end) { page = ZERO_PAGE(0); poff = 0; plen = end_pos - pos; if (plen > PAGE_SIZE) plen = PAGE_SIZE; } else { /* pos >= start && pos < end */ struct bio_vec * bv; if (pos == start) { bw.cur = sbl->head; bw.idx = 0; bw.bv_off = 0; BUG_ON(bw.cur->bi_io_vec[0].bv_len & 511); } bv = bw.cur->bi_io_vec + bw.idx; if (bw.bv_off >= bv->bv_len) { bw.idx++; bv++; bw.bv_off = 0; if (bw.idx >= bw.cur->bi_vcnt) { bw.cur = bw.cur->bi_next; bw.idx = 0; bw.bv_off = 0; bv = bw.cur->bi_io_vec; } BUG_ON(bv->bv_len & 511); } page = bv->bv_page; poff = bv->bv_offset + bw.bv_off; plen = bv->bv_len - bw.bv_off; } if (nreq && nreq->args.count + plen <= wsize) { if (nreq->pages.pagevec[nreq->pages.npages-1] == page && prev_end == poff) { nreq->args.count += plen; pos += plen; bw.bv_off += plen; prev_end += plen; continue; } if (nreq->pages.npages < MAX_NBIO_PAGES && poff == 0 && prev_end == PAGE_SIZE) { nreq->args.count += plen; nreq->pages.pagevec[nreq->pages.npages] = page; nreq->pages.npages++; pos += plen; bw.bv_off += plen; prev_end = poff + plen; continue; } } if (nreq) { int err; atomic_inc(&preq->io_count); err = wbio_submit(io, nreq, &nfsio_write_ops); if (err) { PLOOP_REQ_SET_ERROR(preq, err); nfsio_complete_io_request(preq); goto out; } } nreq = wbio_init(pos, page, poff, plen, preq, inode); if (nreq == NULL) { PLOOP_REQ_SET_ERROR(preq, -ENOMEM); goto out; } prev_end = poff + plen; pos += plen; bw.bv_off += plen; } if (nreq) { int err; atomic_inc(&preq->io_count); err = wbio_submit(io, nreq, &nfsio_write_ops); if (err) { PLOOP_REQ_SET_ERROR(preq, err); nfsio_complete_io_request(preq); } } out: nfsio_complete_io_request(preq); } static void nfsio_submit_alloc(struct ploop_io *io, struct ploop_request * preq, struct bio_list * sbl, unsigned int size) { iblock_t iblk = io->alloc_head++; if (!(io->files.file->f_mode & FMODE_WRITE)) { PLOOP_FAIL_REQUEST(preq, -EBADF); return; } preq->iblock = iblk; preq->eng_state = PLOOP_E_DATA_WBI; nfsio_submit_write_pad(io, preq, sbl, iblk, size); } static void nfsio_destroy(struct ploop_io * io) { if (io->fsync_thread) { kthread_stop(io->fsync_thread); io->fsync_thread = NULL; } if (io->files.file) { struct file * file = io->files.file; mutex_lock(&io->plo->sysfs_mutex); io->files.file = NULL; if (io->files.mapping) (void)invalidate_inode_pages2(io->files.mapping); mutex_unlock(&io->plo->sysfs_mutex); fput(file); } } static int nfsio_sync(struct ploop_io * io) { return 0; } static int nfsio_stop(struct ploop_io * io) { return 0; } static int nfsio_init(struct ploop_io * io) { INIT_LIST_HEAD(&io->fsync_queue); init_waitqueue_head(&io->fsync_waitq); return 0; }
/* * Set attributes, and at the same time refresh them. * * Truncation is slightly complicated, because the 'truncate' request * may fail, in which case we don't want to touch the mapping. * vmtruncate() doesn't allow for this case, so do the rlimit checking * and the actual truncation by hand. */ int fuse_do_setattr(struct inode *inode, struct iattr *attr, struct file *file) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_req *req; struct fuse_setattr_in inarg; struct fuse_attr_out outarg; bool is_truncate = false; int is_wb = fc->writeback_cache; loff_t oldsize; int err; bool trust_local_mtime = is_wb && S_ISREG(inode->i_mode); if (fc->flags & FUSE_DEFAULT_PERMISSIONS) { err = inode_change_ok(inode, attr); if (err) return err; } if ((attr->ia_valid & ATTR_OPEN) && fc->atomic_o_trunc) return 0; if (attr->ia_valid & ATTR_SIZE) { err = inode_newsize_ok(inode, attr->ia_size); if (err) return err; is_truncate = true; } req = fuse_get_req_nopages(fc); if (IS_ERR(req)) return PTR_ERR(req); if (is_truncate) { fuse_set_nowrite(inode); set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); } memset(&inarg, 0, sizeof(inarg)); memset(&outarg, 0, sizeof(outarg)); iattr_to_fattr(attr, &inarg, trust_local_mtime); if (file) { struct fuse_file *ff = file->private_data; inarg.valid |= FATTR_FH; inarg.fh = ff->fh; } if (attr->ia_valid & ATTR_SIZE) { /* For mandatory locking in truncate */ inarg.valid |= FATTR_LOCKOWNER; inarg.lock_owner = fuse_lock_owner_id(fc, current->files); } fuse_setattr_fill(fc, req, inode, &inarg, &outarg); fuse_request_send(fc, req); err = req->out.h.error; fuse_put_request(fc, req); if (err) { if (err == -EINTR) fuse_invalidate_attr(inode); goto error; } if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) { make_bad_inode(inode); err = -EIO; goto error; } spin_lock(&fc->lock); /* the kernel maintains i_mtime locally */ if (trust_local_mtime && (attr->ia_valid & ATTR_MTIME)) { inode->i_mtime = attr->ia_mtime; clear_bit(FUSE_I_MTIME_DIRTY, &fi->state); } fuse_change_attributes_common(inode, &outarg.attr, attr_timeout(&outarg)); oldsize = inode->i_size; if (!is_wb || is_truncate || !S_ISREG(inode->i_mode)) i_size_write(inode, outarg.attr.size); if (is_truncate) { /* NOTE: this may release/reacquire fc->lock */ __fuse_release_nowrite(inode); } spin_unlock(&fc->lock); /* * Only call invalidate_inode_pages2() after removing * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock. */ if ((is_truncate || !is_wb) && S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) { truncate_pagecache(inode, oldsize, outarg.attr.size); invalidate_inode_pages2(inode->i_mapping); } clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); return 0; error: if (is_truncate) fuse_release_nowrite(inode); clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); return err; }
/* * Get a layout for the pNFS client. */ int xfs_fs_map_blocks( struct inode *inode, loff_t offset, u64 length, struct iomap *iomap, bool write, u32 *device_generation) { struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; struct xfs_bmbt_irec imap; xfs_fileoff_t offset_fsb, end_fsb; loff_t limit; int bmapi_flags = XFS_BMAPI_ENTIRE; int nimaps = 1; uint lock_flags; int error = 0; if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; /* * We can't export inodes residing on the realtime device. The realtime * device doesn't have a UUID to identify it, so the client has no way * to find it. */ if (XFS_IS_REALTIME_INODE(ip)) return -ENXIO; /* * Lock out any other I/O before we flush and invalidate the pagecache, * and then hand out a layout to the remote system. This is very * similar to direct I/O, except that the synchronization is much more * complicated. See the comment near xfs_break_layouts for a detailed * explanation. */ xfs_ilock(ip, XFS_IOLOCK_EXCL); error = -EINVAL; limit = mp->m_super->s_maxbytes; if (!write) limit = max(limit, round_up(i_size_read(inode), inode->i_sb->s_blocksize)); if (offset > limit) goto out_unlock; if (offset > limit - length) length = limit - offset; error = filemap_write_and_wait(inode->i_mapping); if (error) goto out_unlock; error = invalidate_inode_pages2(inode->i_mapping); if (WARN_ON_ONCE(error)) return error; end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + length); offset_fsb = XFS_B_TO_FSBT(mp, offset); lock_flags = xfs_ilock_data_map_shared(ip); error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, &nimaps, bmapi_flags); xfs_iunlock(ip, lock_flags); if (error) goto out_unlock; if (write) { enum xfs_prealloc_flags flags = 0; ASSERT(imap.br_startblock != DELAYSTARTBLOCK); if (!nimaps || imap.br_startblock == HOLESTARTBLOCK) { /* * xfs_iomap_write_direct() expects to take ownership of * the shared ilock. */ xfs_ilock(ip, XFS_ILOCK_SHARED); error = xfs_iomap_write_direct(ip, offset, length, &imap, nimaps); if (error) goto out_unlock; /* * Ensure the next transaction is committed * synchronously so that the blocks allocated and * handed out to the client are guaranteed to be * present even after a server crash. */ flags |= XFS_PREALLOC_SET | XFS_PREALLOC_SYNC; } error = xfs_update_prealloc_flags(ip, flags); if (error) goto out_unlock; } xfs_iunlock(ip, XFS_IOLOCK_EXCL); xfs_bmbt_to_iomap(ip, iomap, &imap); *device_generation = mp->m_generation; return error; out_unlock: xfs_iunlock(ip, XFS_IOLOCK_EXCL); return error; }
/* * Set attributes, and at the same time refresh them. * * Truncation is slightly complicated, because the 'truncate' request * may fail, in which case we don't want to touch the mapping. * vmtruncate() doesn't allow for this case, so do the rlimit checking * and the actual truncation by hand. */ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr, struct file *file) { struct inode *inode = entry->d_inode; struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_req *req; struct fuse_setattr_in inarg; struct fuse_attr_out outarg; bool is_truncate = false; loff_t oldsize; int err; if (!fuse_allow_task(fc, current)) return -EACCES; if (fc->flags & FUSE_DEFAULT_PERMISSIONS) { err = inode_change_ok(inode, attr); if (err) return err; } if ((attr->ia_valid & ATTR_OPEN) && fc->atomic_o_trunc) return 0; if (attr->ia_valid & ATTR_SIZE) { unsigned long limit; if (IS_SWAPFILE(inode)) return -ETXTBSY; limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; if (limit != RLIM_INFINITY && attr->ia_size > (loff_t) limit) { send_sig(SIGXFSZ, current, 0); return -EFBIG; } is_truncate = true; } req = fuse_get_req(fc); if (IS_ERR(req)) return PTR_ERR(req); if (is_truncate) fuse_set_nowrite(inode); memset(&inarg, 0, sizeof(inarg)); memset(&outarg, 0, sizeof(outarg)); iattr_to_fattr(attr, &inarg); if (file) { struct fuse_file *ff = file->private_data; inarg.valid |= FATTR_FH; inarg.fh = ff->fh; } if (attr->ia_valid & ATTR_SIZE) { /* For mandatory locking in truncate */ inarg.valid |= FATTR_LOCKOWNER; inarg.lock_owner = fuse_lock_owner_id(fc, current->files); } req->in.h.opcode = FUSE_SETATTR; req->in.h.nodeid = get_node_id(inode); req->in.numargs = 1; req->in.args[0].size = sizeof(inarg); req->in.args[0].value = &inarg; req->out.numargs = 1; if (fc->minor < 9) req->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE; else req->out.args[0].size = sizeof(outarg); req->out.args[0].value = &outarg; request_send(fc, req); err = req->out.h.error; fuse_put_request(fc, req); if (err) { if (err == -EINTR) fuse_invalidate_attr(inode); goto error; } if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) { make_bad_inode(inode); err = -EIO; goto error; } spin_lock(&fc->lock); fuse_change_attributes_common(inode, &outarg.attr, attr_timeout(&outarg)); oldsize = inode->i_size; i_size_write(inode, outarg.attr.size); if (is_truncate) { /* NOTE: this may release/reacquire fc->lock */ __fuse_release_nowrite(inode); } spin_unlock(&fc->lock); /* * Only call invalidate_inode_pages2() after removing * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock. */ if (S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) { if (outarg.attr.size < oldsize) fuse_truncate(inode->i_mapping, outarg.attr.size); invalidate_inode_pages2(inode->i_mapping); } return 0; error: if (is_truncate) fuse_release_nowrite(inode); return err; }
static int fuse_do_setattr(struct dentry *entry, struct iattr *attr, struct file *file) { struct inode *inode = entry->d_inode; struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_req *req; struct fuse_setattr_in inarg; struct fuse_attr_out outarg; bool is_truncate = false; loff_t oldsize; int err; if (!fuse_allow_task(fc, current)) return -EACCES; if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS)) attr->ia_valid |= ATTR_FORCE; err = inode_change_ok(inode, attr); if (err) return err; if (attr->ia_valid & ATTR_OPEN) { if (fc->atomic_o_trunc) return 0; file = NULL; } if (attr->ia_valid & ATTR_SIZE) is_truncate = true; req = fuse_get_req(fc); if (IS_ERR(req)) return PTR_ERR(req); if (is_truncate) { fuse_set_nowrite(inode); set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); } memset(&inarg, 0, sizeof(inarg)); memset(&outarg, 0, sizeof(outarg)); iattr_to_fattr(attr, &inarg); if (file) { struct fuse_file *ff = file->private_data; inarg.valid |= FATTR_FH; inarg.fh = ff->fh; } if (attr->ia_valid & ATTR_SIZE) { inarg.valid |= FATTR_LOCKOWNER; inarg.lock_owner = fuse_lock_owner_id(fc, current->files); } req->in.h.opcode = FUSE_SETATTR; req->in.h.nodeid = get_node_id(inode); req->in.numargs = 1; req->in.args[0].size = sizeof(inarg); req->in.args[0].value = &inarg; req->out.numargs = 1; if (fc->minor < 9) req->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE; else req->out.args[0].size = sizeof(outarg); req->out.args[0].value = &outarg; fuse_request_send(fc, req); err = req->out.h.error; fuse_put_request(fc, req); if (err) { if (err == -EINTR) fuse_invalidate_attr(inode); goto error; } if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) { make_bad_inode(inode); err = -EIO; goto error; } spin_lock(&fc->lock); fuse_change_attributes_common(inode, &outarg.attr, attr_timeout(&outarg)); oldsize = inode->i_size; i_size_write(inode, outarg.attr.size); if (is_truncate) { __fuse_release_nowrite(inode); } spin_unlock(&fc->lock); if (S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) { truncate_pagecache(inode, oldsize, outarg.attr.size); invalidate_inode_pages2(inode->i_mapping); } clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); return 0; error: if (is_truncate) fuse_release_nowrite(inode); clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); return err; }
/* * Set attributes, and at the same time refresh them. * * Truncation is slightly complicated, because the 'truncate' request * may fail, in which case we don't want to touch the mapping. * vmtruncate() doesn't allow for this case, so do the rlimit checking * and the actual truncation by hand. */ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, struct file *file) { struct inode *inode = d_inode(dentry); struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); FUSE_ARGS(args); struct fuse_setattr_in inarg; struct fuse_attr_out outarg; bool is_truncate = false; bool is_wb = fc->writeback_cache; loff_t oldsize; int err; bool trust_local_cmtime = is_wb && S_ISREG(inode->i_mode); if (!fc->default_permissions) attr->ia_valid |= ATTR_FORCE; err = setattr_prepare(dentry, attr); if (err) return err; if (attr->ia_valid & ATTR_OPEN) { if (fc->atomic_o_trunc) return 0; file = NULL; } if (attr->ia_valid & ATTR_SIZE) is_truncate = true; if (is_truncate) { fuse_set_nowrite(inode); set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); if (trust_local_cmtime && attr->ia_size != inode->i_size) attr->ia_valid |= ATTR_MTIME | ATTR_CTIME; } memset(&inarg, 0, sizeof(inarg)); memset(&outarg, 0, sizeof(outarg)); iattr_to_fattr(attr, &inarg, trust_local_cmtime); if (file) { struct fuse_file *ff = file->private_data; inarg.valid |= FATTR_FH; inarg.fh = ff->fh; } if (attr->ia_valid & ATTR_SIZE) { /* For mandatory locking in truncate */ inarg.valid |= FATTR_LOCKOWNER; inarg.lock_owner = fuse_lock_owner_id(fc, current->files); } fuse_setattr_fill(fc, &args, inode, &inarg, &outarg); err = fuse_simple_request(fc, &args); if (err) { if (err == -EINTR) fuse_invalidate_attr(inode); goto error; } if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) { make_bad_inode(inode); err = -EIO; goto error; } spin_lock(&fc->lock); /* the kernel maintains i_mtime locally */ if (trust_local_cmtime) { if (attr->ia_valid & ATTR_MTIME) inode->i_mtime = attr->ia_mtime; if (attr->ia_valid & ATTR_CTIME) inode->i_ctime = attr->ia_ctime; /* FIXME: clear I_DIRTY_SYNC? */ } fuse_change_attributes_common(inode, &outarg.attr, attr_timeout(&outarg)); oldsize = inode->i_size; /* see the comment in fuse_change_attributes() */ if (!is_wb || is_truncate || !S_ISREG(inode->i_mode)) i_size_write(inode, outarg.attr.size); if (is_truncate) { /* NOTE: this may release/reacquire fc->lock */ __fuse_release_nowrite(inode); } spin_unlock(&fc->lock); /* * Only call invalidate_inode_pages2() after removing * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock. */ if ((is_truncate || !is_wb) && S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) { truncate_pagecache(inode, outarg.attr.size); invalidate_inode_pages2(inode->i_mapping); } clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); return 0; error: if (is_truncate) fuse_release_nowrite(inode); clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); return err; }
/* * xfs_file_dio_aio_write - handle direct IO writes * * Lock the inode appropriately to prepare for and issue a direct IO write. * By separating it from the buffered write path we remove all the tricky to * follow locking changes and looping. * * If there are cached pages or we're extending the file, we need IOLOCK_EXCL * until we're sure the bytes at the new EOF have been zeroed and/or the cached * pages are flushed out. * * In most cases the direct IO writes will be done holding IOLOCK_SHARED * allowing them to be done in parallel with reads and other direct IO writes. * However, if the IO is not aligned to filesystem blocks, the direct IO layer * needs to do sub-block zeroing and that requires serialisation against other * direct IOs to the same block. In this case we need to serialise the * submission of the unaligned IOs so that we don't get racing block zeroing in * the dio layer. To avoid the problem with aio, we also need to wait for * outstanding IOs to complete so that unwritten extent conversion is completed * before we try to map the overlapping block. This is currently implemented by * hitting it with a big hammer (i.e. inode_dio_wait()). * * Returns with locks held indicated by @iolock and errors indicated by * negative return values. */ STATIC ssize_t xfs_file_dio_aio_write( struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; ssize_t ret = 0; int unaligned_io = 0; int iolock; size_t count = iov_iter_count(from); loff_t end; struct iov_iter data; struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp; /* DIO must be aligned to device logical sector size */ if (!IS_DAX(inode) && ((iocb->ki_pos | count) & target->bt_logical_sectormask)) return -EINVAL; /* "unaligned" here means not aligned to a filesystem block */ if ((iocb->ki_pos & mp->m_blockmask) || ((iocb->ki_pos + count) & mp->m_blockmask)) unaligned_io = 1; /* * We don't need to take an exclusive lock unless there page cache needs * to be invalidated or unaligned IO is being executed. We don't need to * consider the EOF extension case here because * xfs_file_aio_write_checks() will relock the inode as necessary for * EOF zeroing cases and fill out the new inode size as appropriate. */ if (unaligned_io || mapping->nrpages) iolock = XFS_IOLOCK_EXCL; else iolock = XFS_IOLOCK_SHARED; xfs_rw_ilock(ip, iolock); /* * Recheck if there are cached pages that need invalidate after we got * the iolock to protect against other threads adding new pages while * we were waiting for the iolock. */ if (mapping->nrpages && iolock == XFS_IOLOCK_SHARED) { xfs_rw_iunlock(ip, iolock); iolock = XFS_IOLOCK_EXCL; xfs_rw_ilock(ip, iolock); } ret = xfs_file_aio_write_checks(iocb, from, &iolock); if (ret) goto out; count = iov_iter_count(from); end = iocb->ki_pos + count - 1; /* * See xfs_file_read_iter() for why we do a full-file flush here. */ if (mapping->nrpages) { ret = filemap_write_and_wait(VFS_I(ip)->i_mapping); if (ret) goto out; /* * Invalidate whole pages. This can return an error if we fail * to invalidate a page, but this should never happen on XFS. * Warn if it does fail. */ ret = invalidate_inode_pages2(VFS_I(ip)->i_mapping); WARN_ON_ONCE(ret); ret = 0; } /* * If we are doing unaligned IO, wait for all other IO to drain, * otherwise demote the lock if we had to flush cached pages */ if (unaligned_io) inode_dio_wait(inode); else if (iolock == XFS_IOLOCK_EXCL) { xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); iolock = XFS_IOLOCK_SHARED; } trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0); data = *from; ret = mapping->a_ops->direct_IO(iocb, &data); /* see generic_file_direct_write() for why this is necessary */ if (mapping->nrpages) { invalidate_inode_pages2_range(mapping, iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT); }
/* * Set attributes, and at the same time refresh them. * * Truncation is slightly complicated, because the 'truncate' request * may fail, in which case we don't want to touch the mapping. * vmtruncate() doesn't allow for this case, so do the rlimit checking * and the actual truncation by hand. */ int fuse_do_setattr(struct inode *inode, struct iattr *attr, struct file *file) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_req *req; struct fuse_setattr_in inarg; struct fuse_attr_out outarg; bool is_truncate = false; loff_t oldsize; int err; if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS)) attr->ia_valid |= ATTR_FORCE; err = inode_change_ok(inode, attr); if (err) return err; if (attr->ia_valid & ATTR_OPEN) { if (fc->atomic_o_trunc) return 0; file = NULL; } if (attr->ia_valid & ATTR_SIZE) is_truncate = true; req = fuse_get_req_nopages(fc); if (IS_ERR(req)) return PTR_ERR(req); if (is_truncate) fuse_set_nowrite(inode); memset(&inarg, 0, sizeof(inarg)); memset(&outarg, 0, sizeof(outarg)); iattr_to_fattr(attr, &inarg); if (file) { struct fuse_file *ff = file->private_data; inarg.valid |= FATTR_FH; inarg.fh = ff->fh; } if (attr->ia_valid & ATTR_SIZE) { /* For mandatory locking in truncate */ inarg.valid |= FATTR_LOCKOWNER; inarg.lock_owner = fuse_lock_owner_id(fc, current->files); } req->in.h.opcode = FUSE_SETATTR; req->in.h.nodeid = get_node_id(inode); req->in.numargs = 1; req->in.args[0].size = sizeof(inarg); req->in.args[0].value = &inarg; req->out.numargs = 1; if (fc->minor < 9) req->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE; else req->out.args[0].size = sizeof(outarg); req->out.args[0].value = &outarg; fuse_request_send(fc, req); err = req->out.h.error; fuse_put_request(fc, req); if (err) { if (err == -EINTR) fuse_invalidate_attr(inode); goto error; } if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) { make_bad_inode(inode); err = -EIO; goto error; } spin_lock(&fc->lock); fuse_change_attributes_common(inode, &outarg.attr, attr_timeout(&outarg)); oldsize = inode->i_size; i_size_write(inode, outarg.attr.size); if (is_truncate) { /* NOTE: this may release/reacquire fc->lock */ __fuse_release_nowrite(inode); } spin_unlock(&fc->lock); /* * Only call invalidate_inode_pages2() after removing * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock. */ if (S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) { truncate_pagecache(inode, oldsize, outarg.attr.size); invalidate_inode_pages2(inode->i_mapping); } return 0; error: if (is_truncate) fuse_release_nowrite(inode); return err; }
STATIC ssize_t xfs_file_dio_aio_read( struct kiocb *iocb, struct iov_iter *to) { struct address_space *mapping = iocb->ki_filp->f_mapping; struct inode *inode = mapping->host; struct xfs_inode *ip = XFS_I(inode); loff_t isize = i_size_read(inode); size_t count = iov_iter_count(to); struct iov_iter data; struct xfs_buftarg *target; ssize_t ret = 0; trace_xfs_file_direct_read(ip, count, iocb->ki_pos); if (!count) return 0; /* skip atime */ if (XFS_IS_REALTIME_INODE(ip)) target = ip->i_mount->m_rtdev_targp; else target = ip->i_mount->m_ddev_targp; /* DIO must be aligned to device logical sector size */ if ((iocb->ki_pos | count) & target->bt_logical_sectormask) { if (iocb->ki_pos == isize) return 0; return -EINVAL; } file_accessed(iocb->ki_filp); /* * Locking is a bit tricky here. If we take an exclusive lock for direct * IO, we effectively serialise all new concurrent read IO to this file * and block it behind IO that is currently in progress because IO in * progress holds the IO lock shared. We only need to hold the lock * exclusive to blow away the page cache, so only take lock exclusively * if the page cache needs invalidation. This allows the normal direct * IO case of no page cache pages to proceeed concurrently without * serialisation. */ xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); if (mapping->nrpages) { xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); xfs_rw_ilock(ip, XFS_IOLOCK_EXCL); /* * The generic dio code only flushes the range of the particular * I/O. Because we take an exclusive lock here, this whole * sequence is considerably more expensive for us. This has a * noticeable performance impact for any file with cached pages, * even when outside of the range of the particular I/O. * * Hence, amortize the cost of the lock against a full file * flush and reduce the chances of repeated iolock cycles going * forward. */ if (mapping->nrpages) { ret = filemap_write_and_wait(mapping); if (ret) { xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL); return ret; } /* * Invalidate whole pages. This can return an error if * we fail to invalidate a page, but this should never * happen on XFS. Warn if it does fail. */ ret = invalidate_inode_pages2(mapping); WARN_ON_ONCE(ret); ret = 0; } xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); } data = *to; ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data, xfs_get_blocks_direct, NULL, NULL, 0); if (ret >= 0) { iocb->ki_pos += ret; iov_iter_advance(to, ret); } xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); return ret; }