/** * generic_file_splice_write_nolock - generic_file_splice_write without mutexes * @pipe: pipe info * @out: file to write to * @len: number of bytes to splice * @flags: splice modifier flags * * Will either move or copy pages (determined by @flags options) from * the given pipe inode to the given file. The caller is responsible * for acquiring i_mutex on both inodes. * */ ssize_t generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos, size_t len, unsigned int flags) { struct address_space *mapping = out->f_mapping; struct inode *inode = mapping->host; ssize_t ret; int err; err = remove_suid(out->f_dentry); if (unlikely(err)) return err; ret = __splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file); if (ret > 0) { *ppos += ret; /* * If file or inode is SYNC and we actually wrote some data, * sync it. */ if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { err = generic_osync_inode(inode, mapping, OSYNC_METADATA|OSYNC_DATA); if (err) ret = err; } } return ret; }
/** * generic_file_splice_write_nolock - generic_file_splice_write without mutexes * @pipe: pipe info * @out: file to write to * @len: number of bytes to splice * @flags: splice modifier flags * * Will either move or copy pages (determined by @flags options) from * the given pipe inode to the given file. The caller is responsible * for acquiring i_mutex on both inodes. * */ ssize_t generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos, size_t len, unsigned int flags) { struct address_space *mapping = out->f_mapping; struct inode *inode = mapping->host; ssize_t ret; int err; err = remove_suid(out->f_path.dentry); if (unlikely(err)) return err; ret = __splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file); if (ret > 0) { unsigned long nr_pages; *ppos += ret; nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; /* * If file or inode is SYNC and we actually wrote some data, * sync it. */ if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { err = generic_osync_inode(inode, mapping, OSYNC_METADATA|OSYNC_DATA); if (err) ret = err; } balance_dirty_pages_ratelimited_nr(mapping, nr_pages); }
ssize_t /* bytes written, or (-) error */ xfs_write( bhv_desc_t *bdp, struct kiocb *iocb, const struct iovec *iovp, unsigned int nsegs, loff_t *offset, int ioflags, cred_t *credp) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; unsigned long segs = nsegs; xfs_inode_t *xip; xfs_mount_t *mp; ssize_t ret = 0, error = 0; xfs_fsize_t isize, new_size; xfs_iocore_t *io; vnode_t *vp; unsigned long seg; int iolock; int eventsent = 0; vrwlock_t locktype; size_t ocount = 0, count; loff_t pos; int need_isem = 1, need_flush = 0; XFS_STATS_INC(xs_write_calls); vp = BHV_TO_VNODE(bdp); xip = XFS_BHVTOI(bdp); for (seg = 0; seg < segs; seg++) { const struct iovec *iv = &iovp[seg]; /* * If any segment has a negative length, or the cumulative * length ever wraps negative then return -EINVAL. */ ocount += iv->iov_len; if (unlikely((ssize_t)(ocount|iv->iov_len) < 0)) return -EINVAL; if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len)) continue; if (seg == 0) return -EFAULT; segs = seg; ocount -= iv->iov_len; /* This segment is no good */ break; } count = ocount; pos = *offset; if (count == 0) return 0; io = &xip->i_iocore; mp = io->io_mount; if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; if (ioflags & IO_ISDIRECT) { xfs_buftarg_t *target = (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? mp->m_rtdev_targp : mp->m_ddev_targp; if ((pos & target->pbr_smask) || (count & target->pbr_smask)) return XFS_ERROR(-EINVAL); if (!VN_CACHED(vp) && pos < i_size_read(inode)) need_isem = 0; if (VN_CACHED(vp)) need_flush = 1; } relock: if (need_isem) { iolock = XFS_IOLOCK_EXCL; locktype = VRWLOCK_WRITE; down(&inode->i_sem); } else { iolock = XFS_IOLOCK_SHARED; locktype = VRWLOCK_WRITE_DIRECT; } xfs_ilock(xip, XFS_ILOCK_EXCL|iolock); isize = i_size_read(inode); if (file->f_flags & O_APPEND) *offset = isize; start: error = -generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); if (error) { xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); goto out_unlock_isem; } new_size = pos + count; if (new_size > isize) io->io_new_size = new_size; if ((DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS) && !eventsent)) { loff_t savedsize = pos; int dmflags = FILP_DELAY_FLAG(file); if (need_isem) dmflags |= DM_FLAGS_ISEM; xfs_iunlock(xip, XFS_ILOCK_EXCL); error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, vp, pos, count, dmflags, &locktype); if (error) { xfs_iunlock(xip, iolock); goto out_unlock_isem; } xfs_ilock(xip, XFS_ILOCK_EXCL); eventsent = 1; /* * The iolock was dropped and reaquired in XFS_SEND_DATA * so we have to recheck the size when appending. * We will only "goto start;" once, since having sent the * event prevents another call to XFS_SEND_DATA, which is * what allows the size to change in the first place. */ if ((file->f_flags & O_APPEND) && savedsize != isize) { pos = isize = xip->i_d.di_size; goto start; } } /* * On Linux, generic_file_write updates the times even if * no data is copied in so long as the write had a size. * * We must update xfs' times since revalidate will overcopy xfs. */ if (!(ioflags & IO_INVIS)) { xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); inode_update_time(inode, 1); } /* * If the offset is beyond the size of the file, we have a couple * of things to do. First, if there is already space allocated * we need to either create holes or zero the disk or ... * * If there is a page where the previous size lands, we need * to zero it out up to the new size. */ if (pos > isize) { error = xfs_zero_eof(BHV_TO_VNODE(bdp), io, pos, isize, pos + count); if (error) { xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); goto out_unlock_isem; } } xfs_iunlock(xip, XFS_ILOCK_EXCL); /* * If we're writing the file then make sure to clear the * setuid and setgid bits if the process is not being run * by root. This keeps people from modifying setuid and * setgid binaries. */ if (((xip->i_d.di_mode & S_ISUID) || ((xip->i_d.di_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP))) && !capable(CAP_FSETID)) { error = xfs_write_clear_setuid(xip); if (likely(!error)) error = -remove_suid(file->f_dentry); if (unlikely(error)) { xfs_iunlock(xip, iolock); goto out_unlock_isem; } } retry: /* We can write back this queue in page reclaim */ current->backing_dev_info = mapping->backing_dev_info; if ((ioflags & IO_ISDIRECT)) { if (need_flush) { xfs_inval_cached_trace(io, pos, -1, ctooff(offtoct(pos)), -1); VOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(pos)), -1, FI_REMAPF_LOCKED); } if (need_isem) { /* demote the lock now the cached pages are gone */ XFS_ILOCK_DEMOTE(mp, io, XFS_IOLOCK_EXCL); up(&inode->i_sem); iolock = XFS_IOLOCK_SHARED; locktype = VRWLOCK_WRITE_DIRECT; need_isem = 0; } xfs_rw_enter_trace(XFS_DIOWR_ENTER, io, (void *)iovp, segs, *offset, ioflags); ret = generic_file_direct_write(iocb, iovp, &segs, pos, offset, count, ocount); /* * direct-io write to a hole: fall through to buffered I/O * for completing the rest of the request. */ if (ret >= 0 && ret != count) { XFS_STATS_ADD(xs_write_bytes, ret); pos += ret; count -= ret; need_isem = 1; ioflags &= ~IO_ISDIRECT; xfs_iunlock(xip, iolock); goto relock; } } else { xfs_rw_enter_trace(XFS_WRITE_ENTER, io, (void *)iovp, segs, *offset, ioflags); ret = generic_file_buffered_write(iocb, iovp, segs, pos, offset, count, ret); } current->backing_dev_info = NULL; if (ret == -EIOCBQUEUED) ret = wait_on_sync_kiocb(iocb); if ((ret == -ENOSPC) && DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) { xfs_rwunlock(bdp, locktype); error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp, DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */ if (error) goto out_unlock_isem; xfs_rwlock(bdp, locktype); pos = xip->i_d.di_size; goto retry; } if (*offset > xip->i_d.di_size) { xfs_ilock(xip, XFS_ILOCK_EXCL); if (*offset > xip->i_d.di_size) { xip->i_d.di_size = *offset; i_size_write(inode, *offset); xip->i_update_core = 1; xip->i_update_size = 1; } xfs_iunlock(xip, XFS_ILOCK_EXCL); } error = -ret; if (ret <= 0) goto out_unlock_internal; XFS_STATS_ADD(xs_write_bytes, ret); /* Handle various SYNC-type writes */ if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { /* * If we're treating this as O_DSYNC and we have not updated the * size, force the log. */ if (!(mp->m_flags & XFS_MOUNT_OSYNCISOSYNC) && !(xip->i_update_size)) { xfs_inode_log_item_t *iip = xip->i_itemp; /* * If an allocation transaction occurred * without extending the size, then we have to force * the log up the proper point to ensure that the * allocation is permanent. We can't count on * the fact that buffered writes lock out direct I/O * writes - the direct I/O write could have extended * the size nontransactionally, then finished before * we started. xfs_write_file will think that the file * didn't grow but the update isn't safe unless the * size change is logged. * * Force the log if we've committed a transaction * against the inode or if someone else has and * the commit record hasn't gone to disk (e.g. * the inode is pinned). This guarantees that * all changes affecting the inode are permanent * when we return. */ if (iip && iip->ili_last_lsn) { xfs_log_force(mp, iip->ili_last_lsn, XFS_LOG_FORCE | XFS_LOG_SYNC); } else if (xfs_ipincount(xip) > 0) { xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); } } else { xfs_trans_t *tp; /* * O_SYNC or O_DSYNC _with_ a size update are handled * the same way. * * If the write was synchronous then we need to make * sure that the inode modification time is permanent. * We'll have updated the timestamp above, so here * we use a synchronous transaction to log the inode. * It's not fast, but it's necessary. * * If this a dsync write and the size got changed * non-transactionally, then we need to ensure that * the size change gets logged in a synchronous * transaction. */ tp = xfs_trans_alloc(mp, XFS_TRANS_WRITE_SYNC); if ((error = xfs_trans_reserve(tp, 0, XFS_SWRITE_LOG_RES(mp), 0, 0, 0))) { /* Transaction reserve failed */ xfs_trans_cancel(tp, 0); } else { /* Transaction reserve successful */ xfs_ilock(xip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, xip, XFS_ILOCK_EXCL); xfs_trans_ihold(tp, xip); xfs_trans_log_inode(tp, xip, XFS_ILOG_CORE); xfs_trans_set_sync(tp); error = xfs_trans_commit(tp, 0, NULL); xfs_iunlock(xip, XFS_ILOCK_EXCL); if (error) goto out_unlock_internal; } } xfs_rwunlock(bdp, locktype); if (need_isem) up(&inode->i_sem); error = sync_page_range(inode, mapping, pos, ret); if (!error) error = ret; return error; } out_unlock_internal: xfs_rwunlock(bdp, locktype); out_unlock_isem: if (need_isem) up(&inode->i_sem); return -error; }
/* FIXME: Ugliest function of all in LFS, need I say more? */ static ssize_t lfs_file_write( struct file *file, const char __user *buf, size_t count, loff_t *ppos) { loff_t pos; struct page *page; ssize_t res, written, bytes; struct inode *inode = file->f_dentry->d_inode; struct super_block *sb = inode->i_sb; struct segment *segp = LFS_SBI(sb)->s_curr; //dprintk("lfs_file_write called for %lu at pos %Lu\n", inode->i_ino, *ppos); if(file->f_flags & O_DIRECT) { dprintk("The file is requesting direct IO\n"); return -EINVAL; } if (unlikely(count < 0 )) return -EINVAL; if (unlikely(!access_ok(VERIFY_READ, buf, count))) return -EFAULT; //down(&inode->i_sem); /* lock the file */ mutex_lock(&inode->i_mutex); //BrechREiZ: We need this for Kernel 2.6.17 lfs_lock(sb); pos = *ppos; res = generic_write_checks(file, &pos, &count, 0); if (res) goto out; if(count == 0) goto out; res = remove_suid(file->f_dentry); if(res) goto out; //inode_update_time(inode, 1); /* update mtime and ctime */ file_update_time(inode); //BrechREiZ: We need this for Kernel 2.6.17 written = 0; do { long offset; size_t copied; int i, siblock, eiblock, boffset; sector_t block; offset = (segp->offset % BUF_IN_PAGE) * LFS_BSIZE; offset += pos & (LFS_BSIZE - 1); /* within block */ bytes = PAGE_CACHE_SIZE - offset; /* number of bytes written in this iteration */ invalidate_old_page(inode, pos); if (bytes > count) bytes = count; //dprintk("1:segp->start=%Lu,segp->offset=%d,segp->end=%Lu,offset=%lu,bytes=%d\n", segp->start, segp->offset, segp->end,offset,bytes); siblock = pos >> LFS_BSIZE_BITS; eiblock = (pos + bytes - 1) >> LFS_BSIZE_BITS; //dprintk("writing %d bytes at offset %ld (pos = %Lu)\n", bytes, offset, pos); //dprintk("siblock = %d, eiblock = %d\n", siblock, eiblock); /* * Bring in the user page that we will copy from _first_. * Otherwise there's a nasty deadlock on copying from the * same page as we're writing to, without it being marked * up-to-date. */ fault_in_pages_readable(buf, bytes); page = get_seg_page(segp); if (!page) { res = -ENOMEM; break; } /* fill the page with current inode blocks if any */ boffset = offset / LFS_BSIZE;; for(i = siblock; i <= eiblock; ++i, ++boffset) { struct buffer_head *bh; //dprintk("Asking for block %d\n", i); bh = lfs_read_block(inode, i); if(!bh) /* new block */ break; //dprintk("boffset = %d\n", boffset); memcpy(page_address(page) + LFS_BSIZE * boffset, bh->b_data, LFS_BSIZE); brelse(bh); } copied = __copy_from_user(page_address(page) + offset, buf, bytes); flush_dcache_page(page); block = segp->start + segp->offset; for(i = siblock;i <= eiblock; ++i, ++block) segsum_update_finfo(segp, inode->i_ino, i, block); block = segp->start + segp->offset; segp->offset += (bytes - 1)/LFS_BSIZE + 1; //dprintk("2:segp->start=%Lu,segp->offset=%d,segp->end=%Lu,offset=%lu,bytes=%d\n", //segp->start, segp->offset, segp->end,offset,bytes); BUG_ON(segp->start + segp->offset > segp->end); if(segp->start + segp->offset == segp->end) { dprintk("allocating new segment\n"); /* This also is going to write the previous segment */ segment_allocate_new(inode->i_sb, segp, segp->start + segp->offset); segp = LFS_SBI(sb)->s_curr; } /* update the inode */ for(i = siblock;i <= eiblock; ++i, ++block) update_inode(inode, i, block); //dprintk("start=%Lu,offset=%d,end=%Lu\n", segp->start, segp->offset, segp->end); segusetbl_add_livebytes(sb, segp->segnum, bytes); written += bytes; buf += bytes; pos += bytes; count -= bytes; } while(count); *ppos = pos; if(pos > inode->i_size) i_size_write(inode, pos); if(written) mark_inode_dirty(inode); lfs_unlock(sb); //up(&inode->i_sem); mutex_unlock(&inode->i_mutex); //BrechREiZ: and unlocking... return written ? written : res; out: lfs_unlock(sb); //up(&inode->i_sem); mutex_unlock(&inode->i_mutex); //BrechREiZ: and unlocking... return res; }
ssize_t /* bytes written, or (-) error */ xfs_write( bhv_desc_t *bdp, struct kiocb *iocb, const struct iovec *iovp, unsigned int nsegs, loff_t *offset, int ioflags, cred_t *credp) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; unsigned long segs = nsegs; xfs_inode_t *xip; xfs_mount_t *mp; ssize_t ret = 0, error = 0; xfs_fsize_t isize, new_size; xfs_iocore_t *io; bhv_vnode_t *vp; unsigned long seg; int iolock; int eventsent = 0; bhv_vrwlock_t locktype; size_t ocount = 0, count; loff_t pos; int need_i_mutex = 1, need_flush = 0; XFS_STATS_INC(xs_write_calls); vp = BHV_TO_VNODE(bdp); xip = XFS_BHVTOI(bdp); for (seg = 0; seg < segs; seg++) { const struct iovec *iv = &iovp[seg]; /* * If any segment has a negative length, or the cumulative * length ever wraps negative then return -EINVAL. */ ocount += iv->iov_len; if (unlikely((ssize_t)(ocount|iv->iov_len) < 0)) return -EINVAL; if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len)) continue; if (seg == 0) return -EFAULT; segs = seg; ocount -= iv->iov_len; /* This segment is no good */ break; } count = ocount; pos = *offset; if (count == 0) return 0; io = &xip->i_iocore; mp = io->io_mount; vfs_wait_for_freeze(vp->v_vfsp, SB_FREEZE_WRITE); if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; if (ioflags & IO_ISDIRECT) { xfs_buftarg_t *target = (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? mp->m_rtdev_targp : mp->m_ddev_targp; if ((pos & target->bt_smask) || (count & target->bt_smask)) return XFS_ERROR(-EINVAL); if (!VN_CACHED(vp) && pos < i_size_read(inode)) need_i_mutex = 0; if (VN_CACHED(vp)) need_flush = 1; } relock: if (need_i_mutex) { iolock = XFS_IOLOCK_EXCL; locktype = VRWLOCK_WRITE; mutex_lock(&inode->i_mutex); } else { iolock = XFS_IOLOCK_SHARED; locktype = VRWLOCK_WRITE_DIRECT; } xfs_ilock(xip, XFS_ILOCK_EXCL|iolock); isize = i_size_read(inode); if (file->f_flags & O_APPEND) *offset = isize; start: error = -generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); if (error) { xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); goto out_unlock_mutex; } new_size = pos + count; if (new_size > isize) io->io_new_size = new_size; if ((DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS) && !eventsent)) { loff_t savedsize = pos; int dmflags = FILP_DELAY_FLAG(file); if (need_i_mutex) dmflags |= DM_FLAGS_IMUX; xfs_iunlock(xip, XFS_ILOCK_EXCL); error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, vp, pos, count, dmflags, &locktype); if (error) { xfs_iunlock(xip, iolock); goto out_unlock_mutex; } xfs_ilock(xip, XFS_ILOCK_EXCL); eventsent = 1; /* * The iolock was dropped and reacquired in XFS_SEND_DATA * so we have to recheck the size when appending. * We will only "goto start;" once, since having sent the * event prevents another call to XFS_SEND_DATA, which is * what allows the size to change in the first place. */ if ((file->f_flags & O_APPEND) && savedsize != isize) { pos = isize = xip->i_d.di_size; goto start; } } if (likely(!(ioflags & IO_INVIS))) { file_update_time(file); xfs_ichgtime_fast(xip, inode, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); } /* * If the offset is beyond the size of the file, we have a couple * of things to do. First, if there is already space allocated * we need to either create holes or zero the disk or ... * * If there is a page where the previous size lands, we need * to zero it out up to the new size. */ if (pos > isize) { error = xfs_zero_eof(BHV_TO_VNODE(bdp), io, pos, isize); if (error) { xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); goto out_unlock_mutex; } } xfs_iunlock(xip, XFS_ILOCK_EXCL); /* * If we're writing the file then make sure to clear the * setuid and setgid bits if the process is not being run * by root. This keeps people from modifying setuid and * setgid binaries. */ if (((xip->i_d.di_mode & S_ISUID) || ((xip->i_d.di_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP))) && !capable(CAP_FSETID)) { error = xfs_write_clear_setuid(xip); if (likely(!error)) error = -remove_suid(file->f_path.dentry); if (unlikely(error)) { xfs_iunlock(xip, iolock); goto out_unlock_mutex; } } retry: /* We can write back this queue in page reclaim */ current->backing_dev_info = mapping->backing_dev_info; if ((ioflags & IO_ISDIRECT)) { if (need_flush) { xfs_inval_cached_trace(io, pos, -1, ctooff(offtoct(pos)), -1); bhv_vop_flushinval_pages(vp, ctooff(offtoct(pos)), -1, FI_REMAPF_LOCKED); } if (need_i_mutex) { /* demote the lock now the cached pages are gone */ XFS_ILOCK_DEMOTE(mp, io, XFS_IOLOCK_EXCL); mutex_unlock(&inode->i_mutex); iolock = XFS_IOLOCK_SHARED; locktype = VRWLOCK_WRITE_DIRECT; need_i_mutex = 0; } xfs_rw_enter_trace(XFS_DIOWR_ENTER, io, (void *)iovp, segs, *offset, ioflags); ret = generic_file_direct_write(iocb, iovp, &segs, pos, offset, count, ocount); /* * direct-io write to a hole: fall through to buffered I/O * for completing the rest of the request. */ if (ret >= 0 && ret != count) { XFS_STATS_ADD(xs_write_bytes, ret); pos += ret; count -= ret; need_i_mutex = 1; ioflags &= ~IO_ISDIRECT; xfs_iunlock(xip, iolock); goto relock; } } else { xfs_rw_enter_trace(XFS_WRITE_ENTER, io, (void *)iovp, segs, *offset, ioflags); ret = generic_file_buffered_write(iocb, iovp, segs, pos, offset, count, ret); } current->backing_dev_info = NULL; if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO)) ret = wait_on_sync_kiocb(iocb); if ((ret == -ENOSPC) && DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) { xfs_rwunlock(bdp, locktype); if (need_i_mutex) mutex_unlock(&inode->i_mutex); error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp, DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */ if (error) goto out_nounlocks; if (need_i_mutex) mutex_lock(&inode->i_mutex); xfs_rwlock(bdp, locktype); pos = xip->i_d.di_size; ret = 0; goto retry; } isize = i_size_read(inode); if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize)) *offset = isize; if (*offset > xip->i_d.di_size) { xfs_ilock(xip, XFS_ILOCK_EXCL); if (*offset > xip->i_d.di_size) { xip->i_d.di_size = *offset; i_size_write(inode, *offset); xip->i_update_core = 1; xip->i_update_size = 1; } xfs_iunlock(xip, XFS_ILOCK_EXCL); } error = -ret; if (ret <= 0) goto out_unlock_internal; XFS_STATS_ADD(xs_write_bytes, ret); /* Handle various SYNC-type writes */ if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { error = xfs_write_sync_logforce(mp, xip); if (error) goto out_unlock_internal; xfs_rwunlock(bdp, locktype); if (need_i_mutex) mutex_unlock(&inode->i_mutex); error = sync_page_range(inode, mapping, pos, ret); if (!error) error = ret; return error; } out_unlock_internal: xfs_rwunlock(bdp, locktype); out_unlock_mutex: if (need_i_mutex) mutex_unlock(&inode->i_mutex); out_nounlocks: return -error; }
ssize_t /* bytes written, or (-) error */ xfs_write( bhv_desc_t *bdp, uio_t *uio, int ioflag, cred_t *credp) { xfs_inode_t *xip; xfs_mount_t *mp; ssize_t ret = 0; int error = 0; xfs_fsize_t isize, new_size; xfs_fsize_t n, limit; xfs_fsize_t size; xfs_iocore_t *io; xfs_vnode_t *vp; int iolock; //int eventsent = 0; vrwlock_t locktype; xfs_off_t offset_c; xfs_off_t *offset; xfs_off_t pos; XFS_STATS_INC(xs_write_calls); vp = BHV_TO_VNODE(bdp); xip = XFS_BHVTOI(bdp); io = &xip->i_iocore; mp = io->io_mount; if (XFS_FORCED_SHUTDOWN(xip->i_mount)) { return EIO; } size = uio->uio_resid; pos = offset_c = uio->uio_offset; offset = &offset_c; if (unlikely(ioflag & IO_ISDIRECT)) { if (((__psint_t)buf & BBMASK) || (*offset & mp->m_blockmask) || (size & mp->m_blockmask)) { return EINVAL; } iolock = XFS_IOLOCK_SHARED; locktype = VRWLOCK_WRITE_DIRECT; } else { if (io->io_flags & XFS_IOCORE_RT) return EINVAL; iolock = XFS_IOLOCK_EXCL; locktype = VRWLOCK_WRITE; } iolock = XFS_IOLOCK_EXCL; locktype = VRWLOCK_WRITE; xfs_ilock(xip, XFS_ILOCK_EXCL|iolock); isize = xip->i_d.di_size; limit = XFS_MAXIOFFSET(mp); if (ioflag & O_APPEND) *offset = isize; //start: n = limit - *offset; if (n <= 0) { xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); return EFBIG; } if (n < size) size = n; new_size = *offset + size; if (new_size > isize) { io->io_new_size = new_size; } #ifdef RMC /* probably be a long time before if ever that we do dmapi */ if ((DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS) && !eventsent)) { loff_t savedsize = *offset; int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags); xfs_iunlock(xip, XFS_ILOCK_EXCL); error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, vp, *offset, size, dmflags, &locktype); if (error) { if (iolock) xfs_iunlock(xip, iolock); return -error; } xfs_ilock(xip, XFS_ILOCK_EXCL); eventsent = 1; /* * The iolock was dropped and reaquired in XFS_SEND_DATA * so we have to recheck the size when appending. * We will only "goto start;" once, since having sent the * event prevents another call to XFS_SEND_DATA, which is * what allows the size to change in the first place. */ if ((file->f_flags & O_APPEND) && savedsize != xip->i_d.di_size) { *offset = isize = xip->i_d.di_size; goto start; } } #endif /* * If the offset is beyond the size of the file, we have a couple * of things to do. First, if there is already space allocated * we need to either create holes or zero the disk or ... * * If there is a page where the previous size lands, we need * to zero it out up to the new size. */ if (!(ioflag & IO_ISDIRECT) && (*offset > isize && isize)) { error = xfs_zero_eof(BHV_TO_VNODE(bdp), io, *offset, isize, *offset + size); if (error) { xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); return(-error); } } xfs_iunlock(xip, XFS_ILOCK_EXCL); #if 0 /* * If we're writing the file then make sure to clear the * setuid and setgid bits if the process is not being run * by root. This keeps people from modifying setuid and * setgid binaries. */ if (((xip->i_d.di_mode & S_ISUID) || ((xip->i_d.di_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP))) && !capable(CAP_FSETID)) { error = xfs_write_clear_setuid(xip); if (likely(!error)) error = -remove_suid(file->f_dentry); if (unlikely(error)) { xfs_iunlock(xip, iolock); goto out_unlock_mutex; } } #endif //retry: if (unlikely(ioflag & IO_ISDIRECT)) { #ifdef RMC xfs_off_t pos = *offset; struct address_space *mapping = file->f_dentry->d_inode->i_mapping; struct inode *inode = mapping->host; ret = precheck_file_write(file, inode, &size, &pos); if (ret || size == 0) goto error; xfs_inval_cached_pages(vp, io, pos, 1, 1); inode->i_ctime = inode->i_mtime = CURRENT_TIME; /* mark_inode_dirty_sync(inode); - we do this later */ xfs_rw_enter_trace(XFS_DIOWR_ENTER, io, buf, size, pos, ioflags); ret = generic_file_direct_IO(WRITE, file, (char *)buf, size, pos); xfs_inval_cached_pages(vp, io, pos, 1, 1); if (ret > 0) *offset += ret; #endif } else { xfs_rw_enter_trace(XFS_WRITE_ENTER, io, buf, size, *offset, ioflags); ret = xfs_write_file(xip,uio,ioflag); } xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); //error: if (ret <= 0) { if (iolock) xfs_rwunlock(bdp, locktype); return ret; } XFS_STATS_ADD(xs_write_bytes, ret); if (*offset > xip->i_d.di_size) { xfs_ilock(xip, XFS_ILOCK_EXCL); if (*offset > xip->i_d.di_size) { printf("xfs_write look at doing more here %s:%d\n",__FILE__,__LINE__); #ifdef RMC struct inode *inode = LINVFS_GET_IP(vp); i_size_write(inode, *offset); mark_inode_dirty_sync(inode); #endif xip->i_d.di_size = *offset; xip->i_update_core = 1; xip->i_update_size = 1; } xfs_iunlock(xip, XFS_ILOCK_EXCL); } /* Handle various SYNC-type writes */ #if 0 // if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { #endif if (ioflag & IO_SYNC) { /* * If we're treating this as O_DSYNC and we have not updated the * size, force the log. */ if (!(mp->m_flags & XFS_MOUNT_OSYNCISOSYNC) && !(xip->i_update_size)) { xfs_inode_log_item_t *iip = xip->i_itemp; /* * If an allocation transaction occurred * without extending the size, then we have to force * the log up the proper point to ensure that the * allocation is permanent. We can't count on * the fact that buffered writes lock out direct I/O * writes - the direct I/O write could have extended * the size nontransactionally, then finished before * we started. xfs_write_file will think that the file * didn't grow but the update isn't safe unless the * size change is logged. * * Force the log if we've committed a transaction * against the inode or if someone else has and * the commit record hasn't gone to disk (e.g. * the inode is pinned). This guarantees that * all changes affecting the inode are permanent * when we return. */ if (iip && iip->ili_last_lsn) { xfs_log_force(mp, iip->ili_last_lsn, XFS_LOG_FORCE | XFS_LOG_SYNC); } else if (xfs_ipincount(xip) > 0) { xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); } } else { xfs_trans_t *tp; /* * O_SYNC or O_DSYNC _with_ a size update are handled * the same way. * * If the write was synchronous then we need to make * sure that the inode modification time is permanent. * We'll have updated the timestamp above, so here * we use a synchronous transaction to log the inode. * It's not fast, but it's necessary. * * If this a dsync write and the size got changed * non-transactionally, then we need to ensure that * the size change gets logged in a synchronous * transaction. */ tp = xfs_trans_alloc(mp, XFS_TRANS_WRITE_SYNC); if ((error = xfs_trans_reserve(tp, 0, XFS_SWRITE_LOG_RES(mp), 0, 0, 0))) { /* Transaction reserve failed */ xfs_trans_cancel(tp, 0); } else { /* Transaction reserve successful */ xfs_ilock(xip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, xip, XFS_ILOCK_EXCL); xfs_trans_ihold(tp, xip); xfs_trans_log_inode(tp, xip, XFS_ILOG_CORE); xfs_trans_set_sync(tp); error = xfs_trans_commit(tp, 0, NULL); xfs_iunlock(xip, XFS_ILOCK_EXCL); } if (error) goto out_unlock_internal; } xfs_rwunlock(bdp, locktype); return ret; } /* (ioflags & O_SYNC) */ out_unlock_internal: xfs_rwunlock(bdp, locktype); #if 0 out_unlock_mutex: if (need_i_mutex) mutex_unlock(&inode->i_mutex); #endif //out_nounlocks: return -error; }
ssize_t /* bytes written, or (-) error */ xfs_write( struct xfs_inode *xip, struct kiocb *iocb, const struct iovec *iovp, unsigned int nsegs, loff_t *offset, int ioflags) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; bhv_vnode_t *vp = XFS_ITOV(xip); unsigned long segs = nsegs; xfs_mount_t *mp; ssize_t ret = 0, error = 0; xfs_fsize_t isize, new_size; int iolock; int eventsent = 0; bhv_vrwlock_t locktype; size_t ocount = 0, count; loff_t pos; int need_i_mutex; XFS_STATS_INC(xs_write_calls); error = generic_segment_checks(iovp, &segs, &ocount, VERIFY_READ); if (error) return error; count = ocount; pos = *offset; if (count == 0) return 0; mp = xip->i_mount; xfs_wait_for_freeze(mp, SB_FREEZE_WRITE); if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; relock: if (ioflags & IO_ISDIRECT) { iolock = XFS_IOLOCK_SHARED; locktype = VRWLOCK_WRITE_DIRECT; need_i_mutex = 0; } else { iolock = XFS_IOLOCK_EXCL; locktype = VRWLOCK_WRITE; need_i_mutex = 1; mutex_lock(&inode->i_mutex); } xfs_ilock(xip, XFS_ILOCK_EXCL|iolock); start: error = -generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); if (error) { xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); goto out_unlock_mutex; } if ((DM_EVENT_ENABLED(xip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS) && !eventsent)) { int dmflags = FILP_DELAY_FLAG(file); if (need_i_mutex) dmflags |= DM_FLAGS_IMUX; xfs_iunlock(xip, XFS_ILOCK_EXCL); error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, vp, pos, count, dmflags, &locktype); if (error) { goto out_unlock_internal; } xfs_ilock(xip, XFS_ILOCK_EXCL); eventsent = 1; /* * The iolock was dropped and reacquired in XFS_SEND_DATA * so we have to recheck the size when appending. * We will only "goto start;" once, since having sent the * event prevents another call to XFS_SEND_DATA, which is * what allows the size to change in the first place. */ if ((file->f_flags & O_APPEND) && pos != xip->i_size) goto start; } if (ioflags & IO_ISDIRECT) { xfs_buftarg_t *target = XFS_IS_REALTIME_INODE(xip) ? mp->m_rtdev_targp : mp->m_ddev_targp; if ((pos & target->bt_smask) || (count & target->bt_smask)) { xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); return XFS_ERROR(-EINVAL); } if (!need_i_mutex && (VN_CACHED(vp) || pos > xip->i_size)) { xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); iolock = XFS_IOLOCK_EXCL; locktype = VRWLOCK_WRITE; need_i_mutex = 1; mutex_lock(&inode->i_mutex); xfs_ilock(xip, XFS_ILOCK_EXCL|iolock); goto start; } } new_size = pos + count; if (new_size > xip->i_size) xip->i_new_size = new_size; if (likely(!(ioflags & IO_INVIS))) { file_update_time(file); xfs_ichgtime_fast(xip, inode, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); } /* * If the offset is beyond the size of the file, we have a couple * of things to do. First, if there is already space allocated * we need to either create holes or zero the disk or ... * * If there is a page where the previous size lands, we need * to zero it out up to the new size. */ if (pos > xip->i_size) { error = xfs_zero_eof(xip, pos, xip->i_size); if (error) { xfs_iunlock(xip, XFS_ILOCK_EXCL); goto out_unlock_internal; } } xfs_iunlock(xip, XFS_ILOCK_EXCL); /* * If we're writing the file then make sure to clear the * setuid and setgid bits if the process is not being run * by root. This keeps people from modifying setuid and * setgid binaries. */ if (((xip->i_d.di_mode & S_ISUID) || ((xip->i_d.di_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP))) && !capable(CAP_FSETID)) { error = xfs_write_clear_setuid(xip); if (likely(!error)) error = -remove_suid(file->f_path.dentry); if (unlikely(error)) { goto out_unlock_internal; } } retry: /* We can write back this queue in page reclaim */ current->backing_dev_info = mapping->backing_dev_info; if ((ioflags & IO_ISDIRECT)) { if (VN_CACHED(vp)) { WARN_ON(need_i_mutex == 0); xfs_inval_cached_trace(xip, pos, -1, (pos & PAGE_CACHE_MASK), -1); error = xfs_flushinval_pages(xip, (pos & PAGE_CACHE_MASK), -1, FI_REMAPF_LOCKED); if (error) goto out_unlock_internal; } if (need_i_mutex) { /* demote the lock now the cached pages are gone */ xfs_ilock_demote(xip, XFS_IOLOCK_EXCL); mutex_unlock(&inode->i_mutex); iolock = XFS_IOLOCK_SHARED; locktype = VRWLOCK_WRITE_DIRECT; need_i_mutex = 0; } xfs_rw_enter_trace(XFS_DIOWR_ENTER, xip, (void *)iovp, segs, *offset, ioflags); ret = generic_file_direct_write(iocb, iovp, &segs, pos, offset, count, ocount); /* * direct-io write to a hole: fall through to buffered I/O * for completing the rest of the request. */ if (ret >= 0 && ret != count) { XFS_STATS_ADD(xs_write_bytes, ret); pos += ret; count -= ret; ioflags &= ~IO_ISDIRECT; xfs_iunlock(xip, iolock); goto relock; } } else { xfs_rw_enter_trace(XFS_WRITE_ENTER, xip, (void *)iovp, segs, *offset, ioflags); ret = generic_file_buffered_write(iocb, iovp, segs, pos, offset, count, ret); } current->backing_dev_info = NULL; if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO)) ret = wait_on_sync_kiocb(iocb); if (ret == -ENOSPC && DM_EVENT_ENABLED(xip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) { xfs_rwunlock(xip, locktype); if (need_i_mutex) mutex_unlock(&inode->i_mutex); error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp, DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */ if (need_i_mutex) mutex_lock(&inode->i_mutex); xfs_rwlock(xip, locktype); if (error) goto out_unlock_internal; pos = xip->i_size; ret = 0; goto retry; } isize = i_size_read(inode); if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize)) *offset = isize; if (*offset > xip->i_size) { xfs_ilock(xip, XFS_ILOCK_EXCL); if (*offset > xip->i_size) xip->i_size = *offset; xfs_iunlock(xip, XFS_ILOCK_EXCL); } error = -ret; if (ret <= 0) goto out_unlock_internal; XFS_STATS_ADD(xs_write_bytes, ret); /* Handle various SYNC-type writes */ if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { int error2; xfs_rwunlock(xip, locktype); if (need_i_mutex) mutex_unlock(&inode->i_mutex); error2 = sync_page_range(inode, mapping, pos, ret); if (!error) error = error2; if (need_i_mutex) mutex_lock(&inode->i_mutex); xfs_rwlock(xip, locktype); error2 = xfs_write_sync_logforce(mp, xip); if (!error) error = error2; } out_unlock_internal: if (xip->i_new_size) { xfs_ilock(xip, XFS_ILOCK_EXCL); xip->i_new_size = 0; /* * If this was a direct or synchronous I/O that failed (such * as ENOSPC) then part of the I/O may have been written to * disk before the error occured. In this case the on-disk * file size may have been adjusted beyond the in-memory file * size and now needs to be truncated back. */ if (xip->i_d.di_size > xip->i_size) xip->i_d.di_size = xip->i_size; xfs_iunlock(xip, XFS_ILOCK_EXCL); } xfs_rwunlock(xip, locktype); out_unlock_mutex: if (need_i_mutex) mutex_unlock(&inode->i_mutex); return -error; }
long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct inode *inode = filp->f_dentry->d_inode; struct ext4_inode_info *ei = EXT4_I(inode); unsigned int flags; ext4_debug("cmd = %u, arg = %lu\n", cmd, arg); switch (cmd) { case EXT4_IOC_GETFLAGS: ext4_get_inode_flags(ei); flags = ei->i_flags & EXT4_FL_USER_VISIBLE; return put_user(flags, (int __user *) arg); case EXT4_IOC_SETFLAGS: { handle_t *handle = NULL; int err, migrate = 0; struct ext4_iloc iloc; unsigned int oldflags; unsigned int jflag; if (IS_RDONLY(inode)) return -EROFS; if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) return -EACCES; if (get_user(flags, (int __user *) arg)) return -EFAULT; flags = ext4_mask_flags(inode->i_mode, flags); mutex_lock(&inode->i_mutex); /* Is it quota file? Do not allow user to mess with it */ if (IS_NOQUOTA(inode)) { mutex_unlock(&inode->i_mutex); return -EPERM; } oldflags = ei->i_flags; /* The JOURNAL_DATA flag is modifiable only by root */ jflag = flags & EXT4_JOURNAL_DATA_FL; /* * The IMMUTABLE and APPEND_ONLY flags can only be changed by * the relevant capability. * * This test looks nicer. Thanks to Pauline Middelink */ if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) { if (!capable(CAP_LINUX_IMMUTABLE)) { mutex_unlock(&inode->i_mutex); return -EPERM; } } /* * The JOURNAL_DATA flag can only be changed by * the relevant capability. */ if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) { if (!capable(CAP_SYS_RESOURCE)) { mutex_unlock(&inode->i_mutex); return -EPERM; } } if (oldflags & EXT4_EXTENTS_FL) { /* We don't support clearning extent flags */ if (!(flags & EXT4_EXTENTS_FL)) { err = -EOPNOTSUPP; goto flags_out; } } else if (flags & EXT4_EXTENTS_FL) { /* migrate the file */ migrate = 1; flags &= ~EXT4_EXTENTS_FL; } if (flags & EXT4_EOFBLOCKS_FL) { /* we don't support adding EOFBLOCKS flag */ if (!(oldflags & EXT4_EOFBLOCKS_FL)) { err = -EOPNOTSUPP; goto flags_out; } } else if (oldflags & EXT4_EOFBLOCKS_FL) ext4_truncate(inode); handle = ext4_journal_start(inode, 1); if (IS_ERR(handle)) { err = PTR_ERR(handle); goto flags_out; } if (IS_SYNC(inode)) ext4_handle_sync(handle); err = ext4_reserve_inode_write(handle, inode, &iloc); if (err) goto flags_err; flags = flags & EXT4_FL_USER_MODIFIABLE; flags |= oldflags & ~EXT4_FL_USER_MODIFIABLE; ei->i_flags = flags; ext4_set_inode_flags(inode); inode->i_ctime = ext4_current_time(inode); err = ext4_mark_iloc_dirty(handle, inode, &iloc); flags_err: ext4_journal_stop(handle); if (err) goto flags_out; if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) err = ext4_change_inode_journal_flag(inode, jflag); if (err) goto flags_out; if (migrate) err = ext4_ext_migrate(inode); flags_out: mutex_unlock(&inode->i_mutex); return err; } case EXT4_IOC_GETVERSION: case EXT4_IOC_GETVERSION_OLD: return put_user(inode->i_generation, (int __user *) arg); case EXT4_IOC_SETVERSION: case EXT4_IOC_SETVERSION_OLD: { handle_t *handle; struct ext4_iloc iloc; __u32 generation; int err; if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) return -EPERM; if (IS_RDONLY(inode)) return -EROFS; if (get_user(generation, (int __user *) arg)) return -EFAULT; handle = ext4_journal_start(inode, 1); if (IS_ERR(handle)) return PTR_ERR(handle); err = ext4_reserve_inode_write(handle, inode, &iloc); if (err == 0) { inode->i_ctime = ext4_current_time(inode); inode->i_generation = generation; err = ext4_mark_iloc_dirty(handle, inode, &iloc); } ext4_journal_stop(handle); return err; } #ifdef CONFIG_JBD2_DEBUG case EXT4_IOC_WAIT_FOR_READONLY: /* * This is racy - by the time we're woken up and running, * the superblock could be released. And the module could * have been unloaded. So sue me. * * Returns 1 if it slept, else zero. */ { struct super_block *sb = inode->i_sb; DECLARE_WAITQUEUE(wait, current); int ret = 0; set_current_state(TASK_INTERRUPTIBLE); add_wait_queue(&EXT4_SB(sb)->ro_wait_queue, &wait); if (timer_pending(&EXT4_SB(sb)->turn_ro_timer)) { schedule(); ret = 1; } remove_wait_queue(&EXT4_SB(sb)->ro_wait_queue, &wait); return ret; } #endif case EXT4_IOC_GROUP_EXTEND: { ext4_fsblk_t n_blocks_count; struct super_block *sb = inode->i_sb; int err, err2=0; if (!capable(CAP_SYS_RESOURCE)) return -EPERM; if (get_user(n_blocks_count, (__u32 __user *)arg)) return -EFAULT; if (IS_RDONLY(inode)) return -EROFS; err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count); if (EXT4_SB(sb)->s_journal) { jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal); jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); } if (err == 0) err = err2; return err; } case EXT4_IOC_MOVE_EXT: { struct move_extent me; struct file *donor_filp; int err; if (!(filp->f_mode & FMODE_READ) || !(filp->f_mode & FMODE_WRITE)) return -EBADF; if (copy_from_user(&me, (struct move_extent __user *)arg, sizeof(me))) return -EFAULT; me.moved_len = 0; donor_filp = fget(me.donor_fd); if (!donor_filp) return -EBADF; if (!(donor_filp->f_mode & FMODE_WRITE)) { err = -EBADF; goto mext_out; } if (IS_RDONLY(inode)) return -EROFS; err = ext4_move_extents(filp, donor_filp, me.orig_start, me.donor_start, me.len, &me.moved_len); if (me.moved_len > 0) remove_suid(donor_filp->f_path.dentry); if (copy_to_user((struct move_extent __user *)arg, &me, sizeof(me))) err = -EFAULT; mext_out: fput(donor_filp); return err; } case EXT4_IOC_GROUP_ADD: { struct ext4_new_group_data input; struct super_block *sb = inode->i_sb; int err, err2=0; if (!capable(CAP_SYS_RESOURCE)) return -EPERM; if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg, sizeof(input))) return -EFAULT; if (IS_RDONLY(inode)) return -EROFS; err = ext4_group_add(sb, &input); if (EXT4_SB(sb)->s_journal) { jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal); jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); } if (err == 0) err = err2; return err; } case EXT4_IOC_MIGRATE: { int err; if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) return -EACCES; if (IS_RDONLY(inode)) return -EROFS; /* * inode_mutex prevent write and truncate on the file. * Read still goes through. We take i_data_sem in * ext4_ext_swap_inode_data before we switch the * inode format to prevent read. */ mutex_lock(&(inode->i_mutex)); err = ext4_ext_migrate(inode); mutex_unlock(&(inode->i_mutex)); return err; } case EXT4_IOC_ALLOC_DA_BLKS: { int err; if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) return -EACCES; if (IS_RDONLY(inode)) return -EROFS; err = ext4_alloc_da_blocks(inode); return err; } default: return -ENOTTY; } }