/* * Common pre-write limit and setup checks. * * Called with the iolocked held either shared and exclusive according to * @iolock, and returns with it held. Might upgrade the iolock to exclusive * if called for a direct write beyond i_size. */ STATIC ssize_t xfs_file_aio_write_checks( struct file *file, loff_t *pos, size_t *count, int *iolock) { struct inode *inode = file->f_mapping->host; struct xfs_inode *ip = XFS_I(inode); int error = 0; restart: error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode)); if (error) return error; /* * If the offset is beyond the size of the file, we need to zero any * blocks that fall between the existing EOF and the start of this * write. If zeroing is needed and we are currently holding the * iolock shared, we need to update it to exclusive which implies * having to redo all checks before. */ if (*pos > i_size_read(inode)) { if (*iolock == XFS_IOLOCK_SHARED) { xfs_rw_iunlock(ip, *iolock); *iolock = XFS_IOLOCK_EXCL; xfs_rw_ilock(ip, *iolock); goto restart; } error = -xfs_zero_eof(ip, *pos, i_size_read(inode)); if (error) return error; } /* * Updating the timestamps will grab the ilock again from * xfs_fs_dirty_inode, so we have to call it after dropping the * lock above. Eventually we should look into a way to avoid * the pointless lock roundtrip. */ if (likely(!(file->f_mode & FMODE_NOCMTIME))) { error = file_update_time(file); if (error) return error; } /* * If we're writing the file then make sure to clear the setuid and * setgid bits if the process is not being run by root. This keeps * people from modifying setuid and setgid binaries. */ return file_remove_suid(file); }
/* * Common pre-write limit and setup checks. * * Called with the iolocked held either shared and exclusive according to * @iolock, and returns with it held. Might upgrade the iolock to exclusive * if called for a direct write beyond i_size. */ STATIC ssize_t xfs_file_aio_write_checks( struct file *file, loff_t *pos, size_t *count, int *iolock) { struct inode *inode = file->f_mapping->host; struct xfs_inode *ip = XFS_I(inode); int error = 0; xfs_rw_ilock(ip, XFS_ILOCK_EXCL); restart: error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode)); if (error) { xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); return error; } if (likely(!(file->f_mode & FMODE_NOCMTIME))) file_update_time(file); /* * If the offset is beyond the size of the file, we need to zero any * blocks that fall between the existing EOF and the start of this * write. If zeroing is needed and we are currently holding the * iolock shared, we need to update it to exclusive which involves * dropping all locks and relocking to maintain correct locking order. * If we do this, restart the function to ensure all checks and values * are still valid. */ if (*pos > i_size_read(inode)) { if (*iolock == XFS_IOLOCK_SHARED) { xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock); *iolock = XFS_IOLOCK_EXCL; xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock); goto restart; } error = -xfs_zero_eof(ip, *pos, i_size_read(inode)); } xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); if (error) return error; /* * If we're writing the file then make sure to clear the setuid and * setgid bits if the process is not being run by root. This keeps * people from modifying setuid and setgid binaries. */ return file_remove_suid(file); }
ssize_t hmfs_xip_file_write(struct file * filp, const char __user * buf, size_t len, loff_t * ppos) { struct address_space *mapping = filp->f_mapping; struct inode *inode = filp->f_inode; struct hmfs_sb_info *sbi = HMFS_SB(inode->i_sb); size_t count = 0, ret; loff_t pos; int ilock; mutex_lock(&inode->i_mutex); if (!access_ok(VERIFY_READ, buf, len)) { ret = -EFAULT; goto out_up; } pos = *ppos; count = len; current->backing_dev_info = mapping->backing_dev_info; ret = generic_write_checks(filp, &pos, &count, S_ISBLK(inode->i_mode)); if (ret) goto out_backing; if (count == 0) goto out_backing; ret = file_remove_suid(filp); if (ret) goto out_backing; ret = file_update_time(filp); if (ret) goto out_backing; inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; ilock = mutex_lock_op(sbi); ret = __hmfs_xip_file_write(filp, buf, count, pos, ppos); mutex_unlock_op(sbi, ilock); mark_inode_dirty(inode); out_backing: current->backing_dev_info = NULL; out_up: mutex_unlock(&inode->i_mutex); return ret; }
/* * Common pre-write limit and setup checks. * * Returns with iolock held according to @iolock. */ STATIC ssize_t xfs_file_aio_write_checks( struct file *file, loff_t *pos, size_t *count, int *iolock) { struct inode *inode = file->f_mapping->host; struct xfs_inode *ip = XFS_I(inode); xfs_fsize_t new_size; int error = 0; xfs_rw_ilock(ip, XFS_ILOCK_EXCL); error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode)); if (error) { xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock); *iolock = 0; return error; } new_size = *pos + *count; if (new_size > ip->i_size) ip->i_new_size = new_size; if (likely(!(file->f_mode & FMODE_NOCMTIME))) file_update_time(file); /* * If the offset is beyond the size of the file, we need to zero any * blocks that fall between the existing EOF and the start of this * write. */ if (*pos > ip->i_size) error = -xfs_zero_eof(ip, *pos, ip->i_size); xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); if (error) return error; /* * If we're writing the file then make sure to clear the setuid and * setgid bits if the process is not being run by root. This keeps * people from modifying setuid and setgid binaries. */ return file_remove_suid(file); }
STATIC ssize_t xfs_file_aio_write_checks( struct file *file, loff_t *pos, size_t *count, int *iolock) { struct inode *inode = file->f_mapping->host; struct xfs_inode *ip = XFS_I(inode); int error = 0; xfs_rw_ilock(ip, XFS_ILOCK_EXCL); restart: error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode)); if (error) { xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); return error; } if (*pos > i_size_read(inode)) { if (*iolock == XFS_IOLOCK_SHARED) { xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock); *iolock = XFS_IOLOCK_EXCL; xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock); goto restart; } error = -xfs_zero_eof(ip, *pos, i_size_read(inode)); } xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); if (error) return error; if (likely(!(file->f_mode & FMODE_NOCMTIME))) file_update_time(file); return file_remove_suid(file); }
static ssize_t svfs_file_splice_write(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos, size_t len, unsigned int flags) { struct address_space *mapping; struct inode *inode; struct file *llfs_filp; struct svfs_inode *si; struct splice_desc sd = {0,}; ssize_t ret; svfs_entry(mdc, "pos %lu, len %ld, flags 0x%x\n", (unsigned long)*ppos, (long)len, flags); si = SVFS_I(out->f_dentry->d_inode); if (si->state & SVFS_STATE_DA) { /* create it now */ ASSERT(!(si->state & SVFS_STATE_CONN)); ret = llfs_create(out->f_dentry); if (ret) goto out; } if (!(si->state & SVFS_STATE_CONN)) { /* open it ? */ ret = llfs_lookup(out->f_dentry->d_inode); if (ret) goto out; } llfs_filp = si->llfs_md.llfs_filp; ASSERT(llfs_filp); mapping = llfs_filp->f_mapping; inode = mapping->host; sd.total_len = len; sd.flags = flags; sd.pos = *ppos; sd.u.file = llfs_filp; pipe_lock(pipe); splice_from_pipe_begin(&sd); do { ret = splice_from_pipe_next(pipe, &sd); if (ret <= 0) break; mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); ret = file_remove_suid(out); if (!ret) ret = splice_from_pipe_feed(pipe, &sd, pipe_to_file); mutex_unlock(&inode->i_mutex); } while (ret > 0); splice_from_pipe_end(pipe, &sd); pipe_unlock(pipe); if (sd.num_spliced) ret = sd.num_spliced; if (ret > 0) { unsigned long nr_pages; *ppos += ret; nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; /* * If file or inode is SYNC and we actually wrote some data, * sync it. */ if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { int err; mutex_lock(&inode->i_mutex); err = generic_osync_inode(inode, mapping, OSYNC_METADATA|OSYNC_DATA); mutex_unlock(&inode->i_mutex); if (err) ret = err; } balance_dirty_pages_ratelimited_nr(mapping, nr_pages); }
STATIC ssize_t xfs_file_aio_write( struct kiocb *iocb, const struct iovec *iovp, unsigned long nr_segs, loff_t pos) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; ssize_t ret = 0, error = 0; int ioflags = 0; xfs_fsize_t isize, new_size; int iolock; int eventsent = 0; size_t ocount = 0, count; int need_i_mutex; XFS_STATS_INC(xs_write_calls); BUG_ON(iocb->ki_pos != pos); if (unlikely(file->f_flags & O_DIRECT)) ioflags |= IO_ISDIRECT; if (file->f_mode & FMODE_NOCMTIME) ioflags |= IO_INVIS; error = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ); if (error) return error; count = ocount; if (count == 0) return 0; xfs_wait_for_freeze(mp, SB_FREEZE_WRITE); if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; relock: if (ioflags & IO_ISDIRECT) { iolock = XFS_IOLOCK_SHARED; need_i_mutex = 0; } else { iolock = XFS_IOLOCK_EXCL; need_i_mutex = 1; mutex_lock(&inode->i_mutex); } xfs_ilock(ip, XFS_ILOCK_EXCL|iolock); start: error = -generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); if (error) { xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock); goto out_unlock_mutex; } if ((DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS) && !eventsent)) { int dmflags = FILP_DELAY_FLAG(file); if (need_i_mutex) dmflags |= DM_FLAGS_IMUX; xfs_iunlock(ip, XFS_ILOCK_EXCL); error = XFS_SEND_DATA(ip->i_mount, DM_EVENT_WRITE, ip, pos, count, dmflags, &iolock); if (error) { goto out_unlock_internal; } xfs_ilock(ip, XFS_ILOCK_EXCL); eventsent = 1; /* * The iolock was dropped and reacquired in XFS_SEND_DATA * so we have to recheck the size when appending. * We will only "goto start;" once, since having sent the * event prevents another call to XFS_SEND_DATA, which is * what allows the size to change in the first place. */ if ((file->f_flags & O_APPEND) && pos != ip->i_size) goto start; } if (ioflags & IO_ISDIRECT) { xfs_buftarg_t *target = XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp; if ((pos & target->bt_smask) || (count & target->bt_smask)) { xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock); return XFS_ERROR(-EINVAL); } if (!need_i_mutex && (mapping->nrpages || pos > ip->i_size)) { xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock); iolock = XFS_IOLOCK_EXCL; need_i_mutex = 1; mutex_lock(&inode->i_mutex); xfs_ilock(ip, XFS_ILOCK_EXCL|iolock); goto start; } } new_size = pos + count; if (new_size > ip->i_size) ip->i_new_size = new_size; if (likely(!(ioflags & IO_INVIS))) file_update_time(file); /* * If the offset is beyond the size of the file, we have a couple * of things to do. First, if there is already space allocated * we need to either create holes or zero the disk or ... * * If there is a page where the previous size lands, we need * to zero it out up to the new size. */ if (pos > ip->i_size) { error = xfs_zero_eof(ip, pos, ip->i_size); if (error) { xfs_iunlock(ip, XFS_ILOCK_EXCL); goto out_unlock_internal; } } xfs_iunlock(ip, XFS_ILOCK_EXCL); /* * If we're writing the file then make sure to clear the * setuid and setgid bits if the process is not being run * by root. This keeps people from modifying setuid and * setgid binaries. */ error = -file_remove_suid(file); if (unlikely(error)) goto out_unlock_internal; /* We can write back this queue in page reclaim */ current->backing_dev_info = mapping->backing_dev_info; if ((ioflags & IO_ISDIRECT)) { if (mapping->nrpages) { WARN_ON(need_i_mutex == 0); error = xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1, FI_REMAPF_LOCKED); if (error) goto out_unlock_internal; } if (need_i_mutex) { /* demote the lock now the cached pages are gone */ xfs_ilock_demote(ip, XFS_IOLOCK_EXCL); mutex_unlock(&inode->i_mutex); iolock = XFS_IOLOCK_SHARED; need_i_mutex = 0; } trace_xfs_file_direct_write(ip, count, iocb->ki_pos, ioflags); ret = generic_file_direct_write(iocb, iovp, &nr_segs, pos, &iocb->ki_pos, count, ocount); /* * direct-io write to a hole: fall through to buffered I/O * for completing the rest of the request. */ if (ret >= 0 && ret != count) { XFS_STATS_ADD(xs_write_bytes, ret); pos += ret; count -= ret; ioflags &= ~IO_ISDIRECT; xfs_iunlock(ip, iolock); goto relock; } } else { int enospc = 0; ssize_t ret2 = 0; write_retry: trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, ioflags); ret2 = generic_file_buffered_write(iocb, iovp, nr_segs, pos, &iocb->ki_pos, count, ret); /* * if we just got an ENOSPC, flush the inode now we * aren't holding any page locks and retry *once* */ if (ret2 == -ENOSPC && !enospc) { error = xfs_flush_pages(ip, 0, -1, 0, FI_NONE); if (error) goto out_unlock_internal; enospc = 1; goto write_retry; } ret = ret2; } current->backing_dev_info = NULL; isize = i_size_read(inode); if (unlikely(ret < 0 && ret != -EFAULT && iocb->ki_pos > isize)) iocb->ki_pos = isize; if (iocb->ki_pos > ip->i_size) { xfs_ilock(ip, XFS_ILOCK_EXCL); if (iocb->ki_pos > ip->i_size) ip->i_size = iocb->ki_pos; xfs_iunlock(ip, XFS_ILOCK_EXCL); } if (ret == -ENOSPC && DM_EVENT_ENABLED(ip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) { xfs_iunlock(ip, iolock); if (need_i_mutex) mutex_unlock(&inode->i_mutex); error = XFS_SEND_NAMESP(ip->i_mount, DM_EVENT_NOSPACE, ip, DM_RIGHT_NULL, ip, DM_RIGHT_NULL, NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */ if (need_i_mutex) mutex_lock(&inode->i_mutex); xfs_ilock(ip, iolock); if (error) goto out_unlock_internal; goto start; } error = -ret; if (ret <= 0) goto out_unlock_internal; XFS_STATS_ADD(xs_write_bytes, ret); /* Handle various SYNC-type writes */ if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { loff_t end = pos + ret - 1; int error2; xfs_iunlock(ip, iolock); if (need_i_mutex) mutex_unlock(&inode->i_mutex); error2 = filemap_write_and_wait_range(mapping, pos, end); if (!error) error = error2; if (need_i_mutex) mutex_lock(&inode->i_mutex); xfs_ilock(ip, iolock); error2 = -xfs_file_fsync(file, file->f_path.dentry, (file->f_flags & __O_SYNC) ? 0 : 1); if (!error) error = error2; } out_unlock_internal: if (ip->i_new_size) { xfs_ilock(ip, XFS_ILOCK_EXCL); ip->i_new_size = 0; /* * If this was a direct or synchronous I/O that failed (such * as ENOSPC) then part of the I/O may have been written to * disk before the error occured. In this case the on-disk * file size may have been adjusted beyond the in-memory file * size and now needs to be truncated back. */ if (ip->i_d.di_size > ip->i_size) ip->i_d.di_size = ip->i_size; xfs_iunlock(ip, XFS_ILOCK_EXCL); } xfs_iunlock(ip, iolock); out_unlock_mutex: if (need_i_mutex) mutex_unlock(&inode->i_mutex); return -error; }
long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct inode *inode = filp->f_dentry->d_inode; struct super_block *sb = inode->i_sb; struct ext4_inode_info *ei = EXT4_I(inode); unsigned int flags; ext4_debug("cmd = %u, arg = %lu\n", cmd, arg); switch (cmd) { case EXT4_IOC_GETFLAGS: ext4_get_inode_flags(ei); flags = ei->i_flags & EXT4_FL_USER_VISIBLE; return put_user(flags, (int __user *) arg); case EXT4_IOC_SETFLAGS: { handle_t *handle = NULL; int err, migrate = 0; struct ext4_iloc iloc; unsigned int oldflags; unsigned int jflag; if (!is_owner_or_cap(inode)) return -EACCES; if (get_user(flags, (int __user *) arg)) return -EFAULT; err = mnt_want_write(filp->f_path.mnt); if (err) return err; flags = ext4_mask_flags(inode->i_mode, flags); err = -EPERM; mutex_lock(&inode->i_mutex); /* Is it quota file? Do not allow user to mess with it */ if (IS_NOQUOTA(inode)) goto flags_out; oldflags = ei->i_flags; /* The JOURNAL_DATA flag is modifiable only by root */ jflag = flags & EXT4_JOURNAL_DATA_FL; /* * The IMMUTABLE and APPEND_ONLY flags can only be changed by * the relevant capability. * * This test looks nicer. Thanks to Pauline Middelink */ if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) { if (!capable(CAP_LINUX_IMMUTABLE)) goto flags_out; } /* * The JOURNAL_DATA flag can only be changed by * the relevant capability. */ if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) { if (!capable(CAP_SYS_RESOURCE)) goto flags_out; } if (oldflags & EXT4_EXTENTS_FL) { /* We don't support clearning extent flags */ if (!(flags & EXT4_EXTENTS_FL)) { err = -EOPNOTSUPP; goto flags_out; } } else if (flags & EXT4_EXTENTS_FL) { /* migrate the file */ migrate = 1; flags &= ~EXT4_EXTENTS_FL; } if (flags & EXT4_EOFBLOCKS_FL) { /* we don't support adding EOFBLOCKS flag */ if (!(oldflags & EXT4_EOFBLOCKS_FL)) { err = -EOPNOTSUPP; goto flags_out; } } else if (oldflags & EXT4_EOFBLOCKS_FL) ext4_truncate(inode); handle = ext4_journal_start(inode, 1); if (IS_ERR(handle)) { err = PTR_ERR(handle); goto flags_out; } if (IS_SYNC(inode)) ext4_handle_sync(handle); err = ext4_reserve_inode_write(handle, inode, &iloc); if (err) goto flags_err; flags = flags & EXT4_FL_USER_MODIFIABLE; flags |= oldflags & ~EXT4_FL_USER_MODIFIABLE; ei->i_flags = flags; ext4_set_inode_flags(inode); inode->i_ctime = ext4_current_time(inode); err = ext4_mark_iloc_dirty(handle, inode, &iloc); flags_err: ext4_journal_stop(handle); if (err) goto flags_out; if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) err = ext4_change_inode_journal_flag(inode, jflag); if (err) goto flags_out; if (migrate) err = ext4_ext_migrate(inode); flags_out: mutex_unlock(&inode->i_mutex); mnt_drop_write(filp->f_path.mnt); return err; } case EXT4_IOC_GETVERSION: case EXT4_IOC_GETVERSION_OLD: return put_user(inode->i_generation, (int __user *) arg); case EXT4_IOC_SETVERSION: case EXT4_IOC_SETVERSION_OLD: { handle_t *handle; struct ext4_iloc iloc; __u32 generation; int err; if (!is_owner_or_cap(inode)) return -EPERM; err = mnt_want_write(filp->f_path.mnt); if (err) return err; if (get_user(generation, (int __user *) arg)) { err = -EFAULT; goto setversion_out; } handle = ext4_journal_start(inode, 1); if (IS_ERR(handle)) { err = PTR_ERR(handle); goto setversion_out; } err = ext4_reserve_inode_write(handle, inode, &iloc); if (err == 0) { inode->i_ctime = ext4_current_time(inode); inode->i_generation = generation; err = ext4_mark_iloc_dirty(handle, inode, &iloc); } ext4_journal_stop(handle); setversion_out: mnt_drop_write(filp->f_path.mnt); return err; } #ifdef CONFIG_JBD2_DEBUG case EXT4_IOC_WAIT_FOR_READONLY: /* * This is racy - by the time we're woken up and running, * the superblock could be released. And the module could * have been unloaded. So sue me. * * Returns 1 if it slept, else zero. */ { DECLARE_WAITQUEUE(wait, current); int ret = 0; set_current_state(TASK_INTERRUPTIBLE); add_wait_queue(&EXT4_SB(sb)->ro_wait_queue, &wait); if (timer_pending(&EXT4_SB(sb)->turn_ro_timer)) { schedule(); ret = 1; } remove_wait_queue(&EXT4_SB(sb)->ro_wait_queue, &wait); return ret; } #endif case EXT4_IOC_GROUP_EXTEND: { ext4_fsblk_t n_blocks_count; int err, err2=0; if (!capable(CAP_SYS_RESOURCE)) return -EPERM; if (get_user(n_blocks_count, (__u32 __user *)arg)) return -EFAULT; if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { ext4_msg(sb, KERN_ERR, "Online resizing not supported with bigalloc"); return -EOPNOTSUPP; } err = mnt_want_write(filp->f_path.mnt); if (err) return err; err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count); if (EXT4_SB(sb)->s_journal) { jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal); jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); } if (err == 0) err = err2; mnt_drop_write(filp->f_path.mnt); return err; } case EXT4_IOC_MOVE_EXT: { struct move_extent me; struct file *donor_filp; int err; if (!(filp->f_mode & FMODE_READ) || !(filp->f_mode & FMODE_WRITE)) return -EBADF; if (copy_from_user(&me, (struct move_extent __user *)arg, sizeof(me))) return -EFAULT; donor_filp = fget(me.donor_fd); if (!donor_filp) return -EBADF; if (!(donor_filp->f_mode & FMODE_WRITE)) { err = -EBADF; goto mext_out; } if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { ext4_msg(sb, KERN_ERR, "Online defrag not supported with bigalloc"); return -EOPNOTSUPP; } err = mnt_want_write(filp->f_path.mnt); if (err) goto mext_out; me.moved_len = 0; err = ext4_move_extents(filp, donor_filp, me.orig_start, me.donor_start, me.len, &me.moved_len); mnt_drop_write(filp->f_path.mnt); if (me.moved_len > 0) file_remove_suid(donor_filp); if (copy_to_user((struct move_extent *)arg, &me, sizeof(me))) err = -EFAULT; mext_out: fput(donor_filp); return err; } case EXT4_IOC_GROUP_ADD: { struct ext4_new_group_data input; int err, err2=0; if (!capable(CAP_SYS_RESOURCE)) return -EPERM; if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg, sizeof(input))) return -EFAULT; if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { ext4_msg(sb, KERN_ERR, "Online resizing not supported with bigalloc"); return -EOPNOTSUPP; } err = mnt_want_write(filp->f_path.mnt); if (err) return err; err = ext4_group_add(sb, &input); if (EXT4_SB(sb)->s_journal) { jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal); jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); } if (err == 0) err = err2; mnt_drop_write(filp->f_path.mnt); return err; } case EXT4_IOC_MIGRATE: { int err; if (!is_owner_or_cap(inode)) return -EACCES; err = mnt_want_write(filp->f_path.mnt); if (err) return err; /* * inode_mutex prevent write and truncate on the file. * Read still goes through. We take i_data_sem in * ext4_ext_swap_inode_data before we switch the * inode format to prevent read. */ mutex_lock(&(inode->i_mutex)); err = ext4_ext_migrate(inode); mutex_unlock(&(inode->i_mutex)); mnt_drop_write(filp->f_path.mnt); return err; } case EXT4_IOC_ALLOC_DA_BLKS: { int err; if (!is_owner_or_cap(inode)) return -EACCES; err = mnt_want_write(filp->f_path.mnt); if (err) return err; err = ext4_alloc_da_blocks(inode); mnt_drop_write(filp->f_path.mnt); return err; } case FITRIM: { struct request_queue *q = bdev_get_queue(sb->s_bdev); struct fstrim_range range; int ret = 0; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (!blk_queue_discard(q)) return -EOPNOTSUPP; if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { ext4_msg(sb, KERN_ERR, "FITRIM not supported with bigalloc"); return -EOPNOTSUPP; } if (copy_from_user(&range, (struct fstrim_range *)arg, sizeof(range))) return -EFAULT; range.minlen = max((unsigned int)range.minlen, q->limits.discard_granularity); ret = ext4_trim_fs(sb, &range); if (ret < 0) return ret; if (copy_to_user((struct fstrim_range *)arg, &range, sizeof(range))) return -EFAULT; return 0; } default: return -ENOTTY; } }
/* * Almost copy of generic_file_splice_write() (added changed_begin/end, * tux3_iattrdirty()). */ static ssize_t tux3_file_splice_write(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos, size_t len, unsigned int flags) { if(DEBUG_MODE_K==1) { printk(KERN_INFO"%25s %25s %4d #in\n",__FILE__,__func__,__LINE__); } struct address_space *mapping = out->f_mapping; struct inode *inode = mapping->host; struct sb *sb = tux_sb(inode->i_sb); struct splice_desc sd = { .total_len = len, .flags = flags, .pos = *ppos, .u.file = out, }; ssize_t ret; sb_start_write(inode->i_sb); pipe_lock(pipe); splice_from_pipe_begin(&sd); do { ret = splice_from_pipe_next(pipe, &sd); if (ret <= 0) break; mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); /* For each ->write_end() calls change_end(). */ change_begin(sb); /* For timestamp. FIXME: convert this to ->update_time * handler? */ tux3_iattrdirty(inode); ret = file_remove_suid(out); if (!ret) { ret = file_update_time(out); if (!ret) ret = splice_from_pipe_feed(pipe, &sd, pipe_to_file); } change_end_if_needed(sb); mutex_unlock(&inode->i_mutex); } while (ret > 0); splice_from_pipe_end(pipe, &sd); pipe_unlock(pipe); if (sd.num_spliced) ret = sd.num_spliced; if (ret > 0) { int err; err = generic_write_sync(out, *ppos, ret); if (err) ret = err; else *ppos += ret; balance_dirty_pages_ratelimited(mapping); } sb_end_write(inode->i_sb); return ret; }
ssize_t /* bytes written, or (-) error */ xfs_write( struct xfs_inode *xip, struct kiocb *iocb, const struct iovec *iovp, unsigned int nsegs, loff_t *offset, int ioflags) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; unsigned long segs = nsegs; xfs_mount_t *mp; ssize_t ret = 0, error = 0; xfs_fsize_t isize, new_size; int iolock; int eventsent = 0; size_t ocount = 0, count; loff_t pos; int need_i_mutex; XFS_STATS_INC(xs_write_calls); error = generic_segment_checks(iovp, &segs, &ocount, VERIFY_READ); if (error) return error; count = ocount; pos = *offset; if (count == 0) return 0; mp = xip->i_mount; xfs_wait_for_freeze(mp, SB_FREEZE_WRITE); if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; relock: if (ioflags & IO_ISDIRECT) { iolock = XFS_IOLOCK_SHARED; need_i_mutex = 0; } else { iolock = XFS_IOLOCK_EXCL; need_i_mutex = 1; mutex_lock(&inode->i_mutex); } xfs_ilock(xip, XFS_ILOCK_EXCL|iolock); start: error = -generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); if (error) { xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); goto out_unlock_mutex; } if ((DM_EVENT_ENABLED(xip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS) && !eventsent)) { int dmflags = FILP_DELAY_FLAG(file); if (need_i_mutex) dmflags |= DM_FLAGS_IMUX; xfs_iunlock(xip, XFS_ILOCK_EXCL); error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, xip, pos, count, dmflags, &iolock); if (error) { goto out_unlock_internal; } xfs_ilock(xip, XFS_ILOCK_EXCL); eventsent = 1; /* * The iolock was dropped and reacquired in XFS_SEND_DATA * so we have to recheck the size when appending. * We will only "goto start;" once, since having sent the * event prevents another call to XFS_SEND_DATA, which is * what allows the size to change in the first place. */ if ((file->f_flags & O_APPEND) && pos != xip->i_size) goto start; } if (ioflags & IO_ISDIRECT) { xfs_buftarg_t *target = XFS_IS_REALTIME_INODE(xip) ? mp->m_rtdev_targp : mp->m_ddev_targp; if ((pos & target->bt_smask) || (count & target->bt_smask)) { xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); return XFS_ERROR(-EINVAL); } if (!need_i_mutex && (mapping->nrpages || pos > xip->i_size)) { xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); iolock = XFS_IOLOCK_EXCL; need_i_mutex = 1; mutex_lock(&inode->i_mutex); xfs_ilock(xip, XFS_ILOCK_EXCL|iolock); goto start; } } new_size = pos + count; if (new_size > xip->i_size) xip->i_new_size = new_size; if (likely(!(ioflags & IO_INVIS))) xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); /* * If the offset is beyond the size of the file, we have a couple * of things to do. First, if there is already space allocated * we need to either create holes or zero the disk or ... * * If there is a page where the previous size lands, we need * to zero it out up to the new size. */ if (pos > xip->i_size) { error = xfs_zero_eof(xip, pos, xip->i_size); if (error) { xfs_iunlock(xip, XFS_ILOCK_EXCL); goto out_unlock_internal; } } xfs_iunlock(xip, XFS_ILOCK_EXCL); /* * If we're writing the file then make sure to clear the * setuid and setgid bits if the process is not being run * by root. This keeps people from modifying setuid and * setgid binaries. */ if (((xip->i_d.di_mode & S_ISUID) || ((xip->i_d.di_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP))) && !capable(CAP_FSETID)) { error = xfs_write_clear_setuid(xip); if (likely(!error)) error = -file_remove_suid(file); if (unlikely(error)) { goto out_unlock_internal; } } /* We can write back this queue in page reclaim */ current->backing_dev_info = mapping->backing_dev_info; if ((ioflags & IO_ISDIRECT)) { if (mapping->nrpages) { WARN_ON(need_i_mutex == 0); xfs_inval_cached_trace(xip, pos, -1, (pos & PAGE_CACHE_MASK), -1); error = xfs_flushinval_pages(xip, (pos & PAGE_CACHE_MASK), -1, FI_REMAPF_LOCKED); if (error) goto out_unlock_internal; } if (need_i_mutex) { /* demote the lock now the cached pages are gone */ xfs_ilock_demote(xip, XFS_IOLOCK_EXCL); mutex_unlock(&inode->i_mutex); iolock = XFS_IOLOCK_SHARED; need_i_mutex = 0; } xfs_rw_enter_trace(XFS_DIOWR_ENTER, xip, (void *)iovp, segs, *offset, ioflags); ret = generic_file_direct_write(iocb, iovp, &segs, pos, offset, count, ocount); /* * direct-io write to a hole: fall through to buffered I/O * for completing the rest of the request. */ if (ret >= 0 && ret != count) { XFS_STATS_ADD(xs_write_bytes, ret); pos += ret; count -= ret; ioflags &= ~IO_ISDIRECT; xfs_iunlock(xip, iolock); goto relock; } } else { xfs_rw_enter_trace(XFS_WRITE_ENTER, xip, (void *)iovp, segs, *offset, ioflags); ret = generic_file_buffered_write(iocb, iovp, segs, pos, offset, count, ret); } current->backing_dev_info = NULL; if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO)) ret = wait_on_sync_kiocb(iocb); isize = i_size_read(inode); if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize)) *offset = isize; if (*offset > xip->i_size) { xfs_ilock(xip, XFS_ILOCK_EXCL); if (*offset > xip->i_size) xip->i_size = *offset; xfs_iunlock(xip, XFS_ILOCK_EXCL); } if (ret == -ENOSPC && DM_EVENT_ENABLED(xip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) { xfs_iunlock(xip, iolock); if (need_i_mutex) mutex_unlock(&inode->i_mutex); error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, xip, DM_RIGHT_NULL, xip, DM_RIGHT_NULL, NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */ if (need_i_mutex) mutex_lock(&inode->i_mutex); xfs_ilock(xip, iolock); if (error) goto out_unlock_internal; goto start; } error = -ret; if (ret <= 0) goto out_unlock_internal; XFS_STATS_ADD(xs_write_bytes, ret); /* Handle various SYNC-type writes */ if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { int error2; xfs_iunlock(xip, iolock); if (need_i_mutex) mutex_unlock(&inode->i_mutex); error2 = sync_page_range(inode, mapping, pos, ret); if (!error) error = error2; if (need_i_mutex) mutex_lock(&inode->i_mutex); xfs_ilock(xip, iolock); error2 = xfs_write_sync_logforce(mp, xip); if (!error) error = error2; } out_unlock_internal: if (xip->i_new_size) { xfs_ilock(xip, XFS_ILOCK_EXCL); xip->i_new_size = 0; /* * If this was a direct or synchronous I/O that failed (such * as ENOSPC) then part of the I/O may have been written to * disk before the error occured. In this case the on-disk * file size may have been adjusted beyond the in-memory file * size and now needs to be truncated back. */ if (xip->i_d.di_size > xip->i_size) xip->i_d.di_size = xip->i_size; xfs_iunlock(xip, XFS_ILOCK_EXCL); } xfs_iunlock(xip, iolock); out_unlock_mutex: if (need_i_mutex) mutex_unlock(&inode->i_mutex); return -error; }
/* * Common pre-write limit and setup checks. * * Returns with iolock held according to @iolock. */ STATIC ssize_t xfs_file_aio_write_checks( struct file *file, loff_t *pos, size_t *count, xfs_fsize_t *new_sizep, int *iolock) { struct inode *inode = file->f_mapping->host; struct xfs_inode *ip = XFS_I(inode); xfs_fsize_t new_size; int error = 0; xfs_rw_ilock(ip, XFS_ILOCK_EXCL); *new_sizep = 0; restart: error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode)); if (error) { xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock); *iolock = 0; return error; } if (likely(!(file->f_mode & FMODE_NOCMTIME))) file_update_time(file); /* * If the offset is beyond the size of the file, we need to zero any * blocks that fall between the existing EOF and the start of this * write. There is no need to issue zeroing if another in-flght IO ends * at or before this one If zeronig is needed and we are currently * holding the iolock shared, we need to update it to exclusive which * involves dropping all locks and relocking to maintain correct locking * order. If we do this, restart the function to ensure all checks and * values are still valid. */ if ((ip->i_new_size && *pos > ip->i_new_size) || (!ip->i_new_size && *pos > ip->i_size)) { if (*iolock == XFS_IOLOCK_SHARED) { xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock); *iolock = XFS_IOLOCK_EXCL; xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock); goto restart; } error = -xfs_zero_eof(ip, *pos, ip->i_size); } /* * If this IO extends beyond EOF, we may need to update ip->i_new_size. * We have already zeroed space beyond EOF (if necessary). Only update * ip->i_new_size if this IO ends beyond any other in-flight writes. */ new_size = *pos + *count; if (new_size > ip->i_size) { if (new_size > ip->i_new_size) ip->i_new_size = new_size; *new_sizep = new_size; } xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); if (error) return error; /* * If we're writing the file then make sure to clear the setuid and * setgid bits if the process is not being run by root. This keeps * people from modifying setuid and setgid binaries. */ return file_remove_suid(file); }
ssize_t GSFS_file_write(struct file *filp, const char __user *buf, size_t len, loff_t *off){ struct inode *inode=filp->f_mapping->host; struct address_space *mapping=filp->f_mapping; sector_t sec_start, sec_end, sec_len; unsigned long bufstart, bytes_in_first_buf_page, bytes_in_last_buf_page, pagestartbyte, pageendbyte, pagelen; size_t rlen; struct page *res[2], *page, **pages; unsigned int i, j, pages_count, start_read, end_read; gwf(printk("<0>" "File write with inode :%lu len:%lu offset:%llu , filepos:%llu, buf:%lx inode_size:%llu, pid:%u\n",inode->i_ino,len,*off,filp->f_pos,(unsigned long)buf,inode->i_size,current->pid)); if(unlikely(!access_ok(VERIFY_READ,buf,len))) return -1; mutex_lock(&inode->i_mutex); if((*off+len)>inode->i_size){ if((*off+len)>inode->i_sb->s_maxbytes) return -1; inode->i_size=(*off+len); GSFS_truncate(inode); } if(filp->f_flags & O_APPEND) *off=inode->i_size; current->backing_dev_info=mapping->backing_dev_info; file_remove_suid(filp); file_update_time(filp); //inode_inc_iversion(inode); sec_start=(*off)>>Block_Size_Bits; sec_end=(*off+len-1)>>Block_Size_Bits; sec_len=sec_end-sec_start+1; pages=kzalloc(sizeof(struct page*) * sec_len, GFP_KERNEL); bytes_in_first_buf_page=Block_Size-((*off)&((unsigned long)Block_Size-1)); bytes_in_last_buf_page=((*off+len)&((unsigned long)Block_Size-1)); if(bytes_in_last_buf_page==0) bytes_in_last_buf_page=Block_Size; start_read=(bytes_in_first_buf_page!=Block_Size)?1:0; end_read=(bytes_in_last_buf_page!=Block_Size && inode->i_size>(*off+len))?1:0; gwf(printk("<0>" "GSFS write bytes_in_first_buf_page:%lu, bytes_in_last_buf_page:%lu\n",bytes_in_first_buf_page,bytes_in_last_buf_page)); gwf(printk("<0>" "GSFS write start_read:%u, end_read:%u, sec_start:%lu, sec_end:%lu\n",start_read,end_read,sec_start,sec_end)); if(sec_start==sec_end){ if(start_read || end_read){ res[0]=GSFS_get_data_page_of_inode_with_read(inode, sec_start); gwf(printk("<0>" "sec_start==sec_end, start_read || end_read , res[0]=%lx",(unsigned long)res[0])); } else{ res[0]=GSFS_get_locked_data_page_of_inode_without_read(inode,sec_start); if(likely(res[0])) unlock_page(res[0]); gwf(printk("<0>" "sec_start==sec_end, !(start_read || end_read) , res[0]=%lx",(unsigned long)res[0])); } res[1]=0; if(unlikely(!res[0])){ gwf(printk("<0>" "GSFS write len:-1\n")); mutex_unlock(&inode->i_mutex); printk("<1>" "GSFS write len:-1\n"); kfree(pages); return len; } } else{ if(start_read){ res[0]=GSFS_get_data_page_of_inode_with_read(inode, sec_start); gwf(printk("<0>" "sec_start!=sec_end, start_read, res[0]=%lx",(unsigned long)res[0])); } else{ res[0]=GSFS_get_locked_data_page_of_inode_without_read(inode,sec_start); if(likely(res[0])) unlock_page(res[0]); gwf(printk("<0>" "sec_start!=sec_end, !start_read, res[0]=%lx",(unsigned long)res[0])); } } pages_count=0; if(sec_len>1) for(i=sec_start+1;i<=sec_end-1;i++) pages[pages_count++]=GSFS_get_locked_data_page_of_inode_without_read(inode,i); if(sec_start != sec_end){ if(end_read){ res[1]=GSFS_get_data_page_of_inode_with_read(inode,sec_end); gwf(printk("<0>" "sec_start!=sec_end, end_read, res[1]=%lx",(unsigned long)res[1])); } else{ res[1]=GSFS_get_locked_data_page_of_inode_without_read(inode,sec_end); if(likely(res[1])) unlock_page(res[1]); gwf(printk("<0>" "sec_start!=sec_end, !end_read, res[1]=%lx",(unsigned long)res[1])); } if(unlikely(!res[0] || !res[1])){ gwf(printk("<0>" "GSFS write len:-1\n")); printk("<1>" "GSFS write len:-1\n"); mutex_unlock(&inode->i_mutex); kfree(pages); return len; } } rlen=0; bufstart=(unsigned long)buf+bytes_in_first_buf_page; pagelen=Block_Size; //100% expected complete pages that should be copied pages_count=0; if(sec_len>1) for(i=sec_start+1;i<=sec_end-1;i++){ gwf(printk("<0>" "write page complete pages, i:%u, bufstart:%lx, rlen=%lu\n",i,bufstart,rlen)); page=pages[pages_count++]; if(unlikely(!page)) goto buf_cont; j=__copy_from_user_inatomic(page_address(page),(void*)bufstart,pagelen); rlen+=(Block_Size-j); mark_page_accessed(page); set_page_dirty(page); put_page(page); unlock_page(page); buf_cont: bufstart+=pagelen; } //first and last page that are not surely complete for(i=0;i<2 && res[i];i++){ page=res[i]; wait_on_page_locked(page); lock_page(page); if(page->index==sec_start){ bufstart=(unsigned long)buf; pagestartbyte=Block_Size-bytes_in_first_buf_page; if(sec_start==sec_end) pageendbyte=pagestartbyte+len-1; else pageendbyte=Block_Size-1; } else{ bufstart=(unsigned long)buf+bytes_in_first_buf_page+((sec_len-2)<<Block_Size_Bits); pageendbyte=bytes_in_last_buf_page-1; pagestartbyte=0; } gwf(printk("<0>" "gsfs_write for first and last page, i=%u, page:%lx, bufstart:%lx, pagestartbyte:%lu, pageendbyte:%lu\n", i,(unsigned long)page,bufstart,pagestartbyte,pageendbyte)); pagelen=pageendbyte-pagestartbyte+1; j=__copy_from_user_inatomic(page_address(page)+pagestartbyte,(void*)bufstart,pagelen); rlen+=(pagelen-j); mark_page_accessed(page); set_page_dirty(page); put_page(page); unlock_page(page); } mutex_unlock(&inode->i_mutex); (*off)+=rlen; gwf(printk("<0>" "GSFS write rlen:%lu\n",rlen)); kfree(pages); if(filp->f_flags & O_SYNC){ write_inode_now(inode,1); } return rlen; }