/*
 * This tests whether the IO in question is block-aligned or not.
 * Ext4 utilizes unwritten extents when hole-filling during direct IO, and they
 * are converted to written only after the IO is complete.  Until they are
 * mapped, these blocks appear as holes, so dio_zero_block() will assume that
 * it needs to zero out portions of the start and/or end block.  If 2 AIO
 * threads are at work on the same unwritten block, they must be synchronized
 * or one thread will zero the other's data, causing corruption.
 */
static int
ext4_unaligned_aio(struct inode *inode, struct iov_iter *iter, loff_t pos)
{
	struct super_block *sb = inode->i_sb;
	int blockmask = sb->s_blocksize - 1;
	size_t count = iov_iter_count(iter);
	loff_t final_size = pos + count;

	if (pos >= i_size_read(inode))
		return 0;

	if ((pos & blockmask) || (final_size & blockmask))
		return 1;

	return 0;
}
Example #2
0
static ssize_t
v9fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
	struct p9_fid *fid = iocb->ki_filp->private_data;
	int ret, err = 0;

	p9_debug(P9_DEBUG_VFS, "count %zu offset %lld\n",
		 iov_iter_count(to), iocb->ki_pos);

	ret = p9_client_read(fid, iocb->ki_pos, to, &err);
	if (!ret)
		return err;

	iocb->ki_pos += ret;
	return ret;
}
Example #3
0
/**
 * process_vm_rw_single_vec - read/write pages from task specified
 * @addr: start memory address of target process
 * @len: size of area to copy to/from
 * @iter: where to copy to/from locally
 * @process_pages: struct pages area that can store at least
 *  nr_pages_to_copy struct page pointers
 * @mm: mm for task
 * @task: task to read/write from
 * @vm_write: 0 means copy from, 1 means copy to
 * Returns 0 on success or on failure error code
 */
static int process_vm_rw_single_vec(unsigned long addr,
				    unsigned long len,
				    struct iov_iter *iter,
				    struct page **process_pages,
				    struct mm_struct *mm,
				    struct task_struct *task,
				    int vm_write)
{
	unsigned long pa = addr & PAGE_MASK;
	unsigned long start_offset = addr - pa;
	unsigned long nr_pages;
	ssize_t rc = 0;
	unsigned long max_pages_per_loop = PVM_MAX_KMALLOC_PAGES
		/ sizeof(struct pages *);

	/* Work out address and page range required */
	if (len == 0)
		return 0;
	nr_pages = (addr + len - 1) / PAGE_SIZE - addr / PAGE_SIZE + 1;

	while (!rc && nr_pages && iov_iter_count(iter)) {
		int pages = min(nr_pages, max_pages_per_loop);
		size_t bytes;

		/* Get the pages we're interested in */
		pages = get_user_pages_unlocked(task, mm, pa, pages,
						vm_write, 0, process_pages);
		if (pages <= 0)
			return -EFAULT;

		bytes = pages * PAGE_SIZE - start_offset;
		if (bytes > len)
			bytes = len;

		rc = process_vm_rw_pages(process_pages,
					 start_offset, bytes, iter,
					 vm_write);
		len -= bytes;
		start_offset = 0;
		nr_pages -= pages;
		pa += pages * PAGE_SIZE;
		while (pages)
			put_page(process_pages[--pages]);
	}

	return rc;
}
Example #4
0
File: file.c Project: infidel/linux
static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
    ssize_t retval;
    struct file *file = iocb->ki_filp;
    struct inode *inode = file_inode(file);
    struct udf_inode_info *iinfo = UDF_I(inode);
    int err;

    mutex_lock(&inode->i_mutex);

    retval = generic_write_checks(iocb, from);
    if (retval <= 0)
        goto out;

    down_write(&iinfo->i_data_sem);
    if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
        loff_t end = iocb->ki_pos + iov_iter_count(from);

        if (inode->i_sb->s_blocksize <
                (udf_file_entry_alloc_offset(inode) + end)) {
            err = udf_expand_file_adinicb(inode);
            if (err) {
                mutex_unlock(&inode->i_mutex);
                udf_debug("udf_expand_adinicb: err=%d\n", err);
                return err;
            }
        } else {
            iinfo->i_lenAlloc = max(end, inode->i_size);
            up_write(&iinfo->i_data_sem);
        }
    } else
        up_write(&iinfo->i_data_sem);

    retval = __generic_file_write_iter(iocb, from);
out:
    mutex_unlock(&inode->i_mutex);

    if (retval > 0) {
        mark_inode_dirty(inode);
        err = generic_write_sync(file, iocb->ki_pos - retval, retval);
        if (err < 0)
            retval = err;
    }

    return retval;
}
Example #5
0
STATIC ssize_t
xfs_file_write_iter(
	struct kiocb		*iocb,
	struct iov_iter		*from)
{
	struct file		*file = iocb->ki_filp;
	struct address_space	*mapping = file->f_mapping;
	struct inode		*inode = mapping->host;
	struct xfs_inode	*ip = XFS_I(inode);
	ssize_t			ret;
	size_t			ocount = iov_iter_count(from);

	XFS_STATS_INC(ip->i_mount, xs_write_calls);

	if (ocount == 0)
		return 0;

	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
		return -EIO;

	if (IS_DAX(inode))
		ret = xfs_file_dax_write(iocb, from);
	else if (iocb->ki_flags & IOCB_DIRECT) {
		/*
		 * Allow a directio write to fall back to a buffered
		 * write *only* in the case that we're doing a reflink
		 * CoW.  In all other directio scenarios we do not
		 * allow an operation to fall back to buffered mode.
		 */
		ret = xfs_file_dio_aio_write(iocb, from);
		if (ret == -EREMCHG)
			goto buffered;
	} else {
buffered:
		ret = xfs_file_buffered_aio_write(iocb, from);
	}

	if (ret > 0) {
		XFS_STATS_ADD(ip->i_mount, xs_write_bytes, ret);

		/* Handle various SYNC-type writes */
		ret = generic_write_sync(iocb, ret);
	}
	return ret;
}
Example #6
0
static noinline ssize_t
xfs_file_dax_write(
	struct kiocb		*iocb,
	struct iov_iter		*from)
{
	struct inode		*inode = iocb->ki_filp->f_mapping->host;
	struct xfs_inode	*ip = XFS_I(inode);
	int			iolock = XFS_IOLOCK_EXCL;
	ssize_t			ret, error = 0;
	size_t			count;
	loff_t			pos;

	if (iocb->ki_flags & IOCB_NOWAIT) {
		if (!xfs_ilock_nowait(ip, iolock))
			return -EAGAIN;
	} else {
		xfs_ilock(ip, iolock);
	}

	ret = xfs_file_aio_write_checks(iocb, from, &iolock);
	if (ret)
		goto out;

	pos = iocb->ki_pos;
	count = iov_iter_count(from);

	trace_xfs_file_dax_write(ip, count, pos);
	ret = dax_iomap_rw(iocb, from, &xfs_iomap_ops);
	if (ret > 0 && iocb->ki_pos > i_size_read(inode)) {
		i_size_write(inode, iocb->ki_pos);
		error = xfs_setfilesize(ip, pos, ret);
	}
out:
	xfs_iunlock(ip, iolock);
	if (error)
		return error;

	if (ret > 0) {
		XFS_STATS_ADD(ip->i_mount, xs_write_bytes, ret);

		/* Handle various SYNC-type writes */
		ret = generic_write_sync(iocb, ret);
	}
	return ret;
}
Example #7
0
File: dax.c Project: 020gzh/linux
/**
 * dax_do_io - Perform I/O to a DAX file
 * @iocb: The control block for this I/O
 * @inode: The file which the I/O is directed at
 * @iter: The addresses to do I/O from or to
 * @pos: The file offset where the I/O starts
 * @get_block: The filesystem method used to translate file offsets to blocks
 * @end_io: A filesystem callback for I/O completion
 * @flags: See below
 *
 * This function uses the same locking scheme as do_blockdev_direct_IO:
 * If @flags has DIO_LOCKING set, we assume that the i_mutex is held by the
 * caller for writes.  For reads, we take and release the i_mutex ourselves.
 * If DIO_LOCKING is not set, the filesystem takes care of its own locking.
 * As with do_blockdev_direct_IO(), we increment i_dio_count while the I/O
 * is in progress.
 */
ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
		  struct iov_iter *iter, loff_t pos, get_block_t get_block,
		  dio_iodone_t end_io, int flags)
{
	struct buffer_head bh;
	ssize_t retval = -EINVAL;
	loff_t end = pos + iov_iter_count(iter);

	memset(&bh, 0, sizeof(bh));
	bh.b_bdev = inode->i_sb->s_bdev;

	if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ) {
		struct address_space *mapping = inode->i_mapping;
		inode_lock(inode);
		retval = filemap_write_and_wait_range(mapping, pos, end - 1);
		if (retval) {
			inode_unlock(inode);
			goto out;
		}
	}

	/* Protects against truncate */
	if (!(flags & DIO_SKIP_DIO_COUNT))
		inode_dio_begin(inode);

	retval = dax_io(inode, iter, pos, end, get_block, &bh);

	if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ)
		inode_unlock(inode);

	if (end_io) {
		int err;

		err = end_io(iocb, pos, retval, bh.b_private);
		if (err)
			retval = err;
	}

	if (!(flags & DIO_SKIP_DIO_COUNT))
		inode_dio_end(inode);
 out:
	return retval;
}
Example #8
0
File: inode.c Project: ifilex/sensa
static ssize_t hypfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
	struct file *file = iocb->ki_filp;
	char *data = file->private_data;
	size_t available = strlen(data);
	loff_t pos = iocb->ki_pos;
	size_t count;

	if (pos < 0)
		return -EINVAL;
	if (pos >= available || !iov_iter_count(to))
		return 0;
	count = copy_to_iter(data + pos, available - pos, to);
	if (!count)
		return -EFAULT;
	iocb->ki_pos = pos + count;
	file_accessed(file);
	return count;
}
Example #9
0
static ssize_t
zpl_iter_write(struct kiocb *kiocb, struct iov_iter *from)
{
	size_t count;
	ssize_t ret;
	uio_seg_t seg = UIO_USERSPACE;

#ifndef HAVE_GENERIC_WRITE_CHECKS_KIOCB
	struct file *file = kiocb->ki_filp;
	struct address_space *mapping = file->f_mapping;
	struct inode *ip = mapping->host;
	int isblk = S_ISBLK(ip->i_mode);

	count = iov_iter_count(from);
	ret = generic_write_checks(file, &kiocb->ki_pos, &count, isblk);
	if (ret)
		return (ret);
#else
	/*
	 * XXX - ideally this check should be in the same lock region with
	 * write operations, so that there's no TOCTTOU race when doing
	 * append and someone else grow the file.
	 */
	ret = generic_write_checks(kiocb, from);
	if (ret <= 0)
		return (ret);
	count = ret;
#endif

	if (from->type & ITER_KVEC)
		seg = UIO_SYSSPACE;
	if (from->type & ITER_BVEC)
		seg = UIO_BVEC;

	ret = zpl_iter_write_common(kiocb, from->iov, from->nr_segs,
	    count, seg, from->iov_offset);
	if (ret > 0)
		iov_iter_advance(from, ret);

	return (ret);
}
Example #10
0
ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos)
{
	struct kiocb kiocb;
	ssize_t ret;

	if (!file->f_op->write_iter)
		return -EINVAL;

	init_sync_kiocb(&kiocb, file);
	kiocb.ki_pos = *ppos;
	kiocb.ki_nbytes = iov_iter_count(iter);

	iter->type |= WRITE;
	ret = file->f_op->write_iter(&kiocb, iter);
	if (ret == -EIOCBQUEUED)
		ret = wait_on_sync_kiocb(&kiocb);

	if (ret > 0)
		*ppos = kiocb.ki_pos;
	return ret;
}
Example #11
0
ssize_t
nfs_file_read(struct kiocb *iocb, struct iov_iter *to)
{
	struct inode *inode = file_inode(iocb->ki_filp);
	ssize_t result;

	if (iocb->ki_flags & IOCB_DIRECT)
		return nfs_file_direct_read(iocb, to, iocb->ki_pos);

	dprintk("NFS: read(%pD2, %zu@%lu)\n",
		iocb->ki_filp,
		iov_iter_count(to), (unsigned long) iocb->ki_pos);

	result = nfs_revalidate_mapping_protected(inode, iocb->ki_filp->f_mapping);
	if (!result) {
		result = generic_file_read_iter(iocb, to);
		if (result > 0)
			nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, result);
	}
	return result;
}
Example #12
0
STATIC ssize_t
xfs_file_buffered_aio_read(
	struct kiocb		*iocb,
	struct iov_iter		*to)
{
	struct xfs_inode	*ip = XFS_I(file_inode(iocb->ki_filp));
	ssize_t			ret;

	trace_xfs_file_buffered_read(ip, iov_iter_count(to), iocb->ki_pos);

	if (iocb->ki_flags & IOCB_NOWAIT) {
		if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED))
			return -EAGAIN;
	} else {
		xfs_ilock(ip, XFS_IOLOCK_SHARED);
	}
	ret = generic_file_read_iter(iocb, to);
	xfs_iunlock(ip, XFS_IOLOCK_SHARED);

	return ret;
}
Example #13
0
static noinline ssize_t
xfs_file_dax_read(
	struct kiocb		*iocb,
	struct iov_iter		*to)
{
	struct xfs_inode	*ip = XFS_I(iocb->ki_filp->f_mapping->host);
	size_t			count = iov_iter_count(to);
	ssize_t			ret = 0;

	trace_xfs_file_dax_read(ip, count, iocb->ki_pos);

	if (!count)
		return 0; /* skip atime */

	xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
	ret = iomap_dax_rw(iocb, to, &xfs_iomap_ops);
	xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);

	file_accessed(iocb->ki_filp);
	return ret;
}
Example #14
0
/*
 * Read data from a specified offset in a file (referenced by inode).
 * Data may be placed either in a user or kernel buffer.
 */
ssize_t orangefs_inode_read(struct inode *inode,
			    struct iov_iter *iter,
			    loff_t *offset,
			    loff_t readahead_size)
{
	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
	size_t count = iov_iter_count(iter);
	size_t bufmap_size;
	ssize_t ret = -EINVAL;

	g_orangefs_stats.reads++;

	bufmap_size = orangefs_bufmap_size_query();
	if (count > bufmap_size) {
		gossip_debug(GOSSIP_FILE_DEBUG,
			     "%s: count is too large (%zd/%zd)!\n",
			     __func__, count, bufmap_size);
		return -EINVAL;
	}

	gossip_debug(GOSSIP_FILE_DEBUG,
		     "%s(%pU) %zd@%llu\n",
		     __func__,
		     &orangefs_inode->refn.khandle,
		     count,
		     llu(*offset));

	ret = wait_for_direct_io(ORANGEFS_IO_READ, inode, offset, iter,
			count, readahead_size);
	if (ret > 0)
		*offset += ret;

	gossip_debug(GOSSIP_FILE_DEBUG,
		     "%s(%pU): Value(%zd) returned.\n",
		     __func__,
		     &orangefs_inode->refn.khandle,
		     ret);

	return ret;
}
Example #15
0
/**
 * process_vm_rw - check iovecs before calling core routine
 * @pid: PID of process to read/write from/to
 * @lvec: iovec array specifying where to copy to/from locally
 * @liovcnt: size of lvec array
 * @rvec: iovec array specifying where to copy to/from in the other process
 * @riovcnt: size of rvec array
 * @flags: currently unused
 * @vm_write: 0 if reading from other process, 1 if writing to other process
 * Returns the number of bytes read/written or error code. May
 *  return less bytes than expected if an error occurs during the copying
 *  process.
 */
static ssize_t process_vm_rw(pid_t pid,
			     const struct iovec __user *lvec,
			     unsigned long liovcnt,
			     const struct iovec __user *rvec,
			     unsigned long riovcnt,
			     unsigned long flags, int vm_write)
{
	struct iovec iovstack_l[UIO_FASTIOV];
	struct iovec iovstack_r[UIO_FASTIOV];
	struct iovec *iov_l = iovstack_l;
	struct iovec *iov_r = iovstack_r;
	struct iov_iter iter;
	ssize_t rc;
	int dir = vm_write ? WRITE : READ;

	if (flags != 0)
		return -EINVAL;

	/* Check iovecs */
	rc = import_iovec(dir, lvec, liovcnt, UIO_FASTIOV, &iov_l, &iter);
	if (rc < 0)
		return rc;
	if (!iov_iter_count(&iter))
		goto free_iovecs;

	rc = rw_copy_check_uvector(CHECK_IOVEC_ONLY, rvec, riovcnt, UIO_FASTIOV,
				   iovstack_r, &iov_r);
	if (rc <= 0)
		goto free_iovecs;

	rc = process_vm_rw_core(pid, &iter, iov_r, riovcnt, flags, vm_write);

free_iovecs:
	if (iov_r != iovstack_r)
		kfree(iov_r);
	kfree(iov_l);

	return rc;
}
Example #16
0
STATIC ssize_t
xfs_file_dio_aio_read(
	struct kiocb		*iocb,
	struct iov_iter		*to)
{
	struct xfs_inode	*ip = XFS_I(file_inode(iocb->ki_filp));
	size_t			count = iov_iter_count(to);
	ssize_t			ret;

	trace_xfs_file_direct_read(ip, count, iocb->ki_pos);

	if (!count)
		return 0; /* skip atime */

	file_accessed(iocb->ki_filp);

	xfs_ilock(ip, XFS_IOLOCK_SHARED);
	ret = iomap_dio_rw(iocb, to, &xfs_iomap_ops, NULL);
	xfs_iunlock(ip, XFS_IOLOCK_SHARED);

	return ret;
}
Example #17
0
int rds_message_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to)
{
	struct rds_message *rm;
	struct scatterlist *sg;
	unsigned long to_copy;
	unsigned long vec_off;
	int copied;
	int ret;
	u32 len;

	rm = container_of(inc, struct rds_message, m_inc);
	len = be32_to_cpu(rm->m_inc.i_hdr.h_len);

	sg = rm->data.op_sg;
	vec_off = 0;
	copied = 0;

	while (iov_iter_count(to) && copied < len) {
		to_copy = min_t(unsigned long, iov_iter_count(to),
				sg->length - vec_off);
		to_copy = min_t(unsigned long, to_copy, len - copied);

		rds_stats_add(s_copy_to_user, to_copy);
		ret = copy_page_to_iter(sg_page(sg), sg->offset + vec_off,
					to_copy, to);
		if (ret != to_copy)
			return -EFAULT;

		vec_off += to_copy;
		copied += to_copy;

		if (vec_off == sg->length) {
			vec_off = 0;
			sg++;
		}
	}

	return copied;
}
Example #18
0
/**
 * v9fs_file_write - write to a file
 * @filp: file pointer to write
 * @data: data buffer to write data from
 * @count: size of buffer
 * @offset: offset at which to write data
 *
 */
static ssize_t
v9fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
	struct file *file = iocb->ki_filp;
	ssize_t retval = 0;
	loff_t origin = iocb->ki_pos;
	size_t count = iov_iter_count(from);
	int err = 0;

	retval = generic_write_checks(file, &origin, &count, 0);
	if (retval)
		return retval;

	iov_iter_truncate(from, count);

	if (!count)
		return 0;

	retval = p9_client_write(file->private_data, origin, from, &err);
	if (retval > 0) {
		struct inode *inode = file_inode(file);
		loff_t i_size;
		unsigned long pg_start, pg_end;
		pg_start = origin >> PAGE_CACHE_SHIFT;
		pg_end = (origin + retval - 1) >> PAGE_CACHE_SHIFT;
		if (inode->i_mapping && inode->i_mapping->nrpages)
			invalidate_inode_pages2_range(inode->i_mapping,
						      pg_start, pg_end);
		origin += retval;
		i_size = i_size_read(inode);
		iocb->ki_pos = origin;
		if (origin > i_size) {
			inode_add_bytes(inode, origin - i_size);
			i_size_write(inode, origin);
		}
		return retval;
	}
	return err;
}
Example #19
0
File: file.c Project: Lyude/linux
static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
{
	struct file *file = iocb->ki_filp;
	struct inode *inode = file_inode(file);
	struct fd real;
	const struct cred *old_cred;
	ssize_t ret;

	if (!iov_iter_count(iter))
		return 0;

	inode_lock(inode);
	/* Update mode */
	ovl_copyattr(ovl_inode_real(inode), inode);
	ret = file_remove_privs(file);
	if (ret)
		goto out_unlock;

	ret = ovl_real_fdget(file, &real);
	if (ret)
		goto out_unlock;

	old_cred = ovl_override_creds(file_inode(file)->i_sb);
	file_start_write(real.file);
	ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
			     ovl_iocb_to_rwf(iocb));
	file_end_write(real.file);
	revert_creds(old_cred);

	/* Update size */
	ovl_copyattr(ovl_inode_real(inode), inode);

	fdput(real);

out_unlock:
	inode_unlock(inode);

	return ret;
}
Example #20
0
File: inode.c Project: ifilex/sensa
static ssize_t hypfs_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
	int rc;
	struct super_block *sb = file_inode(iocb->ki_filp)->i_sb;
	struct hypfs_sb_info *fs_info = sb->s_fs_info;
	size_t count = iov_iter_count(from);

	/*
	 * Currently we only allow one update per second for two reasons:
	 * 1. diag 204 is VERY expensive
	 * 2. If several processes do updates in parallel and then read the
	 *    hypfs data, the likelihood of collisions is reduced, if we restrict
	 *    the minimum update interval. A collision occurs, if during the
	 *    data gathering of one process another process triggers an update
	 *    If the first process wants to ensure consistent data, it has
	 *    to restart data collection in this case.
	 */
	mutex_lock(&fs_info->lock);
	if (fs_info->last_update == get_seconds()) {
		rc = -EBUSY;
		goto out;
	}
	hypfs_delete_tree(sb->s_root);
	if (MACHINE_IS_VM)
		rc = hypfs_vm_create_files(sb->s_root);
	else
		rc = hypfs_diag_create_files(sb->s_root);
	if (rc) {
		pr_err("Updating the hypfs tree failed\n");
		hypfs_delete_tree(sb->s_root);
		goto out;
	}
	hypfs_update_update(sb);
	rc = count;
	iov_iter_advance(from, count);
out:
	mutex_unlock(&fs_info->lock);
	return rc;
}
Example #21
0
STATIC ssize_t
xfs_file_write_iter(
	struct kiocb		*iocb,
	struct iov_iter		*from)
{
	struct file		*file = iocb->ki_filp;
	struct address_space	*mapping = file->f_mapping;
	struct inode		*inode = mapping->host;
	struct xfs_inode	*ip = XFS_I(inode);
	ssize_t			ret;
	size_t			ocount = iov_iter_count(from);

	XFS_STATS_INC(xs_write_calls);

	if (ocount == 0)
		return 0;

	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
		return -EIO;

	if (unlikely(file->f_flags & O_DIRECT))
		ret = xfs_file_dio_aio_write(iocb, from);
	else
		ret = xfs_file_buffered_aio_write(iocb, from);

	if (ret > 0) {
		ssize_t err;

		XFS_STATS_ADD(xs_write_bytes, ret);

		/* Handle various SYNC-type writes */
		err = generic_write_sync(file, iocb->ki_pos - ret, ret);
		if (err < 0)
			ret = err;
	}
	return ret;
}
Example #22
0
static ssize_t jfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
{
	struct file *file = iocb->ki_filp;
	struct address_space *mapping = file->f_mapping;
	struct inode *inode = file->f_mapping->host;
	size_t count = iov_iter_count(iter);
	ssize_t ret;

	ret = blockdev_direct_IO(iocb, inode, iter, jfs_get_block);

	/*
	 * In case of error extending write may have instantiated a few
	 * blocks outside i_size. Trim these off again.
	 */
	if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) {
		loff_t isize = i_size_read(inode);
		loff_t end = iocb->ki_pos + count;

		if (end > isize)
			jfs_write_failed(mapping, end);
	}

	return ret;
}
Example #23
0
static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
                              loff_t offset)
{
    struct file *file = iocb->ki_filp;
    struct address_space *mapping = file->f_mapping;
    struct inode *inode = mapping->host;
    size_t count = iov_iter_count(iter);
    int err;

    /* we don't need to use inline_data strictly */
    if (f2fs_has_inline_data(inode)) {
        err = f2fs_convert_inline_inode(inode);
        if (err)
            return err;
    }

    if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
        return 0;

    err = check_direct_IO(inode, iter, offset);
    if (err)
        return err;

    trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));

    if (iov_iter_rw(iter) == WRITE)
        __allocate_data_blocks(inode, offset, count);

    err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block_dio);
    if (err < 0 && iov_iter_rw(iter) == WRITE)
        f2fs_write_failed(mapping, offset + count);

    trace_f2fs_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), err);

    return err;
}
Example #24
0
/**
 * dax_do_io - Perform I/O to a DAX file
 * @iocb: The control block for this I/O
 * @inode: The file which the I/O is directed at
 * @iter: The addresses to do I/O from or to
 * @get_block: The filesystem method used to translate file offsets to blocks
 * @end_io: A filesystem callback for I/O completion
 * @flags: See below
 *
 * This function uses the same locking scheme as do_blockdev_direct_IO:
 * If @flags has DIO_LOCKING set, we assume that the i_mutex is held by the
 * caller for writes.  For reads, we take and release the i_mutex ourselves.
 * If DIO_LOCKING is not set, the filesystem takes care of its own locking.
 * As with do_blockdev_direct_IO(), we increment i_dio_count while the I/O
 * is in progress.
 */
ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
		  struct iov_iter *iter, get_block_t get_block,
		  dio_iodone_t end_io, int flags)
{
	struct buffer_head bh;
	ssize_t retval = -EINVAL;
	loff_t pos = iocb->ki_pos;
	loff_t end = pos + iov_iter_count(iter);

	memset(&bh, 0, sizeof(bh));
	bh.b_bdev = inode->i_sb->s_bdev;

	if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ)
		inode_lock(inode);

	/* Protects against truncate */
	if (!(flags & DIO_SKIP_DIO_COUNT))
		inode_dio_begin(inode);

	retval = dax_io(inode, iter, pos, end, get_block, &bh);

	if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ)
		inode_unlock(inode);

	if (end_io) {
		int err;

		err = end_io(iocb, pos, retval, bh.b_private);
		if (err)
			retval = err;
	}

	if (!(flags & DIO_SKIP_DIO_COUNT))
		inode_dio_end(inode);
	return retval;
}
Example #25
0
/* Do it by hand, with file-ops */
static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
		loff_t *ppos, io_fn_t fn)
{
	ssize_t ret = 0;

	while (iov_iter_count(iter)) {
		struct iovec iovec = iov_iter_iovec(iter);
		ssize_t nr;

		nr = fn(filp, iovec.iov_base, iovec.iov_len, ppos);

		if (nr < 0) {
			if (!ret)
				ret = nr;
			break;
		}
		ret += nr;
		if (nr != iovec.iov_len)
			break;
		iov_iter_advance(iter, nr);
	}

	return ret;
}
Example #26
0
STATIC ssize_t
xfs_file_read_iter(
	struct kiocb		*iocb,
	struct iov_iter		*to)
{
	struct file		*file = iocb->ki_filp;
	struct inode		*inode = file->f_mapping->host;
	struct xfs_inode	*ip = XFS_I(inode);
	struct xfs_mount	*mp = ip->i_mount;
	size_t			size = iov_iter_count(to);
	ssize_t			ret = 0;
	int			ioflags = 0;
	xfs_fsize_t		n;
	loff_t			pos = iocb->ki_pos;

	XFS_STATS_INC(xs_read_calls);

	if (unlikely(file->f_flags & O_DIRECT))
		ioflags |= XFS_IO_ISDIRECT;
	if (file->f_mode & FMODE_NOCMTIME)
		ioflags |= XFS_IO_INVIS;

	if (unlikely(ioflags & XFS_IO_ISDIRECT)) {
		xfs_buftarg_t	*target =
			XFS_IS_REALTIME_INODE(ip) ?
				mp->m_rtdev_targp : mp->m_ddev_targp;
		/* DIO must be aligned to device logical sector size */
		if ((pos | size) & target->bt_logical_sectormask) {
			if (pos == i_size_read(inode))
				return 0;
			return -EINVAL;
		}
	}

	n = mp->m_super->s_maxbytes - pos;
	if (n <= 0 || size == 0)
		return 0;

	if (n < size)
		size = n;

	if (XFS_FORCED_SHUTDOWN(mp))
		return -EIO;

	/*
	 * Locking is a bit tricky here. If we take an exclusive lock
	 * for direct IO, we effectively serialise all new concurrent
	 * read IO to this file and block it behind IO that is currently in
	 * progress because IO in progress holds the IO lock shared. We only
	 * need to hold the lock exclusive to blow away the page cache, so
	 * only take lock exclusively if the page cache needs invalidation.
	 * This allows the normal direct IO case of no page cache pages to
	 * proceeed concurrently without serialisation.
	 */
	xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
	if ((ioflags & XFS_IO_ISDIRECT) && inode->i_mapping->nrpages) {
		xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
		xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);

		if (inode->i_mapping->nrpages) {
			ret = filemap_write_and_wait_range(
							VFS_I(ip)->i_mapping,
							pos, pos + size - 1);
			if (ret) {
				xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
				return ret;
			}

			/*
			 * Invalidate whole pages. This can return an error if
			 * we fail to invalidate a page, but this should never
			 * happen on XFS. Warn if it does fail.
			 */
			ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
					pos >> PAGE_CACHE_SHIFT,
					(pos + size - 1) >> PAGE_CACHE_SHIFT);
			WARN_ON_ONCE(ret);
			ret = 0;
		}
Example #27
0
/*
 * Common pre-write limit and setup checks.
 *
 * Called with the iolocked held either shared and exclusive according to
 * @iolock, and returns with it held.  Might upgrade the iolock to exclusive
 * if called for a direct write beyond i_size.
 */
STATIC ssize_t
xfs_file_aio_write_checks(
	struct kiocb		*iocb,
	struct iov_iter		*from,
	int			*iolock)
{
	struct file		*file = iocb->ki_filp;
	struct inode		*inode = file->f_mapping->host;
	struct xfs_inode	*ip = XFS_I(inode);
	ssize_t			error = 0;
	size_t			count = iov_iter_count(from);
	bool			drained_dio = false;

restart:
	error = generic_write_checks(iocb, from);
	if (error <= 0)
		return error;

	error = xfs_break_layouts(inode, iolock);
	if (error)
		return error;

	/*
	 * For changing security info in file_remove_privs() we need i_rwsem
	 * exclusively.
	 */
	if (*iolock == XFS_IOLOCK_SHARED && !IS_NOSEC(inode)) {
		xfs_iunlock(ip, *iolock);
		*iolock = XFS_IOLOCK_EXCL;
		xfs_ilock(ip, *iolock);
		goto restart;
	}
	/*
	 * If the offset is beyond the size of the file, we need to zero any
	 * blocks that fall between the existing EOF and the start of this
	 * write.  If zeroing is needed and we are currently holding the
	 * iolock shared, we need to update it to exclusive which implies
	 * having to redo all checks before.
	 *
	 * We need to serialise against EOF updates that occur in IO
	 * completions here. We want to make sure that nobody is changing the
	 * size while we do this check until we have placed an IO barrier (i.e.
	 * hold the XFS_IOLOCK_EXCL) that prevents new IO from being dispatched.
	 * The spinlock effectively forms a memory barrier once we have the
	 * XFS_IOLOCK_EXCL so we are guaranteed to see the latest EOF value
	 * and hence be able to correctly determine if we need to run zeroing.
	 */
	spin_lock(&ip->i_flags_lock);
	if (iocb->ki_pos > i_size_read(inode)) {
		spin_unlock(&ip->i_flags_lock);
		if (!drained_dio) {
			if (*iolock == XFS_IOLOCK_SHARED) {
				xfs_iunlock(ip, *iolock);
				*iolock = XFS_IOLOCK_EXCL;
				xfs_ilock(ip, *iolock);
				iov_iter_reexpand(from, count);
			}
			/*
			 * We now have an IO submission barrier in place, but
			 * AIO can do EOF updates during IO completion and hence
			 * we now need to wait for all of them to drain. Non-AIO
			 * DIO will have drained before we are given the
			 * XFS_IOLOCK_EXCL, and so for most cases this wait is a
			 * no-op.
			 */
			inode_dio_wait(inode);
			drained_dio = true;
			goto restart;
		}
		error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), NULL);
		if (error)
			return error;
	} else
		spin_unlock(&ip->i_flags_lock);

	/*
	 * Updating the timestamps will grab the ilock again from
	 * xfs_fs_dirty_inode, so we have to call it after dropping the
	 * lock above.  Eventually we should look into a way to avoid
	 * the pointless lock roundtrip.
	 */
	if (likely(!(file->f_mode & FMODE_NOCMTIME))) {
		error = file_update_time(file);
		if (error)
			return error;
	}

	/*
	 * If we're writing the file then make sure to clear the setuid and
	 * setgid bits if the process is not being run by root.  This keeps
	 * people from modifying setuid and setgid binaries.
	 */
	if (!IS_NOSEC(inode))
		return file_remove_privs(file);
	return 0;
}
Example #28
0
/*
 * xfs_file_dio_aio_write - handle direct IO writes
 *
 * Lock the inode appropriately to prepare for and issue a direct IO write.
 * By separating it from the buffered write path we remove all the tricky to
 * follow locking changes and looping.
 *
 * If there are cached pages or we're extending the file, we need IOLOCK_EXCL
 * until we're sure the bytes at the new EOF have been zeroed and/or the cached
 * pages are flushed out.
 *
 * In most cases the direct IO writes will be done holding IOLOCK_SHARED
 * allowing them to be done in parallel with reads and other direct IO writes.
 * However, if the IO is not aligned to filesystem blocks, the direct IO layer
 * needs to do sub-block zeroing and that requires serialisation against other
 * direct IOs to the same block. In this case we need to serialise the
 * submission of the unaligned IOs so that we don't get racing block zeroing in
 * the dio layer.  To avoid the problem with aio, we also need to wait for
 * outstanding IOs to complete so that unwritten extent conversion is completed
 * before we try to map the overlapping block. This is currently implemented by
 * hitting it with a big hammer (i.e. inode_dio_wait()).
 *
 * Returns with locks held indicated by @iolock and errors indicated by
 * negative return values.
 */
STATIC ssize_t
xfs_file_dio_aio_write(
	struct kiocb		*iocb,
	struct iov_iter		*from)
{
	struct file		*file = iocb->ki_filp;
	struct address_space	*mapping = file->f_mapping;
	struct inode		*inode = mapping->host;
	struct xfs_inode	*ip = XFS_I(inode);
	struct xfs_mount	*mp = ip->i_mount;
	ssize_t			ret = 0;
	int			unaligned_io = 0;
	int			iolock;
	size_t			count = iov_iter_count(from);
	struct xfs_buftarg      *target = XFS_IS_REALTIME_INODE(ip) ?
					mp->m_rtdev_targp : mp->m_ddev_targp;

	/* DIO must be aligned to device logical sector size */
	if ((iocb->ki_pos | count) & target->bt_logical_sectormask)
		return -EINVAL;

	/*
	 * Don't take the exclusive iolock here unless the I/O is unaligned to
	 * the file system block size.  We don't need to consider the EOF
	 * extension case here because xfs_file_aio_write_checks() will relock
	 * the inode as necessary for EOF zeroing cases and fill out the new
	 * inode size as appropriate.
	 */
	if ((iocb->ki_pos & mp->m_blockmask) ||
	    ((iocb->ki_pos + count) & mp->m_blockmask)) {
		unaligned_io = 1;

		/*
		 * We can't properly handle unaligned direct I/O to reflink
		 * files yet, as we can't unshare a partial block.
		 */
		if (xfs_is_reflink_inode(ip)) {
			trace_xfs_reflink_bounce_dio_write(ip, iocb->ki_pos, count);
			return -EREMCHG;
		}
		iolock = XFS_IOLOCK_EXCL;
	} else {
		iolock = XFS_IOLOCK_SHARED;
	}

	if (iocb->ki_flags & IOCB_NOWAIT) {
		if (!xfs_ilock_nowait(ip, iolock))
			return -EAGAIN;
	} else {
		xfs_ilock(ip, iolock);
	}

	ret = xfs_file_aio_write_checks(iocb, from, &iolock);
	if (ret)
		goto out;
	count = iov_iter_count(from);

	/*
	 * If we are doing unaligned IO, wait for all other IO to drain,
	 * otherwise demote the lock if we had to take the exclusive lock
	 * for other reasons in xfs_file_aio_write_checks.
	 */
	if (unaligned_io) {
		/* If we are going to wait for other DIO to finish, bail */
		if (iocb->ki_flags & IOCB_NOWAIT) {
			if (atomic_read(&inode->i_dio_count))
				return -EAGAIN;
		} else {
			inode_dio_wait(inode);
		}
	} else if (iolock == XFS_IOLOCK_EXCL) {
		xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
		iolock = XFS_IOLOCK_SHARED;
	}

	trace_xfs_file_direct_write(ip, count, iocb->ki_pos);
	ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, xfs_dio_write_end_io);
out:
	xfs_iunlock(ip, iolock);

	/*
	 * No fallback to buffered IO on errors for XFS, direct IO will either
	 * complete fully or fail.
	 */
	ASSERT(ret < 0 || ret == count);
	return ret;
}
Example #29
0
/**
 * process_vm_rw_core - core of reading/writing pages from task specified
 * @pid: PID of process to read/write from/to
 * @iter: where to copy to/from locally
 * @rvec: iovec array specifying where to copy to/from in the other process
 * @riovcnt: size of rvec array
 * @flags: currently unused
 * @vm_write: 0 if reading from other process, 1 if writing to other process
 * Returns the number of bytes read/written or error code. May
 *  return less bytes than expected if an error occurs during the copying
 *  process.
 */
static ssize_t process_vm_rw_core(pid_t pid, struct iov_iter *iter,
				  const struct iovec *rvec,
				  unsigned long riovcnt,
				  unsigned long flags, int vm_write)
{
	struct task_struct *task;
	struct page *pp_stack[PVM_MAX_PP_ARRAY_COUNT];
	struct page **process_pages = pp_stack;
	struct mm_struct *mm;
	unsigned long i;
	ssize_t rc = 0;
	unsigned long nr_pages = 0;
	unsigned long nr_pages_iov;
	ssize_t iov_len;
	size_t total_len = iov_iter_count(iter);

	return -ENOSYS; // PaX: until properly audited

	/*
	 * Work out how many pages of struct pages we're going to need
	 * when eventually calling get_user_pages
	 */
	for (i = 0; i < riovcnt; i++) {
		iov_len = rvec[i].iov_len;
		if (iov_len <= 0)
			continue;
		nr_pages_iov = ((unsigned long)rvec[i].iov_base + iov_len) / PAGE_SIZE -
				(unsigned long)rvec[i].iov_base / PAGE_SIZE + 1;
		nr_pages = max(nr_pages, nr_pages_iov);
	}

	if (nr_pages == 0)
		return 0;

	if (nr_pages > PVM_MAX_PP_ARRAY_COUNT) {
		/* For reliability don't try to kmalloc more than
		   2 pages worth */
		process_pages = kmalloc(min_t(size_t, PVM_MAX_KMALLOC_PAGES,
					      sizeof(struct pages *)*nr_pages),
					GFP_KERNEL);

		if (!process_pages)
			return -ENOMEM;
	}

	/* Get process information */
	rcu_read_lock();
	task = find_task_by_vpid(pid);
	if (task)
		get_task_struct(task);
	rcu_read_unlock();
	if (!task) {
		rc = -ESRCH;
		goto free_proc_pages;
	}

	if (gr_handle_ptrace(task, vm_write ? PTRACE_POKETEXT : PTRACE_ATTACH)) {
		rc = -EPERM;
		goto put_task_struct;
	}

	mm = mm_access(task, PTRACE_MODE_ATTACH);
	if (!mm || IS_ERR(mm)) {
		rc = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH;
		/*
		 * Explicitly map EACCES to EPERM as EPERM is a more a
		 * appropriate error code for process_vw_readv/writev
		 */
		if (rc == -EACCES)
			rc = -EPERM;
		goto put_task_struct;
	}

	for (i = 0; i < riovcnt && iov_iter_count(iter) && !rc; i++)
		rc = process_vm_rw_single_vec(
			(unsigned long)rvec[i].iov_base, rvec[i].iov_len,
			iter, process_pages, mm, task, vm_write);

	/* copied = space before - space after */
	total_len -= iov_iter_count(iter);

	/* If we have managed to copy any data at all then
	   we return the number of bytes copied. Otherwise
	   we return the error code */
	if (total_len)
		rc = total_len;

	mmput(mm);

put_task_struct:
	put_task_struct(task);

free_proc_pages:
	if (process_pages != pp_stack)
		kfree(process_pages);
	return rc;
}
Example #30
0
STATIC ssize_t
xfs_file_buffered_aio_write(
	struct kiocb		*iocb,
	struct iov_iter		*from)
{
	struct file		*file = iocb->ki_filp;
	struct address_space	*mapping = file->f_mapping;
	struct inode		*inode = mapping->host;
	struct xfs_inode	*ip = XFS_I(inode);
	ssize_t			ret;
	int			enospc = 0;
	int			iolock;

	if (iocb->ki_flags & IOCB_NOWAIT)
		return -EOPNOTSUPP;

write_retry:
	iolock = XFS_IOLOCK_EXCL;
	xfs_ilock(ip, iolock);

	ret = xfs_file_aio_write_checks(iocb, from, &iolock);
	if (ret)
		goto out;

	/* We can write back this queue in page reclaim */
	current->backing_dev_info = inode_to_bdi(inode);

	trace_xfs_file_buffered_write(ip, iov_iter_count(from), iocb->ki_pos);
	ret = iomap_file_buffered_write(iocb, from, &xfs_iomap_ops);
	if (likely(ret >= 0))
		iocb->ki_pos += ret;

	/*
	 * If we hit a space limit, try to free up some lingering preallocated
	 * space before returning an error. In the case of ENOSPC, first try to
	 * write back all dirty inodes to free up some of the excess reserved
	 * metadata space. This reduces the chances that the eofblocks scan
	 * waits on dirty mappings. Since xfs_flush_inodes() is serialized, this
	 * also behaves as a filter to prevent too many eofblocks scans from
	 * running at the same time.
	 */
	if (ret == -EDQUOT && !enospc) {
		xfs_iunlock(ip, iolock);
		enospc = xfs_inode_free_quota_eofblocks(ip);
		if (enospc)
			goto write_retry;
		enospc = xfs_inode_free_quota_cowblocks(ip);
		if (enospc)
			goto write_retry;
		iolock = 0;
	} else if (ret == -ENOSPC && !enospc) {
		struct xfs_eofblocks eofb = {0};

		enospc = 1;
		xfs_flush_inodes(ip->i_mount);

		xfs_iunlock(ip, iolock);
		eofb.eof_flags = XFS_EOF_FLAGS_SYNC;
		xfs_icache_free_eofblocks(ip->i_mount, &eofb);
		xfs_icache_free_cowblocks(ip->i_mount, &eofb);
		goto write_retry;
	}

	current->backing_dev_info = NULL;
out:
	if (iolock)
		xfs_iunlock(ip, iolock);
	return ret;
}