Example #1
0
File: rw26.c Project: rread/lustre
static ssize_t
ll_direct_IO(
# ifndef HAVE_IOV_ITER_RW
    int rw,
# endif
    struct kiocb *iocb, struct iov_iter *iter,
    loff_t file_offset)
{
    struct ll_cl_context *lcc;
    const struct lu_env *env;
    struct cl_io *io;
    struct file *file = iocb->ki_filp;
    struct inode *inode = file->f_mapping->host;
    ssize_t count = iov_iter_count(iter);
    ssize_t tot_bytes = 0, result = 0;
    size_t size = MAX_DIO_SIZE;

    /* FIXME: io smaller than PAGE_SIZE is broken on ia64 ??? */
    if ((file_offset & ~PAGE_MASK) || (count & ~PAGE_MASK))
        return -EINVAL;

    CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), size=%zd (max %lu), "
           "offset=%lld=%llx, pages %zd (max %lu)\n",
           PFID(ll_inode2fid(inode)), inode, count, MAX_DIO_SIZE,
           file_offset, file_offset, count >> PAGE_SHIFT,
           MAX_DIO_SIZE >> PAGE_SHIFT);

    /* Check that all user buffers are aligned as well */
    if (iov_iter_alignment(iter) & ~PAGE_MASK)
        return -EINVAL;

    lcc = ll_cl_find(file);
    if (lcc == NULL)
        RETURN(-EIO);

    env = lcc->lcc_env;
    LASSERT(!IS_ERR(env));
    io = lcc->lcc_io;
    LASSERT(io != NULL);

    /* 0. Need locking between buffered and direct access. and race with
     *    size changing by concurrent truncates and writes.
     * 1. Need inode mutex to operate transient pages.
     */
    if (iov_iter_rw(iter) == READ)
        inode_lock(inode);

    while (iov_iter_count(iter)) {
        struct page **pages;
        size_t offs;

        count = min_t(size_t, iov_iter_count(iter), size);
        if (iov_iter_rw(iter) == READ) {
            if (file_offset >= i_size_read(inode))
                break;

            if (file_offset + count > i_size_read(inode))
                count = i_size_read(inode) - file_offset;
        }

        result = iov_iter_get_pages_alloc(iter, &pages, count, &offs);
        if (likely(result > 0)) {
            int n = DIV_ROUND_UP(result + offs, PAGE_SIZE);

            result = ll_direct_IO_seg(env, io, iov_iter_rw(iter),
                                      inode, result, file_offset,
                                      pages, n);
            ll_free_user_pages(pages, n,
                               iov_iter_rw(iter) == READ);

        }
        if (unlikely(result <= 0)) {
            /* If we can't allocate a large enough buffer
             * for the request, shrink it to a smaller
             * PAGE_SIZE multiple and try again.
             * We should always be able to kmalloc for a
             * page worth of page pointers = 4MB on i386. */
            if (result == -ENOMEM &&
                    size > (PAGE_SIZE / sizeof(*pages)) *
                    PAGE_SIZE) {
                size = ((((size / 2) - 1) |
                         ~PAGE_MASK) + 1) & PAGE_MASK;
                CDEBUG(D_VFSTRACE, "DIO size now %zu\n",
                       size);
                continue;
            }

            GOTO(out, result);
        }

        iov_iter_advance(iter, result);
        tot_bytes += result;
        file_offset += result;
    }
out:
    if (iov_iter_rw(iter) == READ)
        inode_unlock(inode);

    if (tot_bytes > 0) {
        struct vvp_io *vio = vvp_env_io(env);

        /* no commit async for direct IO */
        vio->u.write.vui_written += tot_bytes;
    }

    return tot_bytes ? : result;
}
Example #2
0
/*
 * Completely synchronous read and write methods.  Direct from __user
 * buffer to osd, or directly to user pages (if O_DIRECT).
 *
 * If the read spans object boundary, just do multiple reads.
 */
static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
			      int *checkeof)
{
	struct file *file = iocb->ki_filp;
	struct inode *inode = file_inode(file);
	struct page **pages;
	u64 off = iocb->ki_pos;
	int num_pages;
	ssize_t ret;
	size_t len = iov_iter_count(to);

	dout("sync_read on file %p %llu~%u %s\n", file, off,
	     (unsigned)len,
	     (file->f_flags & O_DIRECT) ? "O_DIRECT" : "");

	if (!len)
		return 0;
	/*
	 * flush any page cache pages in this range.  this
	 * will make concurrent normal and sync io slow,
	 * but it will at least behave sensibly when they are
	 * in sequence.
	 */
	ret = filemap_write_and_wait_range(inode->i_mapping, off,
						off + len);
	if (ret < 0)
		return ret;

	if (unlikely(to->type & ITER_PIPE)) {
		size_t page_off;
		ret = iov_iter_get_pages_alloc(to, &pages, len,
					       &page_off);
		if (ret <= 0)
			return -ENOMEM;
		num_pages = DIV_ROUND_UP(ret + page_off, PAGE_SIZE);

		ret = striped_read(inode, off, ret, pages, num_pages,
				   page_off, checkeof);
		if (ret > 0) {
			iov_iter_advance(to, ret);
			off += ret;
		} else {
			iov_iter_advance(to, 0);
		}
		ceph_put_page_vector(pages, num_pages, false);
	} else {
		num_pages = calc_pages_for(off, len);
		pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
		if (IS_ERR(pages))
			return PTR_ERR(pages);

		ret = striped_read(inode, off, len, pages, num_pages,
				   (off & ~PAGE_MASK), checkeof);
		if (ret > 0) {
			int l, k = 0;
			size_t left = ret;

			while (left) {
				size_t page_off = off & ~PAGE_MASK;
				size_t copy = min_t(size_t, left,
						    PAGE_SIZE - page_off);
				l = copy_page_to_iter(pages[k++], page_off,
						      copy, to);
				off += l;
				left -= l;
				if (l < copy)
					break;
			}
		}
		ceph_release_page_vector(pages, num_pages);
	}

	if (off > iocb->ki_pos) {
		ret = off - iocb->ki_pos;
		iocb->ki_pos = off;
	}

	dout("sync_read result %zd\n", ret);
	return ret;
}