/**
 * blk_rq_map_user_iov - map user data to a request, for REQ_TYPE_BLOCK_PC usage
 * @q:		request queue where request should be inserted
 * @rq:		request to map data to
 * @map_data:   pointer to the rq_map_data holding pages (if necessary)
 * @iter:	iovec iterator
 * @gfp_mask:	memory allocation flags
 *
 * Description:
 *    Data will be mapped directly for zero copy I/O, if possible. Otherwise
 *    a kernel bounce buffer is used.
 *
 *    A matching blk_rq_unmap_user() must be issued at the end of I/O, while
 *    still in process context.
 *
 *    Note: The mapped bio may need to be bounced through blk_queue_bounce()
 *    before being submitted to the device, as pages mapped may be out of
 *    reach. It's the callers responsibility to make sure this happens. The
 *    original bio must be passed back in to blk_rq_unmap_user() for proper
 *    unmapping.
 */
int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
			struct rq_map_data *map_data,
			const struct iov_iter *iter, gfp_t gfp_mask)
{
	bool copy = false;
	unsigned long align = q->dma_pad_mask | queue_dma_alignment(q);
	struct bio *bio = NULL;
	struct iov_iter i;
	int ret;

	if (map_data)
		copy = true;
	else if (iov_iter_alignment(iter) & align)
		copy = true;
	else if (queue_virt_boundary(q))
		copy = queue_virt_boundary(q) & iov_iter_gap_alignment(iter);

	i = *iter;
	do {
		ret =__blk_rq_map_user_iov(rq, map_data, &i, gfp_mask, copy);
		if (ret)
			goto unmap_rq;
		if (!bio)
			bio = rq->bio;
	} while (iov_iter_count(&i));

	if (!bio_flagged(bio, BIO_USER_MAPPED))
		rq->cmd_flags |= REQ_COPY_USER;
	return 0;

unmap_rq:
	__blk_rq_unmap_user(bio);
	rq->bio = NULL;
	return -EINVAL;
}
Beispiel #2
0
static ssize_t
ll_direct_IO(
# ifndef HAVE_IOV_ITER_RW
    int rw,
# endif
    struct kiocb *iocb, struct iov_iter *iter,
    loff_t file_offset)
{
    struct ll_cl_context *lcc;
    const struct lu_env *env;
    struct cl_io *io;
    struct file *file = iocb->ki_filp;
    struct inode *inode = file->f_mapping->host;
    ssize_t count = iov_iter_count(iter);
    ssize_t tot_bytes = 0, result = 0;
    size_t size = MAX_DIO_SIZE;

    /* FIXME: io smaller than PAGE_SIZE is broken on ia64 ??? */
    if ((file_offset & ~PAGE_MASK) || (count & ~PAGE_MASK))
        return -EINVAL;

    CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), size=%zd (max %lu), "
           "offset=%lld=%llx, pages %zd (max %lu)\n",
           PFID(ll_inode2fid(inode)), inode, count, MAX_DIO_SIZE,
           file_offset, file_offset, count >> PAGE_SHIFT,
           MAX_DIO_SIZE >> PAGE_SHIFT);

    /* Check that all user buffers are aligned as well */
    if (iov_iter_alignment(iter) & ~PAGE_MASK)
        return -EINVAL;

    lcc = ll_cl_find(file);
    if (lcc == NULL)
        RETURN(-EIO);

    env = lcc->lcc_env;
    LASSERT(!IS_ERR(env));
    io = lcc->lcc_io;
    LASSERT(io != NULL);

    /* 0. Need locking between buffered and direct access. and race with
     *    size changing by concurrent truncates and writes.
     * 1. Need inode mutex to operate transient pages.
     */
    if (iov_iter_rw(iter) == READ)
        inode_lock(inode);

    while (iov_iter_count(iter)) {
        struct page **pages;
        size_t offs;

        count = min_t(size_t, iov_iter_count(iter), size);
        if (iov_iter_rw(iter) == READ) {
            if (file_offset >= i_size_read(inode))
                break;

            if (file_offset + count > i_size_read(inode))
                count = i_size_read(inode) - file_offset;
        }

        result = iov_iter_get_pages_alloc(iter, &pages, count, &offs);
        if (likely(result > 0)) {
            int n = DIV_ROUND_UP(result + offs, PAGE_SIZE);

            result = ll_direct_IO_seg(env, io, iov_iter_rw(iter),
                                      inode, result, file_offset,
                                      pages, n);
            ll_free_user_pages(pages, n,
                               iov_iter_rw(iter) == READ);

        }
        if (unlikely(result <= 0)) {
            /* If we can't allocate a large enough buffer
             * for the request, shrink it to a smaller
             * PAGE_SIZE multiple and try again.
             * We should always be able to kmalloc for a
             * page worth of page pointers = 4MB on i386. */
            if (result == -ENOMEM &&
                    size > (PAGE_SIZE / sizeof(*pages)) *
                    PAGE_SIZE) {
                size = ((((size / 2) - 1) |
                         ~PAGE_MASK) + 1) & PAGE_MASK;
                CDEBUG(D_VFSTRACE, "DIO size now %zu\n",
                       size);
                continue;
            }

            GOTO(out, result);
        }

        iov_iter_advance(iter, result);
        tot_bytes += result;
        file_offset += result;
    }
out:
    if (iov_iter_rw(iter) == READ)
        inode_unlock(inode);

    if (tot_bytes > 0) {
        struct vvp_io *vio = vvp_env_io(env);

        /* no commit async for direct IO */
        vio->u.write.vui_written += tot_bytes;
    }

    return tot_bytes ? : result;
}
Beispiel #3
0
static int f2fs_write_begin(struct file *file, struct address_space *mapping,
                            loff_t pos, unsigned len, unsigned flags,
                            struct page **pagep, void **fsdata)
{
    struct inode *inode = mapping->host;
    struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
    struct page *page = NULL;
    struct page *ipage;
    pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT;
    struct dnode_of_data dn;
    int err = 0;

    trace_f2fs_write_begin(inode, pos, len, flags);

    f2fs_balance_fs(sbi);

    /*
     * We should check this at this moment to avoid deadlock on inode page
     * and #0 page. The locking rule for inline_data conversion should be:
     * lock_page(page #0) -> lock_page(inode_page)
     */
    if (index != 0) {
        err = f2fs_convert_inline_inode(inode);
        if (err)
            goto fail;
    }
repeat:
    page = grab_cache_page_write_begin(mapping, index, flags);
    if (!page) {
        err = -ENOMEM;
        goto fail;
    }

    *pagep = page;

    f2fs_lock_op(sbi);

    /* check inline_data */
    ipage = get_node_page(sbi, inode->i_ino);
    if (IS_ERR(ipage)) {
        err = PTR_ERR(ipage);
        goto unlock_fail;
    }

    set_new_dnode(&dn, inode, ipage, ipage, 0);

    if (f2fs_has_inline_data(inode)) {
        if (pos + len <= MAX_INLINE_DATA) {
            read_inline_data(page, ipage);
            set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
            sync_inode_page(&dn);
            goto put_next;
        }
        err = f2fs_convert_inline_page(&dn, page);
        if (err)
            goto put_fail;
    }

    err = f2fs_get_block(&dn, index);
    if (err)
        goto put_fail;
put_next:
    f2fs_put_dnode(&dn);
    f2fs_unlock_op(sbi);

    f2fs_wait_on_page_writeback(page, DATA);

    if (len == PAGE_CACHE_SIZE)
        goto out_update;
    if (PageUptodate(page))
        goto out_clear;

    if ((pos & PAGE_CACHE_MASK) >= i_size_read(inode)) {
        unsigned start = pos & (PAGE_CACHE_SIZE - 1);
        unsigned end = start + len;

        /* Reading beyond i_size is simple: memset to zero */
        zero_user_segments(page, 0, start, end, PAGE_CACHE_SIZE);
        goto out_update;
    }

    if (dn.data_blkaddr == NEW_ADDR) {
        zero_user_segment(page, 0, PAGE_CACHE_SIZE);
    } else {
        struct f2fs_io_info fio = {
            .sbi = sbi,
            .type = DATA,
            .rw = READ_SYNC,
            .blk_addr = dn.data_blkaddr,
            .page = page,
            .encrypted_page = NULL,
        };
        err = f2fs_submit_page_bio(&fio);
        if (err)
            goto fail;

        lock_page(page);
        if (unlikely(!PageUptodate(page))) {
            err = -EIO;
            goto fail;
        }
        if (unlikely(page->mapping != mapping)) {
            f2fs_put_page(page, 1);
            goto repeat;
        }

        /* avoid symlink page */
        if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
            err = f2fs_decrypt_one(inode, page);
            if (err)
                goto fail;
        }
    }
out_update:
    SetPageUptodate(page);
out_clear:
    clear_cold_data(page);
    return 0;

put_fail:
    f2fs_put_dnode(&dn);
unlock_fail:
    f2fs_unlock_op(sbi);
fail:
    f2fs_put_page(page, 1);
    f2fs_write_failed(mapping, pos + len);
    return err;
}

static int f2fs_write_end(struct file *file,
                          struct address_space *mapping,
                          loff_t pos, unsigned len, unsigned copied,
                          struct page *page, void *fsdata)
{
    struct inode *inode = page->mapping->host;

    trace_f2fs_write_end(inode, pos, len, copied);

    set_page_dirty(page);

    if (pos + copied > i_size_read(inode)) {
        i_size_write(inode, pos + copied);
        mark_inode_dirty(inode);
        update_inode_page(inode);
    }

    f2fs_put_page(page, 1);
    return copied;
}

static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
                           loff_t offset)
{
    unsigned blocksize_mask = inode->i_sb->s_blocksize - 1;

    if (offset & blocksize_mask)
        return -EINVAL;

    if (iov_iter_alignment(iter) & blocksize_mask)
        return -EINVAL;

    return 0;
}