Пример #1
0
static void check_iovecs(struct iovec *vec, int cnt,
			 struct iovec *avec, int acnt, uint32_t off_in_head)
{
	char *s1, *s2;
	uint32_t size, asize;

	size = iov_length(vec, cnt);
	asize = iov_length(avec, acnt) - off_in_head;
	if (size != asize) {
		gf_log("crypt", GF_LOG_DEBUG, "size %d is not eq asize %d",
		       size, asize);
		return;
	}
	s1 = GF_CALLOC(1, size, gf_crypt_mt_data);
	if (!s1) {
		gf_log("crypt", GF_LOG_DEBUG, "Can not allocate stream ");
		return;
	}
	s2 = GF_CALLOC(1, asize, gf_crypt_mt_data);
	if (!s2) {
		GF_FREE(s1);
		gf_log("crypt", GF_LOG_DEBUG, "Can not allocate stream ");
		return;
	}
	compound_stream(vec, cnt, s1, 0);
	compound_stream(avec, acnt, s2, off_in_head);
	if (memcmp(s1, s2, size))
		gf_log("crypt", GF_LOG_DEBUG, "chunks of different data");
	GF_FREE(s1);
	GF_FREE(s2);
}
Пример #2
0
int32_t ida_buffer_combine(ida_local_t * local, ida_buffer_t * dst, ida_buffer_t * src)
{
    if (iov_length(dst->vectors, dst->count) == iov_length(src->vectors, src->count))
    {
        return 0;
    }

    return EIO;
}
Пример #3
0
/*
 * Transfer an interrupt request to userspace
 *
 * Unlike other requests this is assembled on demand, without a need
 * to allocate a separate fuse_req structure.
 *
 * Called with fc->lock held, releases it
 */
static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_req *req,
			       const struct iovec *iov, unsigned long nr_segs)
{
	struct fuse_copy_state cs;
	struct fuse_in_header ih;
	struct fuse_interrupt_in arg;
	unsigned reqsize = sizeof(ih) + sizeof(arg);
	int err;

	list_del_init(&req->intr_entry);
	req->intr_unique = fuse_get_unique(fc);
	memset(&ih, 0, sizeof(ih));
	memset(&arg, 0, sizeof(arg));
	ih.len = reqsize;
	ih.opcode = FUSE_INTERRUPT;
	ih.unique = req->intr_unique;
	arg.unique = req->in.h.unique;

	spin_unlock(&fc->lock);
	if (iov_length(iov, nr_segs) < reqsize)
		return -EINVAL;

	fuse_copy_init(&cs, fc, 1, NULL, iov, nr_segs);
	err = fuse_copy_one(&cs, &ih, sizeof(ih));
	if (!err)
		err = fuse_copy_one(&cs, &arg, sizeof(arg));
	fuse_copy_finish(&cs);

	return err ? err : reqsize;
}
Пример #4
0
static ssize_t
nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
		loff_t offset, unsigned long nr_segs)
{
	struct file *file = iocb->ki_filp;
	struct inode *inode = file->f_mapping->host;
	ssize_t size;

	if (rw == WRITE)
		return 0;

	
	size = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
				  nilfs_get_block);

	if (unlikely((rw & WRITE) && size < 0)) {
		loff_t isize = i_size_read(inode);
		loff_t end = offset + iov_length(iov, nr_segs);

		if (end > isize)
			vmtruncate(inode, isize);
	}

	return size;
}
Пример #5
0
static int send_reply(fuse_req_t req, int error, const void *arg,
                      size_t argsize)
{
    struct fuse_out_header out;
    struct iovec iov[2];
    size_t count;
    int res;

    if (error <= -1000 || error > 0) {
        fprintf(stderr, "fuse: bad error value: %i\n",  error);
        error = -ERANGE;
    }

    out.unique = req->unique;
    out.error = error;
    count = 1;
    iov[0].iov_base = &out;
    iov[0].iov_len = sizeof(struct fuse_out_header);
    if (argsize && !error) {
        count++;
        iov[1].iov_base = (void *) arg;
        iov[1].iov_len = argsize;
    }
    out.len = iov_length(iov, count);

    if (req->f->debug) {
        printf("   unique: %llu, error: %i (%s), outsize: %i\n",
               out.unique, out.error, strerror(-out.error), out.len);
        fflush(stdout);
    }
    res = fuse_chan_send(req->ch, iov, count);
    free_req(req);

    return res;
}
Пример #6
0
ssize_t rawfs_block_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
		unsigned long nr_segs, loff_t pos)
{
    struct file *filp = iocb->ki_filp;
//    struct super_block *sb = filp->f_path.dentry->d_sb;
//    struct rawfs_sb_info *rawfs_sb = RAWFS_SB(sb);
//    struct address_space *mapping=filp->f_mapping;
    struct inode *inode = filp->f_mapping->host;
    struct rawfs_inode_info *inode_info = RAWFS_I(inode);

    ssize_t retval;
//	unsigned long seg = 0;
	loff_t *ppos = &iocb->ki_pos;
//	loff_t size;

    retval=iov_length(iov, nr_segs);

    RAWFS_PRINT(RAWFS_DBG_FILE, "rawfs_block_file_aio_write %s, pos %lld, "
        "len %d\n", inode_info->i_name, pos, retval);

	if (retval > 0)
		*ppos = pos + retval;

    file_accessed(filp);

    /* This function is not supported, data will not write to the device */

    return retval;
}
Пример #7
0
static int send_reply_iov(fuse_req_t req, int error, struct iovec *iov,
                          int count)
{
    struct fuse_out_header out;
    int res;

    if (error <= -1000 || error > 0) {
        fprintf(stderr, "fuse: bad error value: %i\n",  error);
        error = -ERANGE;
    }

    out.unique = req->unique;
    out.error = error;
    iov[0].iov_base = &out;
    iov[0].iov_len = sizeof(struct fuse_out_header);
    out.len = iov_length(iov, count);

    /* Foxconn removed start pling 06/19/2009 */
#if 0
    if (req->f->debug)
        fprintf(stderr, "   unique: %llu, error: %i (%s), outsize: %i\n",
                (unsigned long long) out.unique, out.error,
                strerror(-out.error), out.len);
#endif
    /* Foxconn removed end pling 06/19/2009 */

    res = fuse_chan_send(req->ch, iov, count);
    free_req(req);

    return res;
}
Пример #8
0
static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
				   unsigned long nr_segs, loff_t pos)
{
	struct file *file = iocb->ki_filp;
	size_t writesize = iov_length(iov, nr_segs);
	struct dentry *dentry = file->f_dentry;
	struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
	struct gfs2_sbd *sdp;
	int ret;

	sdp = GFS2_SB(file->f_mapping->host);
	ret = gfs2_rs_alloc(ip);
	if (ret)
		return ret;

	atomic_set(&ip->i_res->rs_sizehint, writesize >> sdp->sd_sb.sb_bsize_shift);
	if (file->f_flags & O_APPEND) {
		struct gfs2_holder gh;

		ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
		if (ret)
			return ret;
		gfs2_glock_dq_uninit(&gh);
	}

	return generic_file_aio_write(iocb, iov, nr_segs, pos);
}
Пример #9
0
Файл: inode.c Проект: 7799/linux
static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb,
	const struct iovec *iov, loff_t offset, unsigned long nr_segs)
{
	struct file *file = iocb->ki_filp;
	struct address_space *mapping = file->f_mapping;
	struct inode *inode = file->f_mapping->host;
	ssize_t ret;

	ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
				 jfs_get_block);

	/*
	 * In case of error extending write may have instantiated a few
	 * blocks outside i_size. Trim these off again.
	 */
	if (unlikely((rw & WRITE) && ret < 0)) {
		loff_t isize = i_size_read(inode);
		loff_t end = offset + iov_length(iov, nr_segs);

		if (end > isize)
			jfs_write_failed(mapping, end);
	}

	return ret;
}
Пример #10
0
static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
				   unsigned long nr_segs, loff_t pos)
{
	struct file *file = iocb->ki_filp;
	size_t writesize = iov_length(iov, nr_segs);
	struct dentry *dentry = file->f_dentry;
	struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
	int ret;

	ret = gfs2_rs_alloc(ip);
	if (ret)
		return ret;

	gfs2_size_hint(file, pos, writesize);

	if (file->f_flags & O_APPEND) {
		struct gfs2_holder gh;

		ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
		if (ret)
			return ret;
		gfs2_glock_dq_uninit(&gh);
	}

	return generic_file_aio_write(iocb, iov, nr_segs, pos);
}
Пример #11
0
static ssize_t
nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
		loff_t offset, unsigned long nr_segs)
{
	struct file *file = iocb->ki_filp;
	struct inode *inode = file->f_mapping->host;
	ssize_t size;

	if (rw == WRITE)
		return 0;

	/* Needs synchronization with the cleaner */
	size = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
				  nilfs_get_block);

	/*
	 * In case of error extending write may have instantiated a few
	 * blocks outside i_size. Trim these off again.
	 */
	if (unlikely((rw & WRITE) && size < 0)) {
		loff_t isize = i_size_read(inode);
		loff_t end = offset + iov_length(iov, nr_segs);

		if (end > isize)
			vmtruncate(inode, isize);
	}

	return size;
}
Пример #12
0
int fuse_send_reply_iov_nofree(fuse_req_t req, int error, struct iovec *iov,
			       int count)
{
	struct fuse_out_header out;

	if (error <= -1000 || error > 0) {
		fprintf(stderr, "fuse: bad error value: %i\n",	error);
		error = -ERANGE;
	}

	out.unique = req->unique;
	out.error = error;
	iov[0].iov_base = &out;
	iov[0].iov_len = sizeof(struct fuse_out_header);
	out.len = iov_length(iov, count);

	if (req->f->debug) {
		if (out.error) {
			fprintf(stderr,
				"   unique: %llu, error: %i (%s), outsize: %i\n",
				(unsigned long long) out.unique, out.error,
				strerror(-out.error), out.len);
		} else {
			fprintf(stderr,
				"   unique: %llu, success, outsize: %i\n",
				(unsigned long long) out.unique, out.len);
		}
	}

	return fuse_chan_send(req->ch, iov, count);
}
Пример #13
0
static ssize_t
ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
		unsigned long nr_segs, loff_t pos)
{
	struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;

	/*
	 * If we have encountered a bitmap-format file, the size limit
	 * is smaller than s_maxbytes, which is for extent-mapped files.
	 */

	if (!(ext4_test_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE))) {
		struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
		size_t length = iov_length(iov, nr_segs);

		if (pos > sbi->s_bitmap_maxbytes)
			return -EFBIG;

		if (pos + length > sbi->s_bitmap_maxbytes) {
			nr_segs = iov_shorten((struct iovec *)iov, nr_segs,
					      sbi->s_bitmap_maxbytes - pos);
		}
	}

	return generic_file_aio_write(iocb, iov, nr_segs, pos);
}
Пример #14
0
/**
 * rawfs_block_file_aio_read - read routine for block files
 * @iocb:	kernel I/O control block
 * @iov:	io vector request
 * @nr_segs:	number of segments in the iovec
 * @pos:	current file position
 */
ssize_t
rawfs_block_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
		unsigned long nr_segs, loff_t pos)
{
    struct file *filp = iocb->ki_filp;
    struct super_block *sb = filp->f_path.dentry->d_sb;
    struct rawfs_sb_info *rawfs_sb = RAWFS_SB(sb);
//    struct address_space *mapping=filp->f_mapping;
    struct inode *inode = filp->f_mapping->host;
    struct rawfs_inode_info *inode_info = RAWFS_I(inode);

	ssize_t retval;
//	unsigned long seg = 0;
	size_t count;
	loff_t *ppos = &iocb->ki_pos;
    // Always use direct I/O
	loff_t size;
    int block_no;

	retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);

	if (retval)
		return retval;

    retval=iov_length(iov, nr_segs);

    mutex_lock(&rawfs_sb->rawfs_lock);

    RAWFS_PRINT(RAWFS_DBG_FILE, "rawfs_block_file_aio_read %s, pos %lld, "
        "len %d\n", inode_info->i_name, pos, retval);

	size = i_size_read(inode);

    // Get inode ID
    block_no = filp->f_path.dentry->d_inode->i_ino - RAWFS_BLOCK0_INO;

    if ((retval + pos) >= size)
        retval = size - pos;

	if (pos < size) {
        rawfs_sb->dev.read_page_user(filp->f_path.dentry->d_inode->i_sb,
            block_no, pos, iov, nr_segs, retval);

		if (retval > 0)
			*ppos = pos + retval;

		if (retval < 0 || *ppos >= size) {
			file_accessed(filp);
            goto out;
		}
	}

out:

    mutex_unlock(&rawfs_sb->rawfs_lock);

    return retval;
}
Пример #15
0
static ssize_t
ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
		unsigned long nr_segs, loff_t pos)
{
	struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
	int unaligned_aio = 0;
	int ret;

	trace_ext4_file_write(iocb->ki_filp->f_path.dentry, iocb->ki_left);
	/*
	 * If we have encountered a bitmap-format file, the size limit
	 * is smaller than s_maxbytes, which is for extent-mapped files.
	 */

	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
		struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
		size_t length = iov_length(iov, nr_segs);

		if ((pos > sbi->s_bitmap_maxbytes ||
		    (pos == sbi->s_bitmap_maxbytes && length > 0)))
			return -EFBIG;

		if (pos + length > sbi->s_bitmap_maxbytes) {
			nr_segs = iov_shorten((struct iovec *)iov, nr_segs,
					      sbi->s_bitmap_maxbytes - pos);
		}
	} else if (unlikely((iocb->ki_filp->f_flags & O_DIRECT) &&
		   !is_sync_kiocb(iocb))) {
		unaligned_aio = ext4_unaligned_aio(inode, iov, nr_segs, pos);
	}

	/* Unaligned direct AIO must be serialized; see comment above */
	if (unaligned_aio) {
		static unsigned long unaligned_warn_time;

		/* Warn about this once per day */
		if (printk_timed_ratelimit(&unaligned_warn_time, 60*60*24*HZ))
			ext4_msg(inode->i_sb, KERN_WARNING,
				 "Unaligned AIO/DIO on inode %ld by %s; "
				 "performance will be poor.",
				 inode->i_ino, current->comm);
		mutex_lock(ext4_aio_mutex(inode));
		ext4_aiodio_wait(inode);
	}

	ret = generic_file_aio_write(iocb, iov, nr_segs, pos);

	if (unaligned_aio)
		mutex_unlock(ext4_aio_mutex(inode));

	trace_file_write_done(iocb->ki_filp);
	return ret;
}
Пример #16
0
ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
		       unsigned long nr_segs, loff_t pos)
{
	struct dentry * dentry = iocb->ki_filp->f_path.dentry;
	struct inode * inode = dentry->d_inode;
	unsigned long written = 0;
	ssize_t result;
	size_t count = iov_length(iov, nr_segs);

	if (iocb->ki_filp->f_flags & O_DIRECT)
		return nfs_file_direct_write(iocb, iov, nr_segs, pos, true);

	dprintk("NFS: write(%s/%s, %lu@%Ld)\n",
		dentry->d_parent->d_name.name, dentry->d_name.name,
		(unsigned long) count, (long long) pos);

	result = -EBUSY;
	if (IS_SWAPFILE(inode))
		goto out_swapfile;
	/*
	 * O_APPEND implies that we must revalidate the file length.
	 */
	if (iocb->ki_filp->f_flags & O_APPEND) {
		result = nfs_revalidate_file_size(inode, iocb->ki_filp);
		if (result)
			goto out;
	}

	result = count;
	if (!count)
		goto out;

	result = generic_file_aio_write(iocb, iov, nr_segs, pos);
	if (result > 0)
		written = result;

	/* Return error values for O_DSYNC and IS_SYNC() */
	if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) {
		int err = vfs_fsync(iocb->ki_filp, 0);
		if (err < 0)
			result = err;
	}
	if (result > 0)
		nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written);
out:
	return result;

out_swapfile:
	printk(KERN_INFO "NFS: attempt to write to active swap file!\n");
	goto out;
}
Пример #17
0
static ssize_t
ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
                unsigned long nr_segs, loff_t pos)
{
    struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
    int unaligned_aio = 0;
    int ret;

    trace_ext4_file_write(iocb->ki_filp->f_path.dentry, iocb->ki_left);

    if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
        size_t length = iov_length(iov, nr_segs);

        if ((pos > sbi->s_bitmap_maxbytes ||
                (pos == sbi->s_bitmap_maxbytes && length > 0)))
            return -EFBIG;

        if (pos + length > sbi->s_bitmap_maxbytes) {
            nr_segs = iov_shorten((struct iovec *)iov, nr_segs,
                                  sbi->s_bitmap_maxbytes - pos);
        }
    } else if (unlikely((iocb->ki_filp->f_flags & O_DIRECT) &&
                        !is_sync_kiocb(iocb))) {
        unaligned_aio = ext4_unaligned_aio(inode, iov, nr_segs, pos);
    }


    if (unaligned_aio) {
        static unsigned long unaligned_warn_time;


        if (printk_timed_ratelimit(&unaligned_warn_time, 60*60*24*HZ))
            ext4_msg(inode->i_sb, KERN_WARNING,
                     "Unaligned AIO/DIO on inode %ld by %s; "
                     "performance will be poor.",
                     inode->i_ino, current->comm);
        mutex_lock(ext4_aio_mutex(inode));
        ext4_aiodio_wait(inode);
    }

    ret = generic_file_aio_write(iocb, iov, nr_segs, pos);

    if (unaligned_aio)
        mutex_unlock(ext4_aio_mutex(inode));

    return ret;
}
Пример #18
0
static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
				unsigned long nr_segs, loff_t pos)
{
	struct dentry * dentry = iocb->ki_filp->f_path.dentry;
	struct inode * inode = dentry->d_inode;
	ssize_t result;
	size_t count = iov_length(iov, nr_segs);

#ifdef CONFIG_NFS_DIRECTIO
	if (iocb->ki_filp->f_flags & O_DIRECT)
		return nfs_file_direct_write(iocb, iov, nr_segs, pos);
#endif

	dfprintk(VFS, "nfs: write(%s/%s(%ld), %lu@%Ld)\n",
		dentry->d_parent->d_name.name, dentry->d_name.name,
		inode->i_ino, (unsigned long) count, (long long) pos);

	result = -EBUSY;
	if (IS_SWAPFILE(inode))
		goto out_swapfile;
	/*
	 * O_APPEND implies that we must revalidate the file length.
	 */
	if (iocb->ki_filp->f_flags & O_APPEND) {
		result = nfs_revalidate_file_size(inode, iocb->ki_filp);
		if (result)
			goto out;
	}

	result = count;
	if (!count)
		goto out;

	nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count);
	result = generic_file_aio_write(iocb, iov, nr_segs, pos);
	/* Return error values for O_SYNC and IS_SYNC() */
	if (result >= 0 && (IS_SYNC(inode) || (iocb->ki_filp->f_flags & O_SYNC))) {
		int err = nfs_fsync(iocb->ki_filp, dentry, 1);
		if (err < 0)
			result = err;
	}
out:
	return result;

out_swapfile:
	printk(KERN_INFO "NFS: attempt to write to active swap file!\n");
	goto out;
}
static int send_notify_iov(struct fuse_ll *f, struct fuse_chan *ch,
			   int notify_code, struct iovec *iov, int count)
{
	struct fuse_out_header out;

	out.unique = 0;
	out.error = notify_code;
	iov[0].iov_base = &out;
	iov[0].iov_len = sizeof(struct fuse_out_header);
	out.len = iov_length(iov, count);

	if (f->debug)
		fprintf(stderr, "NOTIFY: code=%d count=%d length=%u\n",
			notify_code, count, out.len);

	return fuse_chan_send(ch, iov, count);
}
Пример #20
0
int32_t ida_buffer_assign(ida_local_t * local, ida_buffer_t * dst, struct iobref * buffers, struct iovec * vectors, uint32_t count)
{
    char * ptr;
    int32_t i, error;

    if (unlikely(count > 1))
    {
        error = ida_buffer_new(local, dst, iov_length(vectors, count));
        if (unlikely(error != 0))
        {
            return error;
        }

        ptr = dst->vectors->iov_base;
        for (i = 0; i < count; i++)
        {
            memcpy(ptr, vectors[i].iov_base, vectors[i].iov_len);
            ptr += vectors[i].iov_len;
        }
    }
    else
    {
        SYS_PTR(
            &dst->buffers, iobref_ref, (buffers),
            ENOMEM,
            E(),
            RETERR()
        );
        SYS_PTR(
            &dst->vectors, iov_dup, (vectors, count),
            ENOMEM,
            E(),
            GOTO(failed)
        );
    }

    dst->count = 1;

    return 0;

failed:
    iobref_unref(dst->buffers);
    dst->buffers = NULL;

    return -ENOMEM;
}
Пример #21
0
/*
 * This tests whether the IO in question is block-aligned or not.
 * Ext4 utilizes unwritten extents when hole-filling during direct IO, and they
 * are converted to written only after the IO is complete.  Until they are
 * mapped, these blocks appear as holes, so dio_zero_block() will assume that
 * it needs to zero out portions of the start and/or end block.  If 2 AIO
 * threads are at work on the same unwritten block, they must be synchronized
 * or one thread will zero the other's data, causing corruption.
 */
static int
ext4_unaligned_aio(struct inode *inode, const struct iovec *iov,
		   unsigned long nr_segs, loff_t pos)
{
	struct super_block *sb = inode->i_sb;
	int blockmask = sb->s_blocksize - 1;
	size_t count = iov_length(iov, nr_segs);
	loff_t final_size = pos + count;

	if (pos >= inode->i_size)
		return 0;

	if ((pos & blockmask) || (final_size & blockmask))
		return 1;

	return 0;
}
Пример #22
0
static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
				const struct iovec *iov, loff_t offset,
				unsigned long nr_segs)
{
	struct file *file = iocb->ki_filp;
	struct address_space *mapping = file->f_mapping;
	struct inode *inode = mapping->host;
	size_t count = iov_length(iov, nr_segs);
	int err;

	/* we don't need to use inline_data strictly */
	if (f2fs_has_inline_data(inode)) {
		err = f2fs_convert_inline_inode(inode);
		if (err)
			return err;
	}

	if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
		return 0;

	err = check_direct_IO(inode, rw, iov, offset, nr_segs);
	if (err)
		return err;

	trace_f2fs_direct_IO_enter(inode, offset, count, rw);

	if (rw & WRITE) {
		__allocate_data_blocks(inode, offset, count);
		if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) {
			err = -EIO;
			goto out;
		}
	}

	err = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
							get_data_block_dio);
out:
	if (err < 0 && (rw & WRITE))
		f2fs_write_failed(mapping, offset + count);

	trace_f2fs_direct_IO_exit(inode, offset, count, rw, err);

	return err;
}
Пример #23
0
static ssize_t hypfs_aio_write(struct kiocb *iocb, const struct iovec *iov,
			      unsigned long nr_segs, loff_t offset)
{
	int rc;
	struct super_block *sb;
	struct hypfs_sb_info *fs_info;
	size_t count = iov_length(iov, nr_segs);

	sb = iocb->ki_filp->f_path.dentry->d_inode->i_sb;
	fs_info = sb->s_fs_info;
	/*
	 * Currently we only allow one update per second for two reasons:
	 * 1. diag 204 is VERY expensive
	 * 2. If several processes do updates in parallel and then read the
	 *    hypfs data, the likelihood of collisions is reduced, if we restrict
	 *    the minimum update interval. A collision occurs, if during the
	 *    data gathering of one process another process triggers an update
	 *    If the first process wants to ensure consistent data, it has
	 *    to restart data collection in this case.
	 */
	mutex_lock(&fs_info->lock);
	if (fs_info->last_update == get_seconds()) {
		rc = -EBUSY;
		goto out;
	}
	hypfs_delete_tree(sb->s_root);
	if (MACHINE_IS_VM)
		rc = hypfs_vm_create_files(sb, sb->s_root);
	else
		rc = hypfs_diag_create_files(sb, sb->s_root);
	if (rc) {
		pr_err("Updating the hypfs tree failed\n");
		hypfs_delete_tree(sb->s_root);
		goto out;
	}
	hypfs_update_update(sb);
	rc = count;
out:
	mutex_unlock(&fs_info->lock);
	return rc;
}
Пример #24
0
static ssize_t
nfs_file_read(struct kiocb *iocb, const struct iovec *iov,
		unsigned long nr_segs, loff_t pos)
{
	struct dentry * dentry = iocb->ki_filp->f_path.dentry;
	struct inode * inode = dentry->d_inode;
	ssize_t result;
	size_t count = iov_length(iov, nr_segs);

	if (iocb->ki_filp->f_flags & O_DIRECT)
		return nfs_file_direct_read(iocb, iov, nr_segs, pos);

	dprintk("NFS: read(%s/%s, %lu@%lu)\n",
		dentry->d_parent->d_name.name, dentry->d_name.name,
		(unsigned long) count, (unsigned long) pos);

	result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
	nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, count);
	if (!result)
		result = generic_file_aio_read(iocb, iov, nr_segs, pos);
	return result;
}
static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
			      const struct iovec *iov, loff_t offset,
			      unsigned long nr_segs)
{
	struct file *file = iocb->ki_filp;
	struct inode *inode = file->f_mapping->host;
	struct address_space *mapping = inode->i_mapping;
	struct gfs2_inode *ip = GFS2_I(inode);
	struct gfs2_holder gh;
	int rv;

	/*
	 * Deferred lock, even if its a write, since we do no allocation
	 * on this path. All we need change is atime, and this lock mode
	 * ensures that other nodes have flushed their buffered read caches
	 * (i.e. their page cache entries for this inode). We do not,
	 * unfortunately have the option of only flushing a range like
	 * the VFS does.
	 */
	gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh);
	rv = gfs2_glock_nq(&gh);
	if (rv)
		return rv;
	rv = gfs2_ok_for_dio(ip, rw, offset);
	if (rv != 1)
		goto out; /* dio not valid, fall back to buffered i/o */

	/*
	 * Now since we are holding a deferred (CW) lock at this point, you
	 * might be wondering why this is ever needed. There is a case however
	 * where we've granted a deferred local lock against a cached exclusive
	 * glock. That is ok provided all granted local locks are deferred, but
	 * it also means that it is possible to encounter pages which are
	 * cached and possibly also mapped. So here we check for that and sort
	 * them out ahead of the dio. The glock state machine will take care of
	 * everything else.
	 *
	 * If in fact the cached glock state (gl->gl_state) is deferred (CW) in
	 * the first place, mapping->nr_pages will always be zero.
	 */
	if (mapping->nrpages) {
		loff_t lstart = offset & (PAGE_CACHE_SIZE - 1);
		loff_t len = iov_length(iov, nr_segs);
		loff_t end = PAGE_ALIGN(offset + len) - 1;

		rv = 0;
		if (len == 0)
			goto out;
		if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags))
			unmap_shared_mapping_range(ip->i_inode.i_mapping, offset, len);
		rv = filemap_write_and_wait_range(mapping, lstart, end);
		if (rv)
			goto out;
		if (rw == WRITE)
			truncate_inode_pages_range(mapping, lstart, end);
	}

	rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
				  offset, nr_segs, gfs2_get_block_direct,
				  NULL, NULL, 0);
out:
	gfs2_glock_dq(&gh);
	gfs2_holder_uninit(&gh);
	return rv;
}
Пример #26
0
static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{
	struct page *page = vmf->page;
	struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
	struct gfs2_inode *ip = GFS2_I(inode);
	struct gfs2_sbd *sdp = GFS2_SB(inode);
	struct gfs2_alloc_parms ap = { .aflags = 0, };
	unsigned long last_index;
	u64 pos = page->index << PAGE_CACHE_SHIFT;
	unsigned int data_blocks, ind_blocks, rblocks;
	int alloc_required = 0;
	struct gfs2_holder gh;
	loff_t size;
	int ret;

	sb_start_pagefault(inode->i_sb);

	/* Update file times before taking page lock */
	file_update_time(vma->vm_file);

	ret = get_write_access(inode);
	if (ret)
		goto out;

	ret = gfs2_rs_alloc(ip);
	if (ret)
		goto out_write_access;

	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
	ret = gfs2_glock_nq(&gh);
	if (ret)
		goto out_uninit;

	set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
	set_bit(GIF_SW_PAGED, &ip->i_flags);

	gfs2_size_hint(inode, pos, PAGE_CACHE_SIZE);

	ret = gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE, &alloc_required);
	if (ret)
		goto out_unlock;

	if (!alloc_required) {
		lock_page(page);
		if (!PageUptodate(page) || page->mapping != inode->i_mapping) {
			ret = -EAGAIN;
			unlock_page(page);
		}
		goto out_unlock;
	}

	ret = gfs2_rindex_update(sdp);
	if (ret)
		goto out_unlock;

	ret = gfs2_quota_lock_check(ip);
	if (ret)
		goto out_unlock;
	gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks);
	ap.target = data_blocks + ind_blocks;
	ret = gfs2_inplace_reserve(ip, &ap);
	if (ret)
		goto out_quota_unlock;

	rblocks = RES_DINODE + ind_blocks;
	if (gfs2_is_jdata(ip))
		rblocks += data_blocks ? data_blocks : 1;
	if (ind_blocks || data_blocks) {
		rblocks += RES_STATFS + RES_QUOTA;
		rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks);
	}
	ret = gfs2_trans_begin(sdp, rblocks, 0);
	if (ret)
		goto out_trans_fail;

	lock_page(page);
	ret = -EINVAL;
	size = i_size_read(inode);
	last_index = (size - 1) >> PAGE_CACHE_SHIFT;
	/* Check page index against inode size */
	if (size == 0 || (page->index > last_index))
		goto out_trans_end;

	ret = -EAGAIN;
	/* If truncated, we must retry the operation, we may have raced
	 * with the glock demotion code.
	 */
	if (!PageUptodate(page) || page->mapping != inode->i_mapping)
		goto out_trans_end;

	/* Unstuff, if required, and allocate backing blocks for page */
	ret = 0;
	if (gfs2_is_stuffed(ip))
		ret = gfs2_unstuff_dinode(ip, page);
	if (ret == 0)
		ret = gfs2_allocate_page_backing(page);

out_trans_end:
	if (ret)
		unlock_page(page);
	gfs2_trans_end(sdp);
out_trans_fail:
	gfs2_inplace_release(ip);
out_quota_unlock:
	gfs2_quota_unlock(ip);
out_unlock:
	gfs2_glock_dq(&gh);
out_uninit:
	gfs2_holder_uninit(&gh);
	if (ret == 0) {
		set_page_dirty(page);
		wait_for_stable_page(page);
	}
out_write_access:
	put_write_access(inode);
out:
	sb_end_pagefault(inode->i_sb);
	return block_page_mkwrite_return(ret);
}

static const struct vm_operations_struct gfs2_vm_ops = {
	.fault = filemap_fault,
	.page_mkwrite = gfs2_page_mkwrite,
};

/**
 * gfs2_mmap -
 * @file: The file to map
 * @vma: The VMA which described the mapping
 *
 * There is no need to get a lock here unless we should be updating
 * atime. We ignore any locking errors since the only consequence is
 * a missed atime update (which will just be deferred until later).
 *
 * Returns: 0
 */

static int gfs2_mmap(struct file *file, struct vm_area_struct *vma)
{
	struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);

	if (!(file->f_flags & O_NOATIME) &&
	    !IS_NOATIME(&ip->i_inode)) {
		struct gfs2_holder i_gh;
		int error;

		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
					   &i_gh);
		if (error)
			return error;
		/* grab lock to update inode */
		gfs2_glock_dq_uninit(&i_gh);
		file_accessed(file);
	}
	vma->vm_ops = &gfs2_vm_ops;
	vma->vm_flags |= VM_CAN_NONLINEAR;

	return 0;
}

/**
 * gfs2_open - open a file
 * @inode: the inode to open
 * @file: the struct file for this opening
 *
 * Returns: errno
 */

static int gfs2_open(struct inode *inode, struct file *file)
{
	struct gfs2_inode *ip = GFS2_I(inode);
	struct gfs2_holder i_gh;
	struct gfs2_file *fp;
	int error;

	fp = kzalloc(sizeof(struct gfs2_file), GFP_KERNEL);
	if (!fp)
		return -ENOMEM;

	mutex_init(&fp->f_fl_mutex);

	gfs2_assert_warn(GFS2_SB(inode), !file->private_data);
	file->private_data = fp;

	if (S_ISREG(ip->i_inode.i_mode)) {
		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
					   &i_gh);
		if (error)
			goto fail;

		if (!(file->f_flags & O_LARGEFILE) &&
		    i_size_read(inode) > MAX_NON_LFS) {
			error = -EOVERFLOW;
			goto fail_gunlock;
		}

		gfs2_glock_dq_uninit(&i_gh);
	}

	return 0;

fail_gunlock:
	gfs2_glock_dq_uninit(&i_gh);
fail:
	file->private_data = NULL;
	kfree(fp);
	return error;
}

/**
 * gfs2_release - called to close a struct file
 * @inode: the inode the struct file belongs to
 * @file: the struct file being closed
 *
 * Returns: errno
 */

static int gfs2_release(struct inode *inode, struct file *file)
{
	struct gfs2_inode *ip = GFS2_I(inode);

	kfree(file->private_data);
	file->private_data = NULL;

	if (!(file->f_mode & FMODE_WRITE))
		return 0;

	gfs2_rs_delete(ip);
	return 0;
}

/**
 * gfs2_fsync - sync the dirty data for a file (across the cluster)
 * @file: the file that points to the dentry
 * @start: the start position in the file to sync
 * @end: the end position in the file to sync
 * @datasync: set if we can ignore timestamp changes
 *
 * The VFS will flush data for us. We only need to worry
 * about metadata here.
 *
 * Returns: errno
 */

static int gfs2_fsync(struct file *file, struct dentry *dentry, int datasync)
{
	struct inode *inode = dentry->d_inode;
	int sync_state = inode->i_state & I_DIRTY;
	struct gfs2_inode *ip = GFS2_I(inode);
	int ret;

	if (!gfs2_is_jdata(ip))
		sync_state &= ~I_DIRTY_PAGES;
	if (datasync)
		sync_state &= ~I_DIRTY_SYNC;

	if (sync_state) {
		ret = sync_inode_metadata(inode, 1);
		if (ret)
			return ret;
		if (gfs2_is_jdata(ip))
			filemap_write_and_wait(inode->i_mapping);
		gfs2_ail_flush(ip->i_gl, 1);
	}

	return 0;
}

/**
 * gfs2_file_aio_write - Perform a write to a file
 * @iocb: The io context
 * @iov: The data to write
 * @nr_segs: Number of @iov segments
 * @pos: The file position
 *
 * We have to do a lock/unlock here to refresh the inode size for
 * O_APPEND writes, otherwise we can land up writing at the wrong
 * offset. There is still a race, but provided the app is using its
 * own file locking, this will make O_APPEND work as expected.
 *
 */

static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
				   unsigned long nr_segs, loff_t pos)
{
	struct file *file = iocb->ki_filp;
	size_t writesize = iov_length(iov, nr_segs);
	struct dentry *dentry = file->f_dentry;
	struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
	struct gfs2_sbd *sdp;
	int ret;

	sdp = GFS2_SB(file->f_mapping->host);
	ret = gfs2_rs_alloc(ip);
	if (ret)
		return ret;

	gfs2_size_hint(file->f_dentry->d_inode, pos, writesize);
	if (file->f_flags & O_APPEND) {
		struct gfs2_holder gh;

		ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
		if (ret)
			return ret;
		gfs2_glock_dq_uninit(&gh);
	}

	return generic_file_aio_write(iocb, iov, nr_segs, pos);
}

static ssize_t gfs2_file_splice_write(struct pipe_inode_info *pipe,
				      struct file *out, loff_t *ppos,
				      size_t len, unsigned int flags)
{
	int error;
	struct inode *inode = out->f_mapping->host;
	struct gfs2_inode *ip = GFS2_I(inode);

	error = gfs2_rs_alloc(ip);
	if (error)
		return (ssize_t)error;

	gfs2_size_hint(inode, *ppos, len);

	return generic_file_splice_write(pipe, out, ppos, len, flags);
}
Пример #27
0
static ssize_t
pipe_write(struct kiocb *iocb, const struct iovec *_iov,
	    unsigned long nr_segs, loff_t ppos)
{
	struct file *filp = iocb->ki_filp;
	struct inode *inode = filp->f_path.dentry->d_inode;
	struct pipe_inode_info *pipe;
	ssize_t ret;
	int do_wakeup;
	struct iovec *iov = (struct iovec *)_iov;
	size_t total_len;
	ssize_t chars;

	total_len = iov_length(iov, nr_segs);
	/* Null write succeeds. */
	if (unlikely(total_len == 0))
		return 0;

	do_wakeup = 0;
	ret = 0;
	mutex_lock(&inode->i_mutex);
	pipe = inode->i_pipe;

	if (!pipe->readers) {
		send_sig(SIGPIPE, current, 0);
		ret = -EPIPE;
		goto out;
	}

	/* We try to merge small writes */
	chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */
	if (pipe->nrbufs && chars != 0) {
		int lastbuf = (pipe->curbuf + pipe->nrbufs - 1) &
							(pipe->buffers - 1);
		struct pipe_buffer *buf = pipe->bufs + lastbuf;
		const struct pipe_buf_operations *ops = buf->ops;
		int offset = buf->offset + buf->len;

		if (ops->can_merge && offset + chars <= PAGE_SIZE) {
			int error, atomic = 1;
			void *addr;

			error = ops->confirm(pipe, buf);
			if (error)
				goto out;

			iov_fault_in_pages_read(iov, chars);
redo1:
			addr = ops->map(pipe, buf, atomic);
			error = pipe_iov_copy_from_user(offset + addr, iov,
							chars, atomic);
			ops->unmap(pipe, buf, addr);
			ret = error;
			do_wakeup = 1;
			if (error) {
				if (atomic) {
					atomic = 0;
					goto redo1;
				}
				goto out;
			}
			buf->len += chars;
			total_len -= chars;
			ret = chars;
			if (!total_len)
				goto out;
		}
	}

	for (;;) {
		int bufs;

		if (!pipe->readers) {
			send_sig(SIGPIPE, current, 0);
			if (!ret)
				ret = -EPIPE;
			break;
		}
		bufs = pipe->nrbufs;
		if (bufs < pipe->buffers) {
			int newbuf = (pipe->curbuf + bufs) & (pipe->buffers-1);
			struct pipe_buffer *buf = pipe->bufs + newbuf;
			struct page *page = pipe->tmp_page;
			char *src;
			int error, atomic = 1;

			if (!page) {
				page = alloc_page(GFP_HIGHUSER);
				if (unlikely(!page)) {
					ret = ret ? : -ENOMEM;
					break;
				}
				pipe->tmp_page = page;
			}
Пример #28
0
static ssize_t
pipe_read(struct kiocb *iocb, const struct iovec *_iov,
	   unsigned long nr_segs, loff_t pos)
{
	struct file *filp = iocb->ki_filp;
	struct inode *inode = filp->f_path.dentry->d_inode;
	struct pipe_inode_info *pipe;
	int do_wakeup;
	ssize_t ret;
	struct iovec *iov = (struct iovec *)_iov;
	size_t total_len;

	total_len = iov_length(iov, nr_segs);
	/* Null read succeeds. */
	if (unlikely(total_len == 0))
		return 0;

	do_wakeup = 0;
	ret = 0;
	mutex_lock(&inode->i_mutex);
	pipe = inode->i_pipe;
	for (;;) {
		int bufs = pipe->nrbufs;
		if (bufs) {
			int curbuf = pipe->curbuf;
			struct pipe_buffer *buf = pipe->bufs + curbuf;
			const struct pipe_buf_operations *ops = buf->ops;
			void *addr;
			size_t chars = buf->len;
			int error, atomic;

			if (chars > total_len)
				chars = total_len;

			error = ops->confirm(pipe, buf);
			if (error) {
				if (!ret)
					error = ret;
				break;
			}

			atomic = !iov_fault_in_pages_write(iov, chars);
redo:
			addr = ops->map(pipe, buf, atomic);
			error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars, atomic);
			ops->unmap(pipe, buf, addr);
			if (unlikely(error)) {
				/*
				 * Just retry with the slow path if we failed.
				 */
				if (atomic) {
					atomic = 0;
					goto redo;
				}
				if (!ret)
					ret = error;
				break;
			}
			ret += chars;
			buf->offset += chars;
			buf->len -= chars;
			if (!buf->len) {
				buf->ops = NULL;
				ops->release(pipe, buf);
				curbuf = (curbuf + 1) & (pipe->buffers - 1);
				pipe->curbuf = curbuf;
				pipe->nrbufs = --bufs;
				do_wakeup = 1;
			}
			total_len -= chars;
			if (!total_len)
				break;	/* common path: read succeeded */
		}
		if (bufs)	/* More to do? */
			continue;
		if (!pipe->writers)
			break;
		if (!pipe->waiting_writers) {
			/* syscall merging: Usually we must not sleep
			 * if O_NONBLOCK is set, or if we got some data.
			 * But if a writer sleeps in kernel space, then
			 * we can wait for that data without violating POSIX.
			 */
			if (ret)
				break;
			if (filp->f_flags & O_NONBLOCK) {
				ret = -EAGAIN;
				break;
			}
		}
		if (signal_pending(current)) {
			if (!ret)
				ret = -ERESTARTSYS;
			break;
		}
		if (do_wakeup) {
			wake_up_interruptible_sync(&pipe->wait);
 			kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
		}
		pipe_wait(pipe);
	}
	mutex_unlock(&inode->i_mutex);

	/* Signal writers asynchronously that there is more room. */
	if (do_wakeup) {
		wake_up_interruptible_sync(&pipe->wait);
		kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
	}
	if (ret > 0)
		file_accessed(filp);
	return ret;
}
Пример #29
0
/* Expects to be always run from workqueue - which acts as
 * read-size critical section for our kind of RCU. */
static void handle_tx(struct vhost_net *net)
{
	struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
	struct vhost_virtqueue *vq = &nvq->vq;
	unsigned out, in, s;
	int head;
	struct msghdr msg = {
		.msg_name = NULL,
		.msg_namelen = 0,
		.msg_control = NULL,
		.msg_controllen = 0,
		.msg_flags = MSG_DONTWAIT,
	};
	size_t len, total_len = 0;
	int err;
	size_t hdr_size;
	struct socket *sock;
	struct vhost_net_ubuf_ref *uninitialized_var(ubufs);
	bool zcopy, zcopy_used;

	mutex_lock(&vq->mutex);
	sock = vq->private_data;
	if (!sock)
		goto out;

	vhost_disable_notify(&net->dev, vq);

	hdr_size = nvq->vhost_hlen;
	zcopy = nvq->ubufs;

	for (;;) {
		/* Release DMAs done buffers first */
		if (zcopy)
			vhost_zerocopy_signal_used(net, vq);

		/* If more outstanding DMAs, queue the work.
		 * Handle upend_idx wrap around
		 */
		if (unlikely((nvq->upend_idx + vq->num - VHOST_MAX_PEND)
			      % UIO_MAXIOV == nvq->done_idx))
			break;

		head = vhost_get_vq_desc(vq, vq->iov,
					 ARRAY_SIZE(vq->iov),
					 &out, &in,
					 NULL, NULL);
		/* On error, stop handling until the next kick. */
		if (unlikely(head < 0))
			break;
		/* Nothing new?  Wait for eventfd to tell us they refilled. */
		if (head == vq->num) {
			if (unlikely(vhost_enable_notify(&net->dev, vq))) {
				vhost_disable_notify(&net->dev, vq);
				continue;
			}
			break;
		}
		if (in) {
			vq_err(vq, "Unexpected descriptor format for TX: "
			       "out %d, int %d\n", out, in);
			break;
		}
		/* Skip header. TODO: support TSO. */
		s = move_iovec_hdr(vq->iov, nvq->hdr, hdr_size, out);
		len = iov_length(vq->iov, out);
		iov_iter_init(&msg.msg_iter, WRITE, vq->iov, out, len);
		/* Sanity check */
		if (!len) {
			vq_err(vq, "Unexpected header len for TX: "
			       "%zd expected %zd\n",
			       iov_length(nvq->hdr, s), hdr_size);
			break;
		}

		zcopy_used = zcopy && len >= VHOST_GOODCOPY_LEN
				   && (nvq->upend_idx + 1) % UIO_MAXIOV !=
				      nvq->done_idx
				   && vhost_net_tx_select_zcopy(net);

		/* use msg_control to pass vhost zerocopy ubuf info to skb */
		if (zcopy_used) {
			struct ubuf_info *ubuf;
			ubuf = nvq->ubuf_info + nvq->upend_idx;

			vq->heads[nvq->upend_idx].id = cpu_to_vhost32(vq, head);
			vq->heads[nvq->upend_idx].len = VHOST_DMA_IN_PROGRESS;
			ubuf->callback = vhost_zerocopy_callback;
			ubuf->ctx = nvq->ubufs;
			ubuf->desc = nvq->upend_idx;
			msg.msg_control = ubuf;
			msg.msg_controllen = sizeof(ubuf);
			ubufs = nvq->ubufs;
			atomic_inc(&ubufs->refcount);
			nvq->upend_idx = (nvq->upend_idx + 1) % UIO_MAXIOV;
		} else {
			msg.msg_control = NULL;
			ubufs = NULL;
		}
		/* TODO: Check specific error and bomb out unless ENOBUFS? */
		err = sock->ops->sendmsg(NULL, sock, &msg, len);
		if (unlikely(err < 0)) {
			if (zcopy_used) {
				vhost_net_ubuf_put(ubufs);
				nvq->upend_idx = ((unsigned)nvq->upend_idx - 1)
					% UIO_MAXIOV;
			}
			vhost_discard_vq_desc(vq, 1);
			break;
		}
		if (err != len)
			pr_debug("Truncated TX packet: "
				 " len %d != %zd\n", err, len);
		if (!zcopy_used)
			vhost_add_used_and_signal(&net->dev, vq, head, 0);
		else
			vhost_zerocopy_signal_used(net, vq);
		total_len += len;
		vhost_net_tx_packet(net);
		if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
			vhost_poll_queue(&vq->poll);
			break;
		}
	}
out:
	mutex_unlock(&vq->mutex);
}

static int peek_head_len(struct sock *sk)
{
	struct sk_buff *head;
	int len = 0;
	unsigned long flags;

	spin_lock_irqsave(&sk->sk_receive_queue.lock, flags);
	head = skb_peek(&sk->sk_receive_queue);
	if (likely(head)) {
		len = head->len;
		if (vlan_tx_tag_present(head))
			len += VLAN_HLEN;
	}

	spin_unlock_irqrestore(&sk->sk_receive_queue.lock, flags);
	return len;
}

/* This is a multi-buffer version of vhost_get_desc, that works if
 *	vq has read descriptors only.
 * @vq		- the relevant virtqueue
 * @datalen	- data length we'll be reading
 * @iovcount	- returned count of io vectors we fill
 * @log		- vhost log
 * @log_num	- log offset
 * @quota       - headcount quota, 1 for big buffer
 *	returns number of buffer heads allocated, negative on error
 */
static int get_rx_bufs(struct vhost_virtqueue *vq,
		       struct vring_used_elem *heads,
		       int datalen,
		       unsigned *iovcount,
		       struct vhost_log *log,
		       unsigned *log_num,
		       unsigned int quota)
{
	unsigned int out, in;
	int seg = 0;
	int headcount = 0;
	unsigned d;
	int r, nlogs = 0;
	/* len is always initialized before use since we are always called with
	 * datalen > 0.
	 */
	u32 uninitialized_var(len);

	while (datalen > 0 && headcount < quota) {
		if (unlikely(seg >= UIO_MAXIOV)) {
			r = -ENOBUFS;
			goto err;
		}
		r = vhost_get_vq_desc(vq, vq->iov + seg,
				      ARRAY_SIZE(vq->iov) - seg, &out,
				      &in, log, log_num);
		if (unlikely(r < 0))
			goto err;

		d = r;
		if (d == vq->num) {
			r = 0;
			goto err;
		}
		if (unlikely(out || in <= 0)) {
			vq_err(vq, "unexpected descriptor format for RX: "
				"out %d, in %d\n", out, in);
			r = -EINVAL;
			goto err;
		}
		if (unlikely(log)) {
			nlogs += *log_num;
			log += *log_num;
		}
		heads[headcount].id = cpu_to_vhost32(vq, d);
		len = iov_length(vq->iov + seg, in);
		heads[headcount].len = cpu_to_vhost32(vq, len);
		datalen -= len;
		++headcount;
		seg += in;
	}
	heads[headcount - 1].len = cpu_to_vhost32(vq, len - datalen);
	*iovcount = seg;
	if (unlikely(log))
		*log_num = nlogs;

	/* Detect overrun */
	if (unlikely(datalen > 0)) {
		r = UIO_MAXIOV + 1;
		goto err;
	}
	return headcount;
err:
	vhost_discard_vq_desc(vq, headcount);
	return r;
}
Пример #30
0
static ssize_t
ll_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
             loff_t file_offset, unsigned long nr_segs)
{
    struct ll_cl_context *lcc;
    const struct lu_env *env;
    struct cl_io *io;
    struct file *file = iocb->ki_filp;
    struct inode *inode = file->f_mapping->host;
    ssize_t count = iov_length(iov, nr_segs);
    ssize_t tot_bytes = 0, result = 0;
    unsigned long seg = 0;
    size_t size = MAX_DIO_SIZE;
    ENTRY;

    /* FIXME: io smaller than PAGE_SIZE is broken on ia64 ??? */
    if ((file_offset & ~PAGE_MASK) || (count & ~PAGE_MASK))
        RETURN(-EINVAL);

    CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), size=%zd (max %lu), "
           "offset=%lld=%llx, pages %zd (max %lu)\n",
           PFID(ll_inode2fid(inode)), inode, count, MAX_DIO_SIZE,
           file_offset, file_offset, count >> PAGE_SHIFT,
           MAX_DIO_SIZE >> PAGE_SHIFT);

    /* Check that all user buffers are aligned as well */
    for (seg = 0; seg < nr_segs; seg++) {
        if (((unsigned long)iov[seg].iov_base & ~PAGE_MASK) ||
                (iov[seg].iov_len & ~PAGE_MASK))
            RETURN(-EINVAL);
    }

    lcc = ll_cl_find(file);
    if (lcc == NULL)
        RETURN(-EIO);

    env = lcc->lcc_env;
    LASSERT(!IS_ERR(env));
    io = lcc->lcc_io;
    LASSERT(io != NULL);

    for (seg = 0; seg < nr_segs; seg++) {
        size_t iov_left = iov[seg].iov_len;
        unsigned long user_addr = (unsigned long)iov[seg].iov_base;

        if (rw == READ) {
            if (file_offset >= i_size_read(inode))
                break;
            if (file_offset + iov_left > i_size_read(inode))
                iov_left = i_size_read(inode) - file_offset;
        }

        while (iov_left > 0) {
            struct page **pages;
            int page_count, max_pages = 0;
            size_t bytes;

            bytes = min(size, iov_left);
            page_count = ll_get_user_pages(rw, user_addr, bytes,
                                           &pages, &max_pages);
            if (likely(page_count > 0)) {
                if (unlikely(page_count <  max_pages))
                    bytes = page_count << PAGE_SHIFT;
                result = ll_direct_IO_seg(env, io, rw, inode,
                                          bytes, file_offset,
                                          pages, page_count);
                ll_free_user_pages(pages, max_pages, rw==READ);
            } else if (page_count == 0) {
                GOTO(out, result = -EFAULT);
            } else {
                result = page_count;
            }
            if (unlikely(result <= 0)) {
                /* If we can't allocate a large enough buffer
                 * for the request, shrink it to a smaller
                 * PAGE_SIZE multiple and try again.
                 * We should always be able to kmalloc for a
                 * page worth of page pointers = 4MB on i386. */
                if (result == -ENOMEM &&
                        size > (PAGE_SIZE / sizeof(*pages)) *
                        PAGE_SIZE) {
                    size = ((((size / 2) - 1) |
                             ~PAGE_MASK) + 1) &
                           PAGE_MASK;
                    CDEBUG(D_VFSTRACE, "DIO size now %zu\n",
                           size);
                    continue;
                }

                GOTO(out, result);
            }

            tot_bytes += result;
            file_offset += result;
            iov_left -= result;
            user_addr += result;
        }
    }
out:
    if (tot_bytes > 0) {
        struct vvp_io *vio = vvp_env_io(env);

        /* no commit async for direct IO */
        vio->u.write.vui_written += tot_bytes;
    }

    RETURN(tot_bytes ? tot_bytes : result);
}