Ejemplo n.º 1
0
static int vvp_io_commit_write(const struct lu_env *env,
                               const struct cl_io_slice *ios,
                               const struct cl_page_slice *slice,
                               unsigned from, unsigned to)
{
        struct cl_object  *obj    = slice->cpl_obj;
        struct cl_io      *io     = ios->cis_io;
        struct ccc_page   *cp     = cl2ccc_page(slice);
        struct cl_page    *pg     = slice->cpl_page;
        struct inode      *inode  = ccc_object_inode(obj);
        struct ll_sb_info *sbi    = ll_i2sbi(inode);
	struct ll_inode_info *lli = ll_i2info(inode);
	struct page        *vmpage = cp->cpg_page;

        int    result;
        int    tallyop;
        loff_t size;

        ENTRY;

        LINVRNT(cl_page_is_vmlocked(env, pg));
        LASSERT(vmpage->mapping->host == inode);

        LU_OBJECT_HEADER(D_INODE, env, &obj->co_lu, "commiting page write\n");
        CL_PAGE_HEADER(D_PAGE, env, pg, "committing: [%d, %d]\n", from, to);

        /*
         * queue a write for some time in the future the first time we
         * dirty the page.
         *
         * This is different from what other file systems do: they usually
         * just mark page (and some of its buffers) dirty and rely on
         * balance_dirty_pages() to start a write-back. Lustre wants write-back
         * to be started earlier for the following reasons:
         *
         *     (1) with a large number of clients we need to limit the amount
         *     of cached data on the clients a lot;
         *
         *     (2) large compute jobs generally want compute-only then io-only
         *     and the IO should complete as quickly as possible;
         *
         *     (3) IO is batched up to the RPC size and is async until the
         *     client max cache is hit
         *     (/proc/fs/lustre/osc/OSC.../max_dirty_mb)
         *
         */
        if (!PageDirty(vmpage)) {
                tallyop = LPROC_LL_DIRTY_MISSES;
                result = cl_page_cache_add(env, io, pg, CRT_WRITE);
                if (result == 0) {
                        /* page was added into cache successfully. */
                        set_page_dirty(vmpage);
                        vvp_write_pending(cl2ccc(obj), cp);
                } else if (result == -EDQUOT) {
			pgoff_t last_index = i_size_read(inode) >> PAGE_CACHE_SHIFT;
                        bool need_clip = true;

                        /*
                         * Client ran out of disk space grant. Possible
                         * strategies are:
                         *
                         *     (a) do a sync write, renewing grant;
                         *
                         *     (b) stop writing on this stripe, switch to the
                         *     next one.
                         *
                         * (b) is a part of "parallel io" design that is the
                         * ultimate goal. (a) is what "old" client did, and
                         * what the new code continues to do for the time
                         * being.
                         */
                        if (last_index > pg->cp_index) {
				to = PAGE_CACHE_SIZE;
                                need_clip = false;
                        } else if (last_index == pg->cp_index) {
                                int size_to = i_size_read(inode) & ~CFS_PAGE_MASK;
                                if (to < size_to)
                                        to = size_to;
                        }
                        if (need_clip)
                                cl_page_clip(env, pg, 0, to);
                        result = vvp_page_sync_io(env, io, pg, cp, CRT_WRITE);
                        if (result)
                                CERROR("Write page %lu of inode %p failed %d\n",
                                       pg->cp_index, inode, result);
                }
Ejemplo n.º 2
0
static int lov_init_sub(const struct lu_env *env, struct lov_object *lov,
			struct cl_object *stripe, struct lov_layout_raid0 *r0,
			int idx)
{
	struct cl_object_header *hdr;
	struct cl_object_header *subhdr;
	struct cl_object_header *parent;
	struct lov_oinfo	*oinfo;
	int result;

	if (OBD_FAIL_CHECK(OBD_FAIL_LOV_INIT)) {
		/* For sanity:test_206.
		 * Do not leave the object in cache to avoid accessing
		 * freed memory. This is because osc_object is referring to
		 * lov_oinfo of lsm_stripe_data which will be freed due to
		 * this failure. */
		cl_object_kill(env, stripe);
		cl_object_put(env, stripe);
		return -EIO;
	}

	hdr    = cl_object_header(lov2cl(lov));
	subhdr = cl_object_header(stripe);

	oinfo = lov->lo_lsm->lsm_oinfo[idx];
	CDEBUG(D_INODE, DFID"@%p[%d] -> "DFID"@%p: ostid: "DOSTID
	       " idx: %d gen: %d\n",
	       PFID(&subhdr->coh_lu.loh_fid), subhdr, idx,
	       PFID(&hdr->coh_lu.loh_fid), hdr, POSTID(&oinfo->loi_oi),
	       oinfo->loi_ost_idx, oinfo->loi_ost_gen);

	/* reuse ->coh_attr_guard to protect coh_parent change */
	spin_lock(&subhdr->coh_attr_guard);
	parent = subhdr->coh_parent;
	if (parent == NULL) {
		subhdr->coh_parent = hdr;
		spin_unlock(&subhdr->coh_attr_guard);
		subhdr->coh_nesting = hdr->coh_nesting + 1;
		lu_object_ref_add(&stripe->co_lu, "lov-parent", lov);
		r0->lo_sub[idx] = cl2lovsub(stripe);
		r0->lo_sub[idx]->lso_super = lov;
		r0->lo_sub[idx]->lso_index = idx;
		result = 0;
	} else {
		struct lu_object  *old_obj;
		struct lov_object *old_lov;
		unsigned int mask = D_INODE;

		spin_unlock(&subhdr->coh_attr_guard);
		old_obj = lu_object_locate(&parent->coh_lu, &lov_device_type);
		LASSERT(old_obj != NULL);
		old_lov = cl2lov(lu2cl(old_obj));
		if (old_lov->lo_layout_invalid) {
			/* the object's layout has already changed but isn't
			 * refreshed */
			lu_object_unhash(env, &stripe->co_lu);
			result = -EAGAIN;
		} else {
			mask = D_ERROR;
			result = -EIO;
		}

		LU_OBJECT_DEBUG(mask, env, &stripe->co_lu,
				"stripe %d is already owned.\n", idx);
		LU_OBJECT_DEBUG(mask, env, old_obj, "owned.\n");
		LU_OBJECT_HEADER(mask, env, lov2lu(lov), "try to own.\n");
		cl_object_put(env, stripe);
	}
	return result;
}
Ejemplo n.º 3
0
static int vvp_io_read_start(const struct lu_env *env,
                             const struct cl_io_slice *ios)
{
        struct vvp_io     *vio   = cl2vvp_io(env, ios);
        struct ccc_io     *cio   = cl2ccc_io(env, ios);
        struct cl_io      *io    = ios->cis_io;
        struct cl_object  *obj   = io->ci_obj;
        struct inode      *inode = ccc_object_inode(obj);
        struct ll_ra_read *bead  = &vio->cui_bead;
        struct file       *file  = cio->cui_fd->fd_file;

        int     result;
        loff_t  pos = io->u.ci_rd.rd.crw_pos;
        long    cnt = io->u.ci_rd.rd.crw_count;
        long    tot = cio->cui_tot_count;
        int     exceed = 0;

        CLOBINVRNT(env, obj, ccc_object_invariant(obj));

        CDEBUG(D_VFSTRACE, "read: -> [%lli, %lli)\n", pos, pos + cnt);

	if (!can_populate_pages(env, io, inode))
		return 0;

        result = ccc_prep_size(env, obj, io, pos, tot, &exceed);
        if (result != 0)
                return result;
        else if (exceed != 0)
                goto out;

        LU_OBJECT_HEADER(D_INODE, env, &obj->co_lu,
                        "Read ino %lu, %lu bytes, offset %lld, size %llu\n",
                        inode->i_ino, cnt, pos, i_size_read(inode));

        /* turn off the kernel's read-ahead */
        cio->cui_fd->fd_file->f_ra.ra_pages = 0;

        /* initialize read-ahead window once per syscall */
        if (!vio->cui_ra_window_set) {
                vio->cui_ra_window_set = 1;
                bead->lrr_start = cl_index(obj, pos);
		bead->lrr_count = cl_index(obj, tot + PAGE_CACHE_SIZE - 1);
                ll_ra_read_in(file, bead);
        }

        /* BUG: 5972 */
        file_accessed(file);
        switch (vio->cui_io_subtype) {
        case IO_NORMAL:
                 result = lustre_generic_file_read(file, cio, &pos);
                 break;
        case IO_SPLICE:
                result = generic_file_splice_read(file, &pos,
                                vio->u.splice.cui_pipe, cnt,
                                vio->u.splice.cui_flags);
                /* LU-1109: do splice read stripe by stripe otherwise if it
                 * may make nfsd stuck if this read occupied all internal pipe
                 * buffers. */
                io->ci_continue = 0;
                break;
        default:
                CERROR("Wrong IO type %u\n", vio->cui_io_subtype);
                LBUG();
        }

out:
	if (result >= 0) {
		if (result < cnt)
			io->ci_continue = 0;
		io->ci_nob += result;
		ll_rw_stats_tally(ll_i2sbi(inode), current->pid, cio->cui_fd,
				  pos, result, READ);
		result = 0;
	}

	return result;
}