Пример #1
0
/**
 * Implements cl_io_operations::cio_prepare_write() method for osc layer.
 *
 * \retval -EIO transfer initiated against this osc will most likely fail
 * \retval 0    transfer initiated against this osc will most likely succeed.
 *
 * The reason for this check is to immediately return an error to the caller
 * in the case of a deactivated import. Note, that import can be deactivated
 * later, while pages, dirtied by this IO, are still in the cache, but this is
 * irrelevant, because that would still return an error to the application (if
 * it does fsync), but many applications don't do fsync because of performance
 * issues, and we wanted to return an -EIO at write time to notify the
 * application.
 */
static int osc_io_prepare_write(const struct lu_env *env,
				const struct cl_io_slice *ios,
				const struct cl_page_slice *slice,
				unsigned from, unsigned to)
{
	struct osc_device *dev = lu2osc_dev(slice->cpl_obj->co_lu.lo_dev);
	struct obd_import *imp = class_exp2cliimp(dev->od_exp);
	struct osc_io     *oio = cl2osc_io(env, ios);
	int result = 0;

	/*
	 * This implements OBD_BRW_CHECK logic from old client.
	 */

	if (imp == NULL || imp->imp_invalid)
		result = -EIO;
	if (result == 0 && oio->oi_lockless)
		/* this page contains `invalid' data, but who cares?
		 * nobody can access the invalid data.
		 * in osc_io_commit_write(), we're going to write exact
		 * [from, to) bytes of this page to OST. -jay */
		cl_page_export(env, slice->cpl_page, 1);

	return result;
}
Пример #2
0
static void osc_io_setattr_end(const struct lu_env *env,
			       const struct cl_io_slice *slice)
{
	struct cl_io     *io  = slice->cis_io;
	struct osc_io    *oio = cl2osc_io(env, slice);
	struct cl_object *obj = slice->cis_obj;
	struct osc_async_cbargs *cbargs = &oio->oi_cbarg;
	int result = 0;

	if (cbargs->opc_rpc_sent) {
		wait_for_completion(&cbargs->opc_sync);
		result = io->ci_result = cbargs->opc_rc;
	}
	if (result == 0) {
		if (oio->oi_lockless) {
			/* lockless truncate */
			struct osc_device *osd = lu2osc_dev(obj->co_lu.lo_dev);

			LASSERT(cl_io_is_trunc(io));
			/* XXX: Need a lock. */
			osd->od_stats.os_lockless_truncates++;
		}
	}

	if (cl_io_is_trunc(io)) {
		__u64 size = io->u.ci_setattr.sa_attr.lvb_size;

		osc_trunc_check(env, io, oio, size);
		if (oio->oi_trunc != NULL) {
			osc_cache_truncate_end(env, oio, cl2osc(obj));
			oio->oi_trunc = NULL;
		}
	}
}
Пример #3
0
static int osc_io_commit_write(const struct lu_env *env,
			       const struct cl_io_slice *ios,
			       const struct cl_page_slice *slice,
			       unsigned from, unsigned to)
{
	struct osc_io	 *oio = cl2osc_io(env, ios);
	struct osc_page       *opg = cl2osc_page(slice);
	struct osc_object     *obj = cl2osc(opg->ops_cl.cpl_obj);
	struct osc_async_page *oap = &opg->ops_oap;

	LASSERT(to > 0);
	/*
	 * XXX instead of calling osc_page_touch() here and in
	 * osc_io_fault_start() it might be more logical to introduce
	 * cl_page_touch() method, that generic cl_io_commit_write() and page
	 * fault code calls.
	 */
	osc_page_touch(env, cl2osc_page(slice), to);
	if (!client_is_remote(osc_export(obj)) &&
	    capable(CFS_CAP_SYS_RESOURCE))
		oap->oap_brw_flags |= OBD_BRW_NOQUOTA;

	if (oio->oi_lockless)
		/* see osc_io_prepare_write() for lockless io handling. */
		cl_page_clip(env, slice->cpl_page, from, to);

	return 0;
}
Пример #4
0
static void osc_io_end(const struct lu_env *env,
		       const struct cl_io_slice *slice)
{
	struct osc_io *oio = cl2osc_io(env, slice);

	if (oio->oi_active) {
		osc_extent_release(env, oio->oi_active);
		oio->oi_active = NULL;
	}
}
Пример #5
0
static int osc_io_ladvise_start(const struct lu_env *env,
				const struct cl_io_slice *slice)
{
	int			 result = 0;
	struct cl_io		*io = slice->cis_io;
	struct osc_io		*oio = cl2osc_io(env, slice);
	struct cl_object	*obj = slice->cis_obj;
	struct lov_oinfo	*loi = cl2osc(obj)->oo_oinfo;
	struct cl_ladvise_io	*lio = &io->u.ci_ladvise;
	struct obdo		*oa = &oio->oi_oa;
	struct osc_async_cbargs	*cbargs = &oio->oi_cbarg;
	struct lu_ladvise	*ladvise;
	struct ladvise_hdr	*ladvise_hdr;
	int			 buf_size;
	int			 num_advise = 1;
	ENTRY;

	/* TODO: add multiple ladvise support in CLIO */
	buf_size = offsetof(typeof(*ladvise_hdr), lah_advise[num_advise]);
	if (osc_env_info(env)->oti_ladvise_buf.lb_len < buf_size)
		lu_buf_realloc(&osc_env_info(env)->oti_ladvise_buf, buf_size);

	ladvise_hdr = osc_env_info(env)->oti_ladvise_buf.lb_buf;
	if (ladvise_hdr == NULL)
		RETURN(-ENOMEM);

	memset(ladvise_hdr, 0, buf_size);
	ladvise_hdr->lah_magic = LADVISE_MAGIC;
	ladvise_hdr->lah_count = num_advise;
	ladvise_hdr->lah_flags = lio->li_flags;

	memset(oa, 0, sizeof(*oa));
	oa->o_oi = loi->loi_oi;
	oa->o_valid = OBD_MD_FLID;
	obdo_set_parent_fid(oa, lio->li_fid);

	ladvise = ladvise_hdr->lah_advise;
	ladvise->lla_start = lio->li_start;
	ladvise->lla_end = lio->li_end;
	ladvise->lla_advice = lio->li_advice;

	if (lio->li_flags & LF_ASYNC) {
		result = osc_ladvise_base(osc_export(cl2osc(obj)), oa,
					  ladvise_hdr, NULL, NULL, NULL);
	} else {
		init_completion(&cbargs->opc_sync);
		result = osc_ladvise_base(osc_export(cl2osc(obj)), oa,
					  ladvise_hdr, osc_async_upcall,
					  cbargs, PTLRPCD_SET);
		cbargs->opc_rpc_sent = result == 0;
	}
	RETURN(result);
}
Пример #6
0
static int osc_io_data_version_start(const struct lu_env *env,
				     const struct cl_io_slice *slice)
{
	struct cl_data_version_io *dv	= &slice->cis_io->u.ci_data_version;
	struct osc_io		*oio	= cl2osc_io(env, slice);
	struct obdo		*oa	= &oio->oi_oa;
	struct osc_async_cbargs	*cbargs	= &oio->oi_cbarg;
	struct osc_object	*obj	= cl2osc(slice->cis_obj);
	struct lov_oinfo	*loi	= obj->oo_oinfo;
	struct obd_export	*exp	= osc_export(obj);
	struct ptlrpc_request	*req;
	struct ost_body		*body;
	struct osc_data_version_args *dva;
	int rc;

	ENTRY;
	memset(oa, 0, sizeof(*oa));
	oa->o_oi = loi->loi_oi;
	oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;

	if (dv->dv_flags & (LL_DV_RD_FLUSH | LL_DV_WR_FLUSH)) {
		oa->o_valid |= OBD_MD_FLFLAGS;
		oa->o_flags |= OBD_FL_SRVLOCK;
		if (dv->dv_flags & LL_DV_WR_FLUSH)
			oa->o_flags |= OBD_FL_FLUSH;
	}

	init_completion(&cbargs->opc_sync);

	req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_GETATTR);
	if (req == NULL)
		RETURN(-ENOMEM);

	rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_GETATTR);
	if (rc < 0) {
		ptlrpc_request_free(req);
		RETURN(rc);
	}

	body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY);
	lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa, oa);

	ptlrpc_request_set_replen(req);
	req->rq_interpret_reply = osc_data_version_interpret;
	CLASSERT(sizeof(*dva) <= sizeof(req->rq_async_args));
	dva = ptlrpc_req_async_args(req);
	dva->dva_oio = oio;

	ptlrpcd_add_req(req);

	RETURN(0);
}
Пример #7
0
static void osc_io_ladvise_end(const struct lu_env *env,
			       const struct cl_io_slice *slice)
{
	struct cl_io		*io = slice->cis_io;
	struct osc_io		*oio = cl2osc_io(env, slice);
	struct osc_async_cbargs	*cbargs = &oio->oi_cbarg;
	int			 result = 0;
	struct cl_ladvise_io	*lio = &io->u.ci_ladvise;

	if ((!(lio->li_flags & LF_ASYNC)) && cbargs->opc_rpc_sent) {
		wait_for_completion(&cbargs->opc_sync);
		result = cbargs->opc_rc;
	}
	slice->cis_io->ci_result = result;
}
Пример #8
0
static int osc_io_read_start(const struct lu_env *env,
                             const struct cl_io_slice *slice)
{
	struct osc_io	 *oio  = cl2osc_io(env, slice);
	struct cl_object *obj  = slice->cis_obj;
	struct cl_attr	 *attr = &osc_env_info(env)->oti_attr;
	int rc = 0;
	ENTRY;

	if (oio->oi_lockless == 0 && !slice->cis_io->ci_noatime) {
		cl_object_attr_lock(obj);
		attr->cat_atime = LTIME_S(CFS_CURRENT_TIME);
		rc = cl_object_attr_set(env, obj, attr, CAT_ATIME);
		cl_object_attr_unlock(obj);
	}

	RETURN(rc);
}
Пример #9
0
static void osc_io_fsync_end(const struct lu_env *env,
			     const struct cl_io_slice *slice)
{
	struct cl_fsync_io *fio = &slice->cis_io->u.ci_fsync;
	struct cl_object   *obj = slice->cis_obj;
	pgoff_t start = cl_index(obj, fio->fi_start);
	pgoff_t end   = cl_index(obj, fio->fi_end);
	int result = 0;

	if (fio->fi_mode == CL_FSYNC_LOCAL) {
		result = osc_cache_wait_range(env, cl2osc(obj), start, end);
	} else if (fio->fi_mode == CL_FSYNC_ALL) {
		struct osc_io	   *oio    = cl2osc_io(env, slice);
		struct osc_async_cbargs *cbargs = &oio->oi_cbarg;

		wait_for_completion(&cbargs->opc_sync);
		if (result == 0)
			result = cbargs->opc_rc;
	}
	slice->cis_io->ci_result = result;
}
Пример #10
0
static void osc_io_data_version_end(const struct lu_env *env,
				    const struct cl_io_slice *slice)
{
	struct cl_data_version_io *dv = &slice->cis_io->u.ci_data_version;
	struct osc_io		*oio    = cl2osc_io(env, slice);
	struct osc_async_cbargs *cbargs = &oio->oi_cbarg;

	ENTRY;
	wait_for_completion(&cbargs->opc_sync);

	if (cbargs->opc_rc != 0) {
		slice->cis_io->ci_result = cbargs->opc_rc;
	} else if (!(oio->oi_oa.o_valid & OBD_MD_FLDATAVERSION)) {
		slice->cis_io->ci_result = -EOPNOTSUPP;
	} else {
		dv->dv_data_version = oio->oi_oa.o_data_version;
		slice->cis_io->ci_result = 0;
	}

	EXIT;
}
Пример #11
0
static int osc_io_write_start(const struct lu_env *env,
                              const struct cl_io_slice *slice)
{
        struct osc_io    *oio   = cl2osc_io(env, slice);
        struct cl_object *obj   = slice->cis_obj;
        struct cl_attr   *attr  = &osc_env_info(env)->oti_attr;
        int              result = 0;
        ENTRY;

        if (oio->oi_lockless == 0) {
		OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_DELAY_SETTIME, 1);
                cl_object_attr_lock(obj);
                result = cl_object_attr_get(env, obj, attr);
                if (result == 0) {
                        attr->cat_mtime = attr->cat_ctime =
                                LTIME_S(CFS_CURRENT_TIME);
                        result = cl_object_attr_set(env, obj, attr,
                                                    CAT_MTIME | CAT_CTIME);
                }
                cl_object_attr_unlock(obj);
        }
        RETURN(result);
}
Пример #12
0
/**
 * An implementation of cl_io_operations::cio_io_submit() method for osc
 * layer. Iterates over pages in the in-queue, prepares each for io by calling
 * cl_page_prep() and then either submits them through osc_io_submit_page()
 * or, if page is already submitted, changes osc flags through
 * osc_set_async_flags().
 */
static int osc_io_submit(const struct lu_env *env,
			 const struct cl_io_slice *ios,
			 enum cl_req_type crt, struct cl_2queue *queue)
{
	struct cl_page    *page;
	struct cl_page    *tmp;
	struct client_obd *cli  = NULL;
	struct osc_object *osc  = NULL; /* to keep gcc happy */
	struct osc_page   *opg;
	struct cl_io      *io;
	LIST_HEAD(list);

	struct cl_page_list *qin      = &queue->c2_qin;
	struct cl_page_list *qout     = &queue->c2_qout;
	int queued = 0;
	int result = 0;
	int cmd;
	int brw_flags;
	int max_pages;

	LASSERT(qin->pl_nr > 0);

	CDEBUG(D_CACHE, "%d %d\n", qin->pl_nr, crt);

	osc = cl2osc(ios->cis_obj);
	cli = osc_cli(osc);
	max_pages = cli->cl_max_pages_per_rpc;

	cmd = crt == CRT_WRITE ? OBD_BRW_WRITE : OBD_BRW_READ;
	brw_flags = osc_io_srvlock(cl2osc_io(env, ios)) ? OBD_BRW_SRVLOCK : 0;

	/*
	 * NOTE: here @page is a top-level page. This is done to avoid
	 *       creation of sub-page-list.
	 */
	cl_page_list_for_each_safe(page, tmp, qin) {
		struct osc_async_page *oap;

		/* Top level IO. */
		io = page->cp_owner;
		LASSERT(io != NULL);

		opg = osc_cl_page_osc(page);
		oap = &opg->ops_oap;
		LASSERT(osc == oap->oap_obj);

		if (!list_empty(&oap->oap_pending_item) ||
		    !list_empty(&oap->oap_rpc_item)) {
			CDEBUG(D_CACHE, "Busy oap %p page %p for submit.\n",
			       oap, opg);
			result = -EBUSY;
			break;
		}

		result = cl_page_prep(env, io, page, crt);
		if (result != 0) {
			LASSERT(result < 0);
			if (result != -EALREADY)
				break;
			/*
			 * Handle -EALREADY error: for read case, the page is
			 * already in UPTODATE state; for write, the page
			 * is not dirty.
			 */
			result = 0;
			continue;
		}

		cl_page_list_move(qout, qin, page);
		oap->oap_async_flags = ASYNC_URGENT|ASYNC_READY;
		oap->oap_async_flags |= ASYNC_COUNT_STABLE;

		osc_page_submit(env, opg, crt, brw_flags);
		list_add_tail(&oap->oap_pending_item, &list);
		if (++queued == max_pages) {
			queued = 0;
			result = osc_queue_sync_pages(env, osc, &list, cmd,
						      brw_flags);
			if (result < 0)
				break;
		}
	}

	if (queued > 0)
		result = osc_queue_sync_pages(env, osc, &list, cmd, brw_flags);

	CDEBUG(D_INFO, "%d/%d %d\n", qin->pl_nr, qout->pl_nr, result);
	return qout->pl_nr > 0 ? 0 : result;
}
Пример #13
0
static int osc_io_setattr_start(const struct lu_env *env,
				const struct cl_io_slice *slice)
{
	struct cl_io	    *io     = slice->cis_io;
	struct osc_io	   *oio    = cl2osc_io(env, slice);
	struct cl_object	*obj    = slice->cis_obj;
	struct lov_oinfo	*loi    = cl2osc(obj)->oo_oinfo;
	struct cl_attr	  *attr   = &osc_env_info(env)->oti_attr;
	struct obdo	     *oa     = &oio->oi_oa;
	struct osc_async_cbargs *cbargs = &oio->oi_cbarg;
	__u64		    size   = io->u.ci_setattr.sa_attr.lvb_size;
	unsigned int	     ia_valid = io->u.ci_setattr.sa_valid;
	int		      result = 0;
	struct obd_info	  oinfo = { { { 0 } } };

	/* truncate cache dirty pages first */
	if (cl_io_is_trunc(io))
		result = osc_cache_truncate_start(env, oio, cl2osc(obj), size);

	if (result == 0 && oio->oi_lockless == 0) {
		cl_object_attr_lock(obj);
		result = cl_object_attr_get(env, obj, attr);
		if (result == 0) {
			struct ost_lvb *lvb = &io->u.ci_setattr.sa_attr;
			unsigned int cl_valid = 0;

			if (ia_valid & ATTR_SIZE) {
				attr->cat_size = attr->cat_kms = size;
				cl_valid = (CAT_SIZE | CAT_KMS);
			}
			if (ia_valid & ATTR_MTIME_SET) {
				attr->cat_mtime = lvb->lvb_mtime;
				cl_valid |= CAT_MTIME;
			}
			if (ia_valid & ATTR_ATIME_SET) {
				attr->cat_atime = lvb->lvb_atime;
				cl_valid |= CAT_ATIME;
			}
			if (ia_valid & ATTR_CTIME_SET) {
				attr->cat_ctime = lvb->lvb_ctime;
				cl_valid |= CAT_CTIME;
			}
			result = cl_object_attr_set(env, obj, attr, cl_valid);
		}
		cl_object_attr_unlock(obj);
	}
	memset(oa, 0, sizeof(*oa));
	if (result == 0) {
		oa->o_oi = loi->loi_oi;
		oa->o_mtime = attr->cat_mtime;
		oa->o_atime = attr->cat_atime;
		oa->o_ctime = attr->cat_ctime;
		oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP | OBD_MD_FLATIME |
			OBD_MD_FLCTIME | OBD_MD_FLMTIME;
		if (ia_valid & ATTR_SIZE) {
			oa->o_size = size;
			oa->o_blocks = OBD_OBJECT_EOF;
			oa->o_valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS;

			if (oio->oi_lockless) {
				oa->o_flags = OBD_FL_SRVLOCK;
				oa->o_valid |= OBD_MD_FLFLAGS;
			}
		} else {
			LASSERT(oio->oi_lockless == 0);
		}

		oinfo.oi_oa = oa;
		oinfo.oi_capa = io->u.ci_setattr.sa_capa;
		init_completion(&cbargs->opc_sync);

		if (ia_valid & ATTR_SIZE)
			result = osc_punch_base(osc_export(cl2osc(obj)),
						&oinfo, osc_async_upcall,
						cbargs, PTLRPCD_SET);
		else
			result = osc_setattr_async_base(osc_export(cl2osc(obj)),
							&oinfo, NULL,
							osc_async_upcall,
							cbargs, PTLRPCD_SET);
		cbargs->opc_rpc_sent = result == 0;
	}
	return result;
}
Пример #14
0
static int osc_io_commit_async(const struct lu_env *env,
				const struct cl_io_slice *ios,
				struct cl_page_list *qin, int from, int to,
				cl_commit_cbt cb)
{
	struct cl_io    *io = ios->cis_io;
	struct osc_io   *oio = cl2osc_io(env, ios);
	struct osc_object *osc = cl2osc(ios->cis_obj);
	struct cl_page  *page;
	struct cl_page  *last_page;
	struct osc_page *opg;
	int result = 0;
	ENTRY;

	LASSERT(qin->pl_nr > 0);

	/* Handle partial page cases */
	last_page = cl_page_list_last(qin);
	if (oio->oi_lockless) {
		page = cl_page_list_first(qin);
		if (page == last_page) {
			cl_page_clip(env, page, from, to);
		} else {
			if (from != 0)
				cl_page_clip(env, page, from, PAGE_SIZE);
			if (to != PAGE_SIZE)
				cl_page_clip(env, last_page, 0, to);
		}
	}

	while (qin->pl_nr > 0) {
		struct osc_async_page *oap;

		page = cl_page_list_first(qin);
		opg = osc_cl_page_osc(page, osc);
		oap = &opg->ops_oap;

		if (!list_empty(&oap->oap_rpc_item)) {
			CDEBUG(D_CACHE, "Busy oap %p page %p for submit.\n",
			       oap, opg);
			result = -EBUSY;
			break;
		}

		/* The page may be already in dirty cache. */
		if (list_empty(&oap->oap_pending_item)) {
			result = osc_page_cache_add(env, &opg->ops_cl, io);
			if (result != 0)
				break;
		}

		osc_page_touch_at(env, osc2cl(osc), osc_index(opg),
				  page == last_page ? to : PAGE_SIZE);

		cl_page_list_del(env, qin, page);

		(*cb)(env, io, page);
		/* Can't access page any more. Page can be in transfer and
		 * complete at any time. */
	}

	/* for sync write, kernel will wait for this page to be flushed before
	 * osc_io_end() is called, so release it earlier.
	 * for mkwrite(), it's known there is no further pages. */
	if (cl_io_is_sync_write(io) && oio->oi_active != NULL) {
		osc_extent_release(env, oio->oi_active);
		oio->oi_active = NULL;
	}

	CDEBUG(D_INFO, "%d %d\n", qin->pl_nr, result);
	RETURN(result);
}