/** * Implements cl_io_operations::cio_prepare_write() method for osc layer. * * \retval -EIO transfer initiated against this osc will most likely fail * \retval 0 transfer initiated against this osc will most likely succeed. * * The reason for this check is to immediately return an error to the caller * in the case of a deactivated import. Note, that import can be deactivated * later, while pages, dirtied by this IO, are still in the cache, but this is * irrelevant, because that would still return an error to the application (if * it does fsync), but many applications don't do fsync because of performance * issues, and we wanted to return an -EIO at write time to notify the * application. */ static int osc_io_prepare_write(const struct lu_env *env, const struct cl_io_slice *ios, const struct cl_page_slice *slice, unsigned from, unsigned to) { struct osc_device *dev = lu2osc_dev(slice->cpl_obj->co_lu.lo_dev); struct obd_import *imp = class_exp2cliimp(dev->od_exp); struct osc_io *oio = cl2osc_io(env, ios); int result = 0; /* * This implements OBD_BRW_CHECK logic from old client. */ if (imp == NULL || imp->imp_invalid) result = -EIO; if (result == 0 && oio->oi_lockless) /* this page contains `invalid' data, but who cares? * nobody can access the invalid data. * in osc_io_commit_write(), we're going to write exact * [from, to) bytes of this page to OST. -jay */ cl_page_export(env, slice->cpl_page, 1); return result; }
static void osc_io_setattr_end(const struct lu_env *env, const struct cl_io_slice *slice) { struct cl_io *io = slice->cis_io; struct osc_io *oio = cl2osc_io(env, slice); struct cl_object *obj = slice->cis_obj; struct osc_async_cbargs *cbargs = &oio->oi_cbarg; int result = 0; if (cbargs->opc_rpc_sent) { wait_for_completion(&cbargs->opc_sync); result = io->ci_result = cbargs->opc_rc; } if (result == 0) { if (oio->oi_lockless) { /* lockless truncate */ struct osc_device *osd = lu2osc_dev(obj->co_lu.lo_dev); LASSERT(cl_io_is_trunc(io)); /* XXX: Need a lock. */ osd->od_stats.os_lockless_truncates++; } } if (cl_io_is_trunc(io)) { __u64 size = io->u.ci_setattr.sa_attr.lvb_size; osc_trunc_check(env, io, oio, size); if (oio->oi_trunc != NULL) { osc_cache_truncate_end(env, oio, cl2osc(obj)); oio->oi_trunc = NULL; } } }
static int osc_io_commit_write(const struct lu_env *env, const struct cl_io_slice *ios, const struct cl_page_slice *slice, unsigned from, unsigned to) { struct osc_io *oio = cl2osc_io(env, ios); struct osc_page *opg = cl2osc_page(slice); struct osc_object *obj = cl2osc(opg->ops_cl.cpl_obj); struct osc_async_page *oap = &opg->ops_oap; LASSERT(to > 0); /* * XXX instead of calling osc_page_touch() here and in * osc_io_fault_start() it might be more logical to introduce * cl_page_touch() method, that generic cl_io_commit_write() and page * fault code calls. */ osc_page_touch(env, cl2osc_page(slice), to); if (!client_is_remote(osc_export(obj)) && capable(CFS_CAP_SYS_RESOURCE)) oap->oap_brw_flags |= OBD_BRW_NOQUOTA; if (oio->oi_lockless) /* see osc_io_prepare_write() for lockless io handling. */ cl_page_clip(env, slice->cpl_page, from, to); return 0; }
static void osc_io_end(const struct lu_env *env, const struct cl_io_slice *slice) { struct osc_io *oio = cl2osc_io(env, slice); if (oio->oi_active) { osc_extent_release(env, oio->oi_active); oio->oi_active = NULL; } }
static int osc_io_ladvise_start(const struct lu_env *env, const struct cl_io_slice *slice) { int result = 0; struct cl_io *io = slice->cis_io; struct osc_io *oio = cl2osc_io(env, slice); struct cl_object *obj = slice->cis_obj; struct lov_oinfo *loi = cl2osc(obj)->oo_oinfo; struct cl_ladvise_io *lio = &io->u.ci_ladvise; struct obdo *oa = &oio->oi_oa; struct osc_async_cbargs *cbargs = &oio->oi_cbarg; struct lu_ladvise *ladvise; struct ladvise_hdr *ladvise_hdr; int buf_size; int num_advise = 1; ENTRY; /* TODO: add multiple ladvise support in CLIO */ buf_size = offsetof(typeof(*ladvise_hdr), lah_advise[num_advise]); if (osc_env_info(env)->oti_ladvise_buf.lb_len < buf_size) lu_buf_realloc(&osc_env_info(env)->oti_ladvise_buf, buf_size); ladvise_hdr = osc_env_info(env)->oti_ladvise_buf.lb_buf; if (ladvise_hdr == NULL) RETURN(-ENOMEM); memset(ladvise_hdr, 0, buf_size); ladvise_hdr->lah_magic = LADVISE_MAGIC; ladvise_hdr->lah_count = num_advise; ladvise_hdr->lah_flags = lio->li_flags; memset(oa, 0, sizeof(*oa)); oa->o_oi = loi->loi_oi; oa->o_valid = OBD_MD_FLID; obdo_set_parent_fid(oa, lio->li_fid); ladvise = ladvise_hdr->lah_advise; ladvise->lla_start = lio->li_start; ladvise->lla_end = lio->li_end; ladvise->lla_advice = lio->li_advice; if (lio->li_flags & LF_ASYNC) { result = osc_ladvise_base(osc_export(cl2osc(obj)), oa, ladvise_hdr, NULL, NULL, NULL); } else { init_completion(&cbargs->opc_sync); result = osc_ladvise_base(osc_export(cl2osc(obj)), oa, ladvise_hdr, osc_async_upcall, cbargs, PTLRPCD_SET); cbargs->opc_rpc_sent = result == 0; } RETURN(result); }
static int osc_io_data_version_start(const struct lu_env *env, const struct cl_io_slice *slice) { struct cl_data_version_io *dv = &slice->cis_io->u.ci_data_version; struct osc_io *oio = cl2osc_io(env, slice); struct obdo *oa = &oio->oi_oa; struct osc_async_cbargs *cbargs = &oio->oi_cbarg; struct osc_object *obj = cl2osc(slice->cis_obj); struct lov_oinfo *loi = obj->oo_oinfo; struct obd_export *exp = osc_export(obj); struct ptlrpc_request *req; struct ost_body *body; struct osc_data_version_args *dva; int rc; ENTRY; memset(oa, 0, sizeof(*oa)); oa->o_oi = loi->loi_oi; oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP; if (dv->dv_flags & (LL_DV_RD_FLUSH | LL_DV_WR_FLUSH)) { oa->o_valid |= OBD_MD_FLFLAGS; oa->o_flags |= OBD_FL_SRVLOCK; if (dv->dv_flags & LL_DV_WR_FLUSH) oa->o_flags |= OBD_FL_FLUSH; } init_completion(&cbargs->opc_sync); req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_GETATTR); if (req == NULL) RETURN(-ENOMEM); rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_GETATTR); if (rc < 0) { ptlrpc_request_free(req); RETURN(rc); } body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY); lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa, oa); ptlrpc_request_set_replen(req); req->rq_interpret_reply = osc_data_version_interpret; CLASSERT(sizeof(*dva) <= sizeof(req->rq_async_args)); dva = ptlrpc_req_async_args(req); dva->dva_oio = oio; ptlrpcd_add_req(req); RETURN(0); }
static void osc_io_ladvise_end(const struct lu_env *env, const struct cl_io_slice *slice) { struct cl_io *io = slice->cis_io; struct osc_io *oio = cl2osc_io(env, slice); struct osc_async_cbargs *cbargs = &oio->oi_cbarg; int result = 0; struct cl_ladvise_io *lio = &io->u.ci_ladvise; if ((!(lio->li_flags & LF_ASYNC)) && cbargs->opc_rpc_sent) { wait_for_completion(&cbargs->opc_sync); result = cbargs->opc_rc; } slice->cis_io->ci_result = result; }
static int osc_io_read_start(const struct lu_env *env, const struct cl_io_slice *slice) { struct osc_io *oio = cl2osc_io(env, slice); struct cl_object *obj = slice->cis_obj; struct cl_attr *attr = &osc_env_info(env)->oti_attr; int rc = 0; ENTRY; if (oio->oi_lockless == 0 && !slice->cis_io->ci_noatime) { cl_object_attr_lock(obj); attr->cat_atime = LTIME_S(CFS_CURRENT_TIME); rc = cl_object_attr_set(env, obj, attr, CAT_ATIME); cl_object_attr_unlock(obj); } RETURN(rc); }
static void osc_io_fsync_end(const struct lu_env *env, const struct cl_io_slice *slice) { struct cl_fsync_io *fio = &slice->cis_io->u.ci_fsync; struct cl_object *obj = slice->cis_obj; pgoff_t start = cl_index(obj, fio->fi_start); pgoff_t end = cl_index(obj, fio->fi_end); int result = 0; if (fio->fi_mode == CL_FSYNC_LOCAL) { result = osc_cache_wait_range(env, cl2osc(obj), start, end); } else if (fio->fi_mode == CL_FSYNC_ALL) { struct osc_io *oio = cl2osc_io(env, slice); struct osc_async_cbargs *cbargs = &oio->oi_cbarg; wait_for_completion(&cbargs->opc_sync); if (result == 0) result = cbargs->opc_rc; } slice->cis_io->ci_result = result; }
static void osc_io_data_version_end(const struct lu_env *env, const struct cl_io_slice *slice) { struct cl_data_version_io *dv = &slice->cis_io->u.ci_data_version; struct osc_io *oio = cl2osc_io(env, slice); struct osc_async_cbargs *cbargs = &oio->oi_cbarg; ENTRY; wait_for_completion(&cbargs->opc_sync); if (cbargs->opc_rc != 0) { slice->cis_io->ci_result = cbargs->opc_rc; } else if (!(oio->oi_oa.o_valid & OBD_MD_FLDATAVERSION)) { slice->cis_io->ci_result = -EOPNOTSUPP; } else { dv->dv_data_version = oio->oi_oa.o_data_version; slice->cis_io->ci_result = 0; } EXIT; }
static int osc_io_write_start(const struct lu_env *env, const struct cl_io_slice *slice) { struct osc_io *oio = cl2osc_io(env, slice); struct cl_object *obj = slice->cis_obj; struct cl_attr *attr = &osc_env_info(env)->oti_attr; int result = 0; ENTRY; if (oio->oi_lockless == 0) { OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_DELAY_SETTIME, 1); cl_object_attr_lock(obj); result = cl_object_attr_get(env, obj, attr); if (result == 0) { attr->cat_mtime = attr->cat_ctime = LTIME_S(CFS_CURRENT_TIME); result = cl_object_attr_set(env, obj, attr, CAT_MTIME | CAT_CTIME); } cl_object_attr_unlock(obj); } RETURN(result); }
/** * An implementation of cl_io_operations::cio_io_submit() method for osc * layer. Iterates over pages in the in-queue, prepares each for io by calling * cl_page_prep() and then either submits them through osc_io_submit_page() * or, if page is already submitted, changes osc flags through * osc_set_async_flags(). */ static int osc_io_submit(const struct lu_env *env, const struct cl_io_slice *ios, enum cl_req_type crt, struct cl_2queue *queue) { struct cl_page *page; struct cl_page *tmp; struct client_obd *cli = NULL; struct osc_object *osc = NULL; /* to keep gcc happy */ struct osc_page *opg; struct cl_io *io; LIST_HEAD(list); struct cl_page_list *qin = &queue->c2_qin; struct cl_page_list *qout = &queue->c2_qout; int queued = 0; int result = 0; int cmd; int brw_flags; int max_pages; LASSERT(qin->pl_nr > 0); CDEBUG(D_CACHE, "%d %d\n", qin->pl_nr, crt); osc = cl2osc(ios->cis_obj); cli = osc_cli(osc); max_pages = cli->cl_max_pages_per_rpc; cmd = crt == CRT_WRITE ? OBD_BRW_WRITE : OBD_BRW_READ; brw_flags = osc_io_srvlock(cl2osc_io(env, ios)) ? OBD_BRW_SRVLOCK : 0; /* * NOTE: here @page is a top-level page. This is done to avoid * creation of sub-page-list. */ cl_page_list_for_each_safe(page, tmp, qin) { struct osc_async_page *oap; /* Top level IO. */ io = page->cp_owner; LASSERT(io != NULL); opg = osc_cl_page_osc(page); oap = &opg->ops_oap; LASSERT(osc == oap->oap_obj); if (!list_empty(&oap->oap_pending_item) || !list_empty(&oap->oap_rpc_item)) { CDEBUG(D_CACHE, "Busy oap %p page %p for submit.\n", oap, opg); result = -EBUSY; break; } result = cl_page_prep(env, io, page, crt); if (result != 0) { LASSERT(result < 0); if (result != -EALREADY) break; /* * Handle -EALREADY error: for read case, the page is * already in UPTODATE state; for write, the page * is not dirty. */ result = 0; continue; } cl_page_list_move(qout, qin, page); oap->oap_async_flags = ASYNC_URGENT|ASYNC_READY; oap->oap_async_flags |= ASYNC_COUNT_STABLE; osc_page_submit(env, opg, crt, brw_flags); list_add_tail(&oap->oap_pending_item, &list); if (++queued == max_pages) { queued = 0; result = osc_queue_sync_pages(env, osc, &list, cmd, brw_flags); if (result < 0) break; } } if (queued > 0) result = osc_queue_sync_pages(env, osc, &list, cmd, brw_flags); CDEBUG(D_INFO, "%d/%d %d\n", qin->pl_nr, qout->pl_nr, result); return qout->pl_nr > 0 ? 0 : result; }
static int osc_io_setattr_start(const struct lu_env *env, const struct cl_io_slice *slice) { struct cl_io *io = slice->cis_io; struct osc_io *oio = cl2osc_io(env, slice); struct cl_object *obj = slice->cis_obj; struct lov_oinfo *loi = cl2osc(obj)->oo_oinfo; struct cl_attr *attr = &osc_env_info(env)->oti_attr; struct obdo *oa = &oio->oi_oa; struct osc_async_cbargs *cbargs = &oio->oi_cbarg; __u64 size = io->u.ci_setattr.sa_attr.lvb_size; unsigned int ia_valid = io->u.ci_setattr.sa_valid; int result = 0; struct obd_info oinfo = { { { 0 } } }; /* truncate cache dirty pages first */ if (cl_io_is_trunc(io)) result = osc_cache_truncate_start(env, oio, cl2osc(obj), size); if (result == 0 && oio->oi_lockless == 0) { cl_object_attr_lock(obj); result = cl_object_attr_get(env, obj, attr); if (result == 0) { struct ost_lvb *lvb = &io->u.ci_setattr.sa_attr; unsigned int cl_valid = 0; if (ia_valid & ATTR_SIZE) { attr->cat_size = attr->cat_kms = size; cl_valid = (CAT_SIZE | CAT_KMS); } if (ia_valid & ATTR_MTIME_SET) { attr->cat_mtime = lvb->lvb_mtime; cl_valid |= CAT_MTIME; } if (ia_valid & ATTR_ATIME_SET) { attr->cat_atime = lvb->lvb_atime; cl_valid |= CAT_ATIME; } if (ia_valid & ATTR_CTIME_SET) { attr->cat_ctime = lvb->lvb_ctime; cl_valid |= CAT_CTIME; } result = cl_object_attr_set(env, obj, attr, cl_valid); } cl_object_attr_unlock(obj); } memset(oa, 0, sizeof(*oa)); if (result == 0) { oa->o_oi = loi->loi_oi; oa->o_mtime = attr->cat_mtime; oa->o_atime = attr->cat_atime; oa->o_ctime = attr->cat_ctime; oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP | OBD_MD_FLATIME | OBD_MD_FLCTIME | OBD_MD_FLMTIME; if (ia_valid & ATTR_SIZE) { oa->o_size = size; oa->o_blocks = OBD_OBJECT_EOF; oa->o_valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS; if (oio->oi_lockless) { oa->o_flags = OBD_FL_SRVLOCK; oa->o_valid |= OBD_MD_FLFLAGS; } } else { LASSERT(oio->oi_lockless == 0); } oinfo.oi_oa = oa; oinfo.oi_capa = io->u.ci_setattr.sa_capa; init_completion(&cbargs->opc_sync); if (ia_valid & ATTR_SIZE) result = osc_punch_base(osc_export(cl2osc(obj)), &oinfo, osc_async_upcall, cbargs, PTLRPCD_SET); else result = osc_setattr_async_base(osc_export(cl2osc(obj)), &oinfo, NULL, osc_async_upcall, cbargs, PTLRPCD_SET); cbargs->opc_rpc_sent = result == 0; } return result; }
static int osc_io_commit_async(const struct lu_env *env, const struct cl_io_slice *ios, struct cl_page_list *qin, int from, int to, cl_commit_cbt cb) { struct cl_io *io = ios->cis_io; struct osc_io *oio = cl2osc_io(env, ios); struct osc_object *osc = cl2osc(ios->cis_obj); struct cl_page *page; struct cl_page *last_page; struct osc_page *opg; int result = 0; ENTRY; LASSERT(qin->pl_nr > 0); /* Handle partial page cases */ last_page = cl_page_list_last(qin); if (oio->oi_lockless) { page = cl_page_list_first(qin); if (page == last_page) { cl_page_clip(env, page, from, to); } else { if (from != 0) cl_page_clip(env, page, from, PAGE_SIZE); if (to != PAGE_SIZE) cl_page_clip(env, last_page, 0, to); } } while (qin->pl_nr > 0) { struct osc_async_page *oap; page = cl_page_list_first(qin); opg = osc_cl_page_osc(page, osc); oap = &opg->ops_oap; if (!list_empty(&oap->oap_rpc_item)) { CDEBUG(D_CACHE, "Busy oap %p page %p for submit.\n", oap, opg); result = -EBUSY; break; } /* The page may be already in dirty cache. */ if (list_empty(&oap->oap_pending_item)) { result = osc_page_cache_add(env, &opg->ops_cl, io); if (result != 0) break; } osc_page_touch_at(env, osc2cl(osc), osc_index(opg), page == last_page ? to : PAGE_SIZE); cl_page_list_del(env, qin, page); (*cb)(env, io, page); /* Can't access page any more. Page can be in transfer and * complete at any time. */ } /* for sync write, kernel will wait for this page to be flushed before * osc_io_end() is called, so release it earlier. * for mkwrite(), it's known there is no further pages. */ if (cl_io_is_sync_write(io) && oio->oi_active != NULL) { osc_extent_release(env, oio->oi_active); oio->oi_active = NULL; } CDEBUG(D_INFO, "%d %d\n", qin->pl_nr, result); RETURN(result); }