static int osc_io_commit_write(const struct lu_env *env, const struct cl_io_slice *ios, const struct cl_page_slice *slice, unsigned from, unsigned to) { struct osc_io *oio = cl2osc_io(env, ios); struct osc_page *opg = cl2osc_page(slice); struct osc_object *obj = cl2osc(opg->ops_cl.cpl_obj); struct osc_async_page *oap = &opg->ops_oap; LASSERT(to > 0); /* * XXX instead of calling osc_page_touch() here and in * osc_io_fault_start() it might be more logical to introduce * cl_page_touch() method, that generic cl_io_commit_write() and page * fault code calls. */ osc_page_touch(env, cl2osc_page(slice), to); if (!client_is_remote(osc_export(obj)) && capable(CFS_CAP_SYS_RESOURCE)) oap->oap_brw_flags |= OBD_BRW_NOQUOTA; if (oio->oi_lockless) /* see osc_io_prepare_write() for lockless io handling. */ cl_page_clip(env, slice->cpl_page, from, to); return 0; }
ssize_t ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io, int rw, struct inode *inode, struct ll_dio_pages *pv) { struct cl_page *clp; struct cl_2queue *queue; struct cl_object *obj = io->ci_obj; int i; ssize_t rc = 0; loff_t file_offset = pv->ldp_start_offset; size_t size = pv->ldp_size; int page_count = pv->ldp_nr; struct page **pages = pv->ldp_pages; size_t page_size = cl_page_size(obj); bool do_io; int io_pages = 0; ENTRY; queue = &io->ci_queue; cl_2queue_init(queue); for (i = 0; i < page_count; i++) { if (pv->ldp_offsets) file_offset = pv->ldp_offsets[i]; LASSERT(!(file_offset & (page_size - 1))); clp = cl_page_find(env, obj, cl_index(obj, file_offset), pv->ldp_pages[i], CPT_TRANSIENT); if (IS_ERR(clp)) { rc = PTR_ERR(clp); break; } rc = cl_page_own(env, io, clp); if (rc) { LASSERT(clp->cp_state == CPS_FREEING); cl_page_put(env, clp); break; } do_io = true; /* check the page type: if the page is a host page, then do * write directly */ if (clp->cp_type == CPT_CACHEABLE) { struct page *vmpage = cl_page_vmpage(clp); struct page *src_page; struct page *dst_page; void *src; void *dst; src_page = (rw == WRITE) ? pages[i] : vmpage; dst_page = (rw == WRITE) ? vmpage : pages[i]; src = ll_kmap_atomic(src_page, KM_USER0); dst = ll_kmap_atomic(dst_page, KM_USER1); memcpy(dst, src, min(page_size, size)); ll_kunmap_atomic(dst, KM_USER1); ll_kunmap_atomic(src, KM_USER0); /* make sure page will be added to the transfer by * cl_io_submit()->...->vvp_page_prep_write(). */ if (rw == WRITE) set_page_dirty(vmpage); if (rw == READ) { /* do not issue the page for read, since it * may reread a ra page which has NOT uptodate * bit set. */ cl_page_disown(env, io, clp); do_io = false; } } if (likely(do_io)) { cl_2queue_add(queue, clp); /* * Set page clip to tell transfer formation engine * that page has to be sent even if it is beyond KMS. */ cl_page_clip(env, clp, 0, min(size, page_size)); ++io_pages; } /* drop the reference count for cl_page_find */ cl_page_put(env, clp); size -= page_size; file_offset += page_size; } if (rc == 0 && io_pages) { rc = cl_io_submit_sync(env, io, rw == READ ? CRT_READ : CRT_WRITE, queue, 0); } if (rc == 0) rc = pv->ldp_size; cl_2queue_discard(env, io, queue); cl_2queue_disown(env, io, queue); cl_2queue_fini(env, queue); RETURN(rc); }
static int llu_queue_pio(const struct lu_env *env, struct cl_io *io, struct llu_io_group *group, char *buf, size_t count, loff_t pos) { struct cl_object *obj = io->ci_obj; struct inode *inode = ccc_object_inode(obj); struct intnl_stat *st = llu_i2stat(inode); struct obd_export *exp = llu_i2obdexp(inode); struct page *page; int rc = 0, ret_bytes = 0; struct cl_page *clp; struct cl_2queue *queue; ENTRY; if (!exp) RETURN(-EINVAL); queue = &io->ci_queue; cl_2queue_init(queue); /* prepare the pages array */ do { unsigned long index, offset, bytes; offset = (pos & ~CFS_PAGE_MASK); index = pos >> PAGE_CACHE_SHIFT; bytes = PAGE_CACHE_SIZE - offset; if (bytes > count) bytes = count; /* prevent read beyond file range */ if (/* local_lock && */ io->ci_type == CIT_READ && pos + bytes >= st->st_size) { if (pos >= st->st_size) break; bytes = st->st_size - pos; } /* prepare page for this index */ page = llu_get_user_page(index, buf - offset, offset, bytes); if (!page) { rc = -ENOMEM; break; } clp = cl_page_find(env, obj, cl_index(obj, pos), page, CPT_TRANSIENT); if (IS_ERR(clp)) { rc = PTR_ERR(clp); break; } rc = cl_page_own(env, io, clp); if (rc) { LASSERT(clp->cp_state == CPS_FREEING); cl_page_put(env, clp); break; } cl_2queue_add(queue, clp); /* drop the reference count for cl_page_find, so that the page * will be freed in cl_2queue_fini. */ cl_page_put(env, clp); cl_page_clip(env, clp, offset, offset+bytes); count -= bytes; pos += bytes; buf += bytes; group->lig_rwcount += bytes; ret_bytes += bytes; page++; } while (count); if (rc == 0) { enum cl_req_type iot; iot = io->ci_type == CIT_READ ? CRT_READ : CRT_WRITE; rc = cl_io_submit_sync(env, io, iot, queue, 0); } group->lig_rc = rc; cl_2queue_discard(env, io, queue); cl_2queue_disown(env, io, queue); cl_2queue_fini(env, queue); RETURN(ret_bytes); }
static int vvp_io_commit_write(const struct lu_env *env, const struct cl_io_slice *ios, const struct cl_page_slice *slice, unsigned from, unsigned to) { struct cl_object *obj = slice->cpl_obj; struct cl_io *io = ios->cis_io; struct ccc_page *cp = cl2ccc_page(slice); struct cl_page *pg = slice->cpl_page; struct inode *inode = ccc_object_inode(obj); struct ll_sb_info *sbi = ll_i2sbi(inode); struct ll_inode_info *lli = ll_i2info(inode); struct page *vmpage = cp->cpg_page; int result; int tallyop; loff_t size; ENTRY; LINVRNT(cl_page_is_vmlocked(env, pg)); LASSERT(vmpage->mapping->host == inode); LU_OBJECT_HEADER(D_INODE, env, &obj->co_lu, "commiting page write\n"); CL_PAGE_HEADER(D_PAGE, env, pg, "committing: [%d, %d]\n", from, to); /* * queue a write for some time in the future the first time we * dirty the page. * * This is different from what other file systems do: they usually * just mark page (and some of its buffers) dirty and rely on * balance_dirty_pages() to start a write-back. Lustre wants write-back * to be started earlier for the following reasons: * * (1) with a large number of clients we need to limit the amount * of cached data on the clients a lot; * * (2) large compute jobs generally want compute-only then io-only * and the IO should complete as quickly as possible; * * (3) IO is batched up to the RPC size and is async until the * client max cache is hit * (/proc/fs/lustre/osc/OSC.../max_dirty_mb) * */ if (!PageDirty(vmpage)) { tallyop = LPROC_LL_DIRTY_MISSES; result = cl_page_cache_add(env, io, pg, CRT_WRITE); if (result == 0) { /* page was added into cache successfully. */ set_page_dirty(vmpage); vvp_write_pending(cl2ccc(obj), cp); } else if (result == -EDQUOT) { pgoff_t last_index = i_size_read(inode) >> PAGE_CACHE_SHIFT; bool need_clip = true; /* * Client ran out of disk space grant. Possible * strategies are: * * (a) do a sync write, renewing grant; * * (b) stop writing on this stripe, switch to the * next one. * * (b) is a part of "parallel io" design that is the * ultimate goal. (a) is what "old" client did, and * what the new code continues to do for the time * being. */ if (last_index > pg->cp_index) { to = PAGE_CACHE_SIZE; need_clip = false; } else if (last_index == pg->cp_index) { int size_to = i_size_read(inode) & ~CFS_PAGE_MASK; if (to < size_to) to = size_to; } if (need_clip) cl_page_clip(env, pg, 0, to); result = vvp_page_sync_io(env, io, pg, cp, CRT_WRITE); if (result) CERROR("Write page %lu of inode %p failed %d\n", pg->cp_index, inode, result); }
static int osc_io_commit_async(const struct lu_env *env, const struct cl_io_slice *ios, struct cl_page_list *qin, int from, int to, cl_commit_cbt cb) { struct cl_io *io = ios->cis_io; struct osc_io *oio = cl2osc_io(env, ios); struct osc_object *osc = cl2osc(ios->cis_obj); struct cl_page *page; struct cl_page *last_page; struct osc_page *opg; int result = 0; ENTRY; LASSERT(qin->pl_nr > 0); /* Handle partial page cases */ last_page = cl_page_list_last(qin); if (oio->oi_lockless) { page = cl_page_list_first(qin); if (page == last_page) { cl_page_clip(env, page, from, to); } else { if (from != 0) cl_page_clip(env, page, from, PAGE_SIZE); if (to != PAGE_SIZE) cl_page_clip(env, last_page, 0, to); } } while (qin->pl_nr > 0) { struct osc_async_page *oap; page = cl_page_list_first(qin); opg = osc_cl_page_osc(page, osc); oap = &opg->ops_oap; if (!list_empty(&oap->oap_rpc_item)) { CDEBUG(D_CACHE, "Busy oap %p page %p for submit.\n", oap, opg); result = -EBUSY; break; } /* The page may be already in dirty cache. */ if (list_empty(&oap->oap_pending_item)) { result = osc_page_cache_add(env, &opg->ops_cl, io); if (result != 0) break; } osc_page_touch_at(env, osc2cl(osc), osc_index(opg), page == last_page ? to : PAGE_SIZE); cl_page_list_del(env, qin, page); (*cb)(env, io, page); /* Can't access page any more. Page can be in transfer and * complete at any time. */ } /* for sync write, kernel will wait for this page to be flushed before * osc_io_end() is called, so release it earlier. * for mkwrite(), it's known there is no further pages. */ if (cl_io_is_sync_write(io) && oio->oi_active != NULL) { osc_extent_release(env, oio->oi_active); oio->oi_active = NULL; } CDEBUG(D_INFO, "%d %d\n", qin->pl_nr, result); RETURN(result); }