int cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock, struct ccc_grouplock *cg) { struct lu_env *env; struct cl_io *io; struct cl_lock *lock; struct cl_lock_descr *descr; __u32 enqflags; int refcheck; int rc; env = cl_env_get(&refcheck); if (IS_ERR(env)) return PTR_ERR(env); io = ccc_env_thread_io(env); io->ci_obj = obj; io->ci_ignore_layout = 1; rc = cl_io_init(env, io, CIT_MISC, io->ci_obj); if (rc) { cl_io_fini(env, io); cl_env_put(env, &refcheck); /* Does not make sense to take GL for released layout */ if (rc > 0) rc = -ENOTSUPP; return rc; } descr = &ccc_env_info(env)->cti_descr; descr->cld_obj = obj; descr->cld_start = 0; descr->cld_end = CL_PAGE_EOF; descr->cld_gid = gid; descr->cld_mode = CLM_GROUP; enqflags = CEF_MUST | (nonblock ? CEF_NONBLOCK : 0); descr->cld_enq_flags = enqflags; lock = cl_lock_request(env, io, descr, GROUPLOCK_SCOPE, current); if (IS_ERR(lock)) { cl_io_fini(env, io); cl_env_put(env, &refcheck); return PTR_ERR(lock); } cg->cg_env = cl_env_get(&refcheck); cg->cg_io = io; cg->cg_lock = lock; cg->cg_gid = gid; LASSERT(cg->cg_env == env); cl_env_unplant(env, &refcheck); return 0; }
int cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock, struct ll_grouplock *lg) { struct lu_env *env; struct cl_io *io; struct cl_lock *lock; struct cl_lock_descr *descr; __u32 enqflags; __u16 refcheck; int rc; env = cl_env_get(&refcheck); if (IS_ERR(env)) return PTR_ERR(env); io = vvp_env_thread_io(env); io->ci_obj = obj; rc = cl_io_init(env, io, CIT_MISC, io->ci_obj); if (rc != 0) { cl_io_fini(env, io); cl_env_put(env, &refcheck); /* Does not make sense to take GL for released layout */ if (rc > 0) rc = -ENOTSUPP; return rc; } lock = vvp_env_lock(env); descr = &lock->cll_descr; descr->cld_obj = obj; descr->cld_start = 0; descr->cld_end = CL_PAGE_EOF; descr->cld_gid = gid; descr->cld_mode = CLM_GROUP; enqflags = CEF_MUST | (nonblock ? CEF_NONBLOCK : 0); descr->cld_enq_flags = enqflags; rc = cl_lock_request(env, io, lock); if (rc < 0) { cl_io_fini(env, io); cl_env_put(env, &refcheck); return rc; } lg->lg_env = env; lg->lg_io = io; lg->lg_lock = lock; lg->lg_gid = gid; return 0; }
/** * Discard pages protected by the given lock. This function traverses radix * tree to find all covering pages and discard them. If a page is being covered * by other locks, it should remain in cache. * * If error happens on any step, the process continues anyway (the reasoning * behind this being that lock cancellation cannot be delayed indefinitely). */ static int mdc_lock_discard_pages(const struct lu_env *env, struct osc_object *osc, pgoff_t start, pgoff_t end, bool discard) { struct osc_thread_info *info = osc_env_info(env); struct cl_io *io = &info->oti_io; osc_page_gang_cbt cb; int res; int result; ENTRY; io->ci_obj = cl_object_top(osc2cl(osc)); io->ci_ignore_layout = 1; result = cl_io_init(env, io, CIT_MISC, io->ci_obj); if (result != 0) GOTO(out, result); cb = discard ? osc_discard_cb : mdc_check_and_discard_cb; info->oti_fn_index = info->oti_next_index = start; do { res = osc_page_gang_lookup(env, io, osc, info->oti_next_index, end, cb, (void *)osc); if (info->oti_next_index > end) break; if (res == CLP_GANG_RESCHED) cond_resched(); } while (res != CLP_GANG_OKAY); out: cl_io_fini(env, io); RETURN(result); }
/** * API independent part for page fault initialization. * \param env - corespondent lu_env to processing * \param vma - virtual memory area addressed to page fault * \param index - page index corespondent to fault. * \parm ra_flags - vma readahead flags. * * \return error codes from cl_io_init. */ static struct cl_io * ll_fault_io_init(struct lu_env *env, struct vm_area_struct *vma, pgoff_t index, unsigned long *ra_flags) { struct file *file = vma->vm_file; struct inode *inode = file_inode(file); struct cl_io *io; struct cl_fault_io *fio; int rc; ENTRY; if (ll_file_nolock(file)) RETURN(ERR_PTR(-EOPNOTSUPP)); restart: io = vvp_env_thread_io(env); io->ci_obj = ll_i2info(inode)->lli_clob; LASSERT(io->ci_obj != NULL); fio = &io->u.ci_fault; fio->ft_index = index; fio->ft_executable = vma->vm_flags&VM_EXEC; /* * disable VM_SEQ_READ and use VM_RAND_READ to make sure that * the kernel will not read other pages not covered by ldlm in * filemap_nopage. we do our readahead in ll_readpage. */ if (ra_flags != NULL) *ra_flags = vma->vm_flags & (VM_RAND_READ|VM_SEQ_READ); vma->vm_flags &= ~VM_SEQ_READ; vma->vm_flags |= VM_RAND_READ; CDEBUG(D_MMAP, "vm_flags: %lx (%lu %d)\n", vma->vm_flags, fio->ft_index, fio->ft_executable); rc = cl_io_init(env, io, CIT_FAULT, io->ci_obj); if (rc == 0) { struct vvp_io *vio = vvp_env_io(env); struct ll_file_data *fd = LUSTRE_FPRIVATE(file); LASSERT(vio->vui_cl.cis_io == io); /* mmap lock must be MANDATORY it has to cache * pages. */ io->ci_lockreq = CILR_MANDATORY; vio->vui_fd = fd; } else { LASSERT(rc < 0); cl_io_fini(env, io); if (io->ci_need_restart) goto restart; io = ERR_PTR(rc); } RETURN(io); }
/** * Lustre implementation of a vm_operations_struct::fault() method, called by * VM to server page fault (both in kernel and user space). * * \param vma - is virtual area struct related to page fault * \param vmf - structure which describe type and address where hit fault * * \return allocated and filled _locked_ page for address * \retval VM_FAULT_ERROR on general error * \retval NOPAGE_OOM not have memory for allocate new page */ static int ll_fault0(struct vm_area_struct *vma, struct vm_fault *vmf) { struct lu_env *env; struct cl_io *io; struct vvp_io *vio = NULL; struct page *vmpage; unsigned long ra_flags; struct cl_env_nest nest; int result; int fault_ret = 0; io = ll_fault_io_init(vma, &env, &nest, vmf->pgoff, &ra_flags); if (IS_ERR(io)) return to_fault_error(PTR_ERR(io)); result = io->ci_result; if (result == 0) { vio = vvp_env_io(env); vio->u.fault.ft_vma = vma; vio->u.fault.ft_vmpage = NULL; vio->u.fault.ft_vmf = vmf; vio->u.fault.ft_flags = 0; vio->u.fault.ft_flags_valid = false; /* May call ll_readpage() */ ll_cl_add(vma->vm_file, env, io); result = cl_io_loop(env, io); ll_cl_remove(vma->vm_file, env); /* ft_flags are only valid if we reached * the call to filemap_fault */ if (vio->u.fault.ft_flags_valid) fault_ret = vio->u.fault.ft_flags; vmpage = vio->u.fault.ft_vmpage; if (result != 0 && vmpage) { put_page(vmpage); vmf->page = NULL; } } cl_io_fini(env, io); cl_env_nested_put(&nest, env); vma->vm_flags |= ra_flags; if (result != 0 && !(fault_ret & VM_FAULT_RETRY)) fault_ret |= to_fault_error(result); CDEBUG(D_MMAP, "%s fault %d/%d\n", current->comm, fault_ret, result); return fault_ret; }
void cl_put_grouplock(struct ll_grouplock *lg) { struct lu_env *env = lg->lg_env; struct cl_io *io = lg->lg_io; struct cl_lock *lock = lg->lg_lock; LASSERT(lg->lg_env != NULL); LASSERT(lg->lg_gid != 0); cl_lock_release(env, lock); cl_io_fini(env, io); cl_env_put(env, NULL); }
int cl_get_grouplock(struct cl_object *obj, unsigned long gid, int nonblock, struct ccc_grouplock *cg) { struct lu_env *env; struct cl_io *io; struct cl_lock *lock; struct cl_lock_descr *descr; __u32 enqflags; int refcheck; int rc; env = cl_env_get(&refcheck); if (IS_ERR(env)) return PTR_ERR(env); io = &ccc_env_info(env)->cti_io; io->ci_obj = obj; rc = cl_io_init(env, io, CIT_MISC, io->ci_obj); if (rc) { LASSERT(rc < 0); cl_env_put(env, &refcheck); return rc; } descr = &ccc_env_info(env)->cti_descr; descr->cld_obj = obj; descr->cld_start = 0; descr->cld_end = CL_PAGE_EOF; descr->cld_gid = gid; descr->cld_mode = CLM_GROUP; enqflags = CEF_MUST | (nonblock ? CEF_NONBLOCK : 0); descr->cld_enq_flags = enqflags; lock = cl_lock_request(env, io, descr, GROUPLOCK_SCOPE, cfs_current()); if (IS_ERR(lock)) { cl_io_fini(env, io); cl_env_put(env, &refcheck); return PTR_ERR(lock); } cg->cg_env = cl_env_get(&refcheck); cg->cg_lock = lock; cg->cg_gid = gid; LASSERT(cg->cg_env == env); cl_env_unplant(env, &refcheck); return 0; }
int cl_glimpse_size0(struct inode *inode, int agl) { /* * We don't need ast_flags argument to cl_glimpse_size(), because * osc_lock_enqueue() takes care of the possible deadlock that said * argument was introduced to avoid. */ /* * XXX but note that ll_file_seek() passes LDLM_FL_BLOCK_NOWAIT to * cl_glimpse_size(), which doesn't make sense: glimpse locks are not * blocking anyway. */ struct lu_env *env = NULL; struct cl_io *io = NULL; __u16 refcheck; int retried = 0; int result; ENTRY; result = cl_io_get(inode, &env, &io, &refcheck); if (result <= 0) RETURN(result); do { io->ci_ndelay_tried = retried++; io->ci_ndelay = io->ci_verify_layout = 1; result = cl_io_init(env, io, CIT_GLIMPSE, io->ci_obj); if (result > 0) { /* * nothing to do for this io. This currently happens * when stripe sub-object's are not yet created. */ result = io->ci_result; } else if (result == 0) { result = cl_glimpse_lock(env, io, inode, io->ci_obj, agl); if (!agl && result == -EWOULDBLOCK) io->ci_need_restart = 1; } OBD_FAIL_TIMEOUT(OBD_FAIL_GLIMPSE_DELAY, 2); cl_io_fini(env, io); } while (unlikely(io->ci_need_restart)); cl_env_put(env, &refcheck); RETURN(result); }
void cl_put_grouplock(struct ccc_grouplock *cg) { struct lu_env *env = cg->cg_env; struct cl_lock *lock = cg->cg_lock; int refcheck; LASSERT(cg->cg_env); LASSERT(cg->cg_gid); cl_env_implant(env, &refcheck); cl_env_put(env, &refcheck); cl_unuse(env, lock); cl_lock_release(env, lock, GROUPLOCK_SCOPE, cfs_current()); cl_io_fini(env, &ccc_env_info(env)->cti_io); cl_env_put(env, NULL); }
int cl_local_size(struct inode *inode) { struct lu_env *env = NULL; struct cl_io *io = NULL; struct ccc_thread_info *cti; struct cl_object *clob; struct cl_lock_descr *descr; struct cl_lock *lock; int result; int refcheck; ENTRY; if (!cl_i2info(inode)->lli_has_smd) RETURN(0); result = cl_io_get(inode, &env, &io, &refcheck); if (result <= 0) RETURN(result); clob = io->ci_obj; result = cl_io_init(env, io, CIT_MISC, clob); if (result > 0) result = io->ci_result; else if (result == 0) { cti = ccc_env_info(env); descr = &cti->cti_descr; *descr = whole_file; descr->cld_obj = clob; lock = cl_lock_peek(env, io, descr, "localsize", current); if (lock != NULL) { cl_merge_lvb(env, inode); cl_unuse(env, lock); cl_lock_release(env, lock, "localsize", current); result = 0; } else result = -ENODATA; } cl_io_fini(env, io); cl_env_put(env, &refcheck); RETURN(result); }
/** * API independent part for page fault initialization. * \param vma - virtual memory area addressed to page fault * \param env - corespondent lu_env to processing * \param nest - nested level * \param index - page index corespondent to fault. * \parm ra_flags - vma readahead flags. * * \return allocated and initialized env for fault operation. * \retval EINVAL if env can't allocated * \return other error codes from cl_io_init. */ static struct cl_io * ll_fault_io_init(struct vm_area_struct *vma, struct lu_env **env_ret, struct cl_env_nest *nest, pgoff_t index, unsigned long *ra_flags) { struct file *file = vma->vm_file; struct inode *inode = file_inode(file); struct cl_io *io; struct cl_fault_io *fio; struct lu_env *env; int rc; *env_ret = NULL; if (ll_file_nolock(file)) return ERR_PTR(-EOPNOTSUPP); /* * page fault can be called when lustre IO is * already active for the current thread, e.g., when doing read/write * against user level buffer mapped from Lustre buffer. To avoid * stomping on existing context, optionally force an allocation of a new * one. */ env = cl_env_nested_get(nest); if (IS_ERR(env)) return ERR_PTR(-EINVAL); *env_ret = env; io = ccc_env_thread_io(env); io->ci_obj = ll_i2info(inode)->lli_clob; LASSERT(io->ci_obj); fio = &io->u.ci_fault; fio->ft_index = index; fio->ft_executable = vma->vm_flags&VM_EXEC; /* * disable VM_SEQ_READ and use VM_RAND_READ to make sure that * the kernel will not read other pages not covered by ldlm in * filemap_nopage. we do our readahead in ll_readpage. */ if (ra_flags) *ra_flags = vma->vm_flags & (VM_RAND_READ|VM_SEQ_READ); vma->vm_flags &= ~VM_SEQ_READ; vma->vm_flags |= VM_RAND_READ; CDEBUG(D_MMAP, "vm_flags: %lx (%lu %d)\n", vma->vm_flags, fio->ft_index, fio->ft_executable); rc = cl_io_init(env, io, CIT_FAULT, io->ci_obj); if (rc == 0) { struct ccc_io *cio = ccc_env_io(env); struct ll_file_data *fd = LUSTRE_FPRIVATE(file); LASSERT(cio->cui_cl.cis_io == io); /* mmap lock must be MANDATORY it has to cache pages. */ io->ci_lockreq = CILR_MANDATORY; cio->cui_fd = fd; } else { LASSERT(rc < 0); cl_io_fini(env, io); cl_env_nested_put(nest, env); io = ERR_PTR(rc); } return io; }
/* Sharing code of page_mkwrite method for rhel5 and rhel6 */ static int ll_page_mkwrite0(struct vm_area_struct *vma, struct page *vmpage, bool *retry) { struct lu_env *env; struct cl_io *io; struct vvp_io *vio; struct cl_env_nest nest; int result; sigset_t set; struct inode *inode; struct ll_inode_info *lli; io = ll_fault_io_init(vma, &env, &nest, vmpage->index, NULL); if (IS_ERR(io)) { result = PTR_ERR(io); goto out; } result = io->ci_result; if (result < 0) goto out_io; io->u.ci_fault.ft_mkwrite = 1; io->u.ci_fault.ft_writable = 1; vio = vvp_env_io(env); vio->u.fault.ft_vma = vma; vio->u.fault.ft_vmpage = vmpage; set = cfs_block_sigsinv(sigmask(SIGKILL) | sigmask(SIGTERM)); /* we grab lli_trunc_sem to exclude truncate case. * Otherwise, we could add dirty pages into osc cache * while truncate is on-going. */ inode = ccc_object_inode(io->ci_obj); lli = ll_i2info(inode); down_read(&lli->lli_trunc_sem); result = cl_io_loop(env, io); up_read(&lli->lli_trunc_sem); cfs_restore_sigs(set); if (result == 0) { struct inode *inode = file_inode(vma->vm_file); struct ll_inode_info *lli = ll_i2info(inode); lock_page(vmpage); if (!vmpage->mapping) { unlock_page(vmpage); /* page was truncated and lock was cancelled, return * ENODATA so that VM_FAULT_NOPAGE will be returned * to handle_mm_fault(). */ if (result == 0) result = -ENODATA; } else if (!PageDirty(vmpage)) { /* race, the page has been cleaned by ptlrpcd after * it was unlocked, it has to be added into dirty * cache again otherwise this soon-to-dirty page won't * consume any grants, even worse if this page is being * transferred because it will break RPC checksum. */ unlock_page(vmpage); CDEBUG(D_MMAP, "Race on page_mkwrite %p/%lu, page has been written out, retry.\n", vmpage, vmpage->index); *retry = true; result = -EAGAIN; } if (result == 0) { spin_lock(&lli->lli_lock); lli->lli_flags |= LLIF_DATA_MODIFIED; spin_unlock(&lli->lli_lock); } } out_io: cl_io_fini(env, io); cl_env_nested_put(&nest, env); out: CDEBUG(D_MMAP, "%s mkwrite with %d\n", current->comm, result); LASSERT(ergo(result == 0, PageLocked(vmpage))); return result; }
/** * Lustre implementation of a vm_operations_struct::fault() method, called by * VM to server page fault (both in kernel and user space). * * \param vma - is virtiual area struct related to page fault * \param vmf - structure which describe type and address where hit fault * * \return allocated and filled _locked_ page for address * \retval VM_FAULT_ERROR on general error * \retval NOPAGE_OOM not have memory for allocate new page */ static int ll_fault0(struct vm_area_struct *vma, struct vm_fault *vmf) { struct lu_env *env; struct cl_io *io; struct vvp_io *vio = NULL; struct page *vmpage; unsigned long ra_flags; int result = 0; int fault_ret = 0; __u16 refcheck; ENTRY; env = cl_env_get(&refcheck); if (IS_ERR(env)) RETURN(PTR_ERR(env)); if (ll_sbi_has_fast_read(ll_i2sbi(file_inode(vma->vm_file)))) { /* do fast fault */ ll_cl_add(vma->vm_file, env, NULL, LCC_MMAP); fault_ret = filemap_fault(vma, vmf); ll_cl_remove(vma->vm_file, env); /* - If there is no error, then the page was found in cache and * uptodate; * - If VM_FAULT_RETRY is set, the page existed but failed to * lock. It will return to kernel and retry; * - Otherwise, it should try normal fault under DLM lock. */ if ((fault_ret & VM_FAULT_RETRY) || !(fault_ret & VM_FAULT_ERROR)) GOTO(out, result = 0); fault_ret = 0; } io = ll_fault_io_init(env, vma, vmf->pgoff, &ra_flags); if (IS_ERR(io)) GOTO(out, result = PTR_ERR(io)); result = io->ci_result; if (result == 0) { vio = vvp_env_io(env); vio->u.fault.ft_vma = vma; vio->u.fault.ft_vmpage = NULL; vio->u.fault.ft_vmf = vmf; vio->u.fault.ft_flags = 0; vio->u.fault.ft_flags_valid = 0; /* May call ll_readpage() */ ll_cl_add(vma->vm_file, env, io, LCC_MMAP); result = cl_io_loop(env, io); ll_cl_remove(vma->vm_file, env); /* ft_flags are only valid if we reached * the call to filemap_fault */ if (vio->u.fault.ft_flags_valid) fault_ret = vio->u.fault.ft_flags; vmpage = vio->u.fault.ft_vmpage; if (result != 0 && vmpage != NULL) { put_page(vmpage); vmf->page = NULL; } } cl_io_fini(env, io); vma->vm_flags |= ra_flags; out: cl_env_put(env, &refcheck); if (result != 0 && !(fault_ret & VM_FAULT_RETRY)) fault_ret |= to_fault_error(result); CDEBUG(D_MMAP, "%s fault %d/%d\n", current->comm, fault_ret, result); RETURN(fault_ret); }
/* Sharing code of page_mkwrite method for rhel5 and rhel6 */ static int ll_page_mkwrite0(struct vm_area_struct *vma, struct page *vmpage, bool *retry) { struct lu_env *env; struct cl_io *io; struct vvp_io *vio; int result; __u16 refcheck; sigset_t set; struct inode *inode; struct ll_inode_info *lli; ENTRY; LASSERT(vmpage != NULL); env = cl_env_get(&refcheck); if (IS_ERR(env)) RETURN(PTR_ERR(env)); io = ll_fault_io_init(env, vma, vmpage->index, NULL); if (IS_ERR(io)) GOTO(out, result = PTR_ERR(io)); result = io->ci_result; if (result < 0) GOTO(out_io, result); io->u.ci_fault.ft_mkwrite = 1; io->u.ci_fault.ft_writable = 1; vio = vvp_env_io(env); vio->u.fault.ft_vma = vma; vio->u.fault.ft_vmpage = vmpage; set = cfs_block_sigsinv(sigmask(SIGKILL) | sigmask(SIGTERM)); inode = vvp_object_inode(io->ci_obj); lli = ll_i2info(inode); result = cl_io_loop(env, io); cfs_restore_sigs(set); if (result == 0) { lock_page(vmpage); if (vmpage->mapping == NULL) { unlock_page(vmpage); /* page was truncated and lock was cancelled, return * ENODATA so that VM_FAULT_NOPAGE will be returned * to handle_mm_fault(). */ if (result == 0) result = -ENODATA; } else if (!PageDirty(vmpage)) { /* race, the page has been cleaned by ptlrpcd after * it was unlocked, it has to be added into dirty * cache again otherwise this soon-to-dirty page won't * consume any grants, even worse if this page is being * transferred because it will break RPC checksum. */ unlock_page(vmpage); CDEBUG(D_MMAP, "Race on page_mkwrite %p/%lu, page has " "been written out, retry.\n", vmpage, vmpage->index); *retry = true; result = -EAGAIN; } if (result == 0) ll_file_set_flag(lli, LLIF_DATA_MODIFIED); } EXIT; out_io: cl_io_fini(env, io); out: cl_env_put(env, &refcheck); CDEBUG(D_MMAP, "%s mkwrite with %d\n", current->comm, result); LASSERT(ergo(result == 0, PageLocked(vmpage))); return result; }