/** * API independent part for page fault initialization. * \param vma - virtual memory area addressed to page fault * \param env - corespondent lu_env to processing * \param nest - nested level * \param index - page index corespondent to fault. * \parm ra_flags - vma readahead flags. * * \return allocated and initialized env for fault operation. * \retval EINVAL if env can't allocated * \return other error codes from cl_io_init. */ static struct cl_io * ll_fault_io_init(struct vm_area_struct *vma, struct lu_env **env_ret, struct cl_env_nest *nest, pgoff_t index, unsigned long *ra_flags) { struct file *file = vma->vm_file; struct inode *inode = file_inode(file); struct cl_io *io; struct cl_fault_io *fio; struct lu_env *env; int rc; *env_ret = NULL; if (ll_file_nolock(file)) return ERR_PTR(-EOPNOTSUPP); /* * page fault can be called when lustre IO is * already active for the current thread, e.g., when doing read/write * against user level buffer mapped from Lustre buffer. To avoid * stomping on existing context, optionally force an allocation of a new * one. */ env = cl_env_nested_get(nest); if (IS_ERR(env)) return ERR_PTR(-EINVAL); *env_ret = env; restart: io = vvp_env_thread_io(env); io->ci_obj = ll_i2info(inode)->lli_clob; LASSERT(io->ci_obj); fio = &io->u.ci_fault; fio->ft_index = index; fio->ft_executable = vma->vm_flags&VM_EXEC; /* * disable VM_SEQ_READ and use VM_RAND_READ to make sure that * the kernel will not read other pages not covered by ldlm in * filemap_nopage. we do our readahead in ll_readpage. */ if (ra_flags) *ra_flags = vma->vm_flags & (VM_RAND_READ|VM_SEQ_READ); vma->vm_flags &= ~VM_SEQ_READ; vma->vm_flags |= VM_RAND_READ; CDEBUG(D_MMAP, "vm_flags: %lx (%lu %d)\n", vma->vm_flags, fio->ft_index, fio->ft_executable); rc = cl_io_init(env, io, CIT_FAULT, io->ci_obj); if (rc == 0) { struct vvp_io *vio = vvp_env_io(env); struct ll_file_data *fd = LUSTRE_FPRIVATE(file); LASSERT(vio->vui_cl.cis_io == io); /* mmap lock must be MANDATORY it has to cache pages. */ io->ci_lockreq = CILR_MANDATORY; vio->vui_fd = fd; } else { LASSERT(rc < 0); cl_io_fini(env, io); if (io->ci_need_restart) goto restart; cl_env_nested_put(nest, env); io = ERR_PTR(rc); } return io; }
/** * Lock upcall function that is executed either when a reply to ENQUEUE rpc is * received from a server, or after osc_enqueue_base() matched a local DLM * lock. */ static int osc_lock_upcall(void *cookie, int errcode) { struct osc_lock *olck = cookie; struct cl_lock_slice *slice = &olck->ols_cl; struct cl_lock *lock = slice->cls_lock; struct lu_env *env; struct cl_env_nest nest; ENTRY; env = cl_env_nested_get(&nest); if (!IS_ERR(env)) { int rc; cl_lock_mutex_get(env, lock); LASSERT(lock->cll_state >= CLS_QUEUING); if (olck->ols_state == OLS_ENQUEUED) { olck->ols_state = OLS_UPCALL_RECEIVED; rc = ldlm_error2errno(errcode); } else if (olck->ols_state == OLS_CANCELLED) { rc = -EIO; } else { CERROR("Impossible state: %d\n", olck->ols_state); LBUG(); } if (rc) { struct ldlm_lock *dlmlock; dlmlock = ldlm_handle2lock(&olck->ols_handle); if (dlmlock != NULL) { lock_res_and_lock(dlmlock); spin_lock(&osc_ast_guard); LASSERT(olck->ols_lock == NULL); dlmlock->l_ast_data = NULL; olck->ols_handle.cookie = 0ULL; spin_unlock(&osc_ast_guard); ldlm_lock_fail_match_locked(dlmlock); unlock_res_and_lock(dlmlock); LDLM_LOCK_PUT(dlmlock); } } else { if (olck->ols_glimpse) olck->ols_glimpse = 0; osc_lock_upcall0(env, olck); } /* Error handling, some errors are tolerable. */ if (olck->ols_locklessable && rc == -EUSERS) { /* This is a tolerable error, turn this lock into * lockless lock. */ osc_object_set_contended(cl2osc(slice->cls_obj)); LASSERT(slice->cls_ops == &osc_lock_ops); /* Change this lock to ldlmlock-less lock. */ osc_lock_to_lockless(env, olck, 1); olck->ols_state = OLS_GRANTED; rc = 0; } else if (olck->ols_glimpse && rc == -ENAVAIL) { osc_lock_lvb_update(env, olck, rc); cl_lock_delete(env, lock); /* Hide the error. */ rc = 0; } if (rc == 0) { /* For AGL case, the RPC sponsor may exits the cl_lock * processing without wait() called before related OSC * lock upcall(). So update the lock status according * to the enqueue result inside AGL upcall(). */ if (olck->ols_agl) { lock->cll_flags |= CLF_FROM_UPCALL; cl_wait_try(env, lock); lock->cll_flags &= ~CLF_FROM_UPCALL; if (!olck->ols_glimpse) olck->ols_agl = 0; } cl_lock_signal(env, lock); /* del user for lock upcall cookie */ cl_unuse_try(env, lock); } else { /* del user for lock upcall cookie */ cl_lock_user_del(env, lock); cl_lock_error(env, lock, rc); } /* release cookie reference, acquired by osc_lock_enqueue() */ cl_lock_hold_release(env, lock, "upcall", lock); cl_lock_mutex_put(env, lock); lu_ref_del(&lock->cll_reference, "upcall", lock); /* This maybe the last reference, so must be called after * cl_lock_mutex_put(). */ cl_lock_put(env, lock); cl_env_nested_put(&nest, env); } else { /* should never happen, similar to osc_ldlm_blocking_ast(). */ LBUG(); } RETURN(errcode); }
/* Sharing code of page_mkwrite method for rhel5 and rhel6 */ static int ll_page_mkwrite0(struct vm_area_struct *vma, struct page *vmpage, bool *retry) { struct lu_env *env; struct cl_io *io; struct vvp_io *vio; struct cl_env_nest nest; int result; sigset_t set; struct inode *inode; struct ll_inode_info *lli; io = ll_fault_io_init(vma, &env, &nest, vmpage->index, NULL); if (IS_ERR(io)) { result = PTR_ERR(io); goto out; } result = io->ci_result; if (result < 0) goto out_io; io->u.ci_fault.ft_mkwrite = 1; io->u.ci_fault.ft_writable = 1; vio = vvp_env_io(env); vio->u.fault.ft_vma = vma; vio->u.fault.ft_vmpage = vmpage; set = cfs_block_sigsinv(sigmask(SIGKILL) | sigmask(SIGTERM)); inode = vvp_object_inode(io->ci_obj); lli = ll_i2info(inode); result = cl_io_loop(env, io); cfs_restore_sigs(set); if (result == 0) { struct inode *inode = file_inode(vma->vm_file); struct ll_inode_info *lli = ll_i2info(inode); lock_page(vmpage); if (!vmpage->mapping) { unlock_page(vmpage); /* page was truncated and lock was cancelled, return * ENODATA so that VM_FAULT_NOPAGE will be returned * to handle_mm_fault(). */ if (result == 0) result = -ENODATA; } else if (!PageDirty(vmpage)) { /* race, the page has been cleaned by ptlrpcd after * it was unlocked, it has to be added into dirty * cache again otherwise this soon-to-dirty page won't * consume any grants, even worse if this page is being * transferred because it will break RPC checksum. */ unlock_page(vmpage); CDEBUG(D_MMAP, "Race on page_mkwrite %p/%lu, page has been written out, retry.\n", vmpage, vmpage->index); *retry = true; result = -EAGAIN; } if (result == 0) { spin_lock(&lli->lli_lock); lli->lli_flags |= LLIF_DATA_MODIFIED; spin_unlock(&lli->lli_lock); } } out_io: cl_io_fini(env, io); cl_env_nested_put(&nest, env); out: CDEBUG(D_MMAP, "%s mkwrite with %d\n", current->comm, result); LASSERT(ergo(result == 0, PageLocked(vmpage))); return result; }