/** * For swapping layout. The file's layout may have changed. * To avoid populating pages to a wrong stripe, we have to verify the * correctness of layout. It works because swapping layout processes * have to acquire group lock. */ static bool can_populate_pages(const struct lu_env *env, struct cl_io *io, struct inode *inode) { struct ll_inode_info *lli = ll_i2info(inode); struct vvp_io *vio = vvp_env_io(env); bool rc = true; switch (io->ci_type) { case CIT_READ: case CIT_WRITE: /* don't need lock here to check lli_layout_gen as we have held * extent lock and GROUP lock has to hold to swap layout */ if (ll_layout_version_get(lli) != vio->vui_layout_gen) { io->ci_need_restart = 1; /* this will return application a short read/write */ io->ci_continue = 0; rc = false; } case CIT_FAULT: /* fault is okay because we've already had a page. */ default: break; } return rc; }
/** * True, if \a io is a normal io, False for sendfile() / splice_{read|write} */ int cl_is_normalio(const struct lu_env *env, const struct cl_io *io) { struct vvp_io *vio = vvp_env_io(env); LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE); return vio->cui_io_subtype == IO_NORMAL; }
/** * API independent part for page fault initialization. * \param env - corespondent lu_env to processing * \param vma - virtual memory area addressed to page fault * \param index - page index corespondent to fault. * \parm ra_flags - vma readahead flags. * * \return error codes from cl_io_init. */ static struct cl_io * ll_fault_io_init(struct lu_env *env, struct vm_area_struct *vma, pgoff_t index, unsigned long *ra_flags) { struct file *file = vma->vm_file; struct inode *inode = file_inode(file); struct cl_io *io; struct cl_fault_io *fio; int rc; ENTRY; if (ll_file_nolock(file)) RETURN(ERR_PTR(-EOPNOTSUPP)); restart: io = vvp_env_thread_io(env); io->ci_obj = ll_i2info(inode)->lli_clob; LASSERT(io->ci_obj != NULL); fio = &io->u.ci_fault; fio->ft_index = index; fio->ft_executable = vma->vm_flags&VM_EXEC; /* * disable VM_SEQ_READ and use VM_RAND_READ to make sure that * the kernel will not read other pages not covered by ldlm in * filemap_nopage. we do our readahead in ll_readpage. */ if (ra_flags != NULL) *ra_flags = vma->vm_flags & (VM_RAND_READ|VM_SEQ_READ); vma->vm_flags &= ~VM_SEQ_READ; vma->vm_flags |= VM_RAND_READ; CDEBUG(D_MMAP, "vm_flags: %lx (%lu %d)\n", vma->vm_flags, fio->ft_index, fio->ft_executable); rc = cl_io_init(env, io, CIT_FAULT, io->ci_obj); if (rc == 0) { struct vvp_io *vio = vvp_env_io(env); struct ll_file_data *fd = LUSTRE_FPRIVATE(file); LASSERT(vio->vui_cl.cis_io == io); /* mmap lock must be MANDATORY it has to cache * pages. */ io->ci_lockreq = CILR_MANDATORY; vio->vui_fd = fd; } else { LASSERT(rc < 0); cl_io_fini(env, io); if (io->ci_need_restart) goto restart; io = ERR_PTR(rc); } RETURN(io); }
static struct vvp_io *cl2vvp_io(const struct lu_env *env, const struct cl_io_slice *slice) { struct vvp_io *vio; vio = container_of(slice, struct vvp_io, vui_cl); LASSERT(vio == vvp_env_io(env)); return vio; }
/** * Lustre implementation of a vm_operations_struct::fault() method, called by * VM to server page fault (both in kernel and user space). * * \param vma - is virtual area struct related to page fault * \param vmf - structure which describe type and address where hit fault * * \return allocated and filled _locked_ page for address * \retval VM_FAULT_ERROR on general error * \retval NOPAGE_OOM not have memory for allocate new page */ static int ll_fault0(struct vm_area_struct *vma, struct vm_fault *vmf) { struct lu_env *env; struct cl_io *io; struct vvp_io *vio = NULL; struct page *vmpage; unsigned long ra_flags; struct cl_env_nest nest; int result; int fault_ret = 0; io = ll_fault_io_init(vma, &env, &nest, vmf->pgoff, &ra_flags); if (IS_ERR(io)) return to_fault_error(PTR_ERR(io)); result = io->ci_result; if (result == 0) { vio = vvp_env_io(env); vio->u.fault.ft_vma = vma; vio->u.fault.ft_vmpage = NULL; vio->u.fault.ft_vmf = vmf; vio->u.fault.ft_flags = 0; vio->u.fault.ft_flags_valid = false; /* May call ll_readpage() */ ll_cl_add(vma->vm_file, env, io); result = cl_io_loop(env, io); ll_cl_remove(vma->vm_file, env); /* ft_flags are only valid if we reached * the call to filemap_fault */ if (vio->u.fault.ft_flags_valid) fault_ret = vio->u.fault.ft_flags; vmpage = vio->u.fault.ft_vmpage; if (result != 0 && vmpage) { put_page(vmpage); vmf->page = NULL; } } cl_io_fini(env, io); cl_env_nested_put(&nest, env); vma->vm_flags |= ra_flags; if (result != 0 && !(fault_ret & VM_FAULT_RETRY)) fault_ret |= to_fault_error(result); CDEBUG(D_MMAP, "%s fault %d/%d\n", current->comm, fault_ret, result); return fault_ret; }
static int ll_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct page *vmpage, void *fsdata) { struct ll_cl_context *lcc = fsdata; const struct lu_env *env; struct cl_io *io; struct vvp_io *vio; struct cl_page *page; unsigned from = pos & (PAGE_SIZE - 1); bool unplug = false; int result = 0; ENTRY; put_page(vmpage); LASSERT(lcc != NULL); env = lcc->lcc_env; page = lcc->lcc_page; io = lcc->lcc_io; vio = vvp_env_io(env); LASSERT(cl_page_is_owned(page, io)); if (copied > 0) { struct cl_page_list *plist = &vio->u.write.vui_queue; lcc->lcc_page = NULL; /* page will be queued */ /* Add it into write queue */ cl_page_list_add(plist, page); if (plist->pl_nr == 1) /* first page */ vio->u.write.vui_from = from; else LASSERT(from == 0); vio->u.write.vui_to = from + copied; /* To address the deadlock in balance_dirty_pages() where * this dirty page may be written back in the same thread. */ if (PageDirty(vmpage)) unplug = true; /* We may have one full RPC, commit it soon */ if (plist->pl_nr >= PTLRPC_MAX_BRW_PAGES) unplug = true; CL_PAGE_DEBUG(D_VFSTRACE, env, page, "queued page: %d.\n", plist->pl_nr); } else { cl_page_disown(env, io, page); lcc->lcc_page = NULL; lu_ref_del(&page->cp_reference, "cl_io", io); cl_page_put(env, page); /* page list is not contiguous now, commit it now */ unplug = true; } if (unplug || file->f_flags & O_SYNC || IS_SYNC(file_inode(file))) result = vvp_io_write_commit(env, io); if (result < 0) io->ci_result = result; RETURN(result >= 0 ? copied : result); }
static int ll_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { struct ll_cl_context *lcc; const struct lu_env *env = NULL; struct cl_io *io; struct cl_page *page = NULL; struct cl_object *clob = ll_i2info(mapping->host)->lli_clob; pgoff_t index = pos >> PAGE_SHIFT; struct page *vmpage = NULL; unsigned from = pos & (PAGE_SIZE - 1); unsigned to = from + len; int result = 0; ENTRY; CDEBUG(D_VFSTRACE, "Writing %lu of %d to %d bytes\n", index, from, len); lcc = ll_cl_find(file); if (lcc == NULL) { io = NULL; GOTO(out, result = -EIO); } env = lcc->lcc_env; io = lcc->lcc_io; /* To avoid deadlock, try to lock page first. */ vmpage = grab_cache_page_nowait(mapping, index); if (unlikely(vmpage == NULL || PageDirty(vmpage) || PageWriteback(vmpage))) { struct vvp_io *vio = vvp_env_io(env); struct cl_page_list *plist = &vio->u.write.vui_queue; /* if the page is already in dirty cache, we have to commit * the pages right now; otherwise, it may cause deadlock * because it holds page lock of a dirty page and request for * more grants. It's okay for the dirty page to be the first * one in commit page list, though. */ if (vmpage != NULL && plist->pl_nr > 0) { unlock_page(vmpage); put_page(vmpage); vmpage = NULL; } /* commit pages and then wait for page lock */ result = vvp_io_write_commit(env, io); if (result < 0) GOTO(out, result); if (vmpage == NULL) { vmpage = grab_cache_page_write_begin(mapping, index, flags); if (vmpage == NULL) GOTO(out, result = -ENOMEM); } } page = cl_page_find(env, clob, vmpage->index, vmpage, CPT_CACHEABLE); if (IS_ERR(page)) GOTO(out, result = PTR_ERR(page)); lcc->lcc_page = page; lu_ref_add(&page->cp_reference, "cl_io", io); cl_page_assume(env, io, page); if (!PageUptodate(vmpage)) { /* * We're completely overwriting an existing page, * so _don't_ set it up to date until commit_write */ if (from == 0 && to == PAGE_SIZE) { CL_PAGE_HEADER(D_PAGE, env, page, "full page write\n"); POISON_PAGE(vmpage, 0x11); } else { /* TODO: can be optimized at OSC layer to check if it * is a lockless IO. In that case, it's not necessary * to read the data. */ result = ll_prepare_partial_page(env, io, page); if (result == 0) SetPageUptodate(vmpage); } } if (result < 0) cl_page_unassume(env, io, page); EXIT; out: if (result < 0) { if (vmpage != NULL) { unlock_page(vmpage); put_page(vmpage); } if (!IS_ERR_OR_NULL(page)) { lu_ref_del(&page->cp_reference, "cl_io", io); cl_page_put(env, page); } if (io) io->ci_result = result; } else { *pagep = vmpage; *fsdata = lcc; } RETURN(result); }
static ssize_t ll_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t file_offset, unsigned long nr_segs) { struct ll_cl_context *lcc; const struct lu_env *env; struct cl_io *io; struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; ssize_t count = iov_length(iov, nr_segs); ssize_t tot_bytes = 0, result = 0; unsigned long seg = 0; size_t size = MAX_DIO_SIZE; ENTRY; /* FIXME: io smaller than PAGE_SIZE is broken on ia64 ??? */ if ((file_offset & ~PAGE_MASK) || (count & ~PAGE_MASK)) RETURN(-EINVAL); CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), size=%zd (max %lu), " "offset=%lld=%llx, pages %zd (max %lu)\n", PFID(ll_inode2fid(inode)), inode, count, MAX_DIO_SIZE, file_offset, file_offset, count >> PAGE_SHIFT, MAX_DIO_SIZE >> PAGE_SHIFT); /* Check that all user buffers are aligned as well */ for (seg = 0; seg < nr_segs; seg++) { if (((unsigned long)iov[seg].iov_base & ~PAGE_MASK) || (iov[seg].iov_len & ~PAGE_MASK)) RETURN(-EINVAL); } lcc = ll_cl_find(file); if (lcc == NULL) RETURN(-EIO); env = lcc->lcc_env; LASSERT(!IS_ERR(env)); io = lcc->lcc_io; LASSERT(io != NULL); for (seg = 0; seg < nr_segs; seg++) { size_t iov_left = iov[seg].iov_len; unsigned long user_addr = (unsigned long)iov[seg].iov_base; if (rw == READ) { if (file_offset >= i_size_read(inode)) break; if (file_offset + iov_left > i_size_read(inode)) iov_left = i_size_read(inode) - file_offset; } while (iov_left > 0) { struct page **pages; int page_count, max_pages = 0; size_t bytes; bytes = min(size, iov_left); page_count = ll_get_user_pages(rw, user_addr, bytes, &pages, &max_pages); if (likely(page_count > 0)) { if (unlikely(page_count < max_pages)) bytes = page_count << PAGE_SHIFT; result = ll_direct_IO_seg(env, io, rw, inode, bytes, file_offset, pages, page_count); ll_free_user_pages(pages, max_pages, rw==READ); } else if (page_count == 0) { GOTO(out, result = -EFAULT); } else { result = page_count; } if (unlikely(result <= 0)) { /* If we can't allocate a large enough buffer * for the request, shrink it to a smaller * PAGE_SIZE multiple and try again. * We should always be able to kmalloc for a * page worth of page pointers = 4MB on i386. */ if (result == -ENOMEM && size > (PAGE_SIZE / sizeof(*pages)) * PAGE_SIZE) { size = ((((size / 2) - 1) | ~PAGE_MASK) + 1) & PAGE_MASK; CDEBUG(D_VFSTRACE, "DIO size now %zu\n", size); continue; } GOTO(out, result); } tot_bytes += result; file_offset += result; iov_left -= result; user_addr += result; } } out: if (tot_bytes > 0) { struct vvp_io *vio = vvp_env_io(env); /* no commit async for direct IO */ vio->u.write.vui_written += tot_bytes; } RETURN(tot_bytes ? tot_bytes : result); }
static ssize_t ll_direct_IO( # ifndef HAVE_IOV_ITER_RW int rw, # endif struct kiocb *iocb, struct iov_iter *iter, loff_t file_offset) { struct ll_cl_context *lcc; const struct lu_env *env; struct cl_io *io; struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; ssize_t count = iov_iter_count(iter); ssize_t tot_bytes = 0, result = 0; size_t size = MAX_DIO_SIZE; /* FIXME: io smaller than PAGE_SIZE is broken on ia64 ??? */ if ((file_offset & ~PAGE_MASK) || (count & ~PAGE_MASK)) return -EINVAL; CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), size=%zd (max %lu), " "offset=%lld=%llx, pages %zd (max %lu)\n", PFID(ll_inode2fid(inode)), inode, count, MAX_DIO_SIZE, file_offset, file_offset, count >> PAGE_SHIFT, MAX_DIO_SIZE >> PAGE_SHIFT); /* Check that all user buffers are aligned as well */ if (iov_iter_alignment(iter) & ~PAGE_MASK) return -EINVAL; lcc = ll_cl_find(file); if (lcc == NULL) RETURN(-EIO); env = lcc->lcc_env; LASSERT(!IS_ERR(env)); io = lcc->lcc_io; LASSERT(io != NULL); /* 0. Need locking between buffered and direct access. and race with * size changing by concurrent truncates and writes. * 1. Need inode mutex to operate transient pages. */ if (iov_iter_rw(iter) == READ) inode_lock(inode); while (iov_iter_count(iter)) { struct page **pages; size_t offs; count = min_t(size_t, iov_iter_count(iter), size); if (iov_iter_rw(iter) == READ) { if (file_offset >= i_size_read(inode)) break; if (file_offset + count > i_size_read(inode)) count = i_size_read(inode) - file_offset; } result = iov_iter_get_pages_alloc(iter, &pages, count, &offs); if (likely(result > 0)) { int n = DIV_ROUND_UP(result + offs, PAGE_SIZE); result = ll_direct_IO_seg(env, io, iov_iter_rw(iter), inode, result, file_offset, pages, n); ll_free_user_pages(pages, n, iov_iter_rw(iter) == READ); } if (unlikely(result <= 0)) { /* If we can't allocate a large enough buffer * for the request, shrink it to a smaller * PAGE_SIZE multiple and try again. * We should always be able to kmalloc for a * page worth of page pointers = 4MB on i386. */ if (result == -ENOMEM && size > (PAGE_SIZE / sizeof(*pages)) * PAGE_SIZE) { size = ((((size / 2) - 1) | ~PAGE_MASK) + 1) & PAGE_MASK; CDEBUG(D_VFSTRACE, "DIO size now %zu\n", size); continue; } GOTO(out, result); } iov_iter_advance(iter, result); tot_bytes += result; file_offset += result; } out: if (iov_iter_rw(iter) == READ) inode_unlock(inode); if (tot_bytes > 0) { struct vvp_io *vio = vvp_env_io(env); /* no commit async for direct IO */ vio->u.write.vui_written += tot_bytes; } return tot_bytes ? : result; }
/* Sharing code of page_mkwrite method for rhel5 and rhel6 */ static int ll_page_mkwrite0(struct vm_area_struct *vma, struct page *vmpage, bool *retry) { struct lu_env *env; struct cl_io *io; struct vvp_io *vio; struct cl_env_nest nest; int result; sigset_t set; struct inode *inode; struct ll_inode_info *lli; io = ll_fault_io_init(vma, &env, &nest, vmpage->index, NULL); if (IS_ERR(io)) { result = PTR_ERR(io); goto out; } result = io->ci_result; if (result < 0) goto out_io; io->u.ci_fault.ft_mkwrite = 1; io->u.ci_fault.ft_writable = 1; vio = vvp_env_io(env); vio->u.fault.ft_vma = vma; vio->u.fault.ft_vmpage = vmpage; set = cfs_block_sigsinv(sigmask(SIGKILL) | sigmask(SIGTERM)); /* we grab lli_trunc_sem to exclude truncate case. * Otherwise, we could add dirty pages into osc cache * while truncate is on-going. */ inode = ccc_object_inode(io->ci_obj); lli = ll_i2info(inode); down_read(&lli->lli_trunc_sem); result = cl_io_loop(env, io); up_read(&lli->lli_trunc_sem); cfs_restore_sigs(set); if (result == 0) { struct inode *inode = file_inode(vma->vm_file); struct ll_inode_info *lli = ll_i2info(inode); lock_page(vmpage); if (!vmpage->mapping) { unlock_page(vmpage); /* page was truncated and lock was cancelled, return * ENODATA so that VM_FAULT_NOPAGE will be returned * to handle_mm_fault(). */ if (result == 0) result = -ENODATA; } else if (!PageDirty(vmpage)) { /* race, the page has been cleaned by ptlrpcd after * it was unlocked, it has to be added into dirty * cache again otherwise this soon-to-dirty page won't * consume any grants, even worse if this page is being * transferred because it will break RPC checksum. */ unlock_page(vmpage); CDEBUG(D_MMAP, "Race on page_mkwrite %p/%lu, page has been written out, retry.\n", vmpage, vmpage->index); *retry = true; result = -EAGAIN; } if (result == 0) { spin_lock(&lli->lli_lock); lli->lli_flags |= LLIF_DATA_MODIFIED; spin_unlock(&lli->lli_lock); } } out_io: cl_io_fini(env, io); cl_env_nested_put(&nest, env); out: CDEBUG(D_MMAP, "%s mkwrite with %d\n", current->comm, result); LASSERT(ergo(result == 0, PageLocked(vmpage))); return result; }
/** * Lustre implementation of a vm_operations_struct::fault() method, called by * VM to server page fault (both in kernel and user space). * * \param vma - is virtiual area struct related to page fault * \param vmf - structure which describe type and address where hit fault * * \return allocated and filled _locked_ page for address * \retval VM_FAULT_ERROR on general error * \retval NOPAGE_OOM not have memory for allocate new page */ static int ll_fault0(struct vm_area_struct *vma, struct vm_fault *vmf) { struct lu_env *env; struct cl_io *io; struct vvp_io *vio = NULL; struct page *vmpage; unsigned long ra_flags; int result = 0; int fault_ret = 0; __u16 refcheck; ENTRY; env = cl_env_get(&refcheck); if (IS_ERR(env)) RETURN(PTR_ERR(env)); if (ll_sbi_has_fast_read(ll_i2sbi(file_inode(vma->vm_file)))) { /* do fast fault */ ll_cl_add(vma->vm_file, env, NULL, LCC_MMAP); fault_ret = filemap_fault(vma, vmf); ll_cl_remove(vma->vm_file, env); /* - If there is no error, then the page was found in cache and * uptodate; * - If VM_FAULT_RETRY is set, the page existed but failed to * lock. It will return to kernel and retry; * - Otherwise, it should try normal fault under DLM lock. */ if ((fault_ret & VM_FAULT_RETRY) || !(fault_ret & VM_FAULT_ERROR)) GOTO(out, result = 0); fault_ret = 0; } io = ll_fault_io_init(env, vma, vmf->pgoff, &ra_flags); if (IS_ERR(io)) GOTO(out, result = PTR_ERR(io)); result = io->ci_result; if (result == 0) { vio = vvp_env_io(env); vio->u.fault.ft_vma = vma; vio->u.fault.ft_vmpage = NULL; vio->u.fault.ft_vmf = vmf; vio->u.fault.ft_flags = 0; vio->u.fault.ft_flags_valid = 0; /* May call ll_readpage() */ ll_cl_add(vma->vm_file, env, io, LCC_MMAP); result = cl_io_loop(env, io); ll_cl_remove(vma->vm_file, env); /* ft_flags are only valid if we reached * the call to filemap_fault */ if (vio->u.fault.ft_flags_valid) fault_ret = vio->u.fault.ft_flags; vmpage = vio->u.fault.ft_vmpage; if (result != 0 && vmpage != NULL) { put_page(vmpage); vmf->page = NULL; } } cl_io_fini(env, io); vma->vm_flags |= ra_flags; out: cl_env_put(env, &refcheck); if (result != 0 && !(fault_ret & VM_FAULT_RETRY)) fault_ret |= to_fault_error(result); CDEBUG(D_MMAP, "%s fault %d/%d\n", current->comm, fault_ret, result); RETURN(fault_ret); }
/* Sharing code of page_mkwrite method for rhel5 and rhel6 */ static int ll_page_mkwrite0(struct vm_area_struct *vma, struct page *vmpage, bool *retry) { struct lu_env *env; struct cl_io *io; struct vvp_io *vio; int result; __u16 refcheck; sigset_t set; struct inode *inode; struct ll_inode_info *lli; ENTRY; LASSERT(vmpage != NULL); env = cl_env_get(&refcheck); if (IS_ERR(env)) RETURN(PTR_ERR(env)); io = ll_fault_io_init(env, vma, vmpage->index, NULL); if (IS_ERR(io)) GOTO(out, result = PTR_ERR(io)); result = io->ci_result; if (result < 0) GOTO(out_io, result); io->u.ci_fault.ft_mkwrite = 1; io->u.ci_fault.ft_writable = 1; vio = vvp_env_io(env); vio->u.fault.ft_vma = vma; vio->u.fault.ft_vmpage = vmpage; set = cfs_block_sigsinv(sigmask(SIGKILL) | sigmask(SIGTERM)); inode = vvp_object_inode(io->ci_obj); lli = ll_i2info(inode); result = cl_io_loop(env, io); cfs_restore_sigs(set); if (result == 0) { lock_page(vmpage); if (vmpage->mapping == NULL) { unlock_page(vmpage); /* page was truncated and lock was cancelled, return * ENODATA so that VM_FAULT_NOPAGE will be returned * to handle_mm_fault(). */ if (result == 0) result = -ENODATA; } else if (!PageDirty(vmpage)) { /* race, the page has been cleaned by ptlrpcd after * it was unlocked, it has to be added into dirty * cache again otherwise this soon-to-dirty page won't * consume any grants, even worse if this page is being * transferred because it will break RPC checksum. */ unlock_page(vmpage); CDEBUG(D_MMAP, "Race on page_mkwrite %p/%lu, page has " "been written out, retry.\n", vmpage, vmpage->index); *retry = true; result = -EAGAIN; } if (result == 0) ll_file_set_flag(lli, LLIF_DATA_MODIFIED); } EXIT; out_io: cl_io_fini(env, io); out: cl_env_put(env, &refcheck); CDEBUG(D_MMAP, "%s mkwrite with %d\n", current->comm, result); LASSERT(ergo(result == 0, PageLocked(vmpage))); return result; }
/** * API independent part for page fault initialization. * \param vma - virtual memory area addressed to page fault * \param env - corespondent lu_env to processing * \param nest - nested level * \param index - page index corespondent to fault. * \parm ra_flags - vma readahead flags. * * \return allocated and initialized env for fault operation. * \retval EINVAL if env can't allocated * \return other error codes from cl_io_init. */ static struct cl_io * ll_fault_io_init(struct vm_area_struct *vma, struct lu_env **env_ret, struct cl_env_nest *nest, pgoff_t index, unsigned long *ra_flags) { struct file *file = vma->vm_file; struct inode *inode = file_inode(file); struct cl_io *io; struct cl_fault_io *fio; struct lu_env *env; int rc; *env_ret = NULL; if (ll_file_nolock(file)) return ERR_PTR(-EOPNOTSUPP); /* * page fault can be called when lustre IO is * already active for the current thread, e.g., when doing read/write * against user level buffer mapped from Lustre buffer. To avoid * stomping on existing context, optionally force an allocation of a new * one. */ env = cl_env_nested_get(nest); if (IS_ERR(env)) return ERR_PTR(-EINVAL); *env_ret = env; restart: io = vvp_env_thread_io(env); io->ci_obj = ll_i2info(inode)->lli_clob; LASSERT(io->ci_obj); fio = &io->u.ci_fault; fio->ft_index = index; fio->ft_executable = vma->vm_flags&VM_EXEC; /* * disable VM_SEQ_READ and use VM_RAND_READ to make sure that * the kernel will not read other pages not covered by ldlm in * filemap_nopage. we do our readahead in ll_readpage. */ if (ra_flags) *ra_flags = vma->vm_flags & (VM_RAND_READ|VM_SEQ_READ); vma->vm_flags &= ~VM_SEQ_READ; vma->vm_flags |= VM_RAND_READ; CDEBUG(D_MMAP, "vm_flags: %lx (%lu %d)\n", vma->vm_flags, fio->ft_index, fio->ft_executable); rc = cl_io_init(env, io, CIT_FAULT, io->ci_obj); if (rc == 0) { struct vvp_io *vio = vvp_env_io(env); struct ll_file_data *fd = LUSTRE_FPRIVATE(file); LASSERT(vio->vui_cl.cis_io == io); /* mmap lock must be MANDATORY it has to cache pages. */ io->ci_lockreq = CILR_MANDATORY; vio->vui_fd = fd; } else { LASSERT(rc < 0); cl_io_fini(env, io); if (io->ci_need_restart) goto restart; cl_env_nested_put(nest, env); io = ERR_PTR(rc); } return io; }