Beispiel #1
0
/**
 * For swapping layout. The file's layout may have changed.
 * To avoid populating pages to a wrong stripe, we have to verify the
 * correctness of layout. It works because swapping layout processes
 * have to acquire group lock.
 */
static bool can_populate_pages(const struct lu_env *env, struct cl_io *io,
			       struct inode *inode)
{
	struct ll_inode_info	*lli = ll_i2info(inode);
	struct vvp_io		*vio = vvp_env_io(env);
	bool rc = true;

	switch (io->ci_type) {
	case CIT_READ:
	case CIT_WRITE:
		/* don't need lock here to check lli_layout_gen as we have held
		 * extent lock and GROUP lock has to hold to swap layout
		 */
		if (ll_layout_version_get(lli) != vio->vui_layout_gen) {
			io->ci_need_restart = 1;
			/* this will return application a short read/write */
			io->ci_continue = 0;
			rc = false;
		}
	case CIT_FAULT:
		/* fault is okay because we've already had a page. */
	default:
		break;
	}

	return rc;
}
Beispiel #2
0
/**
 * True, if \a io is a normal io, False for sendfile() / splice_{read|write}
 */
int cl_is_normalio(const struct lu_env *env, const struct cl_io *io)
{
        struct vvp_io *vio = vvp_env_io(env);

        LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE);

        return vio->cui_io_subtype == IO_NORMAL;
}
Beispiel #3
0
/**
 * API independent part for page fault initialization.
 * \param env - corespondent lu_env to processing
 * \param vma - virtual memory area addressed to page fault
 * \param index - page index corespondent to fault.
 * \parm ra_flags - vma readahead flags.
 *
 * \return error codes from cl_io_init.
 */
static struct cl_io *
ll_fault_io_init(struct lu_env *env, struct vm_area_struct *vma,
		 pgoff_t index, unsigned long *ra_flags)
{
	struct file	       *file = vma->vm_file;
	struct inode	       *inode = file_inode(file);
	struct cl_io	       *io;
	struct cl_fault_io     *fio;
	int			rc;
	ENTRY;

        if (ll_file_nolock(file))
                RETURN(ERR_PTR(-EOPNOTSUPP));

restart:
	io = vvp_env_thread_io(env);
        io->ci_obj = ll_i2info(inode)->lli_clob;
        LASSERT(io->ci_obj != NULL);

        fio = &io->u.ci_fault;
        fio->ft_index      = index;
        fio->ft_executable = vma->vm_flags&VM_EXEC;

        /*
         * disable VM_SEQ_READ and use VM_RAND_READ to make sure that
         * the kernel will not read other pages not covered by ldlm in
         * filemap_nopage. we do our readahead in ll_readpage.
         */
        if (ra_flags != NULL)
                *ra_flags = vma->vm_flags & (VM_RAND_READ|VM_SEQ_READ);
        vma->vm_flags &= ~VM_SEQ_READ;
        vma->vm_flags |= VM_RAND_READ;

        CDEBUG(D_MMAP, "vm_flags: %lx (%lu %d)\n", vma->vm_flags,
               fio->ft_index, fio->ft_executable);

	rc = cl_io_init(env, io, CIT_FAULT, io->ci_obj);
	if (rc == 0) {
		struct vvp_io *vio = vvp_env_io(env);
		struct ll_file_data *fd = LUSTRE_FPRIVATE(file);

		LASSERT(vio->vui_cl.cis_io == io);

		/* mmap lock must be MANDATORY it has to cache
		 * pages. */
		io->ci_lockreq = CILR_MANDATORY;
		vio->vui_fd = fd;
	} else {
		LASSERT(rc < 0);
		cl_io_fini(env, io);
		if (io->ci_need_restart)
			goto restart;

		io = ERR_PTR(rc);
	}

	RETURN(io);
}
Beispiel #4
0
static struct vvp_io *cl2vvp_io(const struct lu_env *env,
				const struct cl_io_slice *slice)
{
	struct vvp_io *vio;

	vio = container_of(slice, struct vvp_io, vui_cl);
	LASSERT(vio == vvp_env_io(env));

	return vio;
}
Beispiel #5
0
/**
 * Lustre implementation of a vm_operations_struct::fault() method, called by
 * VM to server page fault (both in kernel and user space).
 *
 * \param vma - is virtual area struct related to page fault
 * \param vmf - structure which describe type and address where hit fault
 *
 * \return allocated and filled _locked_ page for address
 * \retval VM_FAULT_ERROR on general error
 * \retval NOPAGE_OOM not have memory for allocate new page
 */
static int ll_fault0(struct vm_area_struct *vma, struct vm_fault *vmf)
{
	struct lu_env	   *env;
	struct cl_io	    *io;
	struct vvp_io	   *vio = NULL;
	struct page	     *vmpage;
	unsigned long	    ra_flags;
	struct cl_env_nest       nest;
	int		      result;
	int		      fault_ret = 0;

	io = ll_fault_io_init(vma, &env,  &nest, vmf->pgoff, &ra_flags);
	if (IS_ERR(io))
		return to_fault_error(PTR_ERR(io));

	result = io->ci_result;
	if (result == 0) {
		vio = vvp_env_io(env);
		vio->u.fault.ft_vma       = vma;
		vio->u.fault.ft_vmpage    = NULL;
		vio->u.fault.ft_vmf = vmf;
		vio->u.fault.ft_flags = 0;
		vio->u.fault.ft_flags_valid = false;

		/* May call ll_readpage() */
		ll_cl_add(vma->vm_file, env, io);

		result = cl_io_loop(env, io);

		ll_cl_remove(vma->vm_file, env);

		/* ft_flags are only valid if we reached
		 * the call to filemap_fault
		 */
		if (vio->u.fault.ft_flags_valid)
			fault_ret = vio->u.fault.ft_flags;

		vmpage = vio->u.fault.ft_vmpage;
		if (result != 0 && vmpage) {
			put_page(vmpage);
			vmf->page = NULL;
		}
	}
	cl_io_fini(env, io);
	cl_env_nested_put(&nest, env);

	vma->vm_flags |= ra_flags;
	if (result != 0 && !(fault_ret & VM_FAULT_RETRY))
		fault_ret |= to_fault_error(result);

	CDEBUG(D_MMAP, "%s fault %d/%d\n",
	       current->comm, fault_ret, result);
	return fault_ret;
}
Beispiel #6
0
static int ll_write_end(struct file *file, struct address_space *mapping,
                        loff_t pos, unsigned len, unsigned copied,
                        struct page *vmpage, void *fsdata)
{
    struct ll_cl_context *lcc = fsdata;
    const struct lu_env *env;
    struct cl_io *io;
    struct vvp_io *vio;
    struct cl_page *page;
    unsigned from = pos & (PAGE_SIZE - 1);
    bool unplug = false;
    int result = 0;
    ENTRY;

    put_page(vmpage);

    LASSERT(lcc != NULL);
    env  = lcc->lcc_env;
    page = lcc->lcc_page;
    io   = lcc->lcc_io;
    vio  = vvp_env_io(env);

    LASSERT(cl_page_is_owned(page, io));
    if (copied > 0) {
        struct cl_page_list *plist = &vio->u.write.vui_queue;

        lcc->lcc_page = NULL; /* page will be queued */

        /* Add it into write queue */
        cl_page_list_add(plist, page);
        if (plist->pl_nr == 1) /* first page */
            vio->u.write.vui_from = from;
        else
            LASSERT(from == 0);
        vio->u.write.vui_to = from + copied;

        /* To address the deadlock in balance_dirty_pages() where
         * this dirty page may be written back in the same thread. */
        if (PageDirty(vmpage))
            unplug = true;

        /* We may have one full RPC, commit it soon */
        if (plist->pl_nr >= PTLRPC_MAX_BRW_PAGES)
            unplug = true;

        CL_PAGE_DEBUG(D_VFSTRACE, env, page,
                      "queued page: %d.\n", plist->pl_nr);
    } else {
        cl_page_disown(env, io, page);

        lcc->lcc_page = NULL;
        lu_ref_del(&page->cp_reference, "cl_io", io);
        cl_page_put(env, page);

        /* page list is not contiguous now, commit it now */
        unplug = true;
    }
    if (unplug ||
            file->f_flags & O_SYNC || IS_SYNC(file_inode(file)))
        result = vvp_io_write_commit(env, io);

    if (result < 0)
        io->ci_result = result;
    RETURN(result >= 0 ? copied : result);
}
Beispiel #7
0
static int ll_write_begin(struct file *file, struct address_space *mapping,
                          loff_t pos, unsigned len, unsigned flags,
                          struct page **pagep, void **fsdata)
{
    struct ll_cl_context *lcc;
    const struct lu_env  *env = NULL;
    struct cl_io   *io;
    struct cl_page *page = NULL;

    struct cl_object *clob = ll_i2info(mapping->host)->lli_clob;
    pgoff_t index = pos >> PAGE_SHIFT;
    struct page *vmpage = NULL;
    unsigned from = pos & (PAGE_SIZE - 1);
    unsigned to = from + len;
    int result = 0;
    ENTRY;

    CDEBUG(D_VFSTRACE, "Writing %lu of %d to %d bytes\n", index, from, len);

    lcc = ll_cl_find(file);
    if (lcc == NULL) {
        io = NULL;
        GOTO(out, result = -EIO);
    }

    env = lcc->lcc_env;
    io  = lcc->lcc_io;

    /* To avoid deadlock, try to lock page first. */
    vmpage = grab_cache_page_nowait(mapping, index);

    if (unlikely(vmpage == NULL ||
                 PageDirty(vmpage) || PageWriteback(vmpage))) {
        struct vvp_io *vio = vvp_env_io(env);
        struct cl_page_list *plist = &vio->u.write.vui_queue;

        /* if the page is already in dirty cache, we have to commit
        * the pages right now; otherwise, it may cause deadlock
        	 * because it holds page lock of a dirty page and request for
        	 * more grants. It's okay for the dirty page to be the first
        	 * one in commit page list, though. */
        if (vmpage != NULL && plist->pl_nr > 0) {
            unlock_page(vmpage);
            put_page(vmpage);
            vmpage = NULL;
        }

        /* commit pages and then wait for page lock */
        result = vvp_io_write_commit(env, io);
        if (result < 0)
            GOTO(out, result);

        if (vmpage == NULL) {
            vmpage = grab_cache_page_write_begin(mapping, index,
                                                 flags);
            if (vmpage == NULL)
                GOTO(out, result = -ENOMEM);
        }
    }

    page = cl_page_find(env, clob, vmpage->index, vmpage, CPT_CACHEABLE);
    if (IS_ERR(page))
        GOTO(out, result = PTR_ERR(page));

    lcc->lcc_page = page;
    lu_ref_add(&page->cp_reference, "cl_io", io);

    cl_page_assume(env, io, page);
    if (!PageUptodate(vmpage)) {
        /*
         * We're completely overwriting an existing page,
         * so _don't_ set it up to date until commit_write
         */
        if (from == 0 && to == PAGE_SIZE) {
            CL_PAGE_HEADER(D_PAGE, env, page, "full page write\n");
            POISON_PAGE(vmpage, 0x11);
        } else {
            /* TODO: can be optimized at OSC layer to check if it
             * is a lockless IO. In that case, it's not necessary
             * to read the data. */
            result = ll_prepare_partial_page(env, io, page);
            if (result == 0)
                SetPageUptodate(vmpage);
        }
    }
    if (result < 0)
        cl_page_unassume(env, io, page);
    EXIT;
out:
    if (result < 0) {
        if (vmpage != NULL) {
            unlock_page(vmpage);
            put_page(vmpage);
        }
        if (!IS_ERR_OR_NULL(page)) {
            lu_ref_del(&page->cp_reference, "cl_io", io);
            cl_page_put(env, page);
        }
        if (io)
            io->ci_result = result;
    } else {
        *pagep = vmpage;
        *fsdata = lcc;
    }
    RETURN(result);
}
Beispiel #8
0
static ssize_t
ll_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
             loff_t file_offset, unsigned long nr_segs)
{
    struct ll_cl_context *lcc;
    const struct lu_env *env;
    struct cl_io *io;
    struct file *file = iocb->ki_filp;
    struct inode *inode = file->f_mapping->host;
    ssize_t count = iov_length(iov, nr_segs);
    ssize_t tot_bytes = 0, result = 0;
    unsigned long seg = 0;
    size_t size = MAX_DIO_SIZE;
    ENTRY;

    /* FIXME: io smaller than PAGE_SIZE is broken on ia64 ??? */
    if ((file_offset & ~PAGE_MASK) || (count & ~PAGE_MASK))
        RETURN(-EINVAL);

    CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), size=%zd (max %lu), "
           "offset=%lld=%llx, pages %zd (max %lu)\n",
           PFID(ll_inode2fid(inode)), inode, count, MAX_DIO_SIZE,
           file_offset, file_offset, count >> PAGE_SHIFT,
           MAX_DIO_SIZE >> PAGE_SHIFT);

    /* Check that all user buffers are aligned as well */
    for (seg = 0; seg < nr_segs; seg++) {
        if (((unsigned long)iov[seg].iov_base & ~PAGE_MASK) ||
                (iov[seg].iov_len & ~PAGE_MASK))
            RETURN(-EINVAL);
    }

    lcc = ll_cl_find(file);
    if (lcc == NULL)
        RETURN(-EIO);

    env = lcc->lcc_env;
    LASSERT(!IS_ERR(env));
    io = lcc->lcc_io;
    LASSERT(io != NULL);

    for (seg = 0; seg < nr_segs; seg++) {
        size_t iov_left = iov[seg].iov_len;
        unsigned long user_addr = (unsigned long)iov[seg].iov_base;

        if (rw == READ) {
            if (file_offset >= i_size_read(inode))
                break;
            if (file_offset + iov_left > i_size_read(inode))
                iov_left = i_size_read(inode) - file_offset;
        }

        while (iov_left > 0) {
            struct page **pages;
            int page_count, max_pages = 0;
            size_t bytes;

            bytes = min(size, iov_left);
            page_count = ll_get_user_pages(rw, user_addr, bytes,
                                           &pages, &max_pages);
            if (likely(page_count > 0)) {
                if (unlikely(page_count <  max_pages))
                    bytes = page_count << PAGE_SHIFT;
                result = ll_direct_IO_seg(env, io, rw, inode,
                                          bytes, file_offset,
                                          pages, page_count);
                ll_free_user_pages(pages, max_pages, rw==READ);
            } else if (page_count == 0) {
                GOTO(out, result = -EFAULT);
            } else {
                result = page_count;
            }
            if (unlikely(result <= 0)) {
                /* If we can't allocate a large enough buffer
                 * for the request, shrink it to a smaller
                 * PAGE_SIZE multiple and try again.
                 * We should always be able to kmalloc for a
                 * page worth of page pointers = 4MB on i386. */
                if (result == -ENOMEM &&
                        size > (PAGE_SIZE / sizeof(*pages)) *
                        PAGE_SIZE) {
                    size = ((((size / 2) - 1) |
                             ~PAGE_MASK) + 1) &
                           PAGE_MASK;
                    CDEBUG(D_VFSTRACE, "DIO size now %zu\n",
                           size);
                    continue;
                }

                GOTO(out, result);
            }

            tot_bytes += result;
            file_offset += result;
            iov_left -= result;
            user_addr += result;
        }
    }
out:
    if (tot_bytes > 0) {
        struct vvp_io *vio = vvp_env_io(env);

        /* no commit async for direct IO */
        vio->u.write.vui_written += tot_bytes;
    }

    RETURN(tot_bytes ? tot_bytes : result);
}
Beispiel #9
0
static ssize_t
ll_direct_IO(
# ifndef HAVE_IOV_ITER_RW
    int rw,
# endif
    struct kiocb *iocb, struct iov_iter *iter,
    loff_t file_offset)
{
    struct ll_cl_context *lcc;
    const struct lu_env *env;
    struct cl_io *io;
    struct file *file = iocb->ki_filp;
    struct inode *inode = file->f_mapping->host;
    ssize_t count = iov_iter_count(iter);
    ssize_t tot_bytes = 0, result = 0;
    size_t size = MAX_DIO_SIZE;

    /* FIXME: io smaller than PAGE_SIZE is broken on ia64 ??? */
    if ((file_offset & ~PAGE_MASK) || (count & ~PAGE_MASK))
        return -EINVAL;

    CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), size=%zd (max %lu), "
           "offset=%lld=%llx, pages %zd (max %lu)\n",
           PFID(ll_inode2fid(inode)), inode, count, MAX_DIO_SIZE,
           file_offset, file_offset, count >> PAGE_SHIFT,
           MAX_DIO_SIZE >> PAGE_SHIFT);

    /* Check that all user buffers are aligned as well */
    if (iov_iter_alignment(iter) & ~PAGE_MASK)
        return -EINVAL;

    lcc = ll_cl_find(file);
    if (lcc == NULL)
        RETURN(-EIO);

    env = lcc->lcc_env;
    LASSERT(!IS_ERR(env));
    io = lcc->lcc_io;
    LASSERT(io != NULL);

    /* 0. Need locking between buffered and direct access. and race with
     *    size changing by concurrent truncates and writes.
     * 1. Need inode mutex to operate transient pages.
     */
    if (iov_iter_rw(iter) == READ)
        inode_lock(inode);

    while (iov_iter_count(iter)) {
        struct page **pages;
        size_t offs;

        count = min_t(size_t, iov_iter_count(iter), size);
        if (iov_iter_rw(iter) == READ) {
            if (file_offset >= i_size_read(inode))
                break;

            if (file_offset + count > i_size_read(inode))
                count = i_size_read(inode) - file_offset;
        }

        result = iov_iter_get_pages_alloc(iter, &pages, count, &offs);
        if (likely(result > 0)) {
            int n = DIV_ROUND_UP(result + offs, PAGE_SIZE);

            result = ll_direct_IO_seg(env, io, iov_iter_rw(iter),
                                      inode, result, file_offset,
                                      pages, n);
            ll_free_user_pages(pages, n,
                               iov_iter_rw(iter) == READ);

        }
        if (unlikely(result <= 0)) {
            /* If we can't allocate a large enough buffer
             * for the request, shrink it to a smaller
             * PAGE_SIZE multiple and try again.
             * We should always be able to kmalloc for a
             * page worth of page pointers = 4MB on i386. */
            if (result == -ENOMEM &&
                    size > (PAGE_SIZE / sizeof(*pages)) *
                    PAGE_SIZE) {
                size = ((((size / 2) - 1) |
                         ~PAGE_MASK) + 1) & PAGE_MASK;
                CDEBUG(D_VFSTRACE, "DIO size now %zu\n",
                       size);
                continue;
            }

            GOTO(out, result);
        }

        iov_iter_advance(iter, result);
        tot_bytes += result;
        file_offset += result;
    }
out:
    if (iov_iter_rw(iter) == READ)
        inode_unlock(inode);

    if (tot_bytes > 0) {
        struct vvp_io *vio = vvp_env_io(env);

        /* no commit async for direct IO */
        vio->u.write.vui_written += tot_bytes;
    }

    return tot_bytes ? : result;
}
Beispiel #10
0
/* Sharing code of page_mkwrite method for rhel5 and rhel6 */
static int ll_page_mkwrite0(struct vm_area_struct *vma, struct page *vmpage,
			    bool *retry)
{
	struct lu_env	   *env;
	struct cl_io	    *io;
	struct vvp_io	   *vio;
	struct cl_env_nest       nest;
	int		      result;
	sigset_t	     set;
	struct inode	     *inode;
	struct ll_inode_info     *lli;

	io = ll_fault_io_init(vma, &env,  &nest, vmpage->index, NULL);
	if (IS_ERR(io)) {
		result = PTR_ERR(io);
		goto out;
	}

	result = io->ci_result;
	if (result < 0)
		goto out_io;

	io->u.ci_fault.ft_mkwrite = 1;
	io->u.ci_fault.ft_writable = 1;

	vio = vvp_env_io(env);
	vio->u.fault.ft_vma    = vma;
	vio->u.fault.ft_vmpage = vmpage;

	set = cfs_block_sigsinv(sigmask(SIGKILL) | sigmask(SIGTERM));

	/* we grab lli_trunc_sem to exclude truncate case.
	 * Otherwise, we could add dirty pages into osc cache
	 * while truncate is on-going.
	 */
	inode = ccc_object_inode(io->ci_obj);
	lli = ll_i2info(inode);
	down_read(&lli->lli_trunc_sem);

	result = cl_io_loop(env, io);

	up_read(&lli->lli_trunc_sem);

	cfs_restore_sigs(set);

	if (result == 0) {
		struct inode *inode = file_inode(vma->vm_file);
		struct ll_inode_info *lli = ll_i2info(inode);

		lock_page(vmpage);
		if (!vmpage->mapping) {
			unlock_page(vmpage);

			/* page was truncated and lock was cancelled, return
			 * ENODATA so that VM_FAULT_NOPAGE will be returned
			 * to handle_mm_fault().
			 */
			if (result == 0)
				result = -ENODATA;
		} else if (!PageDirty(vmpage)) {
			/* race, the page has been cleaned by ptlrpcd after
			 * it was unlocked, it has to be added into dirty
			 * cache again otherwise this soon-to-dirty page won't
			 * consume any grants, even worse if this page is being
			 * transferred because it will break RPC checksum.
			 */
			unlock_page(vmpage);

			CDEBUG(D_MMAP, "Race on page_mkwrite %p/%lu, page has been written out, retry.\n",
			       vmpage, vmpage->index);

			*retry = true;
			result = -EAGAIN;
		}

		if (result == 0) {
			spin_lock(&lli->lli_lock);
			lli->lli_flags |= LLIF_DATA_MODIFIED;
			spin_unlock(&lli->lli_lock);
		}
	}

out_io:
	cl_io_fini(env, io);
	cl_env_nested_put(&nest, env);
out:
	CDEBUG(D_MMAP, "%s mkwrite with %d\n", current->comm, result);
	LASSERT(ergo(result == 0, PageLocked(vmpage)));

	return result;
}
Beispiel #11
0
/**
 * Lustre implementation of a vm_operations_struct::fault() method, called by
 * VM to server page fault (both in kernel and user space).
 *
 * \param vma - is virtiual area struct related to page fault
 * \param vmf - structure which describe type and address where hit fault
 *
 * \return allocated and filled _locked_ page for address
 * \retval VM_FAULT_ERROR on general error
 * \retval NOPAGE_OOM not have memory for allocate new page
 */
static int ll_fault0(struct vm_area_struct *vma, struct vm_fault *vmf)
{
	struct lu_env           *env;
	struct cl_io            *io;
	struct vvp_io           *vio = NULL;
	struct page             *vmpage;
	unsigned long            ra_flags;
	int                      result = 0;
	int                      fault_ret = 0;
	__u16			 refcheck;
	ENTRY;

	env = cl_env_get(&refcheck);
	if (IS_ERR(env))
		RETURN(PTR_ERR(env));

	if (ll_sbi_has_fast_read(ll_i2sbi(file_inode(vma->vm_file)))) {
		/* do fast fault */
		ll_cl_add(vma->vm_file, env, NULL, LCC_MMAP);
		fault_ret = filemap_fault(vma, vmf);
		ll_cl_remove(vma->vm_file, env);

		/* - If there is no error, then the page was found in cache and
		 *   uptodate;
		 * - If VM_FAULT_RETRY is set, the page existed but failed to
		 *   lock. It will return to kernel and retry;
		 * - Otherwise, it should try normal fault under DLM lock. */
		if ((fault_ret & VM_FAULT_RETRY) ||
		    !(fault_ret & VM_FAULT_ERROR))
			GOTO(out, result = 0);

		fault_ret = 0;
	}

	io = ll_fault_io_init(env, vma, vmf->pgoff, &ra_flags);
	if (IS_ERR(io))
		GOTO(out, result = PTR_ERR(io));

	result = io->ci_result;
	if (result == 0) {
		vio = vvp_env_io(env);
		vio->u.fault.ft_vma       = vma;
		vio->u.fault.ft_vmpage    = NULL;
		vio->u.fault.ft_vmf = vmf;
		vio->u.fault.ft_flags = 0;
		vio->u.fault.ft_flags_valid = 0;

		/* May call ll_readpage() */
		ll_cl_add(vma->vm_file, env, io, LCC_MMAP);

		result = cl_io_loop(env, io);

		ll_cl_remove(vma->vm_file, env);

		/* ft_flags are only valid if we reached
		 * the call to filemap_fault */
		if (vio->u.fault.ft_flags_valid)
			fault_ret = vio->u.fault.ft_flags;

		vmpage = vio->u.fault.ft_vmpage;
		if (result != 0 && vmpage != NULL) {
			put_page(vmpage);
			vmf->page = NULL;
		}
        }
	cl_io_fini(env, io);

	vma->vm_flags |= ra_flags;

out:
	cl_env_put(env, &refcheck);
	if (result != 0 && !(fault_ret & VM_FAULT_RETRY))
		fault_ret |= to_fault_error(result);

	CDEBUG(D_MMAP, "%s fault %d/%d\n", current->comm, fault_ret, result);
	RETURN(fault_ret);
}
Beispiel #12
0
/* Sharing code of page_mkwrite method for rhel5 and rhel6 */
static int ll_page_mkwrite0(struct vm_area_struct *vma, struct page *vmpage,
                            bool *retry)
{
	struct lu_env           *env;
	struct cl_io            *io;
	struct vvp_io           *vio;
	int                      result;
	__u16			 refcheck;
	sigset_t		 set;
	struct inode             *inode;
	struct ll_inode_info     *lli;
	ENTRY;

	LASSERT(vmpage != NULL);
	env = cl_env_get(&refcheck);
	if (IS_ERR(env))
		RETURN(PTR_ERR(env));

	io = ll_fault_io_init(env, vma, vmpage->index, NULL);
	if (IS_ERR(io))
		GOTO(out, result = PTR_ERR(io));

	result = io->ci_result;
	if (result < 0)
		GOTO(out_io, result);

	io->u.ci_fault.ft_mkwrite = 1;
	io->u.ci_fault.ft_writable = 1;

	vio = vvp_env_io(env);
	vio->u.fault.ft_vma    = vma;
	vio->u.fault.ft_vmpage = vmpage;

	set = cfs_block_sigsinv(sigmask(SIGKILL) | sigmask(SIGTERM));

	inode = vvp_object_inode(io->ci_obj);
	lli = ll_i2info(inode);

	result = cl_io_loop(env, io);

	cfs_restore_sigs(set);

        if (result == 0) {
                lock_page(vmpage);
                if (vmpage->mapping == NULL) {
                        unlock_page(vmpage);

                        /* page was truncated and lock was cancelled, return
                         * ENODATA so that VM_FAULT_NOPAGE will be returned
                         * to handle_mm_fault(). */
                        if (result == 0)
                                result = -ENODATA;
                } else if (!PageDirty(vmpage)) {
                        /* race, the page has been cleaned by ptlrpcd after
                         * it was unlocked, it has to be added into dirty
                         * cache again otherwise this soon-to-dirty page won't
                         * consume any grants, even worse if this page is being
                         * transferred because it will break RPC checksum.
                         */
                        unlock_page(vmpage);

                        CDEBUG(D_MMAP, "Race on page_mkwrite %p/%lu, page has "
                               "been written out, retry.\n",
                               vmpage, vmpage->index);

                        *retry = true;
                        result = -EAGAIN;
                }

		if (result == 0)
			ll_file_set_flag(lli, LLIF_DATA_MODIFIED);
        }
        EXIT;

out_io:
	cl_io_fini(env, io);
out:
	cl_env_put(env, &refcheck);
	CDEBUG(D_MMAP, "%s mkwrite with %d\n", current->comm, result);
	LASSERT(ergo(result == 0, PageLocked(vmpage)));

	return result;
}
Beispiel #13
0
/**
 * API independent part for page fault initialization.
 * \param vma - virtual memory area addressed to page fault
 * \param env - corespondent lu_env to processing
 * \param nest - nested level
 * \param index - page index corespondent to fault.
 * \parm ra_flags - vma readahead flags.
 *
 * \return allocated and initialized env for fault operation.
 * \retval EINVAL if env can't allocated
 * \return other error codes from cl_io_init.
 */
static struct cl_io *
ll_fault_io_init(struct vm_area_struct *vma, struct lu_env **env_ret,
		 struct cl_env_nest *nest, pgoff_t index,
		 unsigned long *ra_flags)
{
	struct file	       *file = vma->vm_file;
	struct inode	       *inode = file_inode(file);
	struct cl_io	       *io;
	struct cl_fault_io     *fio;
	struct lu_env	       *env;
	int			rc;

	*env_ret = NULL;
	if (ll_file_nolock(file))
		return ERR_PTR(-EOPNOTSUPP);

	/*
	 * page fault can be called when lustre IO is
	 * already active for the current thread, e.g., when doing read/write
	 * against user level buffer mapped from Lustre buffer. To avoid
	 * stomping on existing context, optionally force an allocation of a new
	 * one.
	 */
	env = cl_env_nested_get(nest);
	if (IS_ERR(env))
		return ERR_PTR(-EINVAL);

	*env_ret = env;

restart:
	io = vvp_env_thread_io(env);
	io->ci_obj = ll_i2info(inode)->lli_clob;
	LASSERT(io->ci_obj);

	fio = &io->u.ci_fault;
	fio->ft_index      = index;
	fio->ft_executable = vma->vm_flags&VM_EXEC;

	/*
	 * disable VM_SEQ_READ and use VM_RAND_READ to make sure that
	 * the kernel will not read other pages not covered by ldlm in
	 * filemap_nopage. we do our readahead in ll_readpage.
	 */
	if (ra_flags)
		*ra_flags = vma->vm_flags & (VM_RAND_READ|VM_SEQ_READ);
	vma->vm_flags &= ~VM_SEQ_READ;
	vma->vm_flags |= VM_RAND_READ;

	CDEBUG(D_MMAP, "vm_flags: %lx (%lu %d)\n", vma->vm_flags,
	       fio->ft_index, fio->ft_executable);

	rc = cl_io_init(env, io, CIT_FAULT, io->ci_obj);
	if (rc == 0) {
		struct vvp_io *vio = vvp_env_io(env);
		struct ll_file_data *fd = LUSTRE_FPRIVATE(file);

		LASSERT(vio->vui_cl.cis_io == io);

		/* mmap lock must be MANDATORY it has to cache pages. */
		io->ci_lockreq = CILR_MANDATORY;
		vio->vui_fd = fd;
	} else {
		LASSERT(rc < 0);
		cl_io_fini(env, io);
		if (io->ci_need_restart)
			goto restart;

		cl_env_nested_put(nest, env);
		io = ERR_PTR(rc);
	}

	return io;
}