Exemple #1
0
/**
 * API independent part for page fault initialization.
 * \param vma - virtual memory area addressed to page fault
 * \param env - corespondent lu_env to processing
 * \param nest - nested level
 * \param index - page index corespondent to fault.
 * \parm ra_flags - vma readahead flags.
 *
 * \return allocated and initialized env for fault operation.
 * \retval EINVAL if env can't allocated
 * \return other error codes from cl_io_init.
 */
static struct cl_io *
ll_fault_io_init(struct vm_area_struct *vma, struct lu_env **env_ret,
		 struct cl_env_nest *nest, pgoff_t index,
		 unsigned long *ra_flags)
{
	struct file	       *file = vma->vm_file;
	struct inode	       *inode = file_inode(file);
	struct cl_io	       *io;
	struct cl_fault_io     *fio;
	struct lu_env	       *env;
	int			rc;

	*env_ret = NULL;
	if (ll_file_nolock(file))
		return ERR_PTR(-EOPNOTSUPP);

	/*
	 * page fault can be called when lustre IO is
	 * already active for the current thread, e.g., when doing read/write
	 * against user level buffer mapped from Lustre buffer. To avoid
	 * stomping on existing context, optionally force an allocation of a new
	 * one.
	 */
	env = cl_env_nested_get(nest);
	if (IS_ERR(env))
		return ERR_PTR(-EINVAL);

	*env_ret = env;

restart:
	io = vvp_env_thread_io(env);
	io->ci_obj = ll_i2info(inode)->lli_clob;
	LASSERT(io->ci_obj);

	fio = &io->u.ci_fault;
	fio->ft_index      = index;
	fio->ft_executable = vma->vm_flags&VM_EXEC;

	/*
	 * disable VM_SEQ_READ and use VM_RAND_READ to make sure that
	 * the kernel will not read other pages not covered by ldlm in
	 * filemap_nopage. we do our readahead in ll_readpage.
	 */
	if (ra_flags)
		*ra_flags = vma->vm_flags & (VM_RAND_READ|VM_SEQ_READ);
	vma->vm_flags &= ~VM_SEQ_READ;
	vma->vm_flags |= VM_RAND_READ;

	CDEBUG(D_MMAP, "vm_flags: %lx (%lu %d)\n", vma->vm_flags,
	       fio->ft_index, fio->ft_executable);

	rc = cl_io_init(env, io, CIT_FAULT, io->ci_obj);
	if (rc == 0) {
		struct vvp_io *vio = vvp_env_io(env);
		struct ll_file_data *fd = LUSTRE_FPRIVATE(file);

		LASSERT(vio->vui_cl.cis_io == io);

		/* mmap lock must be MANDATORY it has to cache pages. */
		io->ci_lockreq = CILR_MANDATORY;
		vio->vui_fd = fd;
	} else {
		LASSERT(rc < 0);
		cl_io_fini(env, io);
		if (io->ci_need_restart)
			goto restart;

		cl_env_nested_put(nest, env);
		io = ERR_PTR(rc);
	}

	return io;
}
Exemple #2
0
/**
 * Lock upcall function that is executed either when a reply to ENQUEUE rpc is
 * received from a server, or after osc_enqueue_base() matched a local DLM
 * lock.
 */
static int osc_lock_upcall(void *cookie, int errcode)
{
	struct osc_lock	 *olck  = cookie;
	struct cl_lock_slice    *slice = &olck->ols_cl;
	struct cl_lock	  *lock  = slice->cls_lock;
	struct lu_env	   *env;
	struct cl_env_nest       nest;

	ENTRY;
	env = cl_env_nested_get(&nest);
	if (!IS_ERR(env)) {
		int rc;

		cl_lock_mutex_get(env, lock);

		LASSERT(lock->cll_state >= CLS_QUEUING);
		if (olck->ols_state == OLS_ENQUEUED) {
			olck->ols_state = OLS_UPCALL_RECEIVED;
			rc = ldlm_error2errno(errcode);
		} else if (olck->ols_state == OLS_CANCELLED) {
			rc = -EIO;
		} else {
			CERROR("Impossible state: %d\n", olck->ols_state);
			LBUG();
		}
		if (rc) {
			struct ldlm_lock *dlmlock;

			dlmlock = ldlm_handle2lock(&olck->ols_handle);
			if (dlmlock != NULL) {
				lock_res_and_lock(dlmlock);
				spin_lock(&osc_ast_guard);
				LASSERT(olck->ols_lock == NULL);
				dlmlock->l_ast_data = NULL;
				olck->ols_handle.cookie = 0ULL;
				spin_unlock(&osc_ast_guard);
				ldlm_lock_fail_match_locked(dlmlock);
				unlock_res_and_lock(dlmlock);
				LDLM_LOCK_PUT(dlmlock);
			}
		} else {
			if (olck->ols_glimpse)
				olck->ols_glimpse = 0;
			osc_lock_upcall0(env, olck);
		}

		/* Error handling, some errors are tolerable. */
		if (olck->ols_locklessable && rc == -EUSERS) {
			/* This is a tolerable error, turn this lock into
			 * lockless lock.
			 */
			osc_object_set_contended(cl2osc(slice->cls_obj));
			LASSERT(slice->cls_ops == &osc_lock_ops);

			/* Change this lock to ldlmlock-less lock. */
			osc_lock_to_lockless(env, olck, 1);
			olck->ols_state = OLS_GRANTED;
			rc = 0;
		} else if (olck->ols_glimpse && rc == -ENAVAIL) {
			osc_lock_lvb_update(env, olck, rc);
			cl_lock_delete(env, lock);
			/* Hide the error. */
			rc = 0;
		}

		if (rc == 0) {
			/* For AGL case, the RPC sponsor may exits the cl_lock
			*  processing without wait() called before related OSC
			*  lock upcall(). So update the lock status according
			*  to the enqueue result inside AGL upcall(). */
			if (olck->ols_agl) {
				lock->cll_flags |= CLF_FROM_UPCALL;
				cl_wait_try(env, lock);
				lock->cll_flags &= ~CLF_FROM_UPCALL;
				if (!olck->ols_glimpse)
					olck->ols_agl = 0;
			}
			cl_lock_signal(env, lock);
			/* del user for lock upcall cookie */
			cl_unuse_try(env, lock);
		} else {
			/* del user for lock upcall cookie */
			cl_lock_user_del(env, lock);
			cl_lock_error(env, lock, rc);
		}

		/* release cookie reference, acquired by osc_lock_enqueue() */
		cl_lock_hold_release(env, lock, "upcall", lock);
		cl_lock_mutex_put(env, lock);

		lu_ref_del(&lock->cll_reference, "upcall", lock);
		/* This maybe the last reference, so must be called after
		 * cl_lock_mutex_put(). */
		cl_lock_put(env, lock);

		cl_env_nested_put(&nest, env);
	} else {
		/* should never happen, similar to osc_ldlm_blocking_ast(). */
		LBUG();
	}
	RETURN(errcode);
}
Exemple #3
0
/* Sharing code of page_mkwrite method for rhel5 and rhel6 */
static int ll_page_mkwrite0(struct vm_area_struct *vma, struct page *vmpage,
			    bool *retry)
{
	struct lu_env	   *env;
	struct cl_io	    *io;
	struct vvp_io	   *vio;
	struct cl_env_nest       nest;
	int		      result;
	sigset_t	     set;
	struct inode	     *inode;
	struct ll_inode_info     *lli;

	io = ll_fault_io_init(vma, &env,  &nest, vmpage->index, NULL);
	if (IS_ERR(io)) {
		result = PTR_ERR(io);
		goto out;
	}

	result = io->ci_result;
	if (result < 0)
		goto out_io;

	io->u.ci_fault.ft_mkwrite = 1;
	io->u.ci_fault.ft_writable = 1;

	vio = vvp_env_io(env);
	vio->u.fault.ft_vma    = vma;
	vio->u.fault.ft_vmpage = vmpage;

	set = cfs_block_sigsinv(sigmask(SIGKILL) | sigmask(SIGTERM));

	inode = vvp_object_inode(io->ci_obj);
	lli = ll_i2info(inode);

	result = cl_io_loop(env, io);

	cfs_restore_sigs(set);

	if (result == 0) {
		struct inode *inode = file_inode(vma->vm_file);
		struct ll_inode_info *lli = ll_i2info(inode);

		lock_page(vmpage);
		if (!vmpage->mapping) {
			unlock_page(vmpage);

			/* page was truncated and lock was cancelled, return
			 * ENODATA so that VM_FAULT_NOPAGE will be returned
			 * to handle_mm_fault().
			 */
			if (result == 0)
				result = -ENODATA;
		} else if (!PageDirty(vmpage)) {
			/* race, the page has been cleaned by ptlrpcd after
			 * it was unlocked, it has to be added into dirty
			 * cache again otherwise this soon-to-dirty page won't
			 * consume any grants, even worse if this page is being
			 * transferred because it will break RPC checksum.
			 */
			unlock_page(vmpage);

			CDEBUG(D_MMAP, "Race on page_mkwrite %p/%lu, page has been written out, retry.\n",
			       vmpage, vmpage->index);

			*retry = true;
			result = -EAGAIN;
		}

		if (result == 0) {
			spin_lock(&lli->lli_lock);
			lli->lli_flags |= LLIF_DATA_MODIFIED;
			spin_unlock(&lli->lli_lock);
		}
	}

out_io:
	cl_io_fini(env, io);
	cl_env_nested_put(&nest, env);
out:
	CDEBUG(D_MMAP, "%s mkwrite with %d\n", current->comm, result);
	LASSERT(ergo(result == 0, PageLocked(vmpage)));

	return result;
}