int
_cv_timedwait(struct cv *c, struct lock *lk, int timo, int wakesig)
{
	int flags = wakesig ? PCATCH : 0;
	int error;

	/*
	 * Can interlock without critical section/spinlock as long
	 * as we don't block before calling *sleep().  PINTERLOCKED
	 * must be passed to the *sleep() to use the manual interlock
	 * (else a new one is created which opens a timing race).
	 */
	tsleep_interlock(c, flags);

	spin_lock(&c->cv_lock);
	c->cv_waiters++;
	spin_unlock(&c->cv_lock);

	if (lk)
		error = lksleep(c, lk, flags | PINTERLOCKED, c->cv_desc, timo);
	else
		error = tsleep(c, flags | PINTERLOCKED, c->cv_desc, timo);

	return (error);
}
Ejemplo n.º 2
0
/*
 * get - lock and return the f_offset field.
 * set - set and unlock the f_offset field.
 *
 * These routines serve the dual purpose of serializing access to the
 * f_offset field (at least on i386) and guaranteeing operational integrity
 * when multiple read()ers and write()ers are present on the same fp.
 *
 * MPSAFE
 */
static __inline off_t
vn_get_fpf_offset(struct file *fp)
{
	u_int	flags;
	u_int	nflags;

	/*
	 * Shortcut critical path.
	 */
	flags = fp->f_flag & ~FOFFSETLOCK;
	if (atomic_cmpset_int(&fp->f_flag, flags, flags | FOFFSETLOCK))
		return(fp->f_offset);

	/*
	 * The hard way
	 */
	for (;;) {
		flags = fp->f_flag;
		if (flags & FOFFSETLOCK) {
			nflags = flags | FOFFSETWAKE;
			tsleep_interlock(&fp->f_flag, 0);
			if (atomic_cmpset_int(&fp->f_flag, flags, nflags))
				tsleep(&fp->f_flag, PINTERLOCKED, "fpoff", 0);
		} else {
			nflags = flags | FOFFSETLOCK;
			if (atomic_cmpset_int(&fp->f_flag, flags, nflags))
				break;
		}
	}
	return(fp->f_offset);
}
Ejemplo n.º 3
0
/*
 * Share-lock a mutex, block until acquired.  Recursion is allowed.
 *
 * Returns 0 on success, or the tsleep() return code on failure.
 * An error can only be returned if PCATCH is specified in the flags.
 *
 * NOTE: Shared locks get a mass-wakeup so if the tsleep fails we
 *	 do not have to chain the wakeup().
 */
static __inline int
__mtx_lock_sh(mtx_t mtx, const char *ident, int flags, int to)
{
	u_int	lock;
	u_int	nlock;
	int	error;

	for (;;) {
		lock = mtx->mtx_lock;
		if ((lock & MTX_EXCLUSIVE) == 0) {
			KKASSERT((lock & MTX_MASK) != MTX_MASK);
			nlock = lock + 1;
			if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) {
				error = 0;
				break;
			}
		} else {
			nlock = lock | MTX_SHWANTED;
			tsleep_interlock(mtx, 0);
			if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) {
				error = tsleep(mtx, flags, ident, to);
				if (error)
					break;
				++mtx_contention_count;
				/* retry */
			} else {
				tsleep_remove(curthread);
			}
		}
		++mtx_collision_count;
	}
	return (error);
}
Ejemplo n.º 4
0
/*
 * hammer_rel_interlock() works a bit differently in that it must
 * acquire the lock in tandem with a 1->0 transition.  CHECK is
 * not used.
 *
 * TRUE is returned on 1->0 transitions with the lock held on return
 * and FALSE is returned otherwise with the lock not held.
 *
 * It is important to note that the refs are not stable and may
 * increase while we hold the lock, the TRUE indication only means
 * that we transitioned 1->0, not necessarily that we stayed at 0.
 *
 * Another thread bumping refs while we hold the lock will set CHECK,
 * causing one of the competing hammer_ref_interlock() calls to
 * return TRUE after we release our lock.
 *
 * MPSAFE
 */
int
hammer_rel_interlock(struct hammer_lock *lock, int locked)
{
	u_int lv;
	u_int nlv;

	/*
	 * In locked mode (failure/unload path) we release the
	 * ref-count but leave it locked.
	 */
	if (locked) {
		hammer_rel(lock);
		return(1);
	}

	/*
	 * Integrated reference count drop with LOCKED, plus the hot-path
	 * returns.
	 */
	for (;;) {
		lv = lock->refs;

		if (lv == 1) {
			nlv = 0 | HAMMER_REFS_LOCKED;
			if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
				lock->rowner = curthread;
				return(1);
			}
		} else if ((lv & ~HAMMER_REFS_FLAGS) == 1) {
			if ((lv & HAMMER_REFS_LOCKED) == 0) {
				nlv = (lv - 1) | HAMMER_REFS_LOCKED;
				if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
					lock->rowner = curthread;
					return(1);
				}
			} else {
				nlv = lv | HAMMER_REFS_WANTED;
				tsleep_interlock(&lock->refs, 0);
				if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
					tsleep(&lock->refs, PINTERLOCKED,
					       "h0lk", 0);
				}
			}
		} else {
			nlv = (lv - 1);
			KKASSERT((int)nlv >= 0);
			if (atomic_cmpset_int(&lock->refs, lv, nlv))
				return(0);
		}
	}
	/* not reached */
}
Ejemplo n.º 5
0
/*
 * smb_sleep() icky compat routine.  Leave the token held through the tsleep
 * to interlock against the sleep.  Remember that the token could be lost
 * since we blocked, so reget or release as appropriate.
 */
int
smb_sleep(void *chan, struct smb_slock *sl, int slpflags, const char *wmesg, int timo)
{
	int error;

	if (sl) {
		tsleep_interlock(chan, slpflags);
		smb_sl_unlock(sl);
		error = tsleep(chan, slpflags | PINTERLOCKED, wmesg, timo);
		if ((slpflags & PDROP) == 0)
			smb_sl_lock(sl);
	} else {
		error = tsleep(chan, slpflags, wmesg, timo);
	}
	return error;
}
Ejemplo n.º 6
0
/*
 * Lock an inode, with SYNCQ semantics.
 *
 * HAMMER2 offers shared and exclusive locks on inodes.  Pass a mask of
 * flags for options:
 *
 *	- pass HAMMER2_RESOLVE_SHARED if a shared lock is desired.  The
 *	  inode locking function will automatically set the RDONLY flag.
 *	  shared locks are not subject to SYNCQ semantics, exclusive locks
 *	  are.
 *
 *	- pass HAMMER2_RESOLVE_ALWAYS if you need the inode's meta-data.
 *	  Most front-end inode locks do.
 *
 *	- pass HAMMER2_RESOLVE_NEVER if you do not want to require that
 *	  the inode data be resolved.  This is used by the syncthr because
 *	  it can run on an unresolved/out-of-sync cluster, and also by the
 *	  vnode reclamation code to avoid unnecessary I/O (particularly when
 *	  disposing of hundreds of thousands of cached vnodes).
 *
 * This function, along with lock4, has SYNCQ semantics.  If the inode being
 * locked is on the SYNCQ, that is it has been staged by the syncer, we must
 * block until the operation is complete (even if we can lock the inode).  In
 * order to reduce the stall time, we re-order the inode to the front of the
 * pmp->syncq prior to blocking.  This reordering VERY significantly improves
 * performance.
 *
 * The inode locking function locks the inode itself, resolves any stale
 * chains in the inode's cluster, and allocates a fresh copy of the
 * cluster with 1 ref and all the underlying chains locked.
 *
 * ip->cluster will be stable while the inode is locked.
 *
 * NOTE: We don't combine the inode/chain lock because putting away an
 *       inode would otherwise confuse multiple lock holders of the inode.
 */
void
hammer2_inode_lock(hammer2_inode_t *ip, int how)
{
	hammer2_pfs_t *pmp;

	hammer2_inode_ref(ip);
	pmp = ip->pmp;

	/* 
	 * Inode structure mutex - Shared lock
	 */
	if (how & HAMMER2_RESOLVE_SHARED) {
		hammer2_mtx_sh(&ip->lock);
		return;
	}

	/*
	 * Inode structure mutex - Exclusive lock
	 *
	 * An exclusive lock (if not recursive) must wait for inodes on
	 * SYNCQ to flush first, to ensure that meta-data dependencies such
	 * as the nlink count and related directory entries are not split
	 * across flushes.
	 *
	 * If the vnode is locked by the current thread it must be unlocked
	 * across the tsleep() to avoid a deadlock.
	 */
	hammer2_mtx_ex(&ip->lock);
	if (hammer2_mtx_refs(&ip->lock) > 1)
		return;
	while ((ip->flags & HAMMER2_INODE_SYNCQ) && pmp) {
		hammer2_spin_ex(&pmp->list_spin);
		if (ip->flags & HAMMER2_INODE_SYNCQ) {
			tsleep_interlock(&ip->flags, 0);
			atomic_set_int(&ip->flags, HAMMER2_INODE_SYNCQ_WAKEUP);
			TAILQ_REMOVE(&pmp->syncq, ip, entry);
			TAILQ_INSERT_HEAD(&pmp->syncq, ip, entry);
			hammer2_spin_unex(&pmp->list_spin);
			hammer2_mtx_unlock(&ip->lock);
			tsleep(&ip->flags, PINTERLOCKED, "h2sync", 0);
			hammer2_mtx_ex(&ip->lock);
			continue;
		}
		hammer2_spin_unex(&pmp->list_spin);
		break;
	}
}
Ejemplo n.º 7
0
/*
 * (Backend) Feed chain data through the cluster validator and back to
 * the frontend.  Chains are fed from multiple nodes concurrently
 * and pipelined via per-node FIFOs in the XOP.
 *
 * No xop lock is needed because we are only manipulating fields under
 * our direct control.
 *
 * Returns 0 on success and a hammer error code if sync is permanently
 * lost.  The caller retains a ref on the chain but by convention
 * the lock is typically inherited by the xop (caller loses lock).
 *
 * Returns non-zero on error.  In this situation the caller retains a
 * ref on the chain but loses the lock (we unlock here).
 *
 * WARNING!  The chain is moving between two different threads, it must
 *	     be locked SHARED to retain its data mapping, not exclusive.
 *	     When multiple operations are in progress at once, chains fed
 *	     back to the frontend for collection can wind up being locked
 *	     in different orders, only a shared lock can prevent a deadlock.
 *
 *	     Exclusive locks may only be used by a XOP backend node thread
 *	     temporarily, with no direct or indirect dependencies (aka
 *	     blocking/waiting) on other nodes.
 */
int
hammer2_xop_feed(hammer2_xop_head_t *xop, hammer2_chain_t *chain,
		 int clindex, int error)
{
	hammer2_xop_fifo_t *fifo;

	/*
	 * Multi-threaded entry into the XOP collector.  We own the
	 * fifo->wi for our clindex.
	 */
	fifo = &xop->collect[clindex];

	while (fifo->ri == fifo->wi - HAMMER2_XOPFIFO) {
		tsleep_interlock(xop, 0);
		if (hammer2_xop_active(xop) == 0) {
			error = EINTR;
			goto done;
		}
		if (fifo->ri == fifo->wi - HAMMER2_XOPFIFO) {
			tsleep(xop, PINTERLOCKED, "h2feed", hz*60);
		}
	}
	if (chain)
		hammer2_chain_ref(chain);
	fifo->errors[fifo->wi & HAMMER2_XOPFIFO_MASK] = error;
	fifo->array[fifo->wi & HAMMER2_XOPFIFO_MASK] = chain;
	cpu_sfence();
	++fifo->wi;
	atomic_add_int(&xop->check_counter, 1);
	wakeup(&xop->check_counter);	/* XXX optimize */
	error = 0;

	/*
	 * Cleanup.  If an error occurred we eat the lock.  If no error
	 * occurred the fifo inherits the lock and gains an additional ref.
	 *
	 * The caller's ref remains in both cases.
	 */
done:
	if (error && chain)
		hammer2_chain_unlock(chain);
	return error;
}
Ejemplo n.º 8
0
/*
 * Acquire the interlock on lock->refs.
 *
 * Return TRUE if CHECK is currently set.  Note that CHECK will not
 * be set if the reference count is 0, but can get set if this function
 * is preceeded by, say, hammer_ref(), or through races with other
 * threads.  The return value allows the caller to use the same logic
 * as hammer_ref_interlock().
 *
 * MPSAFE
 */
int
hammer_get_interlock(struct hammer_lock *lock)
{
	u_int lv;
	u_int nlv;

	for (;;) {
		lv = lock->refs;
		if (lv & HAMMER_REFS_LOCKED) {
			nlv = lv | HAMMER_REFS_WANTED;
			tsleep_interlock(&lock->refs, 0);
			if (atomic_cmpset_int(&lock->refs, lv, nlv))
				tsleep(&lock->refs, PINTERLOCKED, "hilk", 0);
		} else {
			nlv = (lv | HAMMER_REFS_LOCKED);
			if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
				lock->rowner = curthread;
				return((lv & HAMMER_REFS_CHECK) ? 1 : 0);
			}
		}
	}
}
Ejemplo n.º 9
0
/*
 * Helper function to wait for a reference count to become zero.
 * We set REFCNTF_WAITING and sleep if the reference count is not zero.
 *
 * In the case where REFCNTF_WAITING is already set the atomic op validates
 * that it is still set after the tsleep_interlock() call.
 *
 * Users of this waiting API must use refcount_release_wakeup() to release
 * refs instead of refcount_release().  refcount_release() will not wake
 * up waiters.
 */
void
_refcount_wait(volatile u_int *countp, const char *wstr)
{
	u_int n;
	int base_ticks = ticks;

	for (;;) {
		n = *countp;
		cpu_ccfence();
		if (n == 0)
			break;
		if ((int)(ticks - base_ticks) >= hz*60 - 1) {
			kprintf("warning: refcount_wait %s: long wait\n",
				wstr);
			base_ticks = ticks;
		}
		KKASSERT(n != REFCNTF_WAITING);	/* impossible state */
		tsleep_interlock(countp, 0);
		if (atomic_cmpset_int(countp, n, n | REFCNTF_WAITING))
			tsleep(countp, PINTERLOCKED, wstr, hz*10);
	}
}
Ejemplo n.º 10
0
int
sim_lock_sleep(void *ident, int flags, const char *wmesg, int timo,
	       sim_lock *lock)
{
	int retval;

	if (lock != &sim_mplock) {
		/* lock should be held already */
		KKASSERT(lockstatus(lock, curthread) != 0);
		tsleep_interlock(ident, flags);
		lockmgr(lock, LK_RELEASE);
		retval = tsleep(ident, flags | PINTERLOCKED, wmesg, timo);
	} else {
		retval = tsleep(ident, flags, wmesg, timo);
	}

	if (lock != &sim_mplock) {
		lockmgr(lock, LK_EXCLUSIVE);
	}

	return (retval);
}
Ejemplo n.º 11
0
void
hammer_lock_ex_ident(struct hammer_lock *lock, const char *ident)
{
	thread_t td = curthread;
	u_int lv;
	u_int nlv;

	KKASSERT(lock->refs);
	for (;;) {
		lv = lock->lockval;

		if (lv == 0) {
			nlv = 1 | HAMMER_LOCKF_EXCLUSIVE;
			if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
				lock->lowner = td;
				break;
			}
		} else if ((lv & HAMMER_LOCKF_EXCLUSIVE) &&
			   lock->lowner == td) {
			nlv = (lv + 1);
			if (atomic_cmpset_int(&lock->lockval, lv, nlv))
				break;
		} else {
			if (hammer_debug_locks) {
				kprintf("hammer_lock_ex: held by %p\n",
					lock->lowner);
			}
			nlv = lv | HAMMER_LOCKF_WANTED;
			++hammer_contention_count;
			tsleep_interlock(&lock->lockval, 0);
			if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
				tsleep(&lock->lockval, PINTERLOCKED, ident, 0);
				if (hammer_debug_locks)
					kprintf("hammer_lock_ex: try again\n");
			}
		}
	}
}
Ejemplo n.º 12
0
/*
 * Obtain a shared lock
 *
 * We do not give pending exclusive locks priority over shared locks as
 * doing so could lead to a deadlock.
 */
void
hammer_lock_sh(struct hammer_lock *lock)
{
	thread_t td = curthread;
	u_int lv;
	u_int nlv;
	const char *ident = "hmrlck";

	KKASSERT(lock->refs);
	for (;;) {
		lv = lock->lockval;

		if ((lv & HAMMER_LOCKF_EXCLUSIVE) == 0) {
			nlv = (lv + 1);
			if (atomic_cmpset_int(&lock->lockval, lv, nlv))
				break;
		} else if (lock->lowner == td) {
			/*
			 * Disallowed case, drop into kernel debugger for
			 * now.  A cont continues w/ an exclusive lock.
			 */
			nlv = (lv + 1);
			if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
				if (hammer_debug_critical)
					Debugger("hammer_lock_sh: holding ex");
				break;
			}
		} else {
			nlv = lv | HAMMER_LOCKF_WANTED;
			++hammer_contention_count;
			tsleep_interlock(&lock->lockval, 0);
			if (atomic_cmpset_int(&lock->lockval, lv, nlv))
				tsleep(&lock->lockval, PINTERLOCKED, ident, 0);
		}
	}
}
Ejemplo n.º 13
0
/*
 * (Frontend) collect a response from a running cluster op.
 *
 * Responses are fed from all appropriate nodes concurrently
 * and collected into a cohesive response >= collect_key.
 *
 * The collector will return the instant quorum or other requirements
 * are met, even if some nodes get behind or become non-responsive.
 *
 * HAMMER2_XOP_COLLECT_NOWAIT	- Used to 'poll' a completed collection,
 *				  usually called synchronously from the
 *				  node XOPs for the strategy code to
 *				  fake the frontend collection and complete
 *				  the BIO as soon as possible.
 *
 * HAMMER2_XOP_SYNCHRONIZER	- Reqeuest synchronization with a particular
 *				  cluster index, prevents looping when that
 *				  index is out of sync so caller can act on
 *				  the out of sync element.  ESRCH and EDEADLK
 *				  can be returned if this flag is specified.
 *
 * Returns 0 on success plus a filled out xop->cluster structure.
 * Return ENOENT on normal termination.
 * Otherwise return an error.
 */
int
hammer2_xop_collect(hammer2_xop_head_t *xop, int flags)
{
	hammer2_xop_fifo_t *fifo;
	hammer2_chain_t *chain;
	hammer2_key_t lokey;
	int error;
	int keynull;
	int adv;		/* advance the element */
	int i;
	uint32_t check_counter;

loop:
	/*
	 * First loop tries to advance pieces of the cluster which
	 * are out of sync.
	 */
	lokey = HAMMER2_KEY_MAX;
	keynull = HAMMER2_CHECK_NULL;
	check_counter = xop->check_counter;
	cpu_lfence();

	for (i = 0; i < xop->cluster.nchains; ++i) {
		chain = xop->cluster.array[i].chain;
		if (chain == NULL) {
			adv = 1;
		} else if (chain->bref.key < xop->collect_key) {
			adv = 1;
		} else {
			keynull &= ~HAMMER2_CHECK_NULL;
			if (lokey > chain->bref.key)
				lokey = chain->bref.key;
			adv = 0;
		}
		if (adv == 0)
			continue;

		/*
		 * Advance element if possible, advanced element may be NULL.
		 */
		if (chain) {
			hammer2_chain_unlock(chain);
			hammer2_chain_drop(chain);
		}
		fifo = &xop->collect[i];
		if (fifo->ri != fifo->wi) {
			cpu_lfence();
			chain = fifo->array[fifo->ri & HAMMER2_XOPFIFO_MASK];
			++fifo->ri;
			xop->cluster.array[i].chain = chain;
			if (chain == NULL) {
				/* XXX */
				xop->cluster.array[i].flags |=
							HAMMER2_CITEM_NULL;
			}
			if (fifo->wi - fifo->ri < HAMMER2_XOPFIFO / 2)
				wakeup(xop);	/* XXX optimize */
			--i;		/* loop on same index */
		} else {
			/*
			 * Retain CITEM_NULL flag.  If set just repeat EOF.
			 * If not, the NULL,0 combination indicates an
			 * operation in-progress.
			 */
			xop->cluster.array[i].chain = NULL;
			/* retain any CITEM_NULL setting */
		}
	}

	/*
	 * Determine whether the lowest collected key meets clustering
	 * requirements.  Returns:
	 *
	 * 0	 	 - key valid, cluster can be returned.
	 *
	 * ENOENT	 - normal end of scan, return ENOENT.
	 *
	 * ESRCH	 - sufficient elements collected, quorum agreement
	 *		   that lokey is not a valid element and should be
	 *		   skipped.
	 *
	 * EDEADLK	 - sufficient elements collected, no quorum agreement
	 *		   (and no agreement possible).  In this situation a
	 *		   repair is needed, for now we loop.
	 *
	 * EINPROGRESS	 - insufficient elements collected to resolve, wait
	 *		   for event and loop.
	 */
	if ((flags & HAMMER2_XOP_COLLECT_WAITALL) &&
	    xop->run_mask != HAMMER2_XOPMASK_VOP) {
		error = EINPROGRESS;
	} else {
		error = hammer2_cluster_check(&xop->cluster, lokey, keynull);
	}
	if (error == EINPROGRESS) {
		if (xop->check_counter == check_counter) {
			if (flags & HAMMER2_XOP_COLLECT_NOWAIT)
				goto done;
			tsleep_interlock(&xop->check_counter, 0);
			cpu_lfence();
			if (xop->check_counter == check_counter) {
				tsleep(&xop->check_counter, PINTERLOCKED,
					"h2coll", hz*60);
			}
		}
		goto loop;
	}
	if (error == ESRCH) {
		if (lokey != HAMMER2_KEY_MAX) {
			xop->collect_key = lokey + 1;
			goto loop;
		}
		error = ENOENT;
	}
	if (error == EDEADLK) {
		kprintf("hammer2: no quorum possible lokey %016jx\n",
			lokey);
		if (lokey != HAMMER2_KEY_MAX) {
			xop->collect_key = lokey + 1;
			goto loop;
		}
		error = ENOENT;
	}
	if (lokey == HAMMER2_KEY_MAX)
		xop->collect_key = lokey;
	else
		xop->collect_key = lokey + 1;
done:
	return error;
}
Ejemplo n.º 14
0
debuglockmgr(struct lock *lkp, u_int flags,
	     const char *name, const char *file, int line)
#endif
{
	thread_t td;
	thread_t otd;
	int error;
	int extflags;
	int count;
	int pflags;
	int wflags;
	int timo;
#ifdef DEBUG_LOCKS
	int i;
#endif

	error = 0;

	if (mycpu->gd_intr_nesting_level &&
	    (flags & LK_NOWAIT) == 0 &&
	    (flags & LK_TYPE_MASK) != LK_RELEASE &&
	    panic_cpu_gd != mycpu
	) {

#ifndef DEBUG_LOCKS
		panic("lockmgr %s from %p: called from interrupt, ipi, "
		      "or hard code section",
		      lkp->lk_wmesg, ((int **)&lkp)[-1]);
#else
		panic("lockmgr %s from %s:%d: called from interrupt, ipi, "
		      "or hard code section",
		      lkp->lk_wmesg, file, line);
#endif
	}

#ifdef DEBUG_LOCKS
	if (mycpu->gd_spinlocks && ((flags & LK_NOWAIT) == 0)) {
		panic("lockmgr %s from %s:%d: called with %d spinlocks held",
		      lkp->lk_wmesg, file, line, mycpu->gd_spinlocks);
	}
#endif

	extflags = (flags | lkp->lk_flags) & LK_EXTFLG_MASK;
	td = curthread;

again:
	count = lkp->lk_count;
	cpu_ccfence();

	switch (flags & LK_TYPE_MASK) {
	case LK_SHARED:
		/*
		 * Shared lock critical path case
		 */
		if ((count & (LKC_EXREQ|LKC_UPREQ|LKC_EXCL)) == 0) {
			if (atomic_cmpset_int(&lkp->lk_count,
					      count, count + 1)) {
				COUNT(td, 1);
				break;
			}
			goto again;
		}

		/*
		 * If the caller already holds the lock exclusively then
		 * we silently obtain another count on the exclusive lock.
		 *
		 * WARNING!  The old FreeBSD behavior was to downgrade,
		 *	     but this creates a problem when recursions
		 *	     return to the caller and the caller expects
		 *	     its original exclusive lock to remain exclusively
		 *	     locked.
		 */
		if (lkp->lk_lockholder == td) {
			KKASSERT(count & LKC_EXCL);
			if ((extflags & LK_CANRECURSE) == 0) {
				if (extflags & LK_NOWAIT) {
					error = EBUSY;
					break;
				}
				panic("lockmgr: locking against myself");
			}
			atomic_add_int(&lkp->lk_count, 1);
			COUNT(td, 1);
			break;
		}

		/*
		 * Slow path
		 */
		pflags = (extflags & LK_PCATCH) ? PCATCH : 0;
		timo = (extflags & LK_TIMELOCK) ? lkp->lk_timo : 0;
		wflags = (td->td_flags & TDF_DEADLKTREAT) ?
				LKC_EXCL : (LKC_EXCL|LKC_EXREQ|LKC_UPREQ);

		/*
		 * Block while the lock is held exclusively or, conditionally,
		 * if other threads are tring to obtain an exclusive lock or
		 * upgrade to one.
		 */
		if (count & wflags) {
			if (extflags & LK_NOWAIT) {
				error = EBUSY;
				break;
			}
			tsleep_interlock(lkp, pflags);
			if (!atomic_cmpset_int(&lkp->lk_count, count,
					      count | LKC_SHREQ)) {
				goto again;
			}

			mycpu->gd_cnt.v_lock_name[0] = 'S';
			strncpy(mycpu->gd_cnt.v_lock_name + 1,
				lkp->lk_wmesg,
				sizeof(mycpu->gd_cnt.v_lock_name) - 2);
			++mycpu->gd_cnt.v_lock_colls;

			error = tsleep(lkp, pflags | PINTERLOCKED,
				       lkp->lk_wmesg, timo);
			if (error)
				break;
			if (extflags & LK_SLEEPFAIL) {
				error = ENOLCK;
				break;
			}
			goto again;
		}

		/*
		 * Otherwise we can bump the count
		 */
		if (atomic_cmpset_int(&lkp->lk_count, count, count + 1)) {
			COUNT(td, 1);
			break;
		}
		goto again;

	case LK_EXCLUSIVE:
		/*
		 * Exclusive lock critical path.
		 */
		if (count == 0) {
			if (atomic_cmpset_int(&lkp->lk_count, count,
					      LKC_EXCL | (count + 1))) {
				lkp->lk_lockholder = td;
				COUNT(td, 1);
				break;
			}
			goto again;
		}

		/*
		 * Recursive lock if we already hold it exclusively.
		 */
		if (lkp->lk_lockholder == td) {
			KKASSERT(count & LKC_EXCL);
			if ((extflags & LK_CANRECURSE) == 0) {
				if (extflags & LK_NOWAIT) {
					error = EBUSY;
					break;
				}
				panic("lockmgr: locking against myself");
			}
			atomic_add_int(&lkp->lk_count, 1);
			COUNT(td, 1);
			break;
		}

		/*
		 * We will block, handle LK_NOWAIT
		 */
		if (extflags & LK_NOWAIT) {
			error = EBUSY;
			break;
		}

		/*
		 * Wait until we can obtain the exclusive lock.  EXREQ is
		 * automatically cleared when all current holders release
		 * so if we abort the operation we can safely leave it set.
		 * There might be other exclusive requesters.
		 */
		pflags = (extflags & LK_PCATCH) ? PCATCH : 0;
		timo = (extflags & LK_TIMELOCK) ? lkp->lk_timo : 0;

		tsleep_interlock(lkp, pflags);
		if (!atomic_cmpset_int(&lkp->lk_count, count,
				       count | LKC_EXREQ)) {
			goto again;
		}

		mycpu->gd_cnt.v_lock_name[0] = 'X';
		strncpy(mycpu->gd_cnt.v_lock_name + 1,
			lkp->lk_wmesg,
			sizeof(mycpu->gd_cnt.v_lock_name) - 2);
		++mycpu->gd_cnt.v_lock_colls;

		error = tsleep(lkp, pflags | PINTERLOCKED,
			       lkp->lk_wmesg, timo);
		if (error)
			break;
		if (extflags & LK_SLEEPFAIL) {
			error = ENOLCK;
			break;
		}
		goto again;

	case LK_DOWNGRADE:
		/*
		 * Downgrade an exclusive lock into a shared lock.  All
		 * counts on a recursive exclusive lock become shared.
		 *
		 * This function always succeeds.
		 */
		if (lkp->lk_lockholder != td ||
		    (count & (LKC_EXCL|LKC_MASK)) != (LKC_EXCL|1)) {
			panic("lockmgr: not holding exclusive lock");
		}

#ifdef DEBUG_LOCKS
		for (i = 0; i < LOCKMGR_DEBUG_ARRAY_SIZE; i++) {
			if (td->td_lockmgr_stack[i] == lkp &&
			    td->td_lockmgr_stack_id[i] > 0
			) {
				td->td_lockmgr_stack_id[i]--;
				break;
			}
		}
#endif
		/*
		 * NOTE! Must NULL-out lockholder before releasing LKC_EXCL.
		 */
		otd = lkp->lk_lockholder;
		lkp->lk_lockholder = NULL;
		if (atomic_cmpset_int(&lkp->lk_count, count,
				      count & ~(LKC_EXCL|LKC_SHREQ))) {
			if (count & LKC_SHREQ)
				wakeup(lkp);
			break;
		}
		lkp->lk_lockholder = otd;
		goto again;

	case LK_EXCLUPGRADE:
		/*
		 * Upgrade from a single shared lock to an exclusive lock.
		 *
		 * If another process is ahead of us to get an upgrade,
		 * then we want to fail rather than have an intervening
		 * exclusive access.  The shared lock is released on
		 * failure.
		 */
		if (count & LKC_UPREQ) {
			flags = LK_RELEASE;
			error = EBUSY;
			goto again;
		}
		/* fall through into normal upgrade */

	case LK_UPGRADE:
		/*
		 * Upgrade a shared lock to an exclusive one.  This can cause
		 * the lock to be temporarily released and stolen by other
		 * threads.  LK_SLEEPFAIL or LK_NOWAIT may be used to detect
		 * this case, or use LK_EXCLUPGRADE.
		 *
		 * If the lock is already exclusively owned by us, this
		 * operation is a NOP.
		 *
		 * If we return an error (even NOWAIT), the current lock will
		 * be released.
		 *
		 * Start with the critical path.
		 */
		if ((count & (LKC_UPREQ|LKC_EXCL|LKC_MASK)) == 1) {
			if (atomic_cmpset_int(&lkp->lk_count, count,
					      count | LKC_EXCL)) {
				lkp->lk_lockholder = td;
				break;
			}
			goto again;
		}

		/*
		 * If we already hold the lock exclusively this operation
		 * succeeds and is a NOP.
		 */
		if (count & LKC_EXCL) {
			if (lkp->lk_lockholder == td)
				break;
			panic("lockmgr: upgrade unowned lock");
		}
		if ((count & LKC_MASK) == 0)
			panic("lockmgr: upgrade unowned lock");

		/*
		 * We cannot upgrade without blocking at this point.
		 */
		if (extflags & LK_NOWAIT) {
			flags = LK_RELEASE;
			error = EBUSY;
			goto again;
		}

		/*
		 * Release the shared lock and request the upgrade.
		 */
		pflags = (extflags & LK_PCATCH) ? PCATCH : 0;
		timo = (extflags & LK_TIMELOCK) ? lkp->lk_timo : 0;
		tsleep_interlock(lkp, pflags);
		wflags = (count & LKC_UPREQ) ? LKC_EXREQ : LKC_UPREQ;

		/*
		 * If someone else owns UPREQ and this transition would
		 * allow it to be granted, we have to grant it.  Otherwise
		 * we release the shared lock.
		 */
		if ((count & (LKC_UPREQ|LKC_MASK)) == (LKC_UPREQ | 1)) {
			wflags |= LKC_EXCL | LKC_UPGRANT;
			wflags |= count;
			wflags &= ~LKC_UPREQ;
		} else {
			wflags |= (count - 1);
		}

		if (atomic_cmpset_int(&lkp->lk_count, count, wflags)) {
			COUNT(td, -1);

			/*
			 * Must wakeup the thread granted the upgrade.
			 */
			if ((count & (LKC_UPREQ|LKC_MASK)) == (LKC_UPREQ | 1))
				wakeup(lkp);

			mycpu->gd_cnt.v_lock_name[0] = 'U';
			strncpy(mycpu->gd_cnt.v_lock_name + 1,
				lkp->lk_wmesg,
				sizeof(mycpu->gd_cnt.v_lock_name) - 2);
			++mycpu->gd_cnt.v_lock_colls;

			error = tsleep(lkp, pflags | PINTERLOCKED,
				       lkp->lk_wmesg, timo);
			if (error)
				break;
			if (extflags & LK_SLEEPFAIL) {
				error = ENOLCK;
				break;
			}

			/*
			 * Refactor to either LK_EXCLUSIVE or LK_WAITUPGRADE,
			 * depending on whether we were able to acquire the
			 * LKC_UPREQ bit.
			 */
			if (count & LKC_UPREQ)
				flags = LK_EXCLUSIVE;	/* someone else */
			else
				flags = LK_WAITUPGRADE;	/* we own the bit */
		}
		goto again;

	case LK_WAITUPGRADE:
		/*
		 * We own the LKC_UPREQ bit, wait until we are granted the
		 * exclusive lock (LKC_UPGRANT is set).
		 *
		 * IF THE OPERATION FAILS (tsleep error tsleep+LK_SLEEPFAIL),
		 * we have to undo the upgrade request and clean up any lock
		 * that might have been granted via a race.
		 */
		if (count & LKC_UPGRANT) {
			if (atomic_cmpset_int(&lkp->lk_count, count,
					      count & ~LKC_UPGRANT)) {
				lkp->lk_lockholder = td;
				KKASSERT(count & LKC_EXCL);
				break;
			}
			/* retry */
		} else {
			pflags = (extflags & LK_PCATCH) ? PCATCH : 0;
			timo = (extflags & LK_TIMELOCK) ? lkp->lk_timo : 0;
			tsleep_interlock(lkp, pflags);
			if (atomic_cmpset_int(&lkp->lk_count, count, count)) {

				mycpu->gd_cnt.v_lock_name[0] = 'U';
				strncpy(mycpu->gd_cnt.v_lock_name + 1,
					lkp->lk_wmesg,
					sizeof(mycpu->gd_cnt.v_lock_name) - 2);
				++mycpu->gd_cnt.v_lock_colls;

				error = tsleep(lkp, pflags | PINTERLOCKED,
					       lkp->lk_wmesg, timo);
				if (error) {
					undo_upreq(lkp);
					break;
				}
				if (extflags & LK_SLEEPFAIL) {
					error = ENOLCK;
					undo_upreq(lkp);
					break;
				}
			}
			/* retry */
		}
		goto again;

	case LK_RELEASE:
		/*
		 * Release the currently held lock.  If releasing the current
		 * lock as part of an error return, error will ALREADY be
		 * non-zero.
		 *
		 * When releasing the last lock we automatically transition
		 * LKC_UPREQ to LKC_EXCL|1.
		 *
		 * WARNING! We cannot detect when there are multiple exclusive
		 *	    requests pending.  We clear EXREQ unconditionally
		 *	    on the 1->0 transition so it is possible for
		 *	    shared requests to race the next exclusive
		 *	    request.
		 *
		 * Always succeeds.
		 */
		if ((count & LKC_MASK) == 0)
			panic("lockmgr: LK_RELEASE: no lock held");

		if (count & LKC_EXCL) {
			if (lkp->lk_lockholder != LK_KERNTHREAD &&
			    lkp->lk_lockholder != td) {
				panic("lockmgr: pid %d, not exlusive "
				      "lock holder thr %p/%p unlocking",
				    (td->td_proc ? td->td_proc->p_pid : -1),
				    td, lkp->lk_lockholder);
			}
			if ((count & (LKC_UPREQ|LKC_MASK)) == 1) {
				/*
				 * Last exclusive count is being released
				 */
				otd = lkp->lk_lockholder;
				lkp->lk_lockholder = NULL;
				if (!atomic_cmpset_int(&lkp->lk_count, count,
					      (count - 1) &
					   ~(LKC_EXCL|LKC_EXREQ|LKC_SHREQ))) {
					lkp->lk_lockholder = otd;
					goto again;
				}
				if (count & (LKC_EXREQ|LKC_SHREQ))
					wakeup(lkp);
				/* success */
			} else if ((count & (LKC_UPREQ|LKC_MASK)) ==
				   (LKC_UPREQ | 1)) {
				/*
				 * Last exclusive count is being released but
				 * an upgrade request is present, automatically
				 * grant an exclusive state to the owner of
				 * the upgrade request.
				 */
				otd = lkp->lk_lockholder;
				lkp->lk_lockholder = NULL;
				if (!atomic_cmpset_int(&lkp->lk_count, count,
						(count & ~LKC_UPREQ) |
						LKC_UPGRANT)) {
					lkp->lk_lockholder = otd;
				}
				wakeup(lkp);
				/* success */
			} else {
				otd = lkp->lk_lockholder;
				if (!atomic_cmpset_int(&lkp->lk_count, count,
						       count - 1)) {
					goto again;
				}
				/* success */
			}
			/* success */
			if (otd != LK_KERNTHREAD)
				COUNT(td, -1);
		} else {
			if ((count & (LKC_UPREQ|LKC_MASK)) == 1) {
				/*
				 * Last shared count is being released.
				 */
				if (!atomic_cmpset_int(&lkp->lk_count, count,
					      (count - 1) &
					       ~(LKC_EXREQ|LKC_SHREQ))) {
					goto again;
				}
				if (count & (LKC_EXREQ|LKC_SHREQ))
					wakeup(lkp);
				/* success */
			} else if ((count & (LKC_UPREQ|LKC_MASK)) ==
				   (LKC_UPREQ | 1)) {
				/*
				 * Last shared count is being released but
				 * an upgrade request is present, automatically
				 * grant an exclusive state to the owner of
				 * the upgrade request.
				 */
				if (!atomic_cmpset_int(&lkp->lk_count, count,
					      (count & ~LKC_UPREQ) |
					      LKC_EXCL | LKC_UPGRANT)) {
					goto again;
				}
				wakeup(lkp);
			} else {
				if (!atomic_cmpset_int(&lkp->lk_count, count,
						       count - 1)) {
					goto again;
				}
			}
			/* success */
			COUNT(td, -1);
		}
		break;

	default:
		panic("lockmgr: unknown locktype request %d",
		    flags & LK_TYPE_MASK);
		/* NOTREACHED */
	}
	return (error);
}
Ejemplo n.º 15
0
/*
 * do an ioctl operation on a pfsnode (vp).
 * (vp) is not locked on entry or exit.
 */
static int
procfs_ioctl(struct vop_ioctl_args *ap)
{
	struct pfsnode *pfs = VTOPFS(ap->a_vp);
	struct proc *procp;
	struct proc *p;
	int error;
	int signo;
	struct procfs_status *psp;
	unsigned char flags;

	procp = pfs_pfind(pfs->pfs_pid);
	if (procp == NULL)
		return ENOTTY;
	p = curproc;
	if (p == NULL) {
		error = EINVAL;
		goto done;
	}

	/* Can't trace a process that's currently exec'ing. */ 
	if ((procp->p_flags & P_INEXEC) != 0) {
		error = EAGAIN;
		goto done;
	}
	if (!CHECKIO(p, procp) || p_trespass(ap->a_cred, procp->p_ucred)) {
		error = EPERM;
		goto done;
	}

	switch (ap->a_command) {
	case PIOCBIS:
	  spin_lock(&procp->p_spin);
	  procp->p_stops |= *(unsigned int*)ap->a_data;
	  spin_unlock(&procp->p_spin);
	  break;
	case PIOCBIC:
	  spin_lock(&procp->p_spin);
	  procp->p_stops &= ~*(unsigned int*)ap->a_data;
	  spin_unlock(&procp->p_spin);
	  break;
	case PIOCSFL:
	  /*
	   * NFLAGS is "non-suser_xxx flags" -- currently, only
	   * PFS_ISUGID ("ignore set u/g id");
	   */
#define NFLAGS	(PF_ISUGID)
	  flags = (unsigned char)*(unsigned int*)ap->a_data;
	  if (flags & NFLAGS && (error = priv_check_cred(ap->a_cred, PRIV_ROOT, 0)))
	    goto done;
	  procp->p_pfsflags = flags;
	  break;
	case PIOCGFL:
	  *(unsigned int*)ap->a_data = (unsigned int)procp->p_pfsflags;
	  break;
	case PIOCSTATUS:
	  /*
	   * NOTE: syscall entry deals with stopevents and may run without
	   *	   the MP lock.
	   */
	  psp = (struct procfs_status *)ap->a_data;
	  psp->flags = procp->p_pfsflags;
	  psp->events = procp->p_stops;
	  spin_lock(&procp->p_spin);
	  if (procp->p_step) {
	    psp->state = 0;
	    psp->why = procp->p_stype;
	    psp->val = procp->p_xstat;
	    spin_unlock(&procp->p_spin);
	  } else {
	    psp->state = 1;
	    spin_unlock(&procp->p_spin);
	    psp->why = 0;	/* Not defined values */
	    psp->val = 0;	/* Not defined values */
	  }
	  break;
	case PIOCWAIT:
	  /*
	   * NOTE: syscall entry deals with stopevents and may run without
	   *	   the MP lock.
	   */
	  psp = (struct procfs_status *)ap->a_data;
	  spin_lock(&procp->p_spin);
	  while (procp->p_step == 0) {
	    tsleep_interlock(&procp->p_stype, PCATCH);
	    spin_unlock(&procp->p_spin);
	    if (procp->p_stops == 0) {
		error = EINVAL;
		goto done;
	    }
	    if (procp->p_flags & P_POSTEXIT) {
		error = EINVAL;
		goto done;
	    }
	    if (procp->p_flags & P_INEXEC) {
		error = EAGAIN;
		goto done;
	    }
	    error = tsleep(&procp->p_stype, PCATCH | PINTERLOCKED,
			   "piocwait", 0);
	    if (error)
	      goto done;
	    spin_lock(&procp->p_spin);
	  }
	  spin_unlock(&procp->p_spin);
	  psp->state = 1;	/* It stopped */
	  psp->flags = procp->p_pfsflags;
	  psp->events = procp->p_stops;
	  psp->why = procp->p_stype;	/* why it stopped */
	  psp->val = procp->p_xstat;	/* any extra info */
	  break;
	case PIOCCONT:	/* Restart a proc */
	  /*
	   * NOTE: syscall entry deals with stopevents and may run without
	   *	   the MP lock.  However, the caller is presumably interlocked
	   *	   by having waited.
	   */
	  if (procp->p_step == 0) {
	    error = EINVAL;	/* Can only start a stopped process */
	    goto done;
	  }
	  if ((signo = *(int*)ap->a_data) != 0) {
	    if (signo >= NSIG || signo <= 0) {
	      error = EINVAL;
	      goto done;
	    }
	    ksignal(procp, signo);
	  }
	  procp->p_step = 0;
	  wakeup(&procp->p_step);
	  break;
	default:
	  error = ENOTTY;
	  goto done;
	}
	error = 0;
done:
	pfs_pdone(procp);
	return 0;
}
Ejemplo n.º 16
0
/*
 * Wait for async lock completion or abort.  Returns ENOLCK if an abort
 * occurred.
 */
int
mtx_wait_link(mtx_t *mtx, mtx_link_t *link, int flags, int to)
{
	indefinite_info_t info;
	int error;

	indefinite_init(&info, mtx->mtx_ident, 1,
			((link->state & MTX_LINK_LINKED_SH) ? 'm' : 'M'));

	/*
	 * Sleep.  Handle false wakeups, interruptions, etc.
	 * The link may also have been aborted.  The LINKED
	 * bit was set by this cpu so we can test it without
	 * fences.
	 */
	error = 0;
	while (link->state & MTX_LINK_LINKED) {
		tsleep_interlock(link, 0);
		cpu_lfence();
		if (link->state & MTX_LINK_LINKED) {
			error = tsleep(link, flags | PINTERLOCKED,
				       mtx->mtx_ident, to);
			if (error)
				break;
		}
		if ((mtx->mtx_flags & MTXF_NOCOLLSTATS) == 0)
			indefinite_check(&info);
	}

	/*
	 * We need at least a lfence (load fence) to ensure our cpu does not
	 * reorder loads (of data outside the lock structure) prior to the
	 * remote cpu's release, since the above test may have run without
	 * any atomic interactions.
	 *
	 * If we do not do this then state updated by the other cpu before
	 * releasing its lock may not be read cleanly by our cpu when this
	 * function returns.  Even though the other cpu ordered its stores,
	 * our loads can still be out of order.
	 */
	cpu_mfence();

	/*
	 * We are done, make sure the link structure is unlinked.
	 * It may still be on the list due to e.g. EINTR or
	 * EWOULDBLOCK.
	 *
	 * It is possible for the tsleep to race an ABORT and cause
	 * error to be 0.
	 *
	 * The tsleep() can be woken up for numerous reasons and error
	 * might be zero in situations where we intend to return an error.
	 *
	 * (This is the synchronous case so state cannot be CALLEDBACK)
	 */
	switch(link->state) {
	case MTX_LINK_ACQUIRED:
	case MTX_LINK_CALLEDBACK:
		error = 0;
		break;
	case MTX_LINK_ABORTED:
		error = ENOLCK;
		break;
	case MTX_LINK_LINKED_EX:
	case MTX_LINK_LINKED_SH:
		mtx_delete_link(mtx, link);
		/* fall through */
	default:
		if (error == 0)
			error = EWOULDBLOCK;
		break;
	}

	/*
	 * Clear state on status returned.
	 */
	link->state = MTX_LINK_IDLE;

	if ((mtx->mtx_flags & MTXF_NOCOLLSTATS) == 0)
		indefinite_done(&info);

	return error;
}
Ejemplo n.º 17
0
/*
 * The hammer_*_interlock() and hammer_*_interlock_done() functions are
 * more sophisticated versions which handle MP transition races and block
 * when necessary.
 *
 * hammer_ref_interlock() bumps the ref-count and conditionally acquires
 * the interlock for 0->1 transitions or if the CHECK is found to be set.
 *
 * This case will return TRUE, the interlock will be held, and the CHECK
 * bit also set.  Other threads attempting to ref will see the CHECK bit
 * and block until we clean up.
 *
 * FALSE is returned for transitions other than 0->1 when the CHECK bit
 * is not found to be set, or if the function loses the race with another
 * thread.
 *
 * TRUE is only returned to one thread and the others will block.
 * Effectively a TRUE indicator means 'someone transitioned 0->1
 * and you are the first guy to successfully lock it after that, so you
 * need to check'.  Due to races the ref-count may be greater than 1 upon
 * return.
 *
 * MPSAFE
 */
int
hammer_ref_interlock(struct hammer_lock *lock)
{
	u_int lv;
	u_int nlv;

	/*
	 * Integrated reference count bump, lock, and check, with hot-path.
	 *
	 * (a) Return 1	(+LOCKED, +CHECK)	0->1 transition
	 * (b) Return 0 (-LOCKED, -CHECK)	N->N+1 transition
	 * (c) Break out (+CHECK)		Check condition and Cannot lock
	 * (d) Return 1 (+LOCKED, +CHECK)	Successfully locked
	 */
	for (;;) {
		lv = lock->refs;
		if (lv == 0) {
			nlv = 1 | HAMMER_REFS_LOCKED | HAMMER_REFS_CHECK;
			if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
				lock->rowner = curthread;
				return(1);
			}
		} else {
			nlv = (lv + 1);
			if ((lv & ~HAMMER_REFS_FLAGS) == 0)
				nlv |= HAMMER_REFS_CHECK;
			if ((nlv & HAMMER_REFS_CHECK) == 0) {
				if (atomic_cmpset_int(&lock->refs, lv, nlv))
					return(0);
			} else if (lv & HAMMER_REFS_LOCKED) {
				/* CHECK also set here */
				if (atomic_cmpset_int(&lock->refs, lv, nlv))
					break;
			} else {
				/* CHECK also set here */
				nlv |= HAMMER_REFS_LOCKED;
				if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
					lock->rowner = curthread;
					return(1);
				}
			}
		}
	}

	/*
	 * Defered check condition because we were unable to acquire the
	 * lock.  We must block until the check condition is cleared due
	 * to a race with another thread, or we are able to acquire the
	 * lock.
	 *
	 * (a) Return 0	(-CHECK)		Another thread handled it
	 * (b) Return 1 (+LOCKED, +CHECK)	We handled it.
	 */
	for (;;) {
		lv = lock->refs;
		if ((lv & HAMMER_REFS_CHECK) == 0)
			return(0);
		if (lv & HAMMER_REFS_LOCKED) {
			tsleep_interlock(&lock->refs, 0);
			nlv = (lv | HAMMER_REFS_WANTED);
			if (atomic_cmpset_int(&lock->refs, lv, nlv))
				tsleep(&lock->refs, PINTERLOCKED, "h1lk", 0);
		} else {
			/* CHECK also set here */
			nlv = lv | HAMMER_REFS_LOCKED;
			if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
				lock->rowner = curthread;
				return(1);
			}
		}
	}
	/* not reached */
}
Ejemplo n.º 18
0
/*
 * Exclusive-lock a mutex, block until acquired.  Recursion is allowed.
 *
 * Returns 0 on success, or the tsleep() return code on failure.
 * An error can only be returned if PCATCH is specified in the flags.
 */
static __inline int
__mtx_lock_ex(mtx_t mtx, mtx_link_t link, const char *ident, int flags, int to)
{
	u_int	lock;
	u_int	nlock;
	int	error;

	for (;;) {
		lock = mtx->mtx_lock;
		if (lock == 0) {
			nlock = MTX_EXCLUSIVE | 1;
			if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) {
				mtx->mtx_owner = curthread;
				error = 0;
				break;
			}
		} else if ((lock & MTX_EXCLUSIVE) &&
			   mtx->mtx_owner == curthread) {
			KKASSERT((lock & MTX_MASK) != MTX_MASK);
			nlock = lock + 1;
			if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) {
				error = 0;
				break;
			}
		} else {
			/*
			 * Clearing MTX_EXLINK in lock causes us to loop until
			 * MTX_EXLINK is available.  However, to avoid
			 * unnecessary cpu cache traffic we poll instead.
			 *
			 * Setting MTX_EXLINK in nlock causes us to loop until
			 * we can acquire MTX_EXLINK.
			 *
			 * Also set MTX_EXWANTED coincident with EXLINK, if
			 * not already set.
			 */
			thread_t td;

			if (lock & MTX_EXLINK) {
				cpu_pause();
				++mtx_collision_count;
				continue;
			}
			td = curthread;
			/*lock &= ~MTX_EXLINK;*/
			nlock = lock | MTX_EXWANTED | MTX_EXLINK;
			++td->td_critcount;
			if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) {
				/*
				 * Check for early abort
				 */
				if (link->state == MTX_LINK_ABORTED) {
					atomic_clear_int(&mtx->mtx_lock,
							 MTX_EXLINK);
					--td->td_critcount;
					error = ENOLCK;
					if (mtx->mtx_link == NULL) {
						atomic_clear_int(&mtx->mtx_lock,
								 MTX_EXWANTED);
					}
					break;
				}

				/*
				 * Success.  Link in our structure then
				 * release EXLINK and sleep.
				 */
				link->owner = td;
				link->state = MTX_LINK_LINKED;
				if (mtx->mtx_link) {
					link->next = mtx->mtx_link;
					link->prev = link->next->prev;
					link->next->prev = link;
					link->prev->next = link;
				} else {
					link->next = link;
					link->prev = link;
					mtx->mtx_link = link;
				}
				tsleep_interlock(link, 0);
				atomic_clear_int(&mtx->mtx_lock, MTX_EXLINK);
				--td->td_critcount;

				error = tsleep(link, flags, ident, to);
				++mtx_contention_count;

				/*
				 * Normal unlink, we should own the exclusive
				 * lock now.
				 */
				if (link->state == MTX_LINK_LINKED)
					mtx_delete_link(mtx, link);
				if (link->state == MTX_LINK_ACQUIRED) {
					KKASSERT(mtx->mtx_owner == link->owner);
					error = 0;
					break;
				}

				/*
				 * Aborted lock (mtx_abort_ex called).
				 */
				if (link->state == MTX_LINK_ABORTED) {
					error = ENOLCK;
					break;
				}

				/*
				 * tsleep error, else retry.
				 */
				if (error)
					break;
			} else {
				--td->td_critcount;
			}
		}
		++mtx_collision_count;
	}
	return (error);
}
Ejemplo n.º 19
0
/*
 * Stop a running timer and ensure that any running callout completes before
 * returning.  If the timer is running on another cpu this function may block
 * to interlock against the callout.  If the callout is currently executing
 * or blocked in another thread this function may also block to interlock
 * against the callout.
 *
 * The caller must be careful to avoid deadlocks, either by using
 * callout_init_lk() (which uses the lockmgr lock cancelation feature),
 * by using tokens and dealing with breaks in the serialization, or using
 * the lockmgr lock cancelation feature yourself in the callout callback
 * function.
 *
 * callout_stop() returns non-zero if the callout was pending.
 */
static int
_callout_stop(struct callout *c, int issync)
{
	globaldata_t gd = mycpu;
	globaldata_t tgd;
	softclock_pcpu_t sc;
	int flags;
	int nflags;
	int rc;
	int cpuid;

#ifdef INVARIANTS
        if ((c->c_flags & CALLOUT_DID_INIT) == 0) {
		callout_init(c);
		kprintf(
		    "callout_stop(%p) from %p: callout was not initialized\n",
		    c, ((int **)&c)[-1]);
		print_backtrace(-1);
	}
#endif
	crit_enter_gd(gd);

	/*
	 * Fast path operations:
	 *
	 * If ARMED and owned by our cpu, or not ARMED, and other simple
	 * conditions are met, we can just clear ACTIVE and EXECUTED
	 * and we are done.
	 */
	for (;;) {
		flags = c->c_flags;
		cpu_ccfence();

		cpuid = CALLOUT_FLAGS_TO_CPU(flags);

		/*
		 * Can't handle an armed callout in the fast path if it is
		 * not on the current cpu.  We must atomically increment the
		 * IPI count for the IPI we intend to send and break out of
		 * the fast path to enter the slow path.
		 */
		if (flags & CALLOUT_ARMED) {
			if (gd->gd_cpuid != cpuid) {
				nflags = flags + 1;
				if (atomic_cmpset_int(&c->c_flags,
						      flags, nflags)) {
					/* break to slow path */
					break;
				}
				continue;	/* retry */
			}
		} else {
			cpuid = gd->gd_cpuid;
			KKASSERT((flags & CALLOUT_IPI_MASK) == 0);
			KKASSERT((flags & CALLOUT_PENDING) == 0);
		}

		/*
		 * Process pending IPIs and retry (only if not called from
		 * an IPI).
		 */
		if (flags & CALLOUT_IPI_MASK) {
			lwkt_process_ipiq();
			continue;	/* retry */
		}

		/*
		 * Transition to the stopped state, recover the EXECUTED
		 * status.  If pending we cannot clear ARMED until after
		 * we have removed (c) from the callwheel.
		 *
		 * NOTE: The callout might already not be armed but in this
		 *	 case it should also not be pending.
		 */
		nflags = flags & ~(CALLOUT_ACTIVE |
				   CALLOUT_EXECUTED |
				   CALLOUT_WAITING |
				   CALLOUT_PENDING);

		/* NOTE: IPI_MASK already tested */
		if ((flags & CALLOUT_PENDING) == 0)
			nflags &= ~CALLOUT_ARMED;
		if (atomic_cmpset_int(&c->c_flags, flags, nflags)) {
			/*
			 * Can only remove from callwheel if currently
			 * pending.
			 */
			if (flags & CALLOUT_PENDING) {
				sc = &softclock_pcpu_ary[gd->gd_cpuid];
				if (sc->next == c)
					sc->next = TAILQ_NEXT(c, c_links.tqe);
				TAILQ_REMOVE(
					&sc->callwheel[c->c_time & cwheelmask],
					c,
					c_links.tqe);
				c->c_func = NULL;

				/*
				 * NOTE: Can't clear ARMED until we have
				 *	 physically removed (c) from the
				 *	 callwheel.
				 *
				 * NOTE: WAITING bit race exists when doing
				 *	 unconditional bit clears.
				 */
				callout_maybe_clear_armed(c);
				if (c->c_flags & CALLOUT_WAITING)
					flags |= CALLOUT_WAITING;
			}

			/*
			 * ARMED has been cleared at this point and (c)
			 * might now be stale.  Only good for wakeup()s.
			 */
			if (flags & CALLOUT_WAITING)
				wakeup(c);

			goto skip_slow;
		}
		/* retry */
	}

	/*
	 * Slow path (and not called via an IPI).
	 *
	 * When ARMED to a different cpu the stop must be processed on that
	 * cpu.  Issue the IPI and wait for completion.  We have already
	 * incremented the IPI count.
	 */
	tgd = globaldata_find(cpuid);
	lwkt_send_ipiq3(tgd, callout_stop_ipi, c, issync);

	for (;;) {
		int flags;
		int nflags;

		flags = c->c_flags;
		cpu_ccfence();
		if ((flags & CALLOUT_IPI_MASK) == 0)	/* fast path */
			break;
		nflags = flags | CALLOUT_WAITING;
		tsleep_interlock(c, 0);
		if (atomic_cmpset_int(&c->c_flags, flags, nflags)) {
			tsleep(c, PINTERLOCKED, "cstp1", 0);
		}
	}

skip_slow:

	/*
	 * If (issync) we must also wait for any in-progress callbacks to
	 * complete, unless the stop is being executed from the callback
	 * itself.  The EXECUTED flag is set prior to the callback
	 * being made so our existing flags status already has it.
	 *
	 * If auto-lock mode is being used, this is where we cancel any
	 * blocked lock that is potentially preventing the target cpu
	 * from completing the callback.
	 */
	while (issync) {
		intptr_t *runp;
		intptr_t runco;

		sc = &softclock_pcpu_ary[cpuid];
		if (gd->gd_curthread == &sc->thread)	/* stop from cb */
			break;
		runp = &sc->running;
		runco = *runp;
		cpu_ccfence();
		if ((runco & ~(intptr_t)1) != (intptr_t)c)
			break;
		if (c->c_flags & CALLOUT_AUTOLOCK)
			lockmgr(c->c_lk, LK_CANCEL_BEG);
		tsleep_interlock(c, 0);
		if (atomic_cmpset_long(runp, runco, runco | 1))
			tsleep(c, PINTERLOCKED, "cstp3", 0);
		if (c->c_flags & CALLOUT_AUTOLOCK)
			lockmgr(c->c_lk, LK_CANCEL_END);
	}

	crit_exit_gd(gd);
	rc = (flags & CALLOUT_EXECUTED) != 0;

	return rc;
}