Beispiel #1
0
void
lock_set_spl_spin(lock_t *lp, int new_pil, ushort_t *old_pil_addr, int old_pil)
{
	int spin_count = 1;
	int backoff;	/* current backoff */
	int backctr;	/* ctr for backoff */

	if (panicstr)
		return;

	if (ncpus == 1)
		panic("lock_set_spl: %p lock held and only one CPU", lp);

	ASSERT(new_pil > LOCK_LEVEL);

	if (&plat_lock_delay) {
		backoff = 0;
	} else {
		backoff = BACKOFF_BASE;
	}
	do {
		splx(old_pil);
		while (LOCK_HELD(lp)) {
			if (panicstr) {
				*old_pil_addr = (ushort_t)splr(new_pil);
				return;
			}
			spin_count++;
			/*
			 * Add an exponential backoff delay before trying again
			 * to touch the mutex data structure.
			 * spin_count test and call to nulldev are to prevent
			 * compiler optimizer from eliminating the delay loop.
			 */
			if (&plat_lock_delay) {
				plat_lock_delay(&backoff);
			} else {
				for (backctr = backoff; backctr; backctr--) {
					if (!spin_count) (void) nulldev();
				}
				backoff = backoff << 1;		/* double it */
				if (backoff > BACKOFF_CAP) {
					backoff = BACKOFF_CAP;
				}

				SMT_PAUSE();
			}
		}
		old_pil = splr(new_pil);
	} while (!lock_spin_try(lp));

	*old_pil_addr = (ushort_t)old_pil;

	if (spin_count) {
		LOCKSTAT_RECORD(LS_LOCK_SET_SPL_SPIN, lp, spin_count);
	}

	LOCKSTAT_RECORD(LS_LOCK_SET_SPL_ACQUIRE, lp, spin_count);
}
/*
 * Routine: 	lck_mtx_lock_spinwait
 *
 * Invoked trying to acquire a mutex when there is contention but
 * the holder is running on another processor. We spin for up to a maximum
 * time waiting for the lock to be released.
 *
 * Called with the interlock unlocked.
 */
void
lck_mtx_lock_spinwait(
	lck_mtx_t		*lck)
{
	thread_t		holder;
	volatile lck_mtx_t	*mutex;
	uint64_t		deadline;

	if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT)
		mutex = lck;
	else
		mutex = &lck->lck_mtx_ptr->lck_mtx;

	KERNEL_DEBUG(
		MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN) | DBG_FUNC_NONE,
		(int)lck, (int)mutex->lck_mtx_locked, 0, 0, 0);

	deadline = mach_absolute_time() + MutexSpin;
	/*
	 * Spin while:
	 *   - mutex is locked, and
	 *   - its locked as a spin lock, or
	 *   - owner is running on another processor, and
	 *   - owner (processor) is not idling, and
	 *   - we haven't spun for long enough.
	 */
	while ((holder = (thread_t) mutex->lck_mtx_locked) != NULL) {
	        if ((holder == (thread_t)MUTEX_LOCKED_AS_SPIN) ||
		    ((holder->machine.specFlags & OnProc) != 0 &&
		     (holder->state & TH_IDLE) == 0 &&
		     mach_absolute_time() < deadline)) {
		        cpu_pause();
			continue;
		}
		break;
	}
#if	CONFIG_DTRACE
	/*
	 * We've already kept a count via deadline of how long we spun.
	 * If dtrace is active, then we compute backwards to decide how
	 * long we spun.
	 *
	 * Note that we record a different probe id depending on whether
	 * this is a direct or indirect mutex.  This allows us to 
	 * penalize only lock groups that have debug/stats enabled
	 * with dtrace processing if desired.
	 */
	if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) {
		LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN, lck,
		    mach_absolute_time() - (deadline - MutexSpin));
	} else {
		LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_SPIN, lck,
		    mach_absolute_time() - (deadline - MutexSpin));
	}
	/* The lockstat acquire event is recorded by the assembly code beneath us. */
#endif
}
Beispiel #3
0
/*
 * Routine: 	lck_mtx_lock_acquire
 *
 * Invoked on acquiring the mutex when there is
 * contention.
 *
 * Returns the current number of waiters.
 *
 * Called with the interlock locked.
 */
int
lck_mtx_lock_acquire(
	lck_mtx_t		*lck)
{
	thread_t		thread = current_thread();
	lck_mtx_t		*mutex;

	if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT)
		mutex = lck;
	else
		mutex = &lck->lck_mtx_ptr->lck_mtx;

	if (thread->pending_promoter[thread->pending_promoter_index] == mutex) {
		thread->pending_promoter[thread->pending_promoter_index] = NULL;
		if (thread->pending_promoter_index > 0)
			thread->pending_promoter_index--;
		mutex->lck_mtx_waiters--;
	}

	if (mutex->lck_mtx_waiters > 0) {
		integer_t		priority = mutex->lck_mtx_pri;
		spl_t			s = splsched();

		thread_lock(thread);
		thread->promotions++;
		thread->sched_flags |= TH_SFLAG_PROMOTED;
		if (thread->sched_pri < priority) {
			KERNEL_DEBUG_CONSTANT(
				MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE,
						thread->sched_pri, priority, 0, lck, 0);
			/* Do not promote past promotion ceiling */
			assert(priority <= MAXPRI_PROMOTE);
			set_sched_pri(thread, priority);
		}
		thread_unlock(thread);
		splx(s);
	}
	else
		mutex->lck_mtx_pri = 0;

#if CONFIG_DTRACE
	if (lockstat_probemap[LS_LCK_MTX_LOCK_ACQUIRE] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_ACQUIRE]) {
		if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) {
			LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, lck, 0);
		} else {
			LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_ACQUIRE, lck, 0);
		}
	}
#endif	
	return (mutex->lck_mtx_waiters);
}
Beispiel #4
0
/*
 *	Routine: hw_lock_lock
 *
 *	Acquire lock, spinning until it becomes available,
 *	return with preemption disabled.
 */
void
hw_lock_lock(hw_lock_t lock)
{
	thread_t	thread;
	uintptr_t	state;

	thread = current_thread();
	disable_preemption_for_thread(thread);
	state = LCK_MTX_THREAD_TO_STATE(thread) | PLATFORM_LCK_ILOCK;
#if	__SMP__

#if	LOCK_PRETEST
	if (ordered_load_hw(lock))
		goto contended;
#endif	// LOCK_PRETEST
	if (atomic_compare_exchange(&lock->lock_data, 0, state,
					memory_order_acquire_smp, TRUE)) {
		goto end;
	}
#if	LOCK_PRETEST
contended:
#endif	// LOCK_PRETEST
	hw_lock_lock_contended(lock, state, 0, TRUE);
end:
#else	// __SMP__
	if (lock->lock_data)
		panic("Spinlock held %p", lock);
	lock->lock_data = state;
#endif	// __SMP__
#if CONFIG_DTRACE
	LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE, lock, 0);
#endif
	return;
}
boolean_t
lck_rw_try_lock_exclusive(
	lck_rw_t	*lck)
{
	boolean_t	istate;

	istate = lck_interlock_lock(lck);

	if (lck->lck_rw_want_write || lck->lck_rw_want_upgrade || lck->lck_rw_shared_count) {
		/*
		 *	Can't get lock.
		 */
		lck_interlock_unlock(lck, istate);
		return(FALSE);
	}

	/*
	 *	Have lock.
	 */

	lck->lck_rw_want_write = TRUE;

	lck_interlock_unlock(lck, istate);

#if CONFIG_DTRACE
	LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE, lck, 1);
#endif
	return(TRUE);
}
/*
 *      Routine:        lck_rw_done_gen
 */
lck_rw_type_t
lck_rw_done_gen(
	lck_rw_t	*lck)
{
	boolean_t	wakeup_readers = FALSE;
	boolean_t	wakeup_writers = FALSE;
	lck_rw_type_t	lck_rw_type;
	boolean_t	istate;

	istate = lck_interlock_lock(lck);

	if (lck->lck_rw_shared_count != 0) {
		lck_rw_type = LCK_RW_TYPE_SHARED;
		lck->lck_rw_shared_count--;
	}
	else {	
		lck_rw_type = LCK_RW_TYPE_EXCLUSIVE;
		if (lck->lck_rw_want_upgrade) 
			lck->lck_rw_want_upgrade = FALSE;
		else 
			lck->lck_rw_want_write = FALSE;
	}

	/*
	 *	There is no reason to wakeup a waiting thread
	 *	if the read-count is non-zero.  Consider:
	 *		we must be dropping a read lock
	 *		threads are waiting only if one wants a write lock
	 *		if there are still readers, they can't proceed
	 */

	if (lck->lck_rw_shared_count == 0) {
		if (lck->lck_w_waiting) {
			lck->lck_w_waiting = FALSE;
			wakeup_writers = TRUE;
		} 
		if (!(lck->lck_rw_priv_excl && wakeup_writers == TRUE) && 
				lck->lck_r_waiting) {
			lck->lck_r_waiting = FALSE;
			wakeup_readers = TRUE;
		}
	}

	lck_interlock_unlock(lck, istate);

	if (wakeup_readers) 
		thread_wakeup(RW_LOCK_READER_EVENT(lck));
	if (wakeup_writers) 
		thread_wakeup(RW_LOCK_WRITER_EVENT(lck));

#if CONFIG_DTRACE
	LOCKSTAT_RECORD(LS_LCK_RW_DONE_RELEASE, lck, (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE ? 1 : 0));
#endif

	return(lck_rw_type);
}
Beispiel #7
0
/*
 *	Routine: hw_lock_unlock
 *
 *	Unconditionally release lock, release preemption level.
 */
void
hw_lock_unlock(hw_lock_t lock)
{
	__c11_atomic_store((_Atomic uintptr_t *)&lock->lock_data, 0, memory_order_release_smp);
#if __arm__ || __arm64__
	// ARM tests are only for open-source exclusion
	set_event();
#endif	// __arm__ || __arm64__
#if	CONFIG_DTRACE
	LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE, lock, 0);
#endif /* CONFIG_DTRACE */
	enable_preemption();
}
Beispiel #8
0
/*
 * Simple C support for the cases where spin locks miss on the first try.
 */
void
lock_set_spin(lock_t *lp)
{
	int spin_count = 1;
	int backoff;	/* current backoff */
	int backctr;	/* ctr for backoff */

	if (panicstr)
		return;

	if (ncpus == 1)
		panic("lock_set: %p lock held and only one CPU", lp);

	if (&plat_lock_delay) {
		backoff = 0;
	} else {
		backoff = BACKOFF_BASE;
	}

	while (LOCK_HELD(lp) || !lock_spin_try(lp)) {
		if (panicstr)
			return;
		spin_count++;
		/*
		 * Add an exponential backoff delay before trying again
		 * to touch the mutex data structure.
		 * the spin_count test and call to nulldev are to prevent
		 * the compiler optimizer from eliminating the delay loop.
		 */
		if (&plat_lock_delay) {
			plat_lock_delay(&backoff);
		} else {
			/* delay */
			for (backctr = backoff; backctr; backctr--) {
				if (!spin_count) (void) nulldev();
			}

			backoff = backoff << 1;		/* double it */
			if (backoff > BACKOFF_CAP) {
				backoff = BACKOFF_CAP;
			}
			SMT_PAUSE();
		}
	}

	if (spin_count) {
		LOCKSTAT_RECORD(LS_LOCK_SET_SPIN, lp, spin_count);
	}

	LOCKSTAT_RECORD0(LS_LOCK_SET_ACQUIRE, lp);
}
Beispiel #9
0
void
lck_mtx_unlockspin_wakeup (
	lck_mtx_t			*lck)
{
	assert(lck->lck_mtx_waiters > 0);
	thread_wakeup_one(LCK_MTX_EVENT(lck));

	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_NONE, VM_KERNEL_UNSLIDE_OR_PERM(lck), 0, 0, 1, 0);
#if CONFIG_DTRACE
	/*
	 * When there are waiters, we skip the hot-patch spot in the
	 * fastpath, so we record it here.
	 */
	LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, lck, 0);
#endif
}
Beispiel #10
0
void
lck_mtx_unlockspin_wakeup (
	lck_mtx_t			*lck)
{
	assert(lck->lck_mtx_waiters > 0);
	thread_wakeup_one((event_t)(((unsigned int*)lck)+(sizeof(lck_mtx_t)-1)/sizeof(unsigned int)));

	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_NONE, (int)lck, 0, 0, 1, 0);
#if CONFIG_DTRACE
	/*
	 * When there are waiters, we skip the hot-patch spot in the
	 * fastpath, so we record it here.
	 */
	LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, lck, 0);
#endif
}
Beispiel #11
0
/*
 *	Routine: hw_lock_try
 *
 *	returns with preemption disabled on success.
 */
unsigned int
hw_lock_try(hw_lock_t lock)
{
	thread_t	thread = current_thread();
	int		success = 0;
#if	LOCK_TRY_DISABLE_INT
	long		intmask;

	intmask = disable_interrupts();
#else
	disable_preemption_for_thread(thread);
#endif	// LOCK_TRY_DISABLE_INT

#if	__SMP__
#if	LOCK_PRETEST
	if (ordered_load_hw(lock))
		goto failed;
#endif	// LOCK_PRETEST
	success = atomic_compare_exchange(&lock->lock_data, 0, LCK_MTX_THREAD_TO_STATE(thread) | PLATFORM_LCK_ILOCK,
					memory_order_acquire_smp, FALSE);
#else
	if (lock->lock_data == 0) {
		lock->lock_data = LCK_MTX_THREAD_TO_STATE(thread) | PLATFORM_LCK_ILOCK;
		success = 1;
	}
#endif	// __SMP__

#if	LOCK_TRY_DISABLE_INT
	if (success)
		disable_preemption_for_thread(thread);
#if	LOCK_PRETEST
failed:
#endif	// LOCK_PRETEST
	restore_interrupts(intmask);
#else
#if	LOCK_PRETEST
failed:
#endif	// LOCK_PRETEST
	if (!success)
		enable_preemption();
#endif	// LOCK_TRY_DISABLE_INT
#if CONFIG_DTRACE
	if (success)
		LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE, lock, 0);
#endif
	return success;
}
/*
 *      Routine:        lck_rw_lock_exclusive_to_shared
 */
void
lck_rw_lock_exclusive_to_shared(
	lck_rw_t	*lck)
{
	boolean_t	wakeup_readers = FALSE;
	boolean_t	wakeup_writers = FALSE;
	boolean_t	istate;

	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_START,
			     (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, 0, 0);

	istate = lck_interlock_lock(lck);

	lck->lck_rw_shared_count++;
	if (lck->lck_rw_want_upgrade)
		lck->lck_rw_want_upgrade = FALSE;
	else
	 	lck->lck_rw_want_write = FALSE;

	if (lck->lck_w_waiting) {
		lck->lck_w_waiting = FALSE;
		wakeup_writers = TRUE;
	} 
	if (!(lck->lck_rw_priv_excl && wakeup_writers == TRUE) && 
			lck->lck_r_waiting) {
		lck->lck_r_waiting = FALSE;
		wakeup_readers = TRUE;
	}

	lck_interlock_unlock(lck, istate);

	if (wakeup_readers)
		thread_wakeup(RW_LOCK_READER_EVENT(lck));
	if (wakeup_writers)
		thread_wakeup(RW_LOCK_WRITER_EVENT(lck));

	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_END,
			     (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, lck->lck_rw_shared_count, 0);

#if CONFIG_DTRACE
	LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE, lck, 0);
#endif
}
Beispiel #13
0
/*
 *	Routine: hw_lock_to
 *
 *	Acquire lock, spinning until it becomes available or timeout.
 *	Timeout is in mach_absolute_time ticks, return with
 *	preemption disabled.
 */
unsigned int
hw_lock_to(hw_lock_t lock, uint64_t timeout)
{
	thread_t	thread;
	uintptr_t	state;
	unsigned int success = 0;

	thread = current_thread();
	disable_preemption_for_thread(thread);
	state = LCK_MTX_THREAD_TO_STATE(thread) | PLATFORM_LCK_ILOCK;
#if	__SMP__

#if	LOCK_PRETEST
	if (ordered_load_hw(lock))
		goto contended;
#endif	// LOCK_PRETEST
	if (atomic_compare_exchange(&lock->lock_data, 0, state,
					memory_order_acquire_smp, TRUE)) {
		success = 1;
		goto end;
	}
#if	LOCK_PRETEST
contended:
#endif	// LOCK_PRETEST
	success = hw_lock_lock_contended(lock, state, timeout, FALSE);
end:
#else	// __SMP__
	(void)timeout;
	if (ordered_load_hw(lock) == 0) {
		ordered_store_hw(lock, state);
		success = 1;
	}
#endif	// __SMP__
#if CONFIG_DTRACE
	if (success)
		LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE, lock, 0);
#endif
	return success;
}
boolean_t
lck_rw_try_lock_shared(
	lck_rw_t	*lck)
{
	boolean_t	istate;

	istate = lck_interlock_lock(lck);
/* No reader priority check here... */
	if (lck->lck_rw_want_write || lck->lck_rw_want_upgrade) {
		lck_interlock_unlock(lck, istate);
		return(FALSE);
	}

	lck->lck_rw_shared_count++;

	lck_interlock_unlock(lck, istate);

#if CONFIG_DTRACE
	LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE, lck, 0);
#endif
	return(TRUE);
}
Beispiel #15
0
/*
 * Routine: 	lck_mtx_lock_wait
 *
 * Invoked in order to wait on contention.
 *
 * Called with the interlock locked and
 * returns it unlocked.
 */
void
lck_mtx_lock_wait (
	lck_mtx_t			*lck,
	thread_t			holder)
{
	thread_t		self = current_thread();
	lck_mtx_t		*mutex;
	__kdebug_only uintptr_t	trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
	__kdebug_only uintptr_t	trace_holder = VM_KERNEL_UNSLIDE_OR_PERM(holder);
	integer_t		priority;
	spl_t			s = splsched();
#if	CONFIG_DTRACE
	uint64_t		sleep_start = 0;

	if (lockstat_probemap[LS_LCK_MTX_LOCK_BLOCK] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_BLOCK]) {
		sleep_start = mach_absolute_time();
	}
#endif

	if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT)
		mutex = lck;
	else
		mutex = &lck->lck_mtx_ptr->lck_mtx;

	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START, trace_lck, trace_holder, 0, 0, 0);

	priority = self->sched_pri;
	if (priority < self->base_pri)
		priority = self->base_pri;
	if (priority < BASEPRI_DEFAULT)
		priority = BASEPRI_DEFAULT;

	/* Do not promote past promotion ceiling */
	priority = MIN(priority, MAXPRI_PROMOTE);

	thread_lock(holder);
	if (mutex->lck_mtx_pri == 0)
		holder->promotions++;
	holder->sched_flags |= TH_SFLAG_PROMOTED;
	if (mutex->lck_mtx_pri < priority && holder->sched_pri < priority) {
		KERNEL_DEBUG_CONSTANT(
			MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE,
					holder->sched_pri, priority, trace_holder, trace_lck, 0);
		set_sched_pri(holder, priority);
	}
	thread_unlock(holder);
	splx(s);

	if (mutex->lck_mtx_pri < priority)
		mutex->lck_mtx_pri = priority;
	if (self->pending_promoter[self->pending_promoter_index] == NULL) {
		self->pending_promoter[self->pending_promoter_index] = mutex;
		mutex->lck_mtx_waiters++;
	}
	else
	if (self->pending_promoter[self->pending_promoter_index] != mutex) {
		self->pending_promoter[++self->pending_promoter_index] = mutex;
		mutex->lck_mtx_waiters++;
	}

	assert_wait(LCK_MTX_EVENT(mutex), THREAD_UNINT);
	lck_mtx_ilk_unlock(mutex);

	thread_block(THREAD_CONTINUE_NULL);

	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_END, 0, 0, 0, 0, 0);
#if	CONFIG_DTRACE
	/*
	 * Record the Dtrace lockstat probe for blocking, block time
	 * measured from when we were entered.
	 */
	if (sleep_start) {
		if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) {
			LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_BLOCK, lck,
			    mach_absolute_time() - sleep_start);
		} else {
			LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_BLOCK, lck,
			    mach_absolute_time() - sleep_start);
		}
	}
#endif
}
/*
 *	Routine:	lck_rw_lock_shared_gen
 */
void
lck_rw_lock_shared_gen(
	lck_rw_t	*lck)
{
	int		i;
	wait_result_t      res;
#if	MACH_LDEBUG
	int		decrementer;
#endif	/* MACH_LDEBUG */
	boolean_t	istate;
#if	CONFIG_DTRACE
	uint64_t wait_interval = 0;
	int slept = 0;
	int readers_at_sleep;
#endif

	istate = lck_interlock_lock(lck);
#if	CONFIG_DTRACE
	readers_at_sleep = lck->lck_rw_shared_count;
#endif

#if	MACH_LDEBUG
	decrementer = DECREMENTER_TIMEOUT;
#endif	/* MACH_LDEBUG */
	while ((lck->lck_rw_want_write || lck->lck_rw_want_upgrade) &&
	    ((lck->lck_rw_shared_count == 0) || lck->lck_rw_priv_excl)) {

		i = lock_wait_time[lck->lck_rw_can_sleep ? 1 : 0];

		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_CODE) | DBG_FUNC_START,
			     (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, i, 0);
#if	CONFIG_DTRACE
		if ((lockstat_probemap[LS_LCK_RW_LOCK_SHARED_SPIN] || lockstat_probemap[LS_LCK_RW_LOCK_SHARED_BLOCK]) && wait_interval == 0) {
			wait_interval = mach_absolute_time();
		} else {
			wait_interval = -1;
		}
#endif

		if (i != 0) {
			lck_interlock_unlock(lck, istate);
#if	MACH_LDEBUG
			if (!--decrementer)
				Debugger("timeout - wait no writers");
#endif	/* MACH_LDEBUG */
			while (--i != 0 &&
			    (lck->lck_rw_want_write || lck->lck_rw_want_upgrade) &&
			       ((lck->lck_rw_shared_count == 0) || lck->lck_rw_priv_excl))
				lck_rw_lock_pause(istate);
			istate = lck_interlock_lock(lck);
		}

		if (lck->lck_rw_can_sleep &&
		    (lck->lck_rw_want_write || lck->lck_rw_want_upgrade) &&
		    ((lck->lck_rw_shared_count == 0) || lck->lck_rw_priv_excl)) {
			lck->lck_r_waiting = TRUE;
			res = assert_wait(RW_LOCK_READER_EVENT(lck), THREAD_UNINT);
			if (res == THREAD_WAITING) {
				lck_interlock_unlock(lck, istate);
				res = thread_block(THREAD_CONTINUE_NULL);
#if	CONFIG_DTRACE
				slept = 1;
#endif
				istate = lck_interlock_lock(lck);
			}
		}
		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_CODE) | DBG_FUNC_END,
			     (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, res, 0);
	}

	lck->lck_rw_shared_count++;

	lck_interlock_unlock(lck, istate);
#if	CONFIG_DTRACE
	if (wait_interval != 0 && wait_interval != (unsigned) -1) {
		if (slept == 0) {
			LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_SPIN, lck, mach_absolute_time() - wait_interval, 0);
		} else {
			LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_BLOCK, lck,
			    mach_absolute_time() - wait_interval, 0,
			    (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
		}
	}
	LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lck, 0);
#endif
}
Beispiel #17
0
/*
 * Routine: 	lck_mtx_lock_wait
 *
 * Invoked in order to wait on contention.
 *
 * Called with the interlock locked and
 * returns it unlocked.
 */
void
lck_mtx_lock_wait (
	lck_mtx_t			*lck,
	thread_t			holder)
{
	thread_t		self = current_thread();
	lck_mtx_t		*mutex;
	integer_t		priority;
	spl_t			s = splsched();
#if	CONFIG_DTRACE
	uint64_t		sleep_start = 0;

	if (lockstat_probemap[LS_LCK_MTX_LOCK_BLOCK] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_BLOCK]) {
		sleep_start = mach_absolute_time();
	}
#endif

	if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT)
		mutex = lck;
	else
		mutex = &lck->lck_mtx_ptr->lck_mtx;

	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START, (int)lck, (int)holder, 0, 0, 0);

	priority = self->sched_pri;
	if (priority < self->priority)
		priority = self->priority;
	if (priority < BASEPRI_DEFAULT)
		priority = BASEPRI_DEFAULT;

	thread_lock(holder);
	if (mutex->lck_mtx_pri == 0)
		holder->promotions++;
	holder->sched_mode |= TH_MODE_PROMOTED;
	if (		mutex->lck_mtx_pri < priority	&&
				holder->sched_pri < priority		) {
		KERNEL_DEBUG_CONSTANT(
			MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE,
					holder->sched_pri, priority, (int)holder, (int)lck, 0);

		set_sched_pri(holder, priority);
	}
	thread_unlock(holder);
	splx(s);

	if (mutex->lck_mtx_pri < priority)
		mutex->lck_mtx_pri = priority;
	if (self->pending_promoter[self->pending_promoter_index] == NULL) {
		self->pending_promoter[self->pending_promoter_index] = mutex;
		mutex->lck_mtx_waiters++;
	}
	else
	if (self->pending_promoter[self->pending_promoter_index] != mutex) {
		self->pending_promoter[++self->pending_promoter_index] = mutex;
		mutex->lck_mtx_waiters++;
	}

	assert_wait((event_t)(((unsigned int*)lck)+((sizeof(lck_mtx_t)-1)/sizeof(unsigned int))), THREAD_UNINT);
	lck_mtx_ilk_unlock(mutex);

	thread_block(THREAD_CONTINUE_NULL);

	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_END, 0, 0, 0, 0, 0);
#if	CONFIG_DTRACE
	/*
	 * Record the Dtrace lockstat probe for blocking, block time
	 * measured from when we were entered.
	 */
	if (sleep_start) {
		if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) {
			LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_BLOCK, lck,
			    mach_absolute_time() - sleep_start);
		} else {
			LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_BLOCK, lck,
			    mach_absolute_time() - sleep_start);
		}
	}
#endif
}
boolean_t
lck_rw_lock_shared_to_exclusive(
	lck_rw_t	*lck)
{
	int	    i;
	boolean_t	    do_wakeup = FALSE;
	wait_result_t      res;
#if	MACH_LDEBUG
	int		   decrementer;
#endif	/* MACH_LDEBUG */
	boolean_t	istate;
#if	CONFIG_DTRACE
	uint64_t wait_interval = 0;
	int slept = 0;
	int readers_at_sleep = 0;
#endif

	istate = lck_interlock_lock(lck);

	lck->lck_rw_shared_count--;	

	if (lck->lck_rw_want_upgrade) {
		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_START,
			     (int)lck, lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0);

		/*
		 *	Someone else has requested upgrade.
		 *	Since we've released a read lock, wake
		 *	him up.
		 */
		if (lck->lck_w_waiting && (lck->lck_rw_shared_count == 0)) {
			lck->lck_w_waiting = FALSE;
			do_wakeup = TRUE;
		}

		lck_interlock_unlock(lck, istate);

		if (do_wakeup) 
			thread_wakeup(RW_LOCK_WRITER_EVENT(lck));

		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_END,
			     (int)lck, lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0);

		return (FALSE);
	}

	lck->lck_rw_want_upgrade = TRUE;

#if	MACH_LDEBUG
	decrementer = DECREMENTER_TIMEOUT;
#endif	/* MACH_LDEBUG */
	while (lck->lck_rw_shared_count != 0) {
#if	CONFIG_DTRACE
		if (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN] && wait_interval == 0) {
			wait_interval = mach_absolute_time();
			readers_at_sleep = lck->lck_rw_shared_count;
		} else {
			wait_interval = -1;
		}
#endif
		i = lock_wait_time[lck->lck_rw_can_sleep ? 1 : 0];

		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX1_CODE) | DBG_FUNC_START,
			     (int)lck, lck->lck_rw_shared_count, i, 0, 0);

		if (i != 0) {
			lck_interlock_unlock(lck, istate);
#if	MACH_LDEBUG
			if (!--decrementer)
				Debugger("timeout - lck_rw_shared_count");
#endif	/* MACH_LDEBUG */
			while (--i != 0 && lck->lck_rw_shared_count != 0)
				lck_rw_lock_pause(istate);
			istate = lck_interlock_lock(lck);
		}

		if (lck->lck_rw_can_sleep && lck->lck_rw_shared_count != 0) {
			lck->lck_w_waiting = TRUE;
			res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT);
			if (res == THREAD_WAITING) {
				lck_interlock_unlock(lck, istate);
				res = thread_block(THREAD_CONTINUE_NULL);
#if	CONFIG_DTRACE
				slept = 1;
#endif
				istate = lck_interlock_lock(lck);
			}
		}
		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX1_CODE) | DBG_FUNC_END,
			     (int)lck, lck->lck_rw_shared_count, 0, 0, 0);
	}

	lck_interlock_unlock(lck, istate);
#if	CONFIG_DTRACE
	/*
	 * We infer whether we took the sleep/spin path above by checking readers_at_sleep.
	 */
	if (wait_interval != 0 && wait_interval != (unsigned) -1 && readers_at_sleep) {
		if (slept == 0) {
			LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN, lck, mach_absolute_time() - wait_interval, 0);
		} else {
			LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK, lck,
			    mach_absolute_time() - wait_interval, 1,
			    (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
		}
	}

	LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lck, 1);
#endif
	return (TRUE);
}
/*
 *      Routine:        lck_rw_lock_exclusive
 */
void
lck_rw_lock_exclusive(
	lck_rw_t	*lck)
{
	int	   i;
	wait_result_t	res;
#if	MACH_LDEBUG
	int				decrementer;
#endif	/* MACH_LDEBUG */
	boolean_t	istate;
#if	CONFIG_DTRACE
	uint64_t wait_interval = 0;
	int slept = 0;
	int readers_at_sleep;
#endif

	istate = lck_interlock_lock(lck);
#if	CONFIG_DTRACE
	readers_at_sleep = lck->lck_rw_shared_count;
#endif

#if	MACH_LDEBUG
	decrementer = DECREMENTER_TIMEOUT;
#endif	/* MACH_LDEBUG */

	/*
	 *	Try to acquire the lck_rw_want_write bit.
	 */
	while (lck->lck_rw_want_write) {

		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0);
		/*
		 * Either sleeping or spinning is happening, start
		 * a timing of our delay interval now.
		 */
#if	CONFIG_DTRACE
		if ((lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] || lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK]) && wait_interval == 0) {
			wait_interval = mach_absolute_time();
		} else {
			wait_interval = -1;
		}
#endif


		i = lock_wait_time[lck->lck_rw_can_sleep ? 1 : 0];
		if (i != 0) {
			lck_interlock_unlock(lck, istate);
#if	MACH_LDEBUG
			if (!--decrementer)
				Debugger("timeout - lck_rw_want_write");
#endif	/* MACH_LDEBUG */
			while (--i != 0 && lck->lck_rw_want_write)
				lck_rw_lock_pause(istate);
			istate = lck_interlock_lock(lck);
		}

		if (lck->lck_rw_can_sleep && lck->lck_rw_want_write) {
			lck->lck_w_waiting = TRUE;
			res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT);
			if (res == THREAD_WAITING) {
				lck_interlock_unlock(lck, istate);
				res = thread_block(THREAD_CONTINUE_NULL);
#if	CONFIG_DTRACE
				slept = 1;
#endif
				istate = lck_interlock_lock(lck);
			}
		}
		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE_CODE) | DBG_FUNC_END, (int)lck, res, 0, 0, 0);
	}
	lck->lck_rw_want_write = TRUE;

	/* Wait for readers (and upgrades) to finish */

#if	MACH_LDEBUG
	decrementer = DECREMENTER_TIMEOUT;
#endif	/* MACH_LDEBUG */
	while ((lck->lck_rw_shared_count != 0) || lck->lck_rw_want_upgrade) {

		i = lock_wait_time[lck->lck_rw_can_sleep ? 1 : 0];

		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE1_CODE) | DBG_FUNC_START,
			     (int)lck, lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, i, 0);

#if	CONFIG_DTRACE
		/*
		 * Either sleeping or spinning is happening, start
		 * a timing of our delay interval now.  If we set it
		 * to -1 we don't have accurate data so we cannot later
		 * decide to record a dtrace spin or sleep event.
		 */
		if ((lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] || lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK]) && wait_interval == 0) {
			wait_interval = mach_absolute_time();
		} else {
			wait_interval = (unsigned) -1;
		}
#endif

		if (i != 0) {
			lck_interlock_unlock(lck, istate);
#if	MACH_LDEBUG
			if (!--decrementer)
				Debugger("timeout - wait for readers");
#endif	/* MACH_LDEBUG */
			while (--i != 0 && (lck->lck_rw_shared_count != 0 ||
					    lck->lck_rw_want_upgrade))
				lck_rw_lock_pause(istate);
			istate = lck_interlock_lock(lck);
		}

		if (lck->lck_rw_can_sleep && (lck->lck_rw_shared_count != 0 || lck->lck_rw_want_upgrade)) {
			lck->lck_w_waiting = TRUE;
			res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT);
			if (res == THREAD_WAITING) {
				lck_interlock_unlock(lck, istate);
				res = thread_block(THREAD_CONTINUE_NULL);
#if	CONFIG_DTRACE
				slept = 1;
#endif
				istate = lck_interlock_lock(lck);
			}
		}
		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE1_CODE) | DBG_FUNC_END,
			     (int)lck, lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, res, 0);
	}

	lck_interlock_unlock(lck, istate);
#if	CONFIG_DTRACE
	/*
	 * Decide what latencies we suffered that are Dtrace events.
	 * If we have set wait_interval, then we either spun or slept.
	 * At least we get out from under the interlock before we record
	 * which is the best we can do here to minimize the impact
	 * of the tracing.
	 * If we have set wait_interval to -1, then dtrace was not enabled when we
	 * started sleeping/spinning so we don't record this event.
	 */
	if (wait_interval != 0 && wait_interval != (unsigned) -1) {
		if (slept == 0) {
			LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_EXCL_SPIN, lck,
			    mach_absolute_time() - wait_interval, 1);
		} else {
			/*
			 * For the blocking case, we also record if when we blocked
			 * it was held for read or write, and how many readers.
			 * Notice that above we recorded this before we dropped
			 * the interlock so the count is accurate.
			 */
			LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_EXCL_BLOCK, lck,
			    mach_absolute_time() - wait_interval, 1,
			    (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
		}
	}
	LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lck, 1);
#endif
}
Beispiel #20
0
/*
 * mutex_vector_enter() is called from the assembly mutex_enter() routine
 * if the lock is held or is not of type MUTEX_ADAPTIVE.
 */
void
mutex_vector_enter(mutex_impl_t *lp)
{
	kthread_id_t	owner;
	hrtime_t	sleep_time = 0;	/* how long we slept */
	uint_t		spin_count = 0;	/* how many times we spun */
	cpu_t 		*cpup, *last_cpu;
	extern cpu_t	*cpu_list;
	turnstile_t	*ts;
	volatile mutex_impl_t *vlp = (volatile mutex_impl_t *)lp;
	int		backoff;	/* current backoff */
	int		backctr;	/* ctr for backoff */
	int		sleep_count = 0;

	ASSERT_STACK_ALIGNED();

	if (MUTEX_TYPE_SPIN(lp)) {
		lock_set_spl(&lp->m_spin.m_spinlock, lp->m_spin.m_minspl,
		    &lp->m_spin.m_oldspl);
		return;
	}

	if (!MUTEX_TYPE_ADAPTIVE(lp)) {
		mutex_panic("mutex_enter: bad mutex", lp);
		return;
	}

	/*
	 * Adaptive mutexes must not be acquired from above LOCK_LEVEL.
	 * We can migrate after loading CPU but before checking CPU_ON_INTR,
	 * so we must verify by disabling preemption and loading CPU again.
	 */
	cpup = CPU;
	if (CPU_ON_INTR(cpup) && !panicstr) {
		kpreempt_disable();
		if (CPU_ON_INTR(CPU))
			mutex_panic("mutex_enter: adaptive at high PIL", lp);
		kpreempt_enable();
	}

	CPU_STATS_ADDQ(cpup, sys, mutex_adenters, 1);

	if (&plat_lock_delay) {
		backoff = 0;
	} else {
		backoff = BACKOFF_BASE;
	}

	for (;;) {
spin:
		spin_count++;
		/*
		 * Add an exponential backoff delay before trying again
		 * to touch the mutex data structure.
		 * the spin_count test and call to nulldev are to prevent
		 * the compiler optimizer from eliminating the delay loop.
		 */
		if (&plat_lock_delay) {
			plat_lock_delay(&backoff);
		} else {
			for (backctr = backoff; backctr; backctr--) {
				if (!spin_count) (void) nulldev();
			};    /* delay */
			backoff = backoff << 1;			/* double it */
			if (backoff > BACKOFF_CAP) {
				backoff = BACKOFF_CAP;
			}

			SMT_PAUSE();
		}

		if (panicstr)
			return;

		if ((owner = MUTEX_OWNER(vlp)) == NULL) {
			if (mutex_adaptive_tryenter(lp))
				break;
			continue;
		}

		if (owner == curthread)
			mutex_panic("recursive mutex_enter", lp);

		/*
		 * If lock is held but owner is not yet set, spin.
		 * (Only relevant for platforms that don't have cas.)
		 */
		if (owner == MUTEX_NO_OWNER)
			continue;

		/*
		 * When searching the other CPUs, start with the one where
		 * we last saw the owner thread.  If owner is running, spin.
		 *
		 * We must disable preemption at this point to guarantee
		 * that the list doesn't change while we traverse it
		 * without the cpu_lock mutex.  While preemption is
		 * disabled, we must revalidate our cached cpu pointer.
		 */
		kpreempt_disable();
		if (cpup->cpu_next == NULL)
			cpup = cpu_list;
		last_cpu = cpup;	/* mark end of search */
		do {
			if (cpup->cpu_thread == owner) {
				kpreempt_enable();
				goto spin;
			}
		} while ((cpup = cpup->cpu_next) != last_cpu);
		kpreempt_enable();

		/*
		 * The owner appears not to be running, so block.
		 * See the Big Theory Statement for memory ordering issues.
		 */
		ts = turnstile_lookup(lp);
		MUTEX_SET_WAITERS(lp);
		membar_enter();

		/*
		 * Recheck whether owner is running after waiters bit hits
		 * global visibility (above).  If owner is running, spin.
		 *
		 * Since we are at ipl DISP_LEVEL, kernel preemption is
		 * disabled, however we still need to revalidate our cached
		 * cpu pointer to make sure the cpu hasn't been deleted.
		 */
		if (cpup->cpu_next == NULL)
			last_cpu = cpup = cpu_list;
		do {
			if (cpup->cpu_thread == owner) {
				turnstile_exit(lp);
				goto spin;
			}
		} while ((cpup = cpup->cpu_next) != last_cpu);
		membar_consumer();

		/*
		 * If owner and waiters bit are unchanged, block.
		 */
		if (MUTEX_OWNER(vlp) == owner && MUTEX_HAS_WAITERS(vlp)) {
			sleep_time -= gethrtime();
			(void) turnstile_block(ts, TS_WRITER_Q, lp,
			    &mutex_sobj_ops, NULL, NULL);
			sleep_time += gethrtime();
			sleep_count++;
		} else {
			turnstile_exit(lp);
		}
	}

	ASSERT(MUTEX_OWNER(lp) == curthread);

	if (sleep_time != 0) {
		/*
		 * Note, sleep time is the sum of all the sleeping we
		 * did.
		 */
		LOCKSTAT_RECORD(LS_MUTEX_ENTER_BLOCK, lp, sleep_time);
	}

	/*
	 * We do not count a sleep as a spin.
	 */
	if (spin_count > sleep_count)
		LOCKSTAT_RECORD(LS_MUTEX_ENTER_SPIN, lp,
		    spin_count - sleep_count);

	LOCKSTAT_RECORD0(LS_MUTEX_ENTER_ACQUIRE, lp);
}