void lock_set_spl_spin(lock_t *lp, int new_pil, ushort_t *old_pil_addr, int old_pil) { int spin_count = 1; int backoff; /* current backoff */ int backctr; /* ctr for backoff */ if (panicstr) return; if (ncpus == 1) panic("lock_set_spl: %p lock held and only one CPU", lp); ASSERT(new_pil > LOCK_LEVEL); if (&plat_lock_delay) { backoff = 0; } else { backoff = BACKOFF_BASE; } do { splx(old_pil); while (LOCK_HELD(lp)) { if (panicstr) { *old_pil_addr = (ushort_t)splr(new_pil); return; } spin_count++; /* * Add an exponential backoff delay before trying again * to touch the mutex data structure. * spin_count test and call to nulldev are to prevent * compiler optimizer from eliminating the delay loop. */ if (&plat_lock_delay) { plat_lock_delay(&backoff); } else { for (backctr = backoff; backctr; backctr--) { if (!spin_count) (void) nulldev(); } backoff = backoff << 1; /* double it */ if (backoff > BACKOFF_CAP) { backoff = BACKOFF_CAP; } SMT_PAUSE(); } } old_pil = splr(new_pil); } while (!lock_spin_try(lp)); *old_pil_addr = (ushort_t)old_pil; if (spin_count) { LOCKSTAT_RECORD(LS_LOCK_SET_SPL_SPIN, lp, spin_count); } LOCKSTAT_RECORD(LS_LOCK_SET_SPL_ACQUIRE, lp, spin_count); }
/* * Routine: lck_mtx_lock_spinwait * * Invoked trying to acquire a mutex when there is contention but * the holder is running on another processor. We spin for up to a maximum * time waiting for the lock to be released. * * Called with the interlock unlocked. */ void lck_mtx_lock_spinwait( lck_mtx_t *lck) { thread_t holder; volatile lck_mtx_t *mutex; uint64_t deadline; if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) mutex = lck; else mutex = &lck->lck_mtx_ptr->lck_mtx; KERNEL_DEBUG( MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN) | DBG_FUNC_NONE, (int)lck, (int)mutex->lck_mtx_locked, 0, 0, 0); deadline = mach_absolute_time() + MutexSpin; /* * Spin while: * - mutex is locked, and * - its locked as a spin lock, or * - owner is running on another processor, and * - owner (processor) is not idling, and * - we haven't spun for long enough. */ while ((holder = (thread_t) mutex->lck_mtx_locked) != NULL) { if ((holder == (thread_t)MUTEX_LOCKED_AS_SPIN) || ((holder->machine.specFlags & OnProc) != 0 && (holder->state & TH_IDLE) == 0 && mach_absolute_time() < deadline)) { cpu_pause(); continue; } break; } #if CONFIG_DTRACE /* * We've already kept a count via deadline of how long we spun. * If dtrace is active, then we compute backwards to decide how * long we spun. * * Note that we record a different probe id depending on whether * this is a direct or indirect mutex. This allows us to * penalize only lock groups that have debug/stats enabled * with dtrace processing if desired. */ if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) { LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN, lck, mach_absolute_time() - (deadline - MutexSpin)); } else { LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_SPIN, lck, mach_absolute_time() - (deadline - MutexSpin)); } /* The lockstat acquire event is recorded by the assembly code beneath us. */ #endif }
/* * Routine: lck_mtx_lock_acquire * * Invoked on acquiring the mutex when there is * contention. * * Returns the current number of waiters. * * Called with the interlock locked. */ int lck_mtx_lock_acquire( lck_mtx_t *lck) { thread_t thread = current_thread(); lck_mtx_t *mutex; if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) mutex = lck; else mutex = &lck->lck_mtx_ptr->lck_mtx; if (thread->pending_promoter[thread->pending_promoter_index] == mutex) { thread->pending_promoter[thread->pending_promoter_index] = NULL; if (thread->pending_promoter_index > 0) thread->pending_promoter_index--; mutex->lck_mtx_waiters--; } if (mutex->lck_mtx_waiters > 0) { integer_t priority = mutex->lck_mtx_pri; spl_t s = splsched(); thread_lock(thread); thread->promotions++; thread->sched_flags |= TH_SFLAG_PROMOTED; if (thread->sched_pri < priority) { KERNEL_DEBUG_CONSTANT( MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE, thread->sched_pri, priority, 0, lck, 0); /* Do not promote past promotion ceiling */ assert(priority <= MAXPRI_PROMOTE); set_sched_pri(thread, priority); } thread_unlock(thread); splx(s); } else mutex->lck_mtx_pri = 0; #if CONFIG_DTRACE if (lockstat_probemap[LS_LCK_MTX_LOCK_ACQUIRE] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_ACQUIRE]) { if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) { LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, lck, 0); } else { LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_ACQUIRE, lck, 0); } } #endif return (mutex->lck_mtx_waiters); }
/* * Routine: hw_lock_lock * * Acquire lock, spinning until it becomes available, * return with preemption disabled. */ void hw_lock_lock(hw_lock_t lock) { thread_t thread; uintptr_t state; thread = current_thread(); disable_preemption_for_thread(thread); state = LCK_MTX_THREAD_TO_STATE(thread) | PLATFORM_LCK_ILOCK; #if __SMP__ #if LOCK_PRETEST if (ordered_load_hw(lock)) goto contended; #endif // LOCK_PRETEST if (atomic_compare_exchange(&lock->lock_data, 0, state, memory_order_acquire_smp, TRUE)) { goto end; } #if LOCK_PRETEST contended: #endif // LOCK_PRETEST hw_lock_lock_contended(lock, state, 0, TRUE); end: #else // __SMP__ if (lock->lock_data) panic("Spinlock held %p", lock); lock->lock_data = state; #endif // __SMP__ #if CONFIG_DTRACE LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE, lock, 0); #endif return; }
boolean_t lck_rw_try_lock_exclusive( lck_rw_t *lck) { boolean_t istate; istate = lck_interlock_lock(lck); if (lck->lck_rw_want_write || lck->lck_rw_want_upgrade || lck->lck_rw_shared_count) { /* * Can't get lock. */ lck_interlock_unlock(lck, istate); return(FALSE); } /* * Have lock. */ lck->lck_rw_want_write = TRUE; lck_interlock_unlock(lck, istate); #if CONFIG_DTRACE LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE, lck, 1); #endif return(TRUE); }
/* * Routine: lck_rw_done_gen */ lck_rw_type_t lck_rw_done_gen( lck_rw_t *lck) { boolean_t wakeup_readers = FALSE; boolean_t wakeup_writers = FALSE; lck_rw_type_t lck_rw_type; boolean_t istate; istate = lck_interlock_lock(lck); if (lck->lck_rw_shared_count != 0) { lck_rw_type = LCK_RW_TYPE_SHARED; lck->lck_rw_shared_count--; } else { lck_rw_type = LCK_RW_TYPE_EXCLUSIVE; if (lck->lck_rw_want_upgrade) lck->lck_rw_want_upgrade = FALSE; else lck->lck_rw_want_write = FALSE; } /* * There is no reason to wakeup a waiting thread * if the read-count is non-zero. Consider: * we must be dropping a read lock * threads are waiting only if one wants a write lock * if there are still readers, they can't proceed */ if (lck->lck_rw_shared_count == 0) { if (lck->lck_w_waiting) { lck->lck_w_waiting = FALSE; wakeup_writers = TRUE; } if (!(lck->lck_rw_priv_excl && wakeup_writers == TRUE) && lck->lck_r_waiting) { lck->lck_r_waiting = FALSE; wakeup_readers = TRUE; } } lck_interlock_unlock(lck, istate); if (wakeup_readers) thread_wakeup(RW_LOCK_READER_EVENT(lck)); if (wakeup_writers) thread_wakeup(RW_LOCK_WRITER_EVENT(lck)); #if CONFIG_DTRACE LOCKSTAT_RECORD(LS_LCK_RW_DONE_RELEASE, lck, (lck_rw_type == LCK_RW_TYPE_EXCLUSIVE ? 1 : 0)); #endif return(lck_rw_type); }
/* * Routine: hw_lock_unlock * * Unconditionally release lock, release preemption level. */ void hw_lock_unlock(hw_lock_t lock) { __c11_atomic_store((_Atomic uintptr_t *)&lock->lock_data, 0, memory_order_release_smp); #if __arm__ || __arm64__ // ARM tests are only for open-source exclusion set_event(); #endif // __arm__ || __arm64__ #if CONFIG_DTRACE LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE, lock, 0); #endif /* CONFIG_DTRACE */ enable_preemption(); }
/* * Simple C support for the cases where spin locks miss on the first try. */ void lock_set_spin(lock_t *lp) { int spin_count = 1; int backoff; /* current backoff */ int backctr; /* ctr for backoff */ if (panicstr) return; if (ncpus == 1) panic("lock_set: %p lock held and only one CPU", lp); if (&plat_lock_delay) { backoff = 0; } else { backoff = BACKOFF_BASE; } while (LOCK_HELD(lp) || !lock_spin_try(lp)) { if (panicstr) return; spin_count++; /* * Add an exponential backoff delay before trying again * to touch the mutex data structure. * the spin_count test and call to nulldev are to prevent * the compiler optimizer from eliminating the delay loop. */ if (&plat_lock_delay) { plat_lock_delay(&backoff); } else { /* delay */ for (backctr = backoff; backctr; backctr--) { if (!spin_count) (void) nulldev(); } backoff = backoff << 1; /* double it */ if (backoff > BACKOFF_CAP) { backoff = BACKOFF_CAP; } SMT_PAUSE(); } } if (spin_count) { LOCKSTAT_RECORD(LS_LOCK_SET_SPIN, lp, spin_count); } LOCKSTAT_RECORD0(LS_LOCK_SET_ACQUIRE, lp); }
void lck_mtx_unlockspin_wakeup ( lck_mtx_t *lck) { assert(lck->lck_mtx_waiters > 0); thread_wakeup_one(LCK_MTX_EVENT(lck)); KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_NONE, VM_KERNEL_UNSLIDE_OR_PERM(lck), 0, 0, 1, 0); #if CONFIG_DTRACE /* * When there are waiters, we skip the hot-patch spot in the * fastpath, so we record it here. */ LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, lck, 0); #endif }
void lck_mtx_unlockspin_wakeup ( lck_mtx_t *lck) { assert(lck->lck_mtx_waiters > 0); thread_wakeup_one((event_t)(((unsigned int*)lck)+(sizeof(lck_mtx_t)-1)/sizeof(unsigned int))); KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_NONE, (int)lck, 0, 0, 1, 0); #if CONFIG_DTRACE /* * When there are waiters, we skip the hot-patch spot in the * fastpath, so we record it here. */ LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, lck, 0); #endif }
/* * Routine: hw_lock_try * * returns with preemption disabled on success. */ unsigned int hw_lock_try(hw_lock_t lock) { thread_t thread = current_thread(); int success = 0; #if LOCK_TRY_DISABLE_INT long intmask; intmask = disable_interrupts(); #else disable_preemption_for_thread(thread); #endif // LOCK_TRY_DISABLE_INT #if __SMP__ #if LOCK_PRETEST if (ordered_load_hw(lock)) goto failed; #endif // LOCK_PRETEST success = atomic_compare_exchange(&lock->lock_data, 0, LCK_MTX_THREAD_TO_STATE(thread) | PLATFORM_LCK_ILOCK, memory_order_acquire_smp, FALSE); #else if (lock->lock_data == 0) { lock->lock_data = LCK_MTX_THREAD_TO_STATE(thread) | PLATFORM_LCK_ILOCK; success = 1; } #endif // __SMP__ #if LOCK_TRY_DISABLE_INT if (success) disable_preemption_for_thread(thread); #if LOCK_PRETEST failed: #endif // LOCK_PRETEST restore_interrupts(intmask); #else #if LOCK_PRETEST failed: #endif // LOCK_PRETEST if (!success) enable_preemption(); #endif // LOCK_TRY_DISABLE_INT #if CONFIG_DTRACE if (success) LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE, lock, 0); #endif return success; }
/* * Routine: lck_rw_lock_exclusive_to_shared */ void lck_rw_lock_exclusive_to_shared( lck_rw_t *lck) { boolean_t wakeup_readers = FALSE; boolean_t wakeup_writers = FALSE; boolean_t istate; KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_START, (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, 0, 0); istate = lck_interlock_lock(lck); lck->lck_rw_shared_count++; if (lck->lck_rw_want_upgrade) lck->lck_rw_want_upgrade = FALSE; else lck->lck_rw_want_write = FALSE; if (lck->lck_w_waiting) { lck->lck_w_waiting = FALSE; wakeup_writers = TRUE; } if (!(lck->lck_rw_priv_excl && wakeup_writers == TRUE) && lck->lck_r_waiting) { lck->lck_r_waiting = FALSE; wakeup_readers = TRUE; } lck_interlock_unlock(lck, istate); if (wakeup_readers) thread_wakeup(RW_LOCK_READER_EVENT(lck)); if (wakeup_writers) thread_wakeup(RW_LOCK_WRITER_EVENT(lck)); KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_END, (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, lck->lck_rw_shared_count, 0); #if CONFIG_DTRACE LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE, lck, 0); #endif }
/* * Routine: hw_lock_to * * Acquire lock, spinning until it becomes available or timeout. * Timeout is in mach_absolute_time ticks, return with * preemption disabled. */ unsigned int hw_lock_to(hw_lock_t lock, uint64_t timeout) { thread_t thread; uintptr_t state; unsigned int success = 0; thread = current_thread(); disable_preemption_for_thread(thread); state = LCK_MTX_THREAD_TO_STATE(thread) | PLATFORM_LCK_ILOCK; #if __SMP__ #if LOCK_PRETEST if (ordered_load_hw(lock)) goto contended; #endif // LOCK_PRETEST if (atomic_compare_exchange(&lock->lock_data, 0, state, memory_order_acquire_smp, TRUE)) { success = 1; goto end; } #if LOCK_PRETEST contended: #endif // LOCK_PRETEST success = hw_lock_lock_contended(lock, state, timeout, FALSE); end: #else // __SMP__ (void)timeout; if (ordered_load_hw(lock) == 0) { ordered_store_hw(lock, state); success = 1; } #endif // __SMP__ #if CONFIG_DTRACE if (success) LOCKSTAT_RECORD(LS_LCK_SPIN_LOCK_ACQUIRE, lock, 0); #endif return success; }
boolean_t lck_rw_try_lock_shared( lck_rw_t *lck) { boolean_t istate; istate = lck_interlock_lock(lck); /* No reader priority check here... */ if (lck->lck_rw_want_write || lck->lck_rw_want_upgrade) { lck_interlock_unlock(lck, istate); return(FALSE); } lck->lck_rw_shared_count++; lck_interlock_unlock(lck, istate); #if CONFIG_DTRACE LOCKSTAT_RECORD(LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE, lck, 0); #endif return(TRUE); }
/* * Routine: lck_mtx_lock_wait * * Invoked in order to wait on contention. * * Called with the interlock locked and * returns it unlocked. */ void lck_mtx_lock_wait ( lck_mtx_t *lck, thread_t holder) { thread_t self = current_thread(); lck_mtx_t *mutex; __kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck); __kdebug_only uintptr_t trace_holder = VM_KERNEL_UNSLIDE_OR_PERM(holder); integer_t priority; spl_t s = splsched(); #if CONFIG_DTRACE uint64_t sleep_start = 0; if (lockstat_probemap[LS_LCK_MTX_LOCK_BLOCK] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_BLOCK]) { sleep_start = mach_absolute_time(); } #endif if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) mutex = lck; else mutex = &lck->lck_mtx_ptr->lck_mtx; KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START, trace_lck, trace_holder, 0, 0, 0); priority = self->sched_pri; if (priority < self->base_pri) priority = self->base_pri; if (priority < BASEPRI_DEFAULT) priority = BASEPRI_DEFAULT; /* Do not promote past promotion ceiling */ priority = MIN(priority, MAXPRI_PROMOTE); thread_lock(holder); if (mutex->lck_mtx_pri == 0) holder->promotions++; holder->sched_flags |= TH_SFLAG_PROMOTED; if (mutex->lck_mtx_pri < priority && holder->sched_pri < priority) { KERNEL_DEBUG_CONSTANT( MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE, holder->sched_pri, priority, trace_holder, trace_lck, 0); set_sched_pri(holder, priority); } thread_unlock(holder); splx(s); if (mutex->lck_mtx_pri < priority) mutex->lck_mtx_pri = priority; if (self->pending_promoter[self->pending_promoter_index] == NULL) { self->pending_promoter[self->pending_promoter_index] = mutex; mutex->lck_mtx_waiters++; } else if (self->pending_promoter[self->pending_promoter_index] != mutex) { self->pending_promoter[++self->pending_promoter_index] = mutex; mutex->lck_mtx_waiters++; } assert_wait(LCK_MTX_EVENT(mutex), THREAD_UNINT); lck_mtx_ilk_unlock(mutex); thread_block(THREAD_CONTINUE_NULL); KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_END, 0, 0, 0, 0, 0); #if CONFIG_DTRACE /* * Record the Dtrace lockstat probe for blocking, block time * measured from when we were entered. */ if (sleep_start) { if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) { LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_BLOCK, lck, mach_absolute_time() - sleep_start); } else { LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_BLOCK, lck, mach_absolute_time() - sleep_start); } } #endif }
/* * Routine: lck_rw_lock_shared_gen */ void lck_rw_lock_shared_gen( lck_rw_t *lck) { int i; wait_result_t res; #if MACH_LDEBUG int decrementer; #endif /* MACH_LDEBUG */ boolean_t istate; #if CONFIG_DTRACE uint64_t wait_interval = 0; int slept = 0; int readers_at_sleep; #endif istate = lck_interlock_lock(lck); #if CONFIG_DTRACE readers_at_sleep = lck->lck_rw_shared_count; #endif #if MACH_LDEBUG decrementer = DECREMENTER_TIMEOUT; #endif /* MACH_LDEBUG */ while ((lck->lck_rw_want_write || lck->lck_rw_want_upgrade) && ((lck->lck_rw_shared_count == 0) || lck->lck_rw_priv_excl)) { i = lock_wait_time[lck->lck_rw_can_sleep ? 1 : 0]; KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_CODE) | DBG_FUNC_START, (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, i, 0); #if CONFIG_DTRACE if ((lockstat_probemap[LS_LCK_RW_LOCK_SHARED_SPIN] || lockstat_probemap[LS_LCK_RW_LOCK_SHARED_BLOCK]) && wait_interval == 0) { wait_interval = mach_absolute_time(); } else { wait_interval = -1; } #endif if (i != 0) { lck_interlock_unlock(lck, istate); #if MACH_LDEBUG if (!--decrementer) Debugger("timeout - wait no writers"); #endif /* MACH_LDEBUG */ while (--i != 0 && (lck->lck_rw_want_write || lck->lck_rw_want_upgrade) && ((lck->lck_rw_shared_count == 0) || lck->lck_rw_priv_excl)) lck_rw_lock_pause(istate); istate = lck_interlock_lock(lck); } if (lck->lck_rw_can_sleep && (lck->lck_rw_want_write || lck->lck_rw_want_upgrade) && ((lck->lck_rw_shared_count == 0) || lck->lck_rw_priv_excl)) { lck->lck_r_waiting = TRUE; res = assert_wait(RW_LOCK_READER_EVENT(lck), THREAD_UNINT); if (res == THREAD_WAITING) { lck_interlock_unlock(lck, istate); res = thread_block(THREAD_CONTINUE_NULL); #if CONFIG_DTRACE slept = 1; #endif istate = lck_interlock_lock(lck); } } KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_CODE) | DBG_FUNC_END, (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, res, 0); } lck->lck_rw_shared_count++; lck_interlock_unlock(lck, istate); #if CONFIG_DTRACE if (wait_interval != 0 && wait_interval != (unsigned) -1) { if (slept == 0) { LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_SPIN, lck, mach_absolute_time() - wait_interval, 0); } else { LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_BLOCK, lck, mach_absolute_time() - wait_interval, 0, (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep); } } LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lck, 0); #endif }
/* * Routine: lck_mtx_lock_wait * * Invoked in order to wait on contention. * * Called with the interlock locked and * returns it unlocked. */ void lck_mtx_lock_wait ( lck_mtx_t *lck, thread_t holder) { thread_t self = current_thread(); lck_mtx_t *mutex; integer_t priority; spl_t s = splsched(); #if CONFIG_DTRACE uint64_t sleep_start = 0; if (lockstat_probemap[LS_LCK_MTX_LOCK_BLOCK] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_BLOCK]) { sleep_start = mach_absolute_time(); } #endif if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) mutex = lck; else mutex = &lck->lck_mtx_ptr->lck_mtx; KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START, (int)lck, (int)holder, 0, 0, 0); priority = self->sched_pri; if (priority < self->priority) priority = self->priority; if (priority < BASEPRI_DEFAULT) priority = BASEPRI_DEFAULT; thread_lock(holder); if (mutex->lck_mtx_pri == 0) holder->promotions++; holder->sched_mode |= TH_MODE_PROMOTED; if ( mutex->lck_mtx_pri < priority && holder->sched_pri < priority ) { KERNEL_DEBUG_CONSTANT( MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE, holder->sched_pri, priority, (int)holder, (int)lck, 0); set_sched_pri(holder, priority); } thread_unlock(holder); splx(s); if (mutex->lck_mtx_pri < priority) mutex->lck_mtx_pri = priority; if (self->pending_promoter[self->pending_promoter_index] == NULL) { self->pending_promoter[self->pending_promoter_index] = mutex; mutex->lck_mtx_waiters++; } else if (self->pending_promoter[self->pending_promoter_index] != mutex) { self->pending_promoter[++self->pending_promoter_index] = mutex; mutex->lck_mtx_waiters++; } assert_wait((event_t)(((unsigned int*)lck)+((sizeof(lck_mtx_t)-1)/sizeof(unsigned int))), THREAD_UNINT); lck_mtx_ilk_unlock(mutex); thread_block(THREAD_CONTINUE_NULL); KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_END, 0, 0, 0, 0, 0); #if CONFIG_DTRACE /* * Record the Dtrace lockstat probe for blocking, block time * measured from when we were entered. */ if (sleep_start) { if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) { LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_BLOCK, lck, mach_absolute_time() - sleep_start); } else { LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_BLOCK, lck, mach_absolute_time() - sleep_start); } } #endif }
boolean_t lck_rw_lock_shared_to_exclusive( lck_rw_t *lck) { int i; boolean_t do_wakeup = FALSE; wait_result_t res; #if MACH_LDEBUG int decrementer; #endif /* MACH_LDEBUG */ boolean_t istate; #if CONFIG_DTRACE uint64_t wait_interval = 0; int slept = 0; int readers_at_sleep = 0; #endif istate = lck_interlock_lock(lck); lck->lck_rw_shared_count--; if (lck->lck_rw_want_upgrade) { KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_START, (int)lck, lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0); /* * Someone else has requested upgrade. * Since we've released a read lock, wake * him up. */ if (lck->lck_w_waiting && (lck->lck_rw_shared_count == 0)) { lck->lck_w_waiting = FALSE; do_wakeup = TRUE; } lck_interlock_unlock(lck, istate); if (do_wakeup) thread_wakeup(RW_LOCK_WRITER_EVENT(lck)); KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_END, (int)lck, lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0); return (FALSE); } lck->lck_rw_want_upgrade = TRUE; #if MACH_LDEBUG decrementer = DECREMENTER_TIMEOUT; #endif /* MACH_LDEBUG */ while (lck->lck_rw_shared_count != 0) { #if CONFIG_DTRACE if (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN] && wait_interval == 0) { wait_interval = mach_absolute_time(); readers_at_sleep = lck->lck_rw_shared_count; } else { wait_interval = -1; } #endif i = lock_wait_time[lck->lck_rw_can_sleep ? 1 : 0]; KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX1_CODE) | DBG_FUNC_START, (int)lck, lck->lck_rw_shared_count, i, 0, 0); if (i != 0) { lck_interlock_unlock(lck, istate); #if MACH_LDEBUG if (!--decrementer) Debugger("timeout - lck_rw_shared_count"); #endif /* MACH_LDEBUG */ while (--i != 0 && lck->lck_rw_shared_count != 0) lck_rw_lock_pause(istate); istate = lck_interlock_lock(lck); } if (lck->lck_rw_can_sleep && lck->lck_rw_shared_count != 0) { lck->lck_w_waiting = TRUE; res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT); if (res == THREAD_WAITING) { lck_interlock_unlock(lck, istate); res = thread_block(THREAD_CONTINUE_NULL); #if CONFIG_DTRACE slept = 1; #endif istate = lck_interlock_lock(lck); } } KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX1_CODE) | DBG_FUNC_END, (int)lck, lck->lck_rw_shared_count, 0, 0, 0); } lck_interlock_unlock(lck, istate); #if CONFIG_DTRACE /* * We infer whether we took the sleep/spin path above by checking readers_at_sleep. */ if (wait_interval != 0 && wait_interval != (unsigned) -1 && readers_at_sleep) { if (slept == 0) { LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN, lck, mach_absolute_time() - wait_interval, 0); } else { LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK, lck, mach_absolute_time() - wait_interval, 1, (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep); } } LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lck, 1); #endif return (TRUE); }
/* * Routine: lck_rw_lock_exclusive */ void lck_rw_lock_exclusive( lck_rw_t *lck) { int i; wait_result_t res; #if MACH_LDEBUG int decrementer; #endif /* MACH_LDEBUG */ boolean_t istate; #if CONFIG_DTRACE uint64_t wait_interval = 0; int slept = 0; int readers_at_sleep; #endif istate = lck_interlock_lock(lck); #if CONFIG_DTRACE readers_at_sleep = lck->lck_rw_shared_count; #endif #if MACH_LDEBUG decrementer = DECREMENTER_TIMEOUT; #endif /* MACH_LDEBUG */ /* * Try to acquire the lck_rw_want_write bit. */ while (lck->lck_rw_want_write) { KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0); /* * Either sleeping or spinning is happening, start * a timing of our delay interval now. */ #if CONFIG_DTRACE if ((lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] || lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK]) && wait_interval == 0) { wait_interval = mach_absolute_time(); } else { wait_interval = -1; } #endif i = lock_wait_time[lck->lck_rw_can_sleep ? 1 : 0]; if (i != 0) { lck_interlock_unlock(lck, istate); #if MACH_LDEBUG if (!--decrementer) Debugger("timeout - lck_rw_want_write"); #endif /* MACH_LDEBUG */ while (--i != 0 && lck->lck_rw_want_write) lck_rw_lock_pause(istate); istate = lck_interlock_lock(lck); } if (lck->lck_rw_can_sleep && lck->lck_rw_want_write) { lck->lck_w_waiting = TRUE; res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT); if (res == THREAD_WAITING) { lck_interlock_unlock(lck, istate); res = thread_block(THREAD_CONTINUE_NULL); #if CONFIG_DTRACE slept = 1; #endif istate = lck_interlock_lock(lck); } } KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE_CODE) | DBG_FUNC_END, (int)lck, res, 0, 0, 0); } lck->lck_rw_want_write = TRUE; /* Wait for readers (and upgrades) to finish */ #if MACH_LDEBUG decrementer = DECREMENTER_TIMEOUT; #endif /* MACH_LDEBUG */ while ((lck->lck_rw_shared_count != 0) || lck->lck_rw_want_upgrade) { i = lock_wait_time[lck->lck_rw_can_sleep ? 1 : 0]; KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE1_CODE) | DBG_FUNC_START, (int)lck, lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, i, 0); #if CONFIG_DTRACE /* * Either sleeping or spinning is happening, start * a timing of our delay interval now. If we set it * to -1 we don't have accurate data so we cannot later * decide to record a dtrace spin or sleep event. */ if ((lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] || lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK]) && wait_interval == 0) { wait_interval = mach_absolute_time(); } else { wait_interval = (unsigned) -1; } #endif if (i != 0) { lck_interlock_unlock(lck, istate); #if MACH_LDEBUG if (!--decrementer) Debugger("timeout - wait for readers"); #endif /* MACH_LDEBUG */ while (--i != 0 && (lck->lck_rw_shared_count != 0 || lck->lck_rw_want_upgrade)) lck_rw_lock_pause(istate); istate = lck_interlock_lock(lck); } if (lck->lck_rw_can_sleep && (lck->lck_rw_shared_count != 0 || lck->lck_rw_want_upgrade)) { lck->lck_w_waiting = TRUE; res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT); if (res == THREAD_WAITING) { lck_interlock_unlock(lck, istate); res = thread_block(THREAD_CONTINUE_NULL); #if CONFIG_DTRACE slept = 1; #endif istate = lck_interlock_lock(lck); } } KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE1_CODE) | DBG_FUNC_END, (int)lck, lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, res, 0); } lck_interlock_unlock(lck, istate); #if CONFIG_DTRACE /* * Decide what latencies we suffered that are Dtrace events. * If we have set wait_interval, then we either spun or slept. * At least we get out from under the interlock before we record * which is the best we can do here to minimize the impact * of the tracing. * If we have set wait_interval to -1, then dtrace was not enabled when we * started sleeping/spinning so we don't record this event. */ if (wait_interval != 0 && wait_interval != (unsigned) -1) { if (slept == 0) { LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_EXCL_SPIN, lck, mach_absolute_time() - wait_interval, 1); } else { /* * For the blocking case, we also record if when we blocked * it was held for read or write, and how many readers. * Notice that above we recorded this before we dropped * the interlock so the count is accurate. */ LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_EXCL_BLOCK, lck, mach_absolute_time() - wait_interval, 1, (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep); } } LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lck, 1); #endif }
/* * mutex_vector_enter() is called from the assembly mutex_enter() routine * if the lock is held or is not of type MUTEX_ADAPTIVE. */ void mutex_vector_enter(mutex_impl_t *lp) { kthread_id_t owner; hrtime_t sleep_time = 0; /* how long we slept */ uint_t spin_count = 0; /* how many times we spun */ cpu_t *cpup, *last_cpu; extern cpu_t *cpu_list; turnstile_t *ts; volatile mutex_impl_t *vlp = (volatile mutex_impl_t *)lp; int backoff; /* current backoff */ int backctr; /* ctr for backoff */ int sleep_count = 0; ASSERT_STACK_ALIGNED(); if (MUTEX_TYPE_SPIN(lp)) { lock_set_spl(&lp->m_spin.m_spinlock, lp->m_spin.m_minspl, &lp->m_spin.m_oldspl); return; } if (!MUTEX_TYPE_ADAPTIVE(lp)) { mutex_panic("mutex_enter: bad mutex", lp); return; } /* * Adaptive mutexes must not be acquired from above LOCK_LEVEL. * We can migrate after loading CPU but before checking CPU_ON_INTR, * so we must verify by disabling preemption and loading CPU again. */ cpup = CPU; if (CPU_ON_INTR(cpup) && !panicstr) { kpreempt_disable(); if (CPU_ON_INTR(CPU)) mutex_panic("mutex_enter: adaptive at high PIL", lp); kpreempt_enable(); } CPU_STATS_ADDQ(cpup, sys, mutex_adenters, 1); if (&plat_lock_delay) { backoff = 0; } else { backoff = BACKOFF_BASE; } for (;;) { spin: spin_count++; /* * Add an exponential backoff delay before trying again * to touch the mutex data structure. * the spin_count test and call to nulldev are to prevent * the compiler optimizer from eliminating the delay loop. */ if (&plat_lock_delay) { plat_lock_delay(&backoff); } else { for (backctr = backoff; backctr; backctr--) { if (!spin_count) (void) nulldev(); }; /* delay */ backoff = backoff << 1; /* double it */ if (backoff > BACKOFF_CAP) { backoff = BACKOFF_CAP; } SMT_PAUSE(); } if (panicstr) return; if ((owner = MUTEX_OWNER(vlp)) == NULL) { if (mutex_adaptive_tryenter(lp)) break; continue; } if (owner == curthread) mutex_panic("recursive mutex_enter", lp); /* * If lock is held but owner is not yet set, spin. * (Only relevant for platforms that don't have cas.) */ if (owner == MUTEX_NO_OWNER) continue; /* * When searching the other CPUs, start with the one where * we last saw the owner thread. If owner is running, spin. * * We must disable preemption at this point to guarantee * that the list doesn't change while we traverse it * without the cpu_lock mutex. While preemption is * disabled, we must revalidate our cached cpu pointer. */ kpreempt_disable(); if (cpup->cpu_next == NULL) cpup = cpu_list; last_cpu = cpup; /* mark end of search */ do { if (cpup->cpu_thread == owner) { kpreempt_enable(); goto spin; } } while ((cpup = cpup->cpu_next) != last_cpu); kpreempt_enable(); /* * The owner appears not to be running, so block. * See the Big Theory Statement for memory ordering issues. */ ts = turnstile_lookup(lp); MUTEX_SET_WAITERS(lp); membar_enter(); /* * Recheck whether owner is running after waiters bit hits * global visibility (above). If owner is running, spin. * * Since we are at ipl DISP_LEVEL, kernel preemption is * disabled, however we still need to revalidate our cached * cpu pointer to make sure the cpu hasn't been deleted. */ if (cpup->cpu_next == NULL) last_cpu = cpup = cpu_list; do { if (cpup->cpu_thread == owner) { turnstile_exit(lp); goto spin; } } while ((cpup = cpup->cpu_next) != last_cpu); membar_consumer(); /* * If owner and waiters bit are unchanged, block. */ if (MUTEX_OWNER(vlp) == owner && MUTEX_HAS_WAITERS(vlp)) { sleep_time -= gethrtime(); (void) turnstile_block(ts, TS_WRITER_Q, lp, &mutex_sobj_ops, NULL, NULL); sleep_time += gethrtime(); sleep_count++; } else { turnstile_exit(lp); } } ASSERT(MUTEX_OWNER(lp) == curthread); if (sleep_time != 0) { /* * Note, sleep time is the sum of all the sleeping we * did. */ LOCKSTAT_RECORD(LS_MUTEX_ENTER_BLOCK, lp, sleep_time); } /* * We do not count a sleep as a spin. */ if (spin_count > sleep_count) LOCKSTAT_RECORD(LS_MUTEX_ENTER_SPIN, lp, spin_count - sleep_count); LOCKSTAT_RECORD0(LS_MUTEX_ENTER_ACQUIRE, lp); }