static __inline int _lwkt_trytokref_spin(lwkt_tokref_t ref, thread_t td, long mode) { int spin; if (_lwkt_trytokref(ref, td, mode)) { #ifdef DEBUG_LOCKS_LATENCY long j; for (j = tokens_add_latency; j > 0; --j) cpu_ccfence(); #endif return TRUE; } for (spin = lwkt_token_spin; spin > 0; --spin) { if (lwkt_token_delay) tsc_delay(lwkt_token_delay); else cpu_pause(); if (_lwkt_trytokref(ref, td, mode)) { #ifdef DEBUG_LOCKS_LATENCY long j; for (j = tokens_add_latency; j > 0; --j) cpu_ccfence(); #endif return TRUE; } } return FALSE; }
/* * Attempt to acquire a spinlock, if we fail we must undo the * gd->gd_spinlocks_wr/gd->gd_curthead->td_critcount predisposition. * * Returns 0 on success, EAGAIN on failure. */ int _mtx_spinlock_try(mtx_t mtx) { globaldata_t gd = mycpu; u_int lock; u_int nlock; int res = 0; for (;;) { lock = mtx->mtx_lock; if (lock == 0) { nlock = MTX_EXCLUSIVE | 1; if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) { mtx->mtx_owner = gd->gd_curthread; break; } } else if ((lock & MTX_EXCLUSIVE) && mtx->mtx_owner == gd->gd_curthread) { KKASSERT((lock & MTX_MASK) != MTX_MASK); nlock = lock + 1; if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) break; } else { --gd->gd_spinlocks_wr; cpu_ccfence(); --gd->gd_curthread->td_critcount; res = EAGAIN; break; } cpu_pause(); ++mtx_collision_count; } return res; }
/* * If the lock is held exclusively it must be owned by the caller. If the * lock is already a shared lock this operation is a NOP. A panic will * occur if the lock is not held either shared or exclusive. * * The exclusive count is converted to a shared count. */ void _mtx_downgrade(mtx_t *mtx) { u_int lock; u_int nlock; for (;;) { lock = mtx->mtx_lock; cpu_ccfence(); /* * NOP if already shared. */ if ((lock & MTX_EXCLUSIVE) == 0) { KKASSERT((lock & MTX_MASK) > 0); break; } /* * Transfer count to shared. Any additional pending shared * waiters must be woken up. */ if (lock & MTX_SHWANTED) { if (mtx_chain_link_sh(mtx, lock)) break; /* retry */ } else { nlock = lock & ~MTX_EXCLUSIVE; if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) break; /* retry */ } cpu_pause(); } }
/* * Print out information about state of a lock. Used by VOP_PRINT * routines to display status about contained locks. */ void lockmgr_printinfo(struct lock *lkp) { struct thread *td = lkp->lk_lockholder; struct proc *p; int count; count = lkp->lk_count; cpu_ccfence(); if (td && td != LK_KERNTHREAD && td != LK_NOTHREAD) p = td->td_proc; else p = NULL; if (count & LKC_EXCL) { kprintf(" lock type %s: EXCLUS (count %08x) by td %p pid %d", lkp->lk_wmesg, count, td, p ? p->p_pid : -99); } else if (count & LKC_MASK) { kprintf(" lock type %s: SHARED (count %08x)", lkp->lk_wmesg, count); } else { kprintf(" lock type %s: NOTHELD", lkp->lk_wmesg); } if (count & (LKC_EXREQ|LKC_SHREQ)) kprintf(" with waiters\n"); else kprintf("\n"); }
/* * Upgrade a shared lock to an exclusive lock. The upgrade will fail if * the shared lock has a count other then 1. Optimize the most likely case * but note that a single cmpset can fail due to WANTED races. * * If the lock is held exclusively it must be owned by the caller and * this function will simply return without doing anything. A panic will * occur if the lock is held exclusively by someone other then the caller. * * Returns 0 on success, EDEADLK on failure. */ int _mtx_upgrade_try(mtx_t *mtx) { u_int lock; u_int nlock; int error = 0; for (;;) { lock = mtx->mtx_lock; cpu_ccfence(); if ((lock & ~MTX_EXWANTED) == 1) { nlock = lock | MTX_EXCLUSIVE; if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) { mtx->mtx_owner = curthread; break; } } else if (lock & MTX_EXCLUSIVE) { KKASSERT(mtx->mtx_owner == curthread); break; } else { error = EDEADLK; break; } cpu_pause(); } return (error); }
/* * Add a (pmap, va) pair to the invalidation list and protect access * as appropriate. * * CPUMASK_LOCK is used to interlock thread switchins, otherwise another * cpu can switch in a pmap that we are unaware of and interfere with our * pte operation. */ void pmap_inval_interlock(pmap_inval_info_t info, pmap_t pmap, vm_offset_t va) { cpumask_t oactive; #ifdef SMP cpumask_t nactive; DEBUG_PUSH_INFO("pmap_inval_interlock"); for (;;) { oactive = pmap->pm_active; cpu_ccfence(); nactive = oactive | CPUMASK_LOCK; if ((oactive & CPUMASK_LOCK) == 0 && atomic_cmpset_cpumask(&pmap->pm_active, oactive, nactive)) { break; } lwkt_process_ipiq(); cpu_pause(); } DEBUG_POP_INFO(); #else oactive = pmap->pm_active & ~CPUMASK_LOCK; #endif KKASSERT((info->pir_flags & PIRF_CPUSYNC) == 0); info->pir_va = va; info->pir_flags = PIRF_CPUSYNC; lwkt_cpusync_init(&info->pir_cpusync, oactive, pmap_inval_callback, info); lwkt_cpusync_interlock(&info->pir_cpusync); }
/* * Drop an inode reference, freeing the inode when the last reference goes * away. */ void hammer2_inode_drop(hammer2_inode_t *ip) { hammer2_pfs_t *pmp; u_int refs; while (ip) { if (hammer2_debug & 0x80000) { kprintf("INODE-1 %p (%d->%d)\n", ip, ip->refs, ip->refs - 1); print_backtrace(8); } refs = ip->refs; cpu_ccfence(); if (refs == 1) { /* * Transition to zero, must interlock with * the inode inumber lookup tree (if applicable). * It should not be possible for anyone to race * the transition to 0. */ pmp = ip->pmp; KKASSERT(pmp); hammer2_spin_ex(&pmp->inum_spin); if (atomic_cmpset_int(&ip->refs, 1, 0)) { KKASSERT(hammer2_mtx_refs(&ip->lock) == 0); if (ip->flags & HAMMER2_INODE_ONRBTREE) { atomic_clear_int(&ip->flags, HAMMER2_INODE_ONRBTREE); RB_REMOVE(hammer2_inode_tree, &pmp->inum_tree, ip); --pmp->inum_count; } hammer2_spin_unex(&pmp->inum_spin); ip->pmp = NULL; /* * Cleaning out ip->cluster isn't entirely * trivial. */ hammer2_inode_repoint(ip, NULL, NULL); kfree(ip, pmp->minode); atomic_add_long(&pmp->inmem_inodes, -1); ip = NULL; /* will terminate loop */ } else { hammer2_spin_unex(&ip->pmp->inum_spin); } } else { /* * Non zero transition */ if (atomic_cmpset_int(&ip->refs, refs, refs - 1)) break; } } }
/* * Similar to gettoken but we acquire a shared token instead of an exclusive * token. */ void lwkt_gettoken_shared(lwkt_token_t tok) { thread_t td = curthread; lwkt_tokref_t ref; ref = td->td_toks_stop; KKASSERT(ref < &td->td_toks_end); ++td->td_toks_stop; cpu_ccfence(); _lwkt_tokref_init(ref, tok, td, TOK_EXCLREQ); #ifdef DEBUG_LOCKS /* * Taking a pool token in shared mode is a bad idea; other * addresses deeper in the call stack may hash to the same pool * token and you may end up with an exclusive-shared livelock. * Warn in this condition. */ if ((tok >= &pool_tokens[0].token) && (tok < &pool_tokens[LWKT_NUM_POOL_TOKENS].token)) kprintf("Warning! Taking pool token %p in shared mode\n", tok); #endif if (_lwkt_trytokref_spin(ref, td, TOK_EXCLREQ)) return; /* * Give up running if we can't acquire the token right now. * * Since the tokref is already active the scheduler now * takes care of acquisition, so we need only call * lwkt_switch(). * * Since we failed this was not a recursive token so upon * return tr_tok->t_ref should be assigned to this specific * ref. */ td->td_wmesg = tok->t_desc; ++tok->t_collisions; logtoken(fail, ref); td->td_toks_have = td->td_toks_stop - 1; if (tokens_debug_output > 0) { --tokens_debug_output; spin_lock(&tok_debug_spin); kprintf("Shar Token thread %p %s %s\n", td, tok->t_desc, td->td_comm); print_backtrace(6); kprintf("\n"); spin_unlock(&tok_debug_spin); } lwkt_switch(); logtoken(succ, ref); }
/* * Get a serializing token. This routine can block. */ void lwkt_gettoken(lwkt_token_t tok) { thread_t td = curthread; lwkt_tokref_t ref; ref = td->td_toks_stop; KKASSERT(ref < &td->td_toks_end); ++td->td_toks_stop; cpu_ccfence(); _lwkt_tokref_init(ref, tok, td, TOK_EXCLUSIVE|TOK_EXCLREQ); #ifdef DEBUG_LOCKS /* * Taking an exclusive token after holding it shared will * livelock. Scan for that case and assert. */ lwkt_tokref_t tk; int found = 0; for (tk = &td->td_toks_base; tk < ref; tk++) { if (tk->tr_tok != tok) continue; found++; if (tk->tr_count & TOK_EXCLUSIVE) goto good; } /* We found only shared instances of this token if found >0 here */ KASSERT((found == 0), ("Token %p s/x livelock", tok)); good: #endif if (_lwkt_trytokref_spin(ref, td, TOK_EXCLUSIVE|TOK_EXCLREQ)) return; /* * Give up running if we can't acquire the token right now. * * Since the tokref is already active the scheduler now * takes care of acquisition, so we need only call * lwkt_switch(). * * Since we failed this was not a recursive token so upon * return tr_tok->t_ref should be assigned to this specific * ref. */ td->td_wmesg = tok->t_desc; ++tok->t_collisions; logtoken(fail, ref); td->td_toks_have = td->td_toks_stop - 1; lwkt_switch(); logtoken(succ, ref); KKASSERT(tok->t_ref == ref); }
static void timersig(int nada, siginfo_t *info, void *ctxp) { globaldata_t gd = mycpu; thread_t td = gd->gd_curthread; int save; save = errno; if (td->td_critcount == 0) { crit_enter_raw(td); ++gd->gd_intr_nesting_level; cpu_ccfence(); vktimer_intr(NULL); cpu_ccfence(); --gd->gd_intr_nesting_level; crit_exit_raw(td); } else { need_timer(); } errno = save; }
/* * Return non-zero if the caller owns the lock shared or exclusive. * We can only guess re: shared locks. */ int lockowned(struct lock *lkp) { thread_t td = curthread; int count; count = lkp->lk_count; cpu_ccfence(); if (count & LKC_EXCL) return(lkp->lk_lockholder == td); else return((count & LKC_MASK) != 0); }
/* * Simple version without a timeout which can also return EINTR */ int _thr_umtx_wait_intr(volatile umtx_t *mtx, int exp) { int ret = 0; int errval; cpu_ccfence(); for (;;) { if (*mtx != exp) return (0); errval = _umtx_sleep_err(mtx, exp, 10000000); if (errval == 0) break; if (errval == EBUSY) break; if (errval == EINTR) { ret = errval; break; } cpu_ccfence(); } return (ret); }
/* * Attempt to acquire a token, return TRUE on success, FALSE on failure. * * We setup the tokref in case we actually get the token (if we switch later * it becomes mandatory so we set TOK_EXCLREQ), but we call trytokref without * TOK_EXCLREQ in case we fail. */ int lwkt_trytoken(lwkt_token_t tok) { thread_t td = curthread; lwkt_tokref_t ref; ref = td->td_toks_stop; KKASSERT(ref < &td->td_toks_end); ++td->td_toks_stop; cpu_ccfence(); _lwkt_tokref_init(ref, tok, td, TOK_EXCLUSIVE|TOK_EXCLREQ); if (_lwkt_trytokref(ref, td, TOK_EXCLUSIVE)) return TRUE; /* * Failed, unpend the request */ cpu_ccfence(); --td->td_toks_stop; ++tok->t_collisions; return FALSE; }
/* * This function is used to acquire a contested lock. * * A *mtx value of 1 indicates locked normally. * A *mtx value of 2 indicates locked and contested. */ int __thr_umtx_lock(volatile umtx_t *mtx, int id, int timo) { int v; int errval; int ret = 0; int retry = 4; v = *mtx; cpu_ccfence(); id &= 0x3FFFFFFF; for (;;) { cpu_pause(); if (v == 0) { if (atomic_fcmpset_int(mtx, &v, id)) break; continue; } if (--retry) { sched_yield(); v = *mtx; continue; } /* * Set the waiting bit. If the fcmpset fails v is loaded * with the current content of the mutex, and if the waiting * bit is already set, we can also sleep. */ if (atomic_fcmpset_int(mtx, &v, v|0x40000000) || (v & 0x40000000)) { if (timo == 0) { _umtx_sleep_err(mtx, v|0x40000000, timo); } else if ((errval = _umtx_sleep_err(mtx, v|0x40000000, timo)) > 0) { if (errval == EAGAIN) { if (atomic_cmpset_acq_int(mtx, 0, id)) ret = 0; else ret = ETIMEDOUT; break; } } } retry = 4; } return (ret); }
/* * Release a ref on an active or inactive vnode. * * Caller has no other requirements. * * If VREF_FINALIZE is set this will deactivate the vnode on the 1->0 * transition, otherwise we leave the vnode in the active list and * do a lockless transition to 0, which is very important for the * critical path. * * (vrele() is not called when a vnode is being destroyed w/kfree) */ void vrele(struct vnode *vp) { for (;;) { int count = vp->v_refcnt; cpu_ccfence(); KKASSERT((count & VREF_MASK) > 0); KKASSERT(vp->v_state == VS_ACTIVE || vp->v_state == VS_INACTIVE); /* * 2+ case */ if ((count & VREF_MASK) > 1) { if (atomic_cmpset_int(&vp->v_refcnt, count, count - 1)) break; continue; } /* * 1->0 transition case must handle possible finalization. * When finalizing we transition 1->0x40000000. Note that * cachedvnodes is only adjusted on transitions to ->0. * * WARNING! VREF_TERMINATE can be cleared at any point * when the refcnt is non-zero (by vget()) and * the vnode has not been reclaimed. Thus * transitions out of VREF_TERMINATE do not have * to mess with cachedvnodes. */ if (count & VREF_FINALIZE) { vx_lock(vp); if (atomic_cmpset_int(&vp->v_refcnt, count, VREF_TERMINATE)) { vnode_terminate(vp); break; } vx_unlock(vp); } else { if (atomic_cmpset_int(&vp->v_refcnt, count, 0)) { atomic_add_int(&mycpu->gd_cachedvnodes, 1); break; } } /* retry */ } }
/* * Determine the status of a lock. */ int lockstatus(struct lock *lkp, struct thread *td) { int lock_type = 0; int count; count = lkp->lk_count; cpu_ccfence(); if (count & LKC_EXCL) { if (td == NULL || lkp->lk_lockholder == td) lock_type = LK_EXCLUSIVE; else lock_type = LK_EXCLOTHER; } else if (count & LKC_MASK) { lock_type = LK_SHARED; } return (lock_type); }
/* * Clear ARMED after finishing adjustments to the callout, potentially * allowing other cpus to take over. We can only do this if the IPI mask * is 0. */ static __inline int callout_maybe_clear_armed(struct callout *c) { int flags; int nflags; for (;;) { flags = c->c_flags; cpu_ccfence(); if (flags & (CALLOUT_PENDING | CALLOUT_IPI_MASK)) break; nflags = flags & ~CALLOUT_ARMED; if (atomic_cmpset_int(&c->c_flags, flags, nflags)) break; cpu_pause(); /* retry */ } return flags; }
/* * Clear PENDING and, if possible, also clear ARMED and WAITING. Returns * the flags prior to the clear, atomically (used to check for WAITING). * * Clearing the cpu association (ARMED) can significantly improve the * performance of the next callout_reset*() call. */ static __inline int callout_unpend_disarm(struct callout *c) { int flags; int nflags; for (;;) { flags = c->c_flags; cpu_ccfence(); nflags = flags & ~(CALLOUT_PENDING | CALLOUT_WAITING); if ((flags & CALLOUT_IPI_MASK) == 0) nflags &= ~CALLOUT_ARMED; if (atomic_cmpset_int(&c->c_flags, flags, nflags)) { break; } cpu_pause(); /* retry */ } return flags; }
/* * Release a token that we hold. */ static __inline void _lwkt_reltokref(lwkt_tokref_t ref, thread_t td) { lwkt_token_t tok; long count; tok = ref->tr_tok; for (;;) { count = tok->t_count; cpu_ccfence(); if (tok->t_ref == ref) { /* * We are an exclusive holder. We must clear tr_ref * before we clear the TOK_EXCLUSIVE bit. If we are * unable to clear the bit we must restore * tok->t_ref. */ KKASSERT(count & TOK_EXCLUSIVE); tok->t_ref = NULL; if (atomic_cmpset_long(&tok->t_count, count, count & ~TOK_EXCLUSIVE)) { return; } tok->t_ref = ref; /* retry */ } else { /* * We are a shared holder */ KKASSERT(count & TOK_COUNTMASK); if (atomic_cmpset_long(&tok->t_count, count, count - TOK_INCR)) { return; } /* retry */ } /* retry */ } }
/* * Helper function to wait for a reference count to become zero. * We set REFCNTF_WAITING and sleep if the reference count is not zero. * * In the case where REFCNTF_WAITING is already set the atomic op validates * that it is still set after the tsleep_interlock() call. * * Users of this waiting API must use refcount_release_wakeup() to release * refs instead of refcount_release(). refcount_release() will not wake * up waiters. */ void _refcount_wait(volatile u_int *countp, const char *wstr) { u_int n; int base_ticks = ticks; for (;;) { n = *countp; cpu_ccfence(); if (n == 0) break; if ((int)(ticks - base_ticks) >= hz*60 - 1) { kprintf("warning: refcount_wait %s: long wait\n", wstr); base_ticks = ticks; } KKASSERT(n != REFCNTF_WAITING); /* impossible state */ tsleep_interlock(countp, 0); if (atomic_cmpset_int(countp, n, n | REFCNTF_WAITING)) tsleep(countp, PINTERLOCKED, wstr, hz*10); } }
/* * This helper function implements the release-with-wakeup API. It is * executed for the non-trivial case or if the atomic op races. * * On the i->0 transition is REFCNTF_WAITING is set it will be cleared * and a wakeup() will be issued. * * On any other transition we simply subtract (i) and leave the * REFCNTF_WAITING flag intact. * * This function returns TRUE(1) on the last release, whether a wakeup * occured or not, and FALSE(0) otherwise. * * NOTE! (i) cannot be 0 */ int _refcount_release_wakeup_n(volatile u_int *countp, u_int i) { u_int n; for (;;) { n = *countp; cpu_ccfence(); if (n == (REFCNTF_WAITING | i)) { if (atomic_cmpset_int(countp, n, 0)) { wakeup(countp); n = i; break; } } else { KKASSERT(n != REFCNTF_WAITING); /* illegal state */ if (atomic_cmpset_int(countp, n, n - i)) break; } } return (n == i); }
static void pmap_inval_init(pmap_t pmap) { cpulock_t olock; cpulock_t nlock; crit_enter_id("inval"); if (pmap != &kernel_pmap) { for (;;) { olock = pmap->pm_active_lock; cpu_ccfence(); nlock = olock | CPULOCK_EXCL; if (olock != nlock && atomic_cmpset_int(&pmap->pm_active_lock, olock, nlock)) { break; } lwkt_process_ipiq(); cpu_pause(); } atomic_add_acq_long(&pmap->pm_invgen, 1); } }
/* * Unlock a lock. The caller must hold the lock either shared or exclusive. * * On the last release we handle any pending chains. */ void _mtx_unlock(mtx_t *mtx) { thread_t td __debugvar = curthread; u_int lock; u_int nlock; for (;;) { lock = mtx->mtx_lock; cpu_ccfence(); switch(lock) { case MTX_EXCLUSIVE | 1: /* * Last release, exclusive lock. * No exclusive or shared requests pending. */ KKASSERT(mtx->mtx_owner == td || mtx->mtx_owner == NULL); mtx->mtx_owner = NULL; nlock = 0; if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) goto done; break; case MTX_EXCLUSIVE | MTX_EXWANTED | 1: case MTX_EXCLUSIVE | MTX_EXWANTED | MTX_SHWANTED | 1: /* * Last release, exclusive lock. * Exclusive requests pending. * Exclusive requests have priority over shared reqs. */ KKASSERT(mtx->mtx_owner == td || mtx->mtx_owner == NULL); mtx->mtx_owner = NULL; if (mtx_chain_link_ex(mtx, lock)) goto done; break; case MTX_EXCLUSIVE | MTX_SHWANTED | 1: /* * Last release, exclusive lock. * * Shared requests are pending. Transfer our count (1) * to the first shared request, wakeup all shared reqs. */ KKASSERT(mtx->mtx_owner == td || mtx->mtx_owner == NULL); mtx->mtx_owner = NULL; if (mtx_chain_link_sh(mtx, lock)) goto done; break; case 1: /* * Last release, shared lock. * No exclusive or shared requests pending. */ nlock = 0; if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) goto done; break; case MTX_EXWANTED | 1: case MTX_EXWANTED | MTX_SHWANTED | 1: /* * Last release, shared lock. * * Exclusive requests are pending. Upgrade this * final shared lock to exclusive and transfer our * count (1) to the next exclusive request. * * Exclusive requests have priority over shared reqs. */ if (mtx_chain_link_ex(mtx, lock)) goto done; break; case MTX_SHWANTED | 1: /* * Last release, shared lock. * Shared requests pending. */ if (mtx_chain_link_sh(mtx, lock)) goto done; break; default: /* * We have to loop if this is the last release but * someone is fiddling with LINKSPIN. */ if ((lock & MTX_MASK) == 1) { KKASSERT(lock & MTX_LINKSPIN); break; } /* * Not the last release (shared or exclusive) */ nlock = lock - 1; KKASSERT((nlock & MTX_MASK) != MTX_MASK); if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) goto done; break; } /* loop try again */ cpu_pause(); } done: ; }
/* * Regular umtx wait that cannot return EINTR */ int _thr_umtx_wait(volatile umtx_t *mtx, int exp, const struct timespec *timeout, int clockid) { struct timespec ts, ts2, ts3; int timo, errval, ret = 0; cpu_ccfence(); if (*mtx != exp) return (0); if (timeout == NULL) { /* * NOTE: If no timeout, EINTR cannot be returned. Ignore * EINTR. */ while ((errval = _umtx_sleep_err(mtx, exp, 10000000)) > 0) { if (errval == EBUSY) break; #if 0 if (errval == ETIMEDOUT || errval == EWOULDBLOCK) { if (*mtx != exp) { fprintf(stderr, "thr_umtx_wait: FAULT VALUE CHANGE " "%d -> %d oncond %p\n", exp, *mtx, mtx); } } #endif if (*mtx != exp) return(0); } return (ret); } /* * Timed waits can return EINTR */ if ((timeout->tv_sec < 0) || (timeout->tv_sec == 0 && timeout->tv_nsec <= 0)) return (ETIMEDOUT); clock_gettime(clockid, &ts); TIMESPEC_ADD(&ts, &ts, timeout); ts2 = *timeout; for (;;) { if (ts2.tv_nsec) { timo = (int)(ts2.tv_nsec / 1000); if (timo == 0) timo = 1; } else { timo = 1000000; } if ((errval = _umtx_sleep_err(mtx, exp, timo)) > 0) { if (errval == EBUSY) { ret = 0; break; } if (errval == EINTR) { ret = EINTR; break; } } clock_gettime(clockid, &ts3); TIMESPEC_SUB(&ts2, &ts, &ts3); if (ts2.tv_sec < 0 || (ts2.tv_sec == 0 && ts2.tv_nsec <= 0)) { ret = ETIMEDOUT; break; } } return (ret); }
/* * Stop a running timer and ensure that any running callout completes before * returning. If the timer is running on another cpu this function may block * to interlock against the callout. If the callout is currently executing * or blocked in another thread this function may also block to interlock * against the callout. * * The caller must be careful to avoid deadlocks, either by using * callout_init_lk() (which uses the lockmgr lock cancelation feature), * by using tokens and dealing with breaks in the serialization, or using * the lockmgr lock cancelation feature yourself in the callout callback * function. * * callout_stop() returns non-zero if the callout was pending. */ static int _callout_stop(struct callout *c, int issync) { globaldata_t gd = mycpu; globaldata_t tgd; softclock_pcpu_t sc; int flags; int nflags; int rc; int cpuid; #ifdef INVARIANTS if ((c->c_flags & CALLOUT_DID_INIT) == 0) { callout_init(c); kprintf( "callout_stop(%p) from %p: callout was not initialized\n", c, ((int **)&c)[-1]); print_backtrace(-1); } #endif crit_enter_gd(gd); /* * Fast path operations: * * If ARMED and owned by our cpu, or not ARMED, and other simple * conditions are met, we can just clear ACTIVE and EXECUTED * and we are done. */ for (;;) { flags = c->c_flags; cpu_ccfence(); cpuid = CALLOUT_FLAGS_TO_CPU(flags); /* * Can't handle an armed callout in the fast path if it is * not on the current cpu. We must atomically increment the * IPI count for the IPI we intend to send and break out of * the fast path to enter the slow path. */ if (flags & CALLOUT_ARMED) { if (gd->gd_cpuid != cpuid) { nflags = flags + 1; if (atomic_cmpset_int(&c->c_flags, flags, nflags)) { /* break to slow path */ break; } continue; /* retry */ } } else { cpuid = gd->gd_cpuid; KKASSERT((flags & CALLOUT_IPI_MASK) == 0); KKASSERT((flags & CALLOUT_PENDING) == 0); } /* * Process pending IPIs and retry (only if not called from * an IPI). */ if (flags & CALLOUT_IPI_MASK) { lwkt_process_ipiq(); continue; /* retry */ } /* * Transition to the stopped state, recover the EXECUTED * status. If pending we cannot clear ARMED until after * we have removed (c) from the callwheel. * * NOTE: The callout might already not be armed but in this * case it should also not be pending. */ nflags = flags & ~(CALLOUT_ACTIVE | CALLOUT_EXECUTED | CALLOUT_WAITING | CALLOUT_PENDING); /* NOTE: IPI_MASK already tested */ if ((flags & CALLOUT_PENDING) == 0) nflags &= ~CALLOUT_ARMED; if (atomic_cmpset_int(&c->c_flags, flags, nflags)) { /* * Can only remove from callwheel if currently * pending. */ if (flags & CALLOUT_PENDING) { sc = &softclock_pcpu_ary[gd->gd_cpuid]; if (sc->next == c) sc->next = TAILQ_NEXT(c, c_links.tqe); TAILQ_REMOVE( &sc->callwheel[c->c_time & cwheelmask], c, c_links.tqe); c->c_func = NULL; /* * NOTE: Can't clear ARMED until we have * physically removed (c) from the * callwheel. * * NOTE: WAITING bit race exists when doing * unconditional bit clears. */ callout_maybe_clear_armed(c); if (c->c_flags & CALLOUT_WAITING) flags |= CALLOUT_WAITING; } /* * ARMED has been cleared at this point and (c) * might now be stale. Only good for wakeup()s. */ if (flags & CALLOUT_WAITING) wakeup(c); goto skip_slow; } /* retry */ } /* * Slow path (and not called via an IPI). * * When ARMED to a different cpu the stop must be processed on that * cpu. Issue the IPI and wait for completion. We have already * incremented the IPI count. */ tgd = globaldata_find(cpuid); lwkt_send_ipiq3(tgd, callout_stop_ipi, c, issync); for (;;) { int flags; int nflags; flags = c->c_flags; cpu_ccfence(); if ((flags & CALLOUT_IPI_MASK) == 0) /* fast path */ break; nflags = flags | CALLOUT_WAITING; tsleep_interlock(c, 0); if (atomic_cmpset_int(&c->c_flags, flags, nflags)) { tsleep(c, PINTERLOCKED, "cstp1", 0); } } skip_slow: /* * If (issync) we must also wait for any in-progress callbacks to * complete, unless the stop is being executed from the callback * itself. The EXECUTED flag is set prior to the callback * being made so our existing flags status already has it. * * If auto-lock mode is being used, this is where we cancel any * blocked lock that is potentially preventing the target cpu * from completing the callback. */ while (issync) { intptr_t *runp; intptr_t runco; sc = &softclock_pcpu_ary[cpuid]; if (gd->gd_curthread == &sc->thread) /* stop from cb */ break; runp = &sc->running; runco = *runp; cpu_ccfence(); if ((runco & ~(intptr_t)1) != (intptr_t)c) break; if (c->c_flags & CALLOUT_AUTOLOCK) lockmgr(c->c_lk, LK_CANCEL_BEG); tsleep_interlock(c, 0); if (atomic_cmpset_long(runp, runco, runco | 1)) tsleep(c, PINTERLOCKED, "cstp3", 0); if (c->c_flags & CALLOUT_AUTOLOCK) lockmgr(c->c_lk, LK_CANCEL_END); } crit_exit_gd(gd); rc = (flags & CALLOUT_EXECUTED) != 0; return rc; }
/* * Drop an inode reference, freeing the inode when the last reference goes * away. */ void hammer2_inode_drop(hammer2_inode_t *ip) { hammer2_pfsmount_t *pmp; hammer2_inode_t *pip; u_int refs; while (ip) { refs = ip->refs; cpu_ccfence(); if (refs == 1) { /* * Transition to zero, must interlock with * the inode inumber lookup tree (if applicable). * * NOTE: The super-root inode has no pmp. */ pmp = ip->pmp; if (pmp) spin_lock(&pmp->inum_spin); if (atomic_cmpset_int(&ip->refs, 1, 0)) { KKASSERT(ip->topo_cst.count == 0); if (ip->flags & HAMMER2_INODE_ONRBTREE) { atomic_clear_int(&ip->flags, HAMMER2_INODE_ONRBTREE); RB_REMOVE(hammer2_inode_tree, &pmp->inum_tree, ip); } if (pmp) spin_unlock(&pmp->inum_spin); pip = ip->pip; ip->pip = NULL; ip->pmp = NULL; /* * Cleaning out ip->chain isn't entirely * trivial. */ hammer2_inode_repoint(ip, NULL, NULL); /* * We have to drop pip (if non-NULL) to * dispose of our implied reference from * ip->pip. We can simply loop on it. */ if (pmp) { KKASSERT((ip->flags & HAMMER2_INODE_SROOT) == 0); kfree(ip, pmp->minode); atomic_add_long(&pmp->inmem_inodes, -1); } else { KKASSERT(ip->flags & HAMMER2_INODE_SROOT); kfree(ip, M_HAMMER2); } ip = pip; /* continue with pip (can be NULL) */ } else { if (pmp) spin_unlock(&ip->pmp->inum_spin); } } else { /* * Non zero transition */ if (atomic_cmpset_int(&ip->refs, refs, refs - 1)) break; } } }
static void callout_stop_ipi(void *arg, int issync, struct intrframe *frame) { globaldata_t gd = mycpu; struct callout *c = arg; softclock_pcpu_t sc; /* * Only the fast path can run in an IPI. Chain the stop request * if we are racing cpu changes. */ for (;;) { globaldata_t tgd; int flags; int nflags; int cpuid; flags = c->c_flags; cpu_ccfence(); /* * Can't handle an armed callout in the fast path if it is * not on the current cpu. We must atomically increment the * IPI count and break out of the fast path. * * If called from an IPI we chain the IPI instead. */ if (flags & CALLOUT_ARMED) { cpuid = CALLOUT_FLAGS_TO_CPU(flags); if (gd->gd_cpuid != cpuid) { tgd = globaldata_find(cpuid); lwkt_send_ipiq3(tgd, callout_stop_ipi, c, issync); break; } } /* * NOTE: As an IPI ourselves we cannot wait for other IPIs * to complete, and we are being executed in-order. */ /* * Transition to the stopped state, recover the EXECUTED * status, decrement the IPI count. If pending we cannot * clear ARMED until after we have removed (c) from the * callwheel, and only if there are no more IPIs pending. */ nflags = flags & ~(CALLOUT_ACTIVE | CALLOUT_PENDING); nflags = nflags - 1; /* dec ipi count */ if ((flags & (CALLOUT_IPI_MASK | CALLOUT_PENDING)) == 1) nflags &= ~CALLOUT_ARMED; if ((flags & CALLOUT_IPI_MASK) == 1) nflags &= ~(CALLOUT_WAITING | CALLOUT_EXECUTED); if (atomic_cmpset_int(&c->c_flags, flags, nflags)) { /* * Can only remove from callwheel if currently * pending. */ if (flags & CALLOUT_PENDING) { sc = &softclock_pcpu_ary[gd->gd_cpuid]; if (sc->next == c) sc->next = TAILQ_NEXT(c, c_links.tqe); TAILQ_REMOVE( &sc->callwheel[c->c_time & cwheelmask], c, c_links.tqe); c->c_func = NULL; /* * NOTE: Can't clear ARMED until we have * physically removed (c) from the * callwheel. * * NOTE: WAITING bit race exists when doing * unconditional bit clears. */ callout_maybe_clear_armed(c); if (c->c_flags & CALLOUT_WAITING) flags |= CALLOUT_WAITING; } /* * ARMED has been cleared at this point and (c) * might now be stale. Only good for wakeup()s. */ if (flags & CALLOUT_WAITING) wakeup(c); break; } /* retry */ } }
/* * Stop a running timer. WARNING! If called on a cpu other then the one * the callout was started on this function will liveloop on its IPI to * the target cpu to process the request. It is possible for the callout * to execute in that case. * * WARNING! This function may be called from any cpu but the caller must * serialize callout_stop() and callout_reset() calls on the passed * structure regardless of cpu. * * WARNING! This routine may be called from an IPI * * WARNING! This function can return while it's c_func is still running * in the callout thread, a secondary check may be needed. * Use callout_stop_sync() to wait for any callout function to * complete before returning, being sure that no deadlock is * possible if you do. */ int callout_stop(struct callout *c) { globaldata_t gd = mycpu; globaldata_t tgd; softclock_pcpu_t sc; #ifdef INVARIANTS if ((c->c_flags & CALLOUT_DID_INIT) == 0) { callout_init(c); kprintf( "callout_stop(%p) from %p: callout was not initialized\n", c, ((int **)&c)[-1]); print_backtrace(-1); } #endif crit_enter_gd(gd); /* * Don't attempt to delete a callout that's not on the queue. The * callout may not have a cpu assigned to it. Callers do not have * to be on the issuing cpu but must still serialize access to the * callout structure. * * We are not cpu-localized here and cannot safely modify the * flags field in the callout structure. Note that most of the * time CALLOUT_ACTIVE will be 0 if CALLOUT_PENDING is also 0. * * If we race another cpu's dispatch of this callout it is possible * for CALLOUT_ACTIVE to be set with CALLOUT_PENDING unset. This * will cause us to fall through and synchronize with the other * cpu. */ if ((c->c_flags & CALLOUT_PENDING) == 0) { if ((c->c_flags & CALLOUT_ACTIVE) == 0) { crit_exit_gd(gd); return (0); } if (c->c_gd == NULL || c->c_gd == gd) { c->c_flags &= ~CALLOUT_ACTIVE; crit_exit_gd(gd); return (0); } } if ((tgd = c->c_gd) != gd) { /* * If the callout is owned by a different CPU we have to * execute the function synchronously on the target cpu. */ int seq; cpu_ccfence(); /* don't let tgd alias c_gd */ seq = lwkt_send_ipiq(tgd, (void *)callout_stop, c); lwkt_wait_ipiq(tgd, seq); } else { /* * If the callout is owned by the same CPU we can * process it directly, but if we are racing our helper * thread (sc->next), we have to adjust sc->next. The * race is interlocked by a critical section. */ sc = &softclock_pcpu_ary[gd->gd_cpuid]; c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING); if (sc->next == c) sc->next = TAILQ_NEXT(c, c_links.tqe); TAILQ_REMOVE(&sc->callwheel[c->c_time & callwheelmask], c, c_links.tqe); c->c_func = NULL; } crit_exit_gd(gd); return (1); }
/* * Exclusive-lock a mutex, block until acquired unless link is async. * Recursion is allowed. * * Returns 0 on success, the tsleep() return code on failure, EINPROGRESS * if async. If immediately successful an async exclusive lock will return 0 * and not issue the async callback or link the link structure. The caller * must handle this case (typically this is an optimal code path). * * A tsleep() error can only be returned if PCATCH is specified in the flags. */ static __inline int __mtx_lock_ex(mtx_t *mtx, mtx_link_t *link, int flags, int to) { thread_t td; u_int lock; u_int nlock; int error; int isasync; for (;;) { lock = mtx->mtx_lock; cpu_ccfence(); if (lock == 0) { nlock = MTX_EXCLUSIVE | 1; if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) { mtx->mtx_owner = curthread; cpu_sfence(); link->state = MTX_LINK_ACQUIRED; error = 0; break; } continue; } if ((lock & MTX_EXCLUSIVE) && mtx->mtx_owner == curthread) { KKASSERT((lock & MTX_MASK) != MTX_MASK); nlock = lock + 1; if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) { cpu_sfence(); link->state = MTX_LINK_ACQUIRED; error = 0; break; } continue; } /* * We need MTX_LINKSPIN to manipulate exlink or * shlink. * * We must set MTX_EXWANTED with MTX_LINKSPIN to indicate * pending exclusive requests. It cannot be set as a separate * operation prior to acquiring MTX_LINKSPIN. * * To avoid unnecessary cpu cache traffic we poll * for collisions. It is also possible that EXWANTED * state failing the above test was spurious, so all the * tests must be repeated if we cannot obtain LINKSPIN * with the prior state tests intact (i.e. don't reload * the (lock) variable here, for heaven's sake!). */ if (lock & MTX_LINKSPIN) { cpu_pause(); continue; } td = curthread; nlock = lock | MTX_EXWANTED | MTX_LINKSPIN; crit_enter_raw(td); if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock) == 0) { crit_exit_raw(td); continue; } /* * Check for early abort. */ if (link->state == MTX_LINK_ABORTED) { if (mtx->mtx_exlink == NULL) { atomic_clear_int(&mtx->mtx_lock, MTX_LINKSPIN | MTX_EXWANTED); } else { atomic_clear_int(&mtx->mtx_lock, MTX_LINKSPIN); } crit_exit_raw(td); link->state = MTX_LINK_IDLE; error = ENOLCK; break; } /* * Add our link to the exlink list and release LINKSPIN. */ link->owner = td; link->state = MTX_LINK_LINKED_EX; if (mtx->mtx_exlink) { link->next = mtx->mtx_exlink; link->prev = link->next->prev; link->next->prev = link; link->prev->next = link; } else { link->next = link; link->prev = link; mtx->mtx_exlink = link; } isasync = (link->callback != NULL); atomic_clear_int(&mtx->mtx_lock, MTX_LINKSPIN); crit_exit_raw(td); /* * If asynchronous lock request return without * blocking, leave link structure linked. */ if (isasync) { error = EINPROGRESS; break; } /* * Wait for lock */ error = mtx_wait_link(mtx, link, flags, to); break; } return (error); }
/* * Attempt to acquire a shared or exclusive token. Returns TRUE on success, * FALSE on failure. * * If TOK_EXCLUSIVE is set in mode we are attempting to get an exclusive * token, otherwise are attempting to get a shared token. * * If TOK_EXCLREQ is set in mode this is a blocking operation, otherwise * it is a non-blocking operation (for both exclusive or shared acquisions). */ static __inline int _lwkt_trytokref(lwkt_tokref_t ref, thread_t td, long mode) { lwkt_token_t tok; lwkt_tokref_t oref; long count; tok = ref->tr_tok; KASSERT(((mode & TOK_EXCLREQ) == 0 || /* non blocking */ td->td_gd->gd_intr_nesting_level == 0 || panic_cpu_gd == mycpu), ("Attempt to acquire token %p not already " "held in hard code section", tok)); if (mode & TOK_EXCLUSIVE) { /* * Attempt to get an exclusive token */ for (;;) { count = tok->t_count; oref = tok->t_ref; /* can be NULL */ cpu_ccfence(); if ((count & ~TOK_EXCLREQ) == 0) { /* * It is possible to get the exclusive bit. * We must clear TOK_EXCLREQ on successful * acquisition. */ if (atomic_cmpset_long(&tok->t_count, count, (count & ~TOK_EXCLREQ) | TOK_EXCLUSIVE)) { KKASSERT(tok->t_ref == NULL); tok->t_ref = ref; return TRUE; } /* retry */ } else if ((count & TOK_EXCLUSIVE) && oref >= &td->td_toks_base && oref < td->td_toks_stop) { /* * Our thread already holds the exclusive * bit, we treat this tokref as a shared * token (sorta) to make the token release * code easier. * * NOTE: oref cannot race above if it * happens to be ours, so we're good. * But we must still have a stable * variable for both parts of the * comparison. * * NOTE: Since we already have an exclusive * lock and don't need to check EXCLREQ * we can just use an atomic_add here */ atomic_add_long(&tok->t_count, TOK_INCR); ref->tr_count &= ~TOK_EXCLUSIVE; return TRUE; } else if ((mode & TOK_EXCLREQ) && (count & TOK_EXCLREQ) == 0) { /* * Unable to get the exclusive bit but being * asked to set the exclusive-request bit. * Since we are going to retry anyway just * set the bit unconditionally. */ atomic_set_long(&tok->t_count, TOK_EXCLREQ); return FALSE; } else { /* * Unable to get the exclusive bit and not * being asked to set the exclusive-request * (aka lwkt_trytoken()), or EXCLREQ was * already set. */ cpu_pause(); return FALSE; } /* retry */ } } else { /* * Attempt to get a shared token. Note that TOK_EXCLREQ * for shared tokens simply means the caller intends to * block. We never actually set the bit in tok->t_count. */ for (;;) { count = tok->t_count; oref = tok->t_ref; /* can be NULL */ cpu_ccfence(); if ((count & (TOK_EXCLUSIVE/*|TOK_EXCLREQ*/)) == 0) { /* * It may be possible to get the token shared. */ if ((atomic_fetchadd_long(&tok->t_count, TOK_INCR) & TOK_EXCLUSIVE) == 0) { return TRUE; } atomic_fetchadd_long(&tok->t_count, -TOK_INCR); /* retry */ } else if ((count & TOK_EXCLUSIVE) && oref >= &td->td_toks_base && oref < td->td_toks_stop) { /* * We own the exclusive bit on the token so * we can in fact also get it shared. */ atomic_add_long(&tok->t_count, TOK_INCR); return TRUE; } else { /* * We failed to get the token shared */ return FALSE; } /* retry */ } } }