int _cv_timedwait(struct cv *c, struct lock *lk, int timo, int wakesig) { int flags = wakesig ? PCATCH : 0; int error; /* * Can interlock without critical section/spinlock as long * as we don't block before calling *sleep(). PINTERLOCKED * must be passed to the *sleep() to use the manual interlock * (else a new one is created which opens a timing race). */ tsleep_interlock(c, flags); spin_lock(&c->cv_lock); c->cv_waiters++; spin_unlock(&c->cv_lock); if (lk) error = lksleep(c, lk, flags | PINTERLOCKED, c->cv_desc, timo); else error = tsleep(c, flags | PINTERLOCKED, c->cv_desc, timo); return (error); }
/* * get - lock and return the f_offset field. * set - set and unlock the f_offset field. * * These routines serve the dual purpose of serializing access to the * f_offset field (at least on i386) and guaranteeing operational integrity * when multiple read()ers and write()ers are present on the same fp. * * MPSAFE */ static __inline off_t vn_get_fpf_offset(struct file *fp) { u_int flags; u_int nflags; /* * Shortcut critical path. */ flags = fp->f_flag & ~FOFFSETLOCK; if (atomic_cmpset_int(&fp->f_flag, flags, flags | FOFFSETLOCK)) return(fp->f_offset); /* * The hard way */ for (;;) { flags = fp->f_flag; if (flags & FOFFSETLOCK) { nflags = flags | FOFFSETWAKE; tsleep_interlock(&fp->f_flag, 0); if (atomic_cmpset_int(&fp->f_flag, flags, nflags)) tsleep(&fp->f_flag, PINTERLOCKED, "fpoff", 0); } else { nflags = flags | FOFFSETLOCK; if (atomic_cmpset_int(&fp->f_flag, flags, nflags)) break; } } return(fp->f_offset); }
/* * Share-lock a mutex, block until acquired. Recursion is allowed. * * Returns 0 on success, or the tsleep() return code on failure. * An error can only be returned if PCATCH is specified in the flags. * * NOTE: Shared locks get a mass-wakeup so if the tsleep fails we * do not have to chain the wakeup(). */ static __inline int __mtx_lock_sh(mtx_t mtx, const char *ident, int flags, int to) { u_int lock; u_int nlock; int error; for (;;) { lock = mtx->mtx_lock; if ((lock & MTX_EXCLUSIVE) == 0) { KKASSERT((lock & MTX_MASK) != MTX_MASK); nlock = lock + 1; if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) { error = 0; break; } } else { nlock = lock | MTX_SHWANTED; tsleep_interlock(mtx, 0); if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) { error = tsleep(mtx, flags, ident, to); if (error) break; ++mtx_contention_count; /* retry */ } else { tsleep_remove(curthread); } } ++mtx_collision_count; } return (error); }
/* * hammer_rel_interlock() works a bit differently in that it must * acquire the lock in tandem with a 1->0 transition. CHECK is * not used. * * TRUE is returned on 1->0 transitions with the lock held on return * and FALSE is returned otherwise with the lock not held. * * It is important to note that the refs are not stable and may * increase while we hold the lock, the TRUE indication only means * that we transitioned 1->0, not necessarily that we stayed at 0. * * Another thread bumping refs while we hold the lock will set CHECK, * causing one of the competing hammer_ref_interlock() calls to * return TRUE after we release our lock. * * MPSAFE */ int hammer_rel_interlock(struct hammer_lock *lock, int locked) { u_int lv; u_int nlv; /* * In locked mode (failure/unload path) we release the * ref-count but leave it locked. */ if (locked) { hammer_rel(lock); return(1); } /* * Integrated reference count drop with LOCKED, plus the hot-path * returns. */ for (;;) { lv = lock->refs; if (lv == 1) { nlv = 0 | HAMMER_REFS_LOCKED; if (atomic_cmpset_int(&lock->refs, lv, nlv)) { lock->rowner = curthread; return(1); } } else if ((lv & ~HAMMER_REFS_FLAGS) == 1) { if ((lv & HAMMER_REFS_LOCKED) == 0) { nlv = (lv - 1) | HAMMER_REFS_LOCKED; if (atomic_cmpset_int(&lock->refs, lv, nlv)) { lock->rowner = curthread; return(1); } } else { nlv = lv | HAMMER_REFS_WANTED; tsleep_interlock(&lock->refs, 0); if (atomic_cmpset_int(&lock->refs, lv, nlv)) { tsleep(&lock->refs, PINTERLOCKED, "h0lk", 0); } } } else { nlv = (lv - 1); KKASSERT((int)nlv >= 0); if (atomic_cmpset_int(&lock->refs, lv, nlv)) return(0); } } /* not reached */ }
/* * smb_sleep() icky compat routine. Leave the token held through the tsleep * to interlock against the sleep. Remember that the token could be lost * since we blocked, so reget or release as appropriate. */ int smb_sleep(void *chan, struct smb_slock *sl, int slpflags, const char *wmesg, int timo) { int error; if (sl) { tsleep_interlock(chan, slpflags); smb_sl_unlock(sl); error = tsleep(chan, slpflags | PINTERLOCKED, wmesg, timo); if ((slpflags & PDROP) == 0) smb_sl_lock(sl); } else { error = tsleep(chan, slpflags, wmesg, timo); } return error; }
/* * Lock an inode, with SYNCQ semantics. * * HAMMER2 offers shared and exclusive locks on inodes. Pass a mask of * flags for options: * * - pass HAMMER2_RESOLVE_SHARED if a shared lock is desired. The * inode locking function will automatically set the RDONLY flag. * shared locks are not subject to SYNCQ semantics, exclusive locks * are. * * - pass HAMMER2_RESOLVE_ALWAYS if you need the inode's meta-data. * Most front-end inode locks do. * * - pass HAMMER2_RESOLVE_NEVER if you do not want to require that * the inode data be resolved. This is used by the syncthr because * it can run on an unresolved/out-of-sync cluster, and also by the * vnode reclamation code to avoid unnecessary I/O (particularly when * disposing of hundreds of thousands of cached vnodes). * * This function, along with lock4, has SYNCQ semantics. If the inode being * locked is on the SYNCQ, that is it has been staged by the syncer, we must * block until the operation is complete (even if we can lock the inode). In * order to reduce the stall time, we re-order the inode to the front of the * pmp->syncq prior to blocking. This reordering VERY significantly improves * performance. * * The inode locking function locks the inode itself, resolves any stale * chains in the inode's cluster, and allocates a fresh copy of the * cluster with 1 ref and all the underlying chains locked. * * ip->cluster will be stable while the inode is locked. * * NOTE: We don't combine the inode/chain lock because putting away an * inode would otherwise confuse multiple lock holders of the inode. */ void hammer2_inode_lock(hammer2_inode_t *ip, int how) { hammer2_pfs_t *pmp; hammer2_inode_ref(ip); pmp = ip->pmp; /* * Inode structure mutex - Shared lock */ if (how & HAMMER2_RESOLVE_SHARED) { hammer2_mtx_sh(&ip->lock); return; } /* * Inode structure mutex - Exclusive lock * * An exclusive lock (if not recursive) must wait for inodes on * SYNCQ to flush first, to ensure that meta-data dependencies such * as the nlink count and related directory entries are not split * across flushes. * * If the vnode is locked by the current thread it must be unlocked * across the tsleep() to avoid a deadlock. */ hammer2_mtx_ex(&ip->lock); if (hammer2_mtx_refs(&ip->lock) > 1) return; while ((ip->flags & HAMMER2_INODE_SYNCQ) && pmp) { hammer2_spin_ex(&pmp->list_spin); if (ip->flags & HAMMER2_INODE_SYNCQ) { tsleep_interlock(&ip->flags, 0); atomic_set_int(&ip->flags, HAMMER2_INODE_SYNCQ_WAKEUP); TAILQ_REMOVE(&pmp->syncq, ip, entry); TAILQ_INSERT_HEAD(&pmp->syncq, ip, entry); hammer2_spin_unex(&pmp->list_spin); hammer2_mtx_unlock(&ip->lock); tsleep(&ip->flags, PINTERLOCKED, "h2sync", 0); hammer2_mtx_ex(&ip->lock); continue; } hammer2_spin_unex(&pmp->list_spin); break; } }
/* * (Backend) Feed chain data through the cluster validator and back to * the frontend. Chains are fed from multiple nodes concurrently * and pipelined via per-node FIFOs in the XOP. * * No xop lock is needed because we are only manipulating fields under * our direct control. * * Returns 0 on success and a hammer error code if sync is permanently * lost. The caller retains a ref on the chain but by convention * the lock is typically inherited by the xop (caller loses lock). * * Returns non-zero on error. In this situation the caller retains a * ref on the chain but loses the lock (we unlock here). * * WARNING! The chain is moving between two different threads, it must * be locked SHARED to retain its data mapping, not exclusive. * When multiple operations are in progress at once, chains fed * back to the frontend for collection can wind up being locked * in different orders, only a shared lock can prevent a deadlock. * * Exclusive locks may only be used by a XOP backend node thread * temporarily, with no direct or indirect dependencies (aka * blocking/waiting) on other nodes. */ int hammer2_xop_feed(hammer2_xop_head_t *xop, hammer2_chain_t *chain, int clindex, int error) { hammer2_xop_fifo_t *fifo; /* * Multi-threaded entry into the XOP collector. We own the * fifo->wi for our clindex. */ fifo = &xop->collect[clindex]; while (fifo->ri == fifo->wi - HAMMER2_XOPFIFO) { tsleep_interlock(xop, 0); if (hammer2_xop_active(xop) == 0) { error = EINTR; goto done; } if (fifo->ri == fifo->wi - HAMMER2_XOPFIFO) { tsleep(xop, PINTERLOCKED, "h2feed", hz*60); } } if (chain) hammer2_chain_ref(chain); fifo->errors[fifo->wi & HAMMER2_XOPFIFO_MASK] = error; fifo->array[fifo->wi & HAMMER2_XOPFIFO_MASK] = chain; cpu_sfence(); ++fifo->wi; atomic_add_int(&xop->check_counter, 1); wakeup(&xop->check_counter); /* XXX optimize */ error = 0; /* * Cleanup. If an error occurred we eat the lock. If no error * occurred the fifo inherits the lock and gains an additional ref. * * The caller's ref remains in both cases. */ done: if (error && chain) hammer2_chain_unlock(chain); return error; }
/* * Acquire the interlock on lock->refs. * * Return TRUE if CHECK is currently set. Note that CHECK will not * be set if the reference count is 0, but can get set if this function * is preceeded by, say, hammer_ref(), or through races with other * threads. The return value allows the caller to use the same logic * as hammer_ref_interlock(). * * MPSAFE */ int hammer_get_interlock(struct hammer_lock *lock) { u_int lv; u_int nlv; for (;;) { lv = lock->refs; if (lv & HAMMER_REFS_LOCKED) { nlv = lv | HAMMER_REFS_WANTED; tsleep_interlock(&lock->refs, 0); if (atomic_cmpset_int(&lock->refs, lv, nlv)) tsleep(&lock->refs, PINTERLOCKED, "hilk", 0); } else { nlv = (lv | HAMMER_REFS_LOCKED); if (atomic_cmpset_int(&lock->refs, lv, nlv)) { lock->rowner = curthread; return((lv & HAMMER_REFS_CHECK) ? 1 : 0); } } } }
/* * Helper function to wait for a reference count to become zero. * We set REFCNTF_WAITING and sleep if the reference count is not zero. * * In the case where REFCNTF_WAITING is already set the atomic op validates * that it is still set after the tsleep_interlock() call. * * Users of this waiting API must use refcount_release_wakeup() to release * refs instead of refcount_release(). refcount_release() will not wake * up waiters. */ void _refcount_wait(volatile u_int *countp, const char *wstr) { u_int n; int base_ticks = ticks; for (;;) { n = *countp; cpu_ccfence(); if (n == 0) break; if ((int)(ticks - base_ticks) >= hz*60 - 1) { kprintf("warning: refcount_wait %s: long wait\n", wstr); base_ticks = ticks; } KKASSERT(n != REFCNTF_WAITING); /* impossible state */ tsleep_interlock(countp, 0); if (atomic_cmpset_int(countp, n, n | REFCNTF_WAITING)) tsleep(countp, PINTERLOCKED, wstr, hz*10); } }
int sim_lock_sleep(void *ident, int flags, const char *wmesg, int timo, sim_lock *lock) { int retval; if (lock != &sim_mplock) { /* lock should be held already */ KKASSERT(lockstatus(lock, curthread) != 0); tsleep_interlock(ident, flags); lockmgr(lock, LK_RELEASE); retval = tsleep(ident, flags | PINTERLOCKED, wmesg, timo); } else { retval = tsleep(ident, flags, wmesg, timo); } if (lock != &sim_mplock) { lockmgr(lock, LK_EXCLUSIVE); } return (retval); }
void hammer_lock_ex_ident(struct hammer_lock *lock, const char *ident) { thread_t td = curthread; u_int lv; u_int nlv; KKASSERT(lock->refs); for (;;) { lv = lock->lockval; if (lv == 0) { nlv = 1 | HAMMER_LOCKF_EXCLUSIVE; if (atomic_cmpset_int(&lock->lockval, lv, nlv)) { lock->lowner = td; break; } } else if ((lv & HAMMER_LOCKF_EXCLUSIVE) && lock->lowner == td) { nlv = (lv + 1); if (atomic_cmpset_int(&lock->lockval, lv, nlv)) break; } else { if (hammer_debug_locks) { kprintf("hammer_lock_ex: held by %p\n", lock->lowner); } nlv = lv | HAMMER_LOCKF_WANTED; ++hammer_contention_count; tsleep_interlock(&lock->lockval, 0); if (atomic_cmpset_int(&lock->lockval, lv, nlv)) { tsleep(&lock->lockval, PINTERLOCKED, ident, 0); if (hammer_debug_locks) kprintf("hammer_lock_ex: try again\n"); } } } }
/* * Obtain a shared lock * * We do not give pending exclusive locks priority over shared locks as * doing so could lead to a deadlock. */ void hammer_lock_sh(struct hammer_lock *lock) { thread_t td = curthread; u_int lv; u_int nlv; const char *ident = "hmrlck"; KKASSERT(lock->refs); for (;;) { lv = lock->lockval; if ((lv & HAMMER_LOCKF_EXCLUSIVE) == 0) { nlv = (lv + 1); if (atomic_cmpset_int(&lock->lockval, lv, nlv)) break; } else if (lock->lowner == td) { /* * Disallowed case, drop into kernel debugger for * now. A cont continues w/ an exclusive lock. */ nlv = (lv + 1); if (atomic_cmpset_int(&lock->lockval, lv, nlv)) { if (hammer_debug_critical) Debugger("hammer_lock_sh: holding ex"); break; } } else { nlv = lv | HAMMER_LOCKF_WANTED; ++hammer_contention_count; tsleep_interlock(&lock->lockval, 0); if (atomic_cmpset_int(&lock->lockval, lv, nlv)) tsleep(&lock->lockval, PINTERLOCKED, ident, 0); } } }
/* * (Frontend) collect a response from a running cluster op. * * Responses are fed from all appropriate nodes concurrently * and collected into a cohesive response >= collect_key. * * The collector will return the instant quorum or other requirements * are met, even if some nodes get behind or become non-responsive. * * HAMMER2_XOP_COLLECT_NOWAIT - Used to 'poll' a completed collection, * usually called synchronously from the * node XOPs for the strategy code to * fake the frontend collection and complete * the BIO as soon as possible. * * HAMMER2_XOP_SYNCHRONIZER - Reqeuest synchronization with a particular * cluster index, prevents looping when that * index is out of sync so caller can act on * the out of sync element. ESRCH and EDEADLK * can be returned if this flag is specified. * * Returns 0 on success plus a filled out xop->cluster structure. * Return ENOENT on normal termination. * Otherwise return an error. */ int hammer2_xop_collect(hammer2_xop_head_t *xop, int flags) { hammer2_xop_fifo_t *fifo; hammer2_chain_t *chain; hammer2_key_t lokey; int error; int keynull; int adv; /* advance the element */ int i; uint32_t check_counter; loop: /* * First loop tries to advance pieces of the cluster which * are out of sync. */ lokey = HAMMER2_KEY_MAX; keynull = HAMMER2_CHECK_NULL; check_counter = xop->check_counter; cpu_lfence(); for (i = 0; i < xop->cluster.nchains; ++i) { chain = xop->cluster.array[i].chain; if (chain == NULL) { adv = 1; } else if (chain->bref.key < xop->collect_key) { adv = 1; } else { keynull &= ~HAMMER2_CHECK_NULL; if (lokey > chain->bref.key) lokey = chain->bref.key; adv = 0; } if (adv == 0) continue; /* * Advance element if possible, advanced element may be NULL. */ if (chain) { hammer2_chain_unlock(chain); hammer2_chain_drop(chain); } fifo = &xop->collect[i]; if (fifo->ri != fifo->wi) { cpu_lfence(); chain = fifo->array[fifo->ri & HAMMER2_XOPFIFO_MASK]; ++fifo->ri; xop->cluster.array[i].chain = chain; if (chain == NULL) { /* XXX */ xop->cluster.array[i].flags |= HAMMER2_CITEM_NULL; } if (fifo->wi - fifo->ri < HAMMER2_XOPFIFO / 2) wakeup(xop); /* XXX optimize */ --i; /* loop on same index */ } else { /* * Retain CITEM_NULL flag. If set just repeat EOF. * If not, the NULL,0 combination indicates an * operation in-progress. */ xop->cluster.array[i].chain = NULL; /* retain any CITEM_NULL setting */ } } /* * Determine whether the lowest collected key meets clustering * requirements. Returns: * * 0 - key valid, cluster can be returned. * * ENOENT - normal end of scan, return ENOENT. * * ESRCH - sufficient elements collected, quorum agreement * that lokey is not a valid element and should be * skipped. * * EDEADLK - sufficient elements collected, no quorum agreement * (and no agreement possible). In this situation a * repair is needed, for now we loop. * * EINPROGRESS - insufficient elements collected to resolve, wait * for event and loop. */ if ((flags & HAMMER2_XOP_COLLECT_WAITALL) && xop->run_mask != HAMMER2_XOPMASK_VOP) { error = EINPROGRESS; } else { error = hammer2_cluster_check(&xop->cluster, lokey, keynull); } if (error == EINPROGRESS) { if (xop->check_counter == check_counter) { if (flags & HAMMER2_XOP_COLLECT_NOWAIT) goto done; tsleep_interlock(&xop->check_counter, 0); cpu_lfence(); if (xop->check_counter == check_counter) { tsleep(&xop->check_counter, PINTERLOCKED, "h2coll", hz*60); } } goto loop; } if (error == ESRCH) { if (lokey != HAMMER2_KEY_MAX) { xop->collect_key = lokey + 1; goto loop; } error = ENOENT; } if (error == EDEADLK) { kprintf("hammer2: no quorum possible lokey %016jx\n", lokey); if (lokey != HAMMER2_KEY_MAX) { xop->collect_key = lokey + 1; goto loop; } error = ENOENT; } if (lokey == HAMMER2_KEY_MAX) xop->collect_key = lokey; else xop->collect_key = lokey + 1; done: return error; }
debuglockmgr(struct lock *lkp, u_int flags, const char *name, const char *file, int line) #endif { thread_t td; thread_t otd; int error; int extflags; int count; int pflags; int wflags; int timo; #ifdef DEBUG_LOCKS int i; #endif error = 0; if (mycpu->gd_intr_nesting_level && (flags & LK_NOWAIT) == 0 && (flags & LK_TYPE_MASK) != LK_RELEASE && panic_cpu_gd != mycpu ) { #ifndef DEBUG_LOCKS panic("lockmgr %s from %p: called from interrupt, ipi, " "or hard code section", lkp->lk_wmesg, ((int **)&lkp)[-1]); #else panic("lockmgr %s from %s:%d: called from interrupt, ipi, " "or hard code section", lkp->lk_wmesg, file, line); #endif } #ifdef DEBUG_LOCKS if (mycpu->gd_spinlocks && ((flags & LK_NOWAIT) == 0)) { panic("lockmgr %s from %s:%d: called with %d spinlocks held", lkp->lk_wmesg, file, line, mycpu->gd_spinlocks); } #endif extflags = (flags | lkp->lk_flags) & LK_EXTFLG_MASK; td = curthread; again: count = lkp->lk_count; cpu_ccfence(); switch (flags & LK_TYPE_MASK) { case LK_SHARED: /* * Shared lock critical path case */ if ((count & (LKC_EXREQ|LKC_UPREQ|LKC_EXCL)) == 0) { if (atomic_cmpset_int(&lkp->lk_count, count, count + 1)) { COUNT(td, 1); break; } goto again; } /* * If the caller already holds the lock exclusively then * we silently obtain another count on the exclusive lock. * * WARNING! The old FreeBSD behavior was to downgrade, * but this creates a problem when recursions * return to the caller and the caller expects * its original exclusive lock to remain exclusively * locked. */ if (lkp->lk_lockholder == td) { KKASSERT(count & LKC_EXCL); if ((extflags & LK_CANRECURSE) == 0) { if (extflags & LK_NOWAIT) { error = EBUSY; break; } panic("lockmgr: locking against myself"); } atomic_add_int(&lkp->lk_count, 1); COUNT(td, 1); break; } /* * Slow path */ pflags = (extflags & LK_PCATCH) ? PCATCH : 0; timo = (extflags & LK_TIMELOCK) ? lkp->lk_timo : 0; wflags = (td->td_flags & TDF_DEADLKTREAT) ? LKC_EXCL : (LKC_EXCL|LKC_EXREQ|LKC_UPREQ); /* * Block while the lock is held exclusively or, conditionally, * if other threads are tring to obtain an exclusive lock or * upgrade to one. */ if (count & wflags) { if (extflags & LK_NOWAIT) { error = EBUSY; break; } tsleep_interlock(lkp, pflags); if (!atomic_cmpset_int(&lkp->lk_count, count, count | LKC_SHREQ)) { goto again; } mycpu->gd_cnt.v_lock_name[0] = 'S'; strncpy(mycpu->gd_cnt.v_lock_name + 1, lkp->lk_wmesg, sizeof(mycpu->gd_cnt.v_lock_name) - 2); ++mycpu->gd_cnt.v_lock_colls; error = tsleep(lkp, pflags | PINTERLOCKED, lkp->lk_wmesg, timo); if (error) break; if (extflags & LK_SLEEPFAIL) { error = ENOLCK; break; } goto again; } /* * Otherwise we can bump the count */ if (atomic_cmpset_int(&lkp->lk_count, count, count + 1)) { COUNT(td, 1); break; } goto again; case LK_EXCLUSIVE: /* * Exclusive lock critical path. */ if (count == 0) { if (atomic_cmpset_int(&lkp->lk_count, count, LKC_EXCL | (count + 1))) { lkp->lk_lockholder = td; COUNT(td, 1); break; } goto again; } /* * Recursive lock if we already hold it exclusively. */ if (lkp->lk_lockholder == td) { KKASSERT(count & LKC_EXCL); if ((extflags & LK_CANRECURSE) == 0) { if (extflags & LK_NOWAIT) { error = EBUSY; break; } panic("lockmgr: locking against myself"); } atomic_add_int(&lkp->lk_count, 1); COUNT(td, 1); break; } /* * We will block, handle LK_NOWAIT */ if (extflags & LK_NOWAIT) { error = EBUSY; break; } /* * Wait until we can obtain the exclusive lock. EXREQ is * automatically cleared when all current holders release * so if we abort the operation we can safely leave it set. * There might be other exclusive requesters. */ pflags = (extflags & LK_PCATCH) ? PCATCH : 0; timo = (extflags & LK_TIMELOCK) ? lkp->lk_timo : 0; tsleep_interlock(lkp, pflags); if (!atomic_cmpset_int(&lkp->lk_count, count, count | LKC_EXREQ)) { goto again; } mycpu->gd_cnt.v_lock_name[0] = 'X'; strncpy(mycpu->gd_cnt.v_lock_name + 1, lkp->lk_wmesg, sizeof(mycpu->gd_cnt.v_lock_name) - 2); ++mycpu->gd_cnt.v_lock_colls; error = tsleep(lkp, pflags | PINTERLOCKED, lkp->lk_wmesg, timo); if (error) break; if (extflags & LK_SLEEPFAIL) { error = ENOLCK; break; } goto again; case LK_DOWNGRADE: /* * Downgrade an exclusive lock into a shared lock. All * counts on a recursive exclusive lock become shared. * * This function always succeeds. */ if (lkp->lk_lockholder != td || (count & (LKC_EXCL|LKC_MASK)) != (LKC_EXCL|1)) { panic("lockmgr: not holding exclusive lock"); } #ifdef DEBUG_LOCKS for (i = 0; i < LOCKMGR_DEBUG_ARRAY_SIZE; i++) { if (td->td_lockmgr_stack[i] == lkp && td->td_lockmgr_stack_id[i] > 0 ) { td->td_lockmgr_stack_id[i]--; break; } } #endif /* * NOTE! Must NULL-out lockholder before releasing LKC_EXCL. */ otd = lkp->lk_lockholder; lkp->lk_lockholder = NULL; if (atomic_cmpset_int(&lkp->lk_count, count, count & ~(LKC_EXCL|LKC_SHREQ))) { if (count & LKC_SHREQ) wakeup(lkp); break; } lkp->lk_lockholder = otd; goto again; case LK_EXCLUPGRADE: /* * Upgrade from a single shared lock to an exclusive lock. * * If another process is ahead of us to get an upgrade, * then we want to fail rather than have an intervening * exclusive access. The shared lock is released on * failure. */ if (count & LKC_UPREQ) { flags = LK_RELEASE; error = EBUSY; goto again; } /* fall through into normal upgrade */ case LK_UPGRADE: /* * Upgrade a shared lock to an exclusive one. This can cause * the lock to be temporarily released and stolen by other * threads. LK_SLEEPFAIL or LK_NOWAIT may be used to detect * this case, or use LK_EXCLUPGRADE. * * If the lock is already exclusively owned by us, this * operation is a NOP. * * If we return an error (even NOWAIT), the current lock will * be released. * * Start with the critical path. */ if ((count & (LKC_UPREQ|LKC_EXCL|LKC_MASK)) == 1) { if (atomic_cmpset_int(&lkp->lk_count, count, count | LKC_EXCL)) { lkp->lk_lockholder = td; break; } goto again; } /* * If we already hold the lock exclusively this operation * succeeds and is a NOP. */ if (count & LKC_EXCL) { if (lkp->lk_lockholder == td) break; panic("lockmgr: upgrade unowned lock"); } if ((count & LKC_MASK) == 0) panic("lockmgr: upgrade unowned lock"); /* * We cannot upgrade without blocking at this point. */ if (extflags & LK_NOWAIT) { flags = LK_RELEASE; error = EBUSY; goto again; } /* * Release the shared lock and request the upgrade. */ pflags = (extflags & LK_PCATCH) ? PCATCH : 0; timo = (extflags & LK_TIMELOCK) ? lkp->lk_timo : 0; tsleep_interlock(lkp, pflags); wflags = (count & LKC_UPREQ) ? LKC_EXREQ : LKC_UPREQ; /* * If someone else owns UPREQ and this transition would * allow it to be granted, we have to grant it. Otherwise * we release the shared lock. */ if ((count & (LKC_UPREQ|LKC_MASK)) == (LKC_UPREQ | 1)) { wflags |= LKC_EXCL | LKC_UPGRANT; wflags |= count; wflags &= ~LKC_UPREQ; } else { wflags |= (count - 1); } if (atomic_cmpset_int(&lkp->lk_count, count, wflags)) { COUNT(td, -1); /* * Must wakeup the thread granted the upgrade. */ if ((count & (LKC_UPREQ|LKC_MASK)) == (LKC_UPREQ | 1)) wakeup(lkp); mycpu->gd_cnt.v_lock_name[0] = 'U'; strncpy(mycpu->gd_cnt.v_lock_name + 1, lkp->lk_wmesg, sizeof(mycpu->gd_cnt.v_lock_name) - 2); ++mycpu->gd_cnt.v_lock_colls; error = tsleep(lkp, pflags | PINTERLOCKED, lkp->lk_wmesg, timo); if (error) break; if (extflags & LK_SLEEPFAIL) { error = ENOLCK; break; } /* * Refactor to either LK_EXCLUSIVE or LK_WAITUPGRADE, * depending on whether we were able to acquire the * LKC_UPREQ bit. */ if (count & LKC_UPREQ) flags = LK_EXCLUSIVE; /* someone else */ else flags = LK_WAITUPGRADE; /* we own the bit */ } goto again; case LK_WAITUPGRADE: /* * We own the LKC_UPREQ bit, wait until we are granted the * exclusive lock (LKC_UPGRANT is set). * * IF THE OPERATION FAILS (tsleep error tsleep+LK_SLEEPFAIL), * we have to undo the upgrade request and clean up any lock * that might have been granted via a race. */ if (count & LKC_UPGRANT) { if (atomic_cmpset_int(&lkp->lk_count, count, count & ~LKC_UPGRANT)) { lkp->lk_lockholder = td; KKASSERT(count & LKC_EXCL); break; } /* retry */ } else { pflags = (extflags & LK_PCATCH) ? PCATCH : 0; timo = (extflags & LK_TIMELOCK) ? lkp->lk_timo : 0; tsleep_interlock(lkp, pflags); if (atomic_cmpset_int(&lkp->lk_count, count, count)) { mycpu->gd_cnt.v_lock_name[0] = 'U'; strncpy(mycpu->gd_cnt.v_lock_name + 1, lkp->lk_wmesg, sizeof(mycpu->gd_cnt.v_lock_name) - 2); ++mycpu->gd_cnt.v_lock_colls; error = tsleep(lkp, pflags | PINTERLOCKED, lkp->lk_wmesg, timo); if (error) { undo_upreq(lkp); break; } if (extflags & LK_SLEEPFAIL) { error = ENOLCK; undo_upreq(lkp); break; } } /* retry */ } goto again; case LK_RELEASE: /* * Release the currently held lock. If releasing the current * lock as part of an error return, error will ALREADY be * non-zero. * * When releasing the last lock we automatically transition * LKC_UPREQ to LKC_EXCL|1. * * WARNING! We cannot detect when there are multiple exclusive * requests pending. We clear EXREQ unconditionally * on the 1->0 transition so it is possible for * shared requests to race the next exclusive * request. * * Always succeeds. */ if ((count & LKC_MASK) == 0) panic("lockmgr: LK_RELEASE: no lock held"); if (count & LKC_EXCL) { if (lkp->lk_lockholder != LK_KERNTHREAD && lkp->lk_lockholder != td) { panic("lockmgr: pid %d, not exlusive " "lock holder thr %p/%p unlocking", (td->td_proc ? td->td_proc->p_pid : -1), td, lkp->lk_lockholder); } if ((count & (LKC_UPREQ|LKC_MASK)) == 1) { /* * Last exclusive count is being released */ otd = lkp->lk_lockholder; lkp->lk_lockholder = NULL; if (!atomic_cmpset_int(&lkp->lk_count, count, (count - 1) & ~(LKC_EXCL|LKC_EXREQ|LKC_SHREQ))) { lkp->lk_lockholder = otd; goto again; } if (count & (LKC_EXREQ|LKC_SHREQ)) wakeup(lkp); /* success */ } else if ((count & (LKC_UPREQ|LKC_MASK)) == (LKC_UPREQ | 1)) { /* * Last exclusive count is being released but * an upgrade request is present, automatically * grant an exclusive state to the owner of * the upgrade request. */ otd = lkp->lk_lockholder; lkp->lk_lockholder = NULL; if (!atomic_cmpset_int(&lkp->lk_count, count, (count & ~LKC_UPREQ) | LKC_UPGRANT)) { lkp->lk_lockholder = otd; } wakeup(lkp); /* success */ } else { otd = lkp->lk_lockholder; if (!atomic_cmpset_int(&lkp->lk_count, count, count - 1)) { goto again; } /* success */ } /* success */ if (otd != LK_KERNTHREAD) COUNT(td, -1); } else { if ((count & (LKC_UPREQ|LKC_MASK)) == 1) { /* * Last shared count is being released. */ if (!atomic_cmpset_int(&lkp->lk_count, count, (count - 1) & ~(LKC_EXREQ|LKC_SHREQ))) { goto again; } if (count & (LKC_EXREQ|LKC_SHREQ)) wakeup(lkp); /* success */ } else if ((count & (LKC_UPREQ|LKC_MASK)) == (LKC_UPREQ | 1)) { /* * Last shared count is being released but * an upgrade request is present, automatically * grant an exclusive state to the owner of * the upgrade request. */ if (!atomic_cmpset_int(&lkp->lk_count, count, (count & ~LKC_UPREQ) | LKC_EXCL | LKC_UPGRANT)) { goto again; } wakeup(lkp); } else { if (!atomic_cmpset_int(&lkp->lk_count, count, count - 1)) { goto again; } } /* success */ COUNT(td, -1); } break; default: panic("lockmgr: unknown locktype request %d", flags & LK_TYPE_MASK); /* NOTREACHED */ } return (error); }
/* * do an ioctl operation on a pfsnode (vp). * (vp) is not locked on entry or exit. */ static int procfs_ioctl(struct vop_ioctl_args *ap) { struct pfsnode *pfs = VTOPFS(ap->a_vp); struct proc *procp; struct proc *p; int error; int signo; struct procfs_status *psp; unsigned char flags; procp = pfs_pfind(pfs->pfs_pid); if (procp == NULL) return ENOTTY; p = curproc; if (p == NULL) { error = EINVAL; goto done; } /* Can't trace a process that's currently exec'ing. */ if ((procp->p_flags & P_INEXEC) != 0) { error = EAGAIN; goto done; } if (!CHECKIO(p, procp) || p_trespass(ap->a_cred, procp->p_ucred)) { error = EPERM; goto done; } switch (ap->a_command) { case PIOCBIS: spin_lock(&procp->p_spin); procp->p_stops |= *(unsigned int*)ap->a_data; spin_unlock(&procp->p_spin); break; case PIOCBIC: spin_lock(&procp->p_spin); procp->p_stops &= ~*(unsigned int*)ap->a_data; spin_unlock(&procp->p_spin); break; case PIOCSFL: /* * NFLAGS is "non-suser_xxx flags" -- currently, only * PFS_ISUGID ("ignore set u/g id"); */ #define NFLAGS (PF_ISUGID) flags = (unsigned char)*(unsigned int*)ap->a_data; if (flags & NFLAGS && (error = priv_check_cred(ap->a_cred, PRIV_ROOT, 0))) goto done; procp->p_pfsflags = flags; break; case PIOCGFL: *(unsigned int*)ap->a_data = (unsigned int)procp->p_pfsflags; break; case PIOCSTATUS: /* * NOTE: syscall entry deals with stopevents and may run without * the MP lock. */ psp = (struct procfs_status *)ap->a_data; psp->flags = procp->p_pfsflags; psp->events = procp->p_stops; spin_lock(&procp->p_spin); if (procp->p_step) { psp->state = 0; psp->why = procp->p_stype; psp->val = procp->p_xstat; spin_unlock(&procp->p_spin); } else { psp->state = 1; spin_unlock(&procp->p_spin); psp->why = 0; /* Not defined values */ psp->val = 0; /* Not defined values */ } break; case PIOCWAIT: /* * NOTE: syscall entry deals with stopevents and may run without * the MP lock. */ psp = (struct procfs_status *)ap->a_data; spin_lock(&procp->p_spin); while (procp->p_step == 0) { tsleep_interlock(&procp->p_stype, PCATCH); spin_unlock(&procp->p_spin); if (procp->p_stops == 0) { error = EINVAL; goto done; } if (procp->p_flags & P_POSTEXIT) { error = EINVAL; goto done; } if (procp->p_flags & P_INEXEC) { error = EAGAIN; goto done; } error = tsleep(&procp->p_stype, PCATCH | PINTERLOCKED, "piocwait", 0); if (error) goto done; spin_lock(&procp->p_spin); } spin_unlock(&procp->p_spin); psp->state = 1; /* It stopped */ psp->flags = procp->p_pfsflags; psp->events = procp->p_stops; psp->why = procp->p_stype; /* why it stopped */ psp->val = procp->p_xstat; /* any extra info */ break; case PIOCCONT: /* Restart a proc */ /* * NOTE: syscall entry deals with stopevents and may run without * the MP lock. However, the caller is presumably interlocked * by having waited. */ if (procp->p_step == 0) { error = EINVAL; /* Can only start a stopped process */ goto done; } if ((signo = *(int*)ap->a_data) != 0) { if (signo >= NSIG || signo <= 0) { error = EINVAL; goto done; } ksignal(procp, signo); } procp->p_step = 0; wakeup(&procp->p_step); break; default: error = ENOTTY; goto done; } error = 0; done: pfs_pdone(procp); return 0; }
/* * Wait for async lock completion or abort. Returns ENOLCK if an abort * occurred. */ int mtx_wait_link(mtx_t *mtx, mtx_link_t *link, int flags, int to) { indefinite_info_t info; int error; indefinite_init(&info, mtx->mtx_ident, 1, ((link->state & MTX_LINK_LINKED_SH) ? 'm' : 'M')); /* * Sleep. Handle false wakeups, interruptions, etc. * The link may also have been aborted. The LINKED * bit was set by this cpu so we can test it without * fences. */ error = 0; while (link->state & MTX_LINK_LINKED) { tsleep_interlock(link, 0); cpu_lfence(); if (link->state & MTX_LINK_LINKED) { error = tsleep(link, flags | PINTERLOCKED, mtx->mtx_ident, to); if (error) break; } if ((mtx->mtx_flags & MTXF_NOCOLLSTATS) == 0) indefinite_check(&info); } /* * We need at least a lfence (load fence) to ensure our cpu does not * reorder loads (of data outside the lock structure) prior to the * remote cpu's release, since the above test may have run without * any atomic interactions. * * If we do not do this then state updated by the other cpu before * releasing its lock may not be read cleanly by our cpu when this * function returns. Even though the other cpu ordered its stores, * our loads can still be out of order. */ cpu_mfence(); /* * We are done, make sure the link structure is unlinked. * It may still be on the list due to e.g. EINTR or * EWOULDBLOCK. * * It is possible for the tsleep to race an ABORT and cause * error to be 0. * * The tsleep() can be woken up for numerous reasons and error * might be zero in situations where we intend to return an error. * * (This is the synchronous case so state cannot be CALLEDBACK) */ switch(link->state) { case MTX_LINK_ACQUIRED: case MTX_LINK_CALLEDBACK: error = 0; break; case MTX_LINK_ABORTED: error = ENOLCK; break; case MTX_LINK_LINKED_EX: case MTX_LINK_LINKED_SH: mtx_delete_link(mtx, link); /* fall through */ default: if (error == 0) error = EWOULDBLOCK; break; } /* * Clear state on status returned. */ link->state = MTX_LINK_IDLE; if ((mtx->mtx_flags & MTXF_NOCOLLSTATS) == 0) indefinite_done(&info); return error; }
/* * The hammer_*_interlock() and hammer_*_interlock_done() functions are * more sophisticated versions which handle MP transition races and block * when necessary. * * hammer_ref_interlock() bumps the ref-count and conditionally acquires * the interlock for 0->1 transitions or if the CHECK is found to be set. * * This case will return TRUE, the interlock will be held, and the CHECK * bit also set. Other threads attempting to ref will see the CHECK bit * and block until we clean up. * * FALSE is returned for transitions other than 0->1 when the CHECK bit * is not found to be set, or if the function loses the race with another * thread. * * TRUE is only returned to one thread and the others will block. * Effectively a TRUE indicator means 'someone transitioned 0->1 * and you are the first guy to successfully lock it after that, so you * need to check'. Due to races the ref-count may be greater than 1 upon * return. * * MPSAFE */ int hammer_ref_interlock(struct hammer_lock *lock) { u_int lv; u_int nlv; /* * Integrated reference count bump, lock, and check, with hot-path. * * (a) Return 1 (+LOCKED, +CHECK) 0->1 transition * (b) Return 0 (-LOCKED, -CHECK) N->N+1 transition * (c) Break out (+CHECK) Check condition and Cannot lock * (d) Return 1 (+LOCKED, +CHECK) Successfully locked */ for (;;) { lv = lock->refs; if (lv == 0) { nlv = 1 | HAMMER_REFS_LOCKED | HAMMER_REFS_CHECK; if (atomic_cmpset_int(&lock->refs, lv, nlv)) { lock->rowner = curthread; return(1); } } else { nlv = (lv + 1); if ((lv & ~HAMMER_REFS_FLAGS) == 0) nlv |= HAMMER_REFS_CHECK; if ((nlv & HAMMER_REFS_CHECK) == 0) { if (atomic_cmpset_int(&lock->refs, lv, nlv)) return(0); } else if (lv & HAMMER_REFS_LOCKED) { /* CHECK also set here */ if (atomic_cmpset_int(&lock->refs, lv, nlv)) break; } else { /* CHECK also set here */ nlv |= HAMMER_REFS_LOCKED; if (atomic_cmpset_int(&lock->refs, lv, nlv)) { lock->rowner = curthread; return(1); } } } } /* * Defered check condition because we were unable to acquire the * lock. We must block until the check condition is cleared due * to a race with another thread, or we are able to acquire the * lock. * * (a) Return 0 (-CHECK) Another thread handled it * (b) Return 1 (+LOCKED, +CHECK) We handled it. */ for (;;) { lv = lock->refs; if ((lv & HAMMER_REFS_CHECK) == 0) return(0); if (lv & HAMMER_REFS_LOCKED) { tsleep_interlock(&lock->refs, 0); nlv = (lv | HAMMER_REFS_WANTED); if (atomic_cmpset_int(&lock->refs, lv, nlv)) tsleep(&lock->refs, PINTERLOCKED, "h1lk", 0); } else { /* CHECK also set here */ nlv = lv | HAMMER_REFS_LOCKED; if (atomic_cmpset_int(&lock->refs, lv, nlv)) { lock->rowner = curthread; return(1); } } } /* not reached */ }
/* * Exclusive-lock a mutex, block until acquired. Recursion is allowed. * * Returns 0 on success, or the tsleep() return code on failure. * An error can only be returned if PCATCH is specified in the flags. */ static __inline int __mtx_lock_ex(mtx_t mtx, mtx_link_t link, const char *ident, int flags, int to) { u_int lock; u_int nlock; int error; for (;;) { lock = mtx->mtx_lock; if (lock == 0) { nlock = MTX_EXCLUSIVE | 1; if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) { mtx->mtx_owner = curthread; error = 0; break; } } else if ((lock & MTX_EXCLUSIVE) && mtx->mtx_owner == curthread) { KKASSERT((lock & MTX_MASK) != MTX_MASK); nlock = lock + 1; if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) { error = 0; break; } } else { /* * Clearing MTX_EXLINK in lock causes us to loop until * MTX_EXLINK is available. However, to avoid * unnecessary cpu cache traffic we poll instead. * * Setting MTX_EXLINK in nlock causes us to loop until * we can acquire MTX_EXLINK. * * Also set MTX_EXWANTED coincident with EXLINK, if * not already set. */ thread_t td; if (lock & MTX_EXLINK) { cpu_pause(); ++mtx_collision_count; continue; } td = curthread; /*lock &= ~MTX_EXLINK;*/ nlock = lock | MTX_EXWANTED | MTX_EXLINK; ++td->td_critcount; if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) { /* * Check for early abort */ if (link->state == MTX_LINK_ABORTED) { atomic_clear_int(&mtx->mtx_lock, MTX_EXLINK); --td->td_critcount; error = ENOLCK; if (mtx->mtx_link == NULL) { atomic_clear_int(&mtx->mtx_lock, MTX_EXWANTED); } break; } /* * Success. Link in our structure then * release EXLINK and sleep. */ link->owner = td; link->state = MTX_LINK_LINKED; if (mtx->mtx_link) { link->next = mtx->mtx_link; link->prev = link->next->prev; link->next->prev = link; link->prev->next = link; } else { link->next = link; link->prev = link; mtx->mtx_link = link; } tsleep_interlock(link, 0); atomic_clear_int(&mtx->mtx_lock, MTX_EXLINK); --td->td_critcount; error = tsleep(link, flags, ident, to); ++mtx_contention_count; /* * Normal unlink, we should own the exclusive * lock now. */ if (link->state == MTX_LINK_LINKED) mtx_delete_link(mtx, link); if (link->state == MTX_LINK_ACQUIRED) { KKASSERT(mtx->mtx_owner == link->owner); error = 0; break; } /* * Aborted lock (mtx_abort_ex called). */ if (link->state == MTX_LINK_ABORTED) { error = ENOLCK; break; } /* * tsleep error, else retry. */ if (error) break; } else { --td->td_critcount; } } ++mtx_collision_count; } return (error); }
/* * Stop a running timer and ensure that any running callout completes before * returning. If the timer is running on another cpu this function may block * to interlock against the callout. If the callout is currently executing * or blocked in another thread this function may also block to interlock * against the callout. * * The caller must be careful to avoid deadlocks, either by using * callout_init_lk() (which uses the lockmgr lock cancelation feature), * by using tokens and dealing with breaks in the serialization, or using * the lockmgr lock cancelation feature yourself in the callout callback * function. * * callout_stop() returns non-zero if the callout was pending. */ static int _callout_stop(struct callout *c, int issync) { globaldata_t gd = mycpu; globaldata_t tgd; softclock_pcpu_t sc; int flags; int nflags; int rc; int cpuid; #ifdef INVARIANTS if ((c->c_flags & CALLOUT_DID_INIT) == 0) { callout_init(c); kprintf( "callout_stop(%p) from %p: callout was not initialized\n", c, ((int **)&c)[-1]); print_backtrace(-1); } #endif crit_enter_gd(gd); /* * Fast path operations: * * If ARMED and owned by our cpu, or not ARMED, and other simple * conditions are met, we can just clear ACTIVE and EXECUTED * and we are done. */ for (;;) { flags = c->c_flags; cpu_ccfence(); cpuid = CALLOUT_FLAGS_TO_CPU(flags); /* * Can't handle an armed callout in the fast path if it is * not on the current cpu. We must atomically increment the * IPI count for the IPI we intend to send and break out of * the fast path to enter the slow path. */ if (flags & CALLOUT_ARMED) { if (gd->gd_cpuid != cpuid) { nflags = flags + 1; if (atomic_cmpset_int(&c->c_flags, flags, nflags)) { /* break to slow path */ break; } continue; /* retry */ } } else { cpuid = gd->gd_cpuid; KKASSERT((flags & CALLOUT_IPI_MASK) == 0); KKASSERT((flags & CALLOUT_PENDING) == 0); } /* * Process pending IPIs and retry (only if not called from * an IPI). */ if (flags & CALLOUT_IPI_MASK) { lwkt_process_ipiq(); continue; /* retry */ } /* * Transition to the stopped state, recover the EXECUTED * status. If pending we cannot clear ARMED until after * we have removed (c) from the callwheel. * * NOTE: The callout might already not be armed but in this * case it should also not be pending. */ nflags = flags & ~(CALLOUT_ACTIVE | CALLOUT_EXECUTED | CALLOUT_WAITING | CALLOUT_PENDING); /* NOTE: IPI_MASK already tested */ if ((flags & CALLOUT_PENDING) == 0) nflags &= ~CALLOUT_ARMED; if (atomic_cmpset_int(&c->c_flags, flags, nflags)) { /* * Can only remove from callwheel if currently * pending. */ if (flags & CALLOUT_PENDING) { sc = &softclock_pcpu_ary[gd->gd_cpuid]; if (sc->next == c) sc->next = TAILQ_NEXT(c, c_links.tqe); TAILQ_REMOVE( &sc->callwheel[c->c_time & cwheelmask], c, c_links.tqe); c->c_func = NULL; /* * NOTE: Can't clear ARMED until we have * physically removed (c) from the * callwheel. * * NOTE: WAITING bit race exists when doing * unconditional bit clears. */ callout_maybe_clear_armed(c); if (c->c_flags & CALLOUT_WAITING) flags |= CALLOUT_WAITING; } /* * ARMED has been cleared at this point and (c) * might now be stale. Only good for wakeup()s. */ if (flags & CALLOUT_WAITING) wakeup(c); goto skip_slow; } /* retry */ } /* * Slow path (and not called via an IPI). * * When ARMED to a different cpu the stop must be processed on that * cpu. Issue the IPI and wait for completion. We have already * incremented the IPI count. */ tgd = globaldata_find(cpuid); lwkt_send_ipiq3(tgd, callout_stop_ipi, c, issync); for (;;) { int flags; int nflags; flags = c->c_flags; cpu_ccfence(); if ((flags & CALLOUT_IPI_MASK) == 0) /* fast path */ break; nflags = flags | CALLOUT_WAITING; tsleep_interlock(c, 0); if (atomic_cmpset_int(&c->c_flags, flags, nflags)) { tsleep(c, PINTERLOCKED, "cstp1", 0); } } skip_slow: /* * If (issync) we must also wait for any in-progress callbacks to * complete, unless the stop is being executed from the callback * itself. The EXECUTED flag is set prior to the callback * being made so our existing flags status already has it. * * If auto-lock mode is being used, this is where we cancel any * blocked lock that is potentially preventing the target cpu * from completing the callback. */ while (issync) { intptr_t *runp; intptr_t runco; sc = &softclock_pcpu_ary[cpuid]; if (gd->gd_curthread == &sc->thread) /* stop from cb */ break; runp = &sc->running; runco = *runp; cpu_ccfence(); if ((runco & ~(intptr_t)1) != (intptr_t)c) break; if (c->c_flags & CALLOUT_AUTOLOCK) lockmgr(c->c_lk, LK_CANCEL_BEG); tsleep_interlock(c, 0); if (atomic_cmpset_long(runp, runco, runco | 1)) tsleep(c, PINTERLOCKED, "cstp3", 0); if (c->c_flags & CALLOUT_AUTOLOCK) lockmgr(c->c_lk, LK_CANCEL_END); } crit_exit_gd(gd); rc = (flags & CALLOUT_EXECUTED) != 0; return rc; }