/* * Get a chain of descriptors from the used ring, if one is available. */ struct vq_entry * virtio_pull_chain(struct virtqueue *vq, uint32_t *len) { struct vq_entry *head; int slot; int usedidx; mutex_enter(&vq->vq_used_lock); /* No used entries? Bye. */ if (vq->vq_used_idx == vq->vq_used->idx) { mutex_exit(&vq->vq_used_lock); return (NULL); } usedidx = vq->vq_used_idx; vq->vq_used_idx++; mutex_exit(&vq->vq_used_lock); usedidx %= vq->vq_num; /* Make sure we do the next step _after_ checking the idx. */ membar_consumer(); slot = vq->vq_used->ring[usedidx].id; *len = vq->vq_used->ring[usedidx].len; head = &vq->vq_entries[slot]; return (head); }
bpfjit_func_t bpf_jit_generate(bpf_ctx_t *bc, void *code, size_t size) { membar_consumer(); if (bpfjit_module_ops.bj_generate_code != NULL) { return bpfjit_module_ops.bj_generate_code(bc, code, size); } return NULL; }
static inline uint32_t fake_readl(const void __iomem *ptr) { uint32_t v; v = *(const uint32_t __iomem *)ptr; membar_consumer(); return v; }
int xb_read(void *data, unsigned len) { volatile struct xenstore_domain_interface *intf = xs_domain_interface(xb_addr); XENSTORE_RING_IDX cons, prod; extern int do_polled_io; while (len != 0) { unsigned int avail; const char *src; mutex_enter(&xb_wait_lock); while (intf->rsp_cons == intf->rsp_prod) { if (interrupts_unleashed && !do_polled_io) { if (cv_wait_sig(&xb_wait_cv, &xb_wait_lock) == 0) { mutex_exit(&xb_wait_lock); return (EINTR); } } else { /* polled mode needed for early probes */ (void) HYPERVISOR_yield(); } } mutex_exit(&xb_wait_lock); /* Read indexes, then verify. */ cons = intf->rsp_cons; prod = intf->rsp_prod; membar_enter(); if (!check_indexes(cons, prod)) return (EIO); src = get_input_chunk(cons, prod, (char *)intf->rsp, &avail); if (avail == 0) continue; if (avail > len) avail = len; /* We must read header before we read data. */ membar_consumer(); (void) memcpy(data, src, avail); data = (void *)((uintptr_t)data + avail); len -= avail; /* Other side must not see free space until we've copied out */ membar_enter(); intf->rsp_cons += avail; /* Implies mb(): they will see new header. */ ec_notify_via_evtchn(xen_info->store_evtchn); } return (0); }
/* * Fetch a /dev/u?random context's CPRNG, or create and save one if * necessary. */ static struct cprng_strong * rnd_ctx_cprng(struct rnd_ctx *ctx) { struct cprng_strong *cprng, *tmp = NULL; /* Fast path: if someone has already allocated a CPRNG, use it. */ cprng = ctx->rc_cprng; if (__predict_true(cprng != NULL)) { /* Make sure the CPU hasn't prefetched cprng's guts. */ membar_consumer(); goto out; } /* Slow path: create a CPRNG. Allocate before taking locks. */ char name[64]; struct lwp *const l = curlwp; (void)snprintf(name, sizeof(name), "%d %"PRIu64" %u", (int)l->l_proc->p_pid, l->l_ncsw, l->l_cpticks); const int flags = (ctx->rc_hard? (CPRNG_USE_CV | CPRNG_HARD) : (CPRNG_INIT_ANY | CPRNG_REKEY_ANY)); tmp = cprng_strong_create(name, IPL_NONE, flags); /* Publish cprng's guts before the pointer to them. */ membar_producer(); /* Attempt to publish tmp, unless someone beat us. */ cprng = atomic_cas_ptr(&ctx->rc_cprng, NULL, tmp); if (__predict_false(cprng != NULL)) { /* Make sure the CPU hasn't prefetched cprng's guts. */ membar_consumer(); goto out; } /* Published. Commit tmp. */ cprng = tmp; tmp = NULL; out: if (tmp != NULL) cprng_strong_destroy(tmp); KASSERT(cprng != NULL); return cprng; }
static int getstat(caddr_t data) { au_kcontext_t *kctx = GET_KCTX_PZ; membar_consumer(); if (copyout((caddr_t)&(kctx->auk_statistics), data, sizeof (au_stat_t))) return (EFAULT); return (0); }
boolean_t valid_ephemeral_gid(zone_t *zone, gid_t id) { ephemeral_zsd_t *eph_zsd; if (id <= IDMAP_WK__MAX_GID) return (B_TRUE); eph_zsd = get_ephemeral_zsd(zone); ASSERT(eph_zsd != NULL); membar_consumer(); return (id > eph_zsd->min_gid && id <= eph_zsd->last_gid); }
/* * We can't afford locking the privileges here because of the locations * we call this from; so we make sure that the privileges table * is visible to us; it is made visible before the value of nprivs is * updated. */ const char * priv_getbynum(int priv) { int maxpriv = nprivs; membar_consumer(); if (priv >= 0 && priv < maxpriv) return (priv_names[priv]); return (NULL); }
void vdsp_vd_task(void *xsc) { struct vdsp_softc *sc = xsc; struct vd_desc *vd; while (sc->sc_vd_cons != sc->sc_vd_prod) { membar_consumer(); vd = sc->sc_vd_ring[sc->sc_vd_cons++ % sc->sc_num_descriptors]; DPRINTF(("%s: operation %x\n", sc->sc_dv.dv_xname, vd->operation)); switch (vd->operation) { case VD_OP_BREAD: vdsp_read_dring(sc, vd); break; case VD_OP_BWRITE: vdsp_write_dring(sc, vd); break; case VD_OP_FLUSH: vdsp_flush_dring(sc, vd); break; case VD_OP_GET_VTOC: vdsp_get_vtoc(sc, vd); break; case VD_OP_SET_VTOC: vdsp_set_vtoc(sc, vd); break; case VD_OP_GET_DISKGEOM: vdsp_get_diskgeom(sc, vd); break; case VD_OP_GET_WCE: case VD_OP_SET_WCE: case VD_OP_GET_DEVID: /* * Solaris issues VD_OP_GET_DEVID despite the * fact that we don't advertise it. It seems * to be able to handle failure just fine, so * we silently ignore it. */ vdsp_unimp(sc, vd); break; default: printf("%s: unsupported operation 0x%02x\n", sc->sc_dv.dv_xname, vd->operation); vdsp_unimp(sc, vd); break; } } }
int __sys_faulthandler(unsigned vect, u_long va, u_long err, struct intframe *f) { extern lwt_t *lwt_current; printf("\nException %d, va = %p, err = %lx\n", vect, va, err); if (lwt_current != NULL && (membar_consumer(), lwt_current->flags & LWTF_XCPT)) { framelongjmp(f, &lwt_current->xcptbuf); return 0; } framedump(f); printf("\n"); return 0; }
/* * Since the hashtable itself isn't protected by a lock, obtaining a * per-bucket lock proceeds as follows: * * (a) li->li_htlock protects li->li_hashtable, li->li_htsize, and * li->li_retired. * * (b) Per-bucket locks (lh_lock) protect the contents of the bucket. * * (c) Locking order for resizing the hashtable is li_htlock then * lh_lock. * * To grab the bucket lock we: * * (1) Stash away the htsize and the pointer to the hashtable to make * sure neither change while we're using them. * * (2) lgrow() updates the pointer to the hashtable before it updates * the size: the worst case scenario is that we have the wrong size (but * the correct table), so we hash to the wrong bucket, grab the wrong * lock, and then realize that things have changed, rewind and start * again. If both the size and the table changed since we loaded them, * we'll realize that too and restart. * * (3) The protocol for growing the hashtable involves holding *all* the * locks in the table, hence the unlocking code (TABLE_LOCK_EXIT()) * doesn't need to do any dances, since neither the table nor the size * can change while any bucket lock is held. * * (4) If the hashtable is growing (by thread t1) while another thread * (t2) is trying to grab a bucket lock, t2 might have a stale reference * to li->li_htsize: * * - t1 grabs all locks in lgrow() * - t2 loads li->li_htsize and li->li_hashtable * - t1 changes li->hashtable * - t2 loads from an offset in the "stale" hashtable and tries to grab * the relevant mutex. * * If t1 had free'd the stale hashtable, t2 would be in trouble. Hence, * stale hashtables are not freed but stored in a list of "retired" * hashtables, which is emptied when the filesystem is unmounted. */ static void table_lock_enter(vnode_t *vp, struct loinfo *li) { struct lobucket *chain; uint_t htsize; uint_t hash; for (;;) { htsize = li->li_htsize; membar_consumer(); chain = (struct lobucket *)li->li_hashtable; hash = ltablehash(vp, htsize); mutex_enter(&chain[hash].lh_lock); if (li->li_hashtable == chain && li->li_htsize == htsize) break; mutex_exit(&chain[hash].lh_lock); } }
/* * pthread_once: calls given function only once. * it synchronizes via mutex in pthread_once_t structure */ int pthread_once(pthread_once_t *once_control, void (*init_routine)(void)) { __once_t *once = (__once_t *)once_control; if (once == NULL || init_routine == NULL) return (EINVAL); if (once->once_flag == PTHREAD_ONCE_NOTDONE) { (void) mutex_lock(&once->mlock); if (once->once_flag == PTHREAD_ONCE_NOTDONE) { pthread_cleanup_push(_mutex_unlock_wrap, &once->mlock); (*init_routine)(); pthread_cleanup_pop(0); membar_producer(); once->once_flag = PTHREAD_ONCE_DONE; } (void) mutex_unlock(&once->mlock); } membar_consumer(); return (0); }
void virtio_sync_vq(struct virtqueue *vq) { struct virtio_softc *vsc = vq->vq_owner; /* Make sure the avail ring update hit the buffer */ membar_producer(); vq->vq_avail->idx = vq->vq_avail_idx; /* Make sure the avail idx update hits the buffer */ membar_producer(); /* Make sure we see the flags update */ membar_consumer(); if (!(vq->vq_used->flags & VRING_USED_F_NO_NOTIFY)) { ddi_put16(vsc->sc_ioh, /* LINTED E_BAD_PTR_CAST_ALIGN */ (uint16_t *)(vsc->sc_io_addr + VIRTIO_CONFIG_QUEUE_NOTIFY), vq->vq_index); } }
void kern_preprom(void) { for (;;) { /* * Load the current CPU pointer and examine the mutex_ready bit. * It doesn't matter if we are preempted here because we are * only trying to determine if we are in the *set* of mutex * ready CPUs. We cannot disable preemption until we confirm * that we are running on a CPU in this set, since a call to * kpreempt_disable() requires access to curthread. */ processorid_t cpuid = getprocessorid(); cpu_t *cp = cpu[cpuid]; cpu_t *prcp; if (panicstr) return; /* just return if we are currently panicking */ if (CPU_IN_SET(cpu_ready_set, cpuid) && cp->cpu_m.mutex_ready) { /* * Disable premption, and reload the current CPU. We * can't move from a mutex_ready cpu to a non-ready cpu * so we don't need to re-check cp->cpu_m.mutex_ready. */ kpreempt_disable(); cp = CPU; ASSERT(cp->cpu_m.mutex_ready); /* * Try the lock. If we don't get the lock, re-enable * preemption and see if we should sleep. If we are * already the lock holder, remove the effect of the * previous kpreempt_disable() before returning since * preemption was disabled by an earlier kern_preprom. */ prcp = atomic_cas_ptr((void *)&prom_cpu, NULL, cp); if (prcp == NULL || (prcp == cp && prom_thread == curthread)) { if (prcp == cp) kpreempt_enable(); break; } kpreempt_enable(); /* * We have to be very careful here since both prom_cpu * and prcp->cpu_m.mutex_ready can be changed at any * time by a non mutex_ready cpu holding the lock. * If the owner is mutex_ready, holding prom_mutex * prevents kern_postprom() from completing. If the * owner isn't mutex_ready, we only know it will clear * prom_cpu before changing cpu_m.mutex_ready, so we * issue a membar after checking mutex_ready and then * re-verify that prom_cpu is still held by the same * cpu before actually proceeding to cv_wait(). */ mutex_enter(&prom_mutex); prcp = prom_cpu; if (prcp != NULL && prcp->cpu_m.mutex_ready != 0) { membar_consumer(); if (prcp == prom_cpu) cv_wait(&prom_cv, &prom_mutex); } mutex_exit(&prom_mutex); } else { /* * If we are not yet mutex_ready, just attempt to grab * the lock. If we get it or already hold it, break. */ ASSERT(getpil() == PIL_MAX); prcp = atomic_cas_ptr((void *)&prom_cpu, NULL, cp); if (prcp == NULL || prcp == cp) break; } } /* * We now hold the prom_cpu lock. Increment the hold count by one * and assert our current state before returning to the caller. */ atomic_inc_32(&prom_holdcnt); ASSERT(prom_holdcnt >= 1); prom_thread = curthread; }
static void shmif_rcv(void *arg) { struct ifnet *ifp = arg; struct shmif_sc *sc = ifp->if_softc; struct shmif_mem *busmem; struct mbuf *m = NULL; struct ether_header *eth; uint32_t nextpkt; bool wrap, passup; int error; const int align = ALIGN(sizeof(struct ether_header)) - sizeof(struct ether_header); reup: mutex_enter(&sc->sc_mtx); while ((ifp->if_flags & IFF_RUNNING) == 0 && !sc->sc_dying) cv_wait(&sc->sc_cv, &sc->sc_mtx); mutex_exit(&sc->sc_mtx); busmem = sc->sc_busmem; while (ifp->if_flags & IFF_RUNNING) { struct shmif_pkthdr sp; if (m == NULL) { m = m_gethdr(M_WAIT, MT_DATA); MCLGET(m, M_WAIT); m->m_data += align; } DPRINTF(("waiting %d/%" PRIu64 "\n", sc->sc_nextpacket, sc->sc_devgen)); KASSERT(m->m_flags & M_EXT); shmif_lockbus(busmem); KASSERT(busmem->shm_magic == SHMIF_MAGIC); KASSERT(busmem->shm_gen >= sc->sc_devgen); /* need more data? */ if (sc->sc_devgen == busmem->shm_gen && shmif_nextpktoff(busmem, busmem->shm_last) == sc->sc_nextpacket) { shmif_unlockbus(busmem); error = 0; rumpcomp_shmif_watchwait(sc->sc_kq); if (__predict_false(error)) printf("shmif_rcv: wait failed %d\n", error); membar_consumer(); continue; } if (stillvalid_p(sc)) { nextpkt = sc->sc_nextpacket; } else { KASSERT(busmem->shm_gen > 0); nextpkt = busmem->shm_first; if (busmem->shm_first > busmem->shm_last) sc->sc_devgen = busmem->shm_gen - 1; else sc->sc_devgen = busmem->shm_gen; DPRINTF(("dev %p overrun, new data: %d/%" PRIu64 "\n", sc, nextpkt, sc->sc_devgen)); } /* * If our read pointer is ahead the bus last write, our * generation must be one behind. */ KASSERT(!(nextpkt > busmem->shm_last && sc->sc_devgen == busmem->shm_gen)); wrap = false; nextpkt = shmif_busread(busmem, &sp, nextpkt, sizeof(sp), &wrap); KASSERT(sp.sp_len <= ETHERMTU + ETHER_HDR_LEN); nextpkt = shmif_busread(busmem, mtod(m, void *), nextpkt, sp.sp_len, &wrap); DPRINTF(("shmif_rcv: read packet of length %d at %d\n", sp.sp_len, nextpkt)); sc->sc_nextpacket = nextpkt; shmif_unlockbus(sc->sc_busmem); if (wrap) { sc->sc_devgen++; DPRINTF(("dev %p generation now %" PRIu64 "\n", sc, sc->sc_devgen)); } /* * Ignore packets too short to possibly be valid. * This is hit at least for the first frame on a new bus. */ if (__predict_false(sp.sp_len < ETHER_HDR_LEN)) { DPRINTF(("shmif read packet len %d < ETHER_HDR_LEN\n", sp.sp_len)); continue; } m->m_len = m->m_pkthdr.len = sp.sp_len; m->m_pkthdr.rcvif = ifp; /* * Test if we want to pass the packet upwards */ eth = mtod(m, struct ether_header *); if (memcmp(eth->ether_dhost, CLLADDR(ifp->if_sadl), ETHER_ADDR_LEN) == 0) { passup = true; } else if (ETHER_IS_MULTICAST(eth->ether_dhost)) { passup = true; } else if (ifp->if_flags & IFF_PROMISC) { m->m_flags |= M_PROMISC; passup = true; } else { passup = false; } if (passup) { KERNEL_LOCK(1, NULL); bpf_mtap(ifp, m); ifp->if_input(ifp, m); KERNEL_UNLOCK_ONE(NULL); m = NULL; } /* else: reuse mbuf for a future packet */ } m_freem(m); m = NULL; if (!sc->sc_dying) goto reup; kthread_exit(0); }
/* * Interrupt handler for Rx. Look if there are any pending Rx and * put them in mplist. */ mblk_t * vmxnet3s_rx_intr(vmxnet3s_softc_t *dp, vmxnet3s_rxq_t *rxq) { vmxnet3s_compring_t *compring = &rxq->compring; vmxnet3s_cmdring_t *cmdring = &rxq->cmdring; vmxnet3s_rxqctrl_t *rxqctrl = rxq->sharedctrl; vmxnet3s_gendesc_t *compdesc; mblk_t *mplist = NULL; mblk_t **mplisttail = &mplist; ASSERT(mutex_owned(&dp->intrlock)); compdesc = VMXNET3_GET_DESC(compring, compring->next2comp); while (compdesc->rcd.gen == compring->gen) { mblk_t *mp = NULL; mblk_t **mptail = ∓ boolean_t mpvalid = B_TRUE; boolean_t eop; ASSERT(compdesc->rcd.sop); do { uint16_t rxdidx = compdesc->rcd.rxdidx; vmxnet3s_rxbuf_t *rxbuf = rxq->bufring[rxdidx].rxbuf; mblk_t *mblk = rxbuf->mblk; vmxnet3s_gendesc_t *rxdesc; while (compdesc->rcd.gen != compring->gen) { /* * H/W may be still be in the middle of * generating this entry, so hold on until * the gen bit is flipped. */ membar_consumer(); } ASSERT(compdesc->rcd.gen == compring->gen); ASSERT(rxbuf); ASSERT(mblk); /* Some Rx descriptors may have been skipped */ while (cmdring->next2fill != rxdidx) { rxdesc = VMXNET3_GET_DESC(cmdring, cmdring->next2fill); rxdesc->rxd.gen = cmdring->gen; VMXNET3_INC_RING_IDX(cmdring, cmdring->next2fill); } eop = compdesc->rcd.eop; /* * Now we have a piece of the packet in the rxdidx * descriptor. Grab it only if we achieve to replace * it with a fresh buffer. */ if (vmxnet3s_rx_populate(dp, rxq, rxdidx, B_FALSE) == DDI_SUCCESS) { /* Success, we can chain the mblk with the mp */ mblk->b_wptr = mblk->b_rptr + compdesc->rcd.len; *mptail = mblk; mptail = &mblk->b_cont; ASSERT(*mptail == NULL); if (eop) { if (!compdesc->rcd.err) { /* * Tag the mp if it was * checksummed by the H/W */ vmxnet3s_rx_hwcksum(dp, mp, compdesc); } else { mpvalid = B_FALSE; } } } else { /* * Keep the same buffer, we still need to flip * the gen bit */ rxdesc = VMXNET3_GET_DESC(cmdring, rxdidx); rxdesc->rxd.gen = cmdring->gen; mpvalid = B_FALSE; } VMXNET3_INC_RING_IDX(compring, compring->next2comp); VMXNET3_INC_RING_IDX(cmdring, cmdring->next2fill); compdesc = VMXNET3_GET_DESC(compring, compring->next2comp); } while (!eop); if (mp) { if (mpvalid) { *mplisttail = mp; mplisttail = &mp->b_next; ASSERT(*mplisttail == NULL); } else { /* This message got holes, drop it */ freemsg(mp); } } } if (rxqctrl->updaterxprod) { uint32_t rxprod; /* * All buffers are actually available, but we can't tell that to * the device because it may interpret that as an empty ring. * So skip one buffer. */ if (cmdring->next2fill) rxprod = cmdring->next2fill - 1; else rxprod = cmdring->size - 1; VMXNET3_BAR0_PUT32(dp, VMXNET3_REG_RXPROD, rxprod); } return (mplist); }
/* * mutex_vector_enter: * * Support routine for mutex_enter() that must handle all cases. In * the LOCKDEBUG case, mutex_enter() is always aliased here, even if * fast-path stubs are available. If a mutex_spin_enter() stub is * not available, then it is also aliased directly here. */ void mutex_vector_enter(kmutex_t *mtx) { uintptr_t owner, curthread; turnstile_t *ts; #ifdef MULTIPROCESSOR u_int count; #endif LOCKSTAT_COUNTER(spincnt); LOCKSTAT_COUNTER(slpcnt); LOCKSTAT_TIMER(spintime); LOCKSTAT_TIMER(slptime); LOCKSTAT_FLAG(lsflag); /* * Handle spin mutexes. */ if (MUTEX_SPIN_P(mtx)) { #if defined(LOCKDEBUG) && defined(MULTIPROCESSOR) u_int spins = 0; #endif MUTEX_SPIN_SPLRAISE(mtx); MUTEX_WANTLOCK(mtx); #ifdef FULL if (MUTEX_SPINBIT_LOCK_TRY(mtx)) { MUTEX_LOCKED(mtx); return; } #if !defined(MULTIPROCESSOR) MUTEX_ABORT(mtx, "locking against myself"); #else /* !MULTIPROCESSOR */ LOCKSTAT_ENTER(lsflag); LOCKSTAT_START_TIMER(lsflag, spintime); count = SPINLOCK_BACKOFF_MIN; /* * Spin testing the lock word and do exponential backoff * to reduce cache line ping-ponging between CPUs. */ do { if (panicstr != NULL) break; while (MUTEX_SPINBIT_LOCKED_P(mtx)) { SPINLOCK_BACKOFF(count); #ifdef LOCKDEBUG if (SPINLOCK_SPINOUT(spins)) MUTEX_ABORT(mtx, "spinout"); #endif /* LOCKDEBUG */ } } while (!MUTEX_SPINBIT_LOCK_TRY(mtx)); if (count != SPINLOCK_BACKOFF_MIN) { LOCKSTAT_STOP_TIMER(lsflag, spintime); LOCKSTAT_EVENT(lsflag, mtx, LB_SPIN_MUTEX | LB_SPIN, 1, spintime); } LOCKSTAT_EXIT(lsflag); #endif /* !MULTIPROCESSOR */ #endif /* FULL */ MUTEX_LOCKED(mtx); return; } curthread = (uintptr_t)curlwp; MUTEX_DASSERT(mtx, MUTEX_ADAPTIVE_P(mtx)); MUTEX_ASSERT(mtx, curthread != 0); MUTEX_WANTLOCK(mtx); if (panicstr == NULL) { LOCKDEBUG_BARRIER(&kernel_lock, 1); } LOCKSTAT_ENTER(lsflag); /* * Adaptive mutex; spin trying to acquire the mutex. If we * determine that the owner is not running on a processor, * then we stop spinning, and sleep instead. */ KPREEMPT_DISABLE(curlwp); for (owner = mtx->mtx_owner;;) { if (!MUTEX_OWNED(owner)) { /* * Mutex owner clear could mean two things: * * * The mutex has been released. * * The owner field hasn't been set yet. * * Try to acquire it again. If that fails, * we'll just loop again. */ if (MUTEX_ACQUIRE(mtx, curthread)) break; owner = mtx->mtx_owner; continue; } if (__predict_false(panicstr != NULL)) { KPREEMPT_ENABLE(curlwp); return; } if (__predict_false(MUTEX_OWNER(owner) == curthread)) { MUTEX_ABORT(mtx, "locking against myself"); } #ifdef MULTIPROCESSOR /* * Check to see if the owner is running on a processor. * If so, then we should just spin, as the owner will * likely release the lock very soon. */ if (mutex_oncpu(owner)) { LOCKSTAT_START_TIMER(lsflag, spintime); count = SPINLOCK_BACKOFF_MIN; do { KPREEMPT_ENABLE(curlwp); SPINLOCK_BACKOFF(count); KPREEMPT_DISABLE(curlwp); owner = mtx->mtx_owner; } while (mutex_oncpu(owner)); LOCKSTAT_STOP_TIMER(lsflag, spintime); LOCKSTAT_COUNT(spincnt, 1); if (!MUTEX_OWNED(owner)) continue; } #endif ts = turnstile_lookup(mtx); /* * Once we have the turnstile chain interlock, mark the * mutex has having waiters. If that fails, spin again: * chances are that the mutex has been released. */ if (!MUTEX_SET_WAITERS(mtx, owner)) { turnstile_exit(mtx); owner = mtx->mtx_owner; continue; } #ifdef MULTIPROCESSOR /* * mutex_exit() is permitted to release the mutex without * any interlocking instructions, and the following can * occur as a result: * * CPU 1: MUTEX_SET_WAITERS() CPU2: mutex_exit() * ---------------------------- ---------------------------- * .. acquire cache line * .. test for waiters * acquire cache line <- lose cache line * lock cache line .. * verify mutex is held .. * set waiters .. * unlock cache line .. * lose cache line -> acquire cache line * .. clear lock word, waiters * return success * * There is another race that can occur: a third CPU could * acquire the mutex as soon as it is released. Since * adaptive mutexes are primarily spin mutexes, this is not * something that we need to worry about too much. What we * do need to ensure is that the waiters bit gets set. * * To allow the unlocked release, we need to make some * assumptions here: * * o Release is the only non-atomic/unlocked operation * that can be performed on the mutex. (It must still * be atomic on the local CPU, e.g. in case interrupted * or preempted). * * o At any given time, MUTEX_SET_WAITERS() can only ever * be in progress on one CPU in the system - guaranteed * by the turnstile chain lock. * * o No other operations other than MUTEX_SET_WAITERS() * and release can modify a mutex with a non-zero * owner field. * * o The result of a successful MUTEX_SET_WAITERS() call * is an unbuffered write that is immediately visible * to all other processors in the system. * * o If the holding LWP switches away, it posts a store * fence before changing curlwp, ensuring that any * overwrite of the mutex waiters flag by mutex_exit() * completes before the modification of curlwp becomes * visible to this CPU. * * o mi_switch() posts a store fence before setting curlwp * and before resuming execution of an LWP. * * o _kernel_lock() posts a store fence before setting * curcpu()->ci_biglock_wanted, and after clearing it. * This ensures that any overwrite of the mutex waiters * flag by mutex_exit() completes before the modification * of ci_biglock_wanted becomes visible. * * We now post a read memory barrier (after setting the * waiters field) and check the lock holder's status again. * Some of the possible outcomes (not an exhaustive list): * * 1. The on-CPU check returns true: the holding LWP is * running again. The lock may be released soon and * we should spin. Importantly, we can't trust the * value of the waiters flag. * * 2. The on-CPU check returns false: the holding LWP is * not running. We now have the opportunity to check * if mutex_exit() has blatted the modifications made * by MUTEX_SET_WAITERS(). * * 3. The on-CPU check returns false: the holding LWP may * or may not be running. It has context switched at * some point during our check. Again, we have the * chance to see if the waiters bit is still set or * has been overwritten. * * 4. The on-CPU check returns false: the holding LWP is * running on a CPU, but wants the big lock. It's OK * to check the waiters field in this case. * * 5. The has-waiters check fails: the mutex has been * released, the waiters flag cleared and another LWP * now owns the mutex. * * 6. The has-waiters check fails: the mutex has been * released. * * If the waiters bit is not set it's unsafe to go asleep, * as we might never be awoken. */ if ((membar_consumer(), mutex_oncpu(owner)) || (membar_consumer(), !MUTEX_HAS_WAITERS(mtx))) { turnstile_exit(mtx); owner = mtx->mtx_owner; continue; } #endif /* MULTIPROCESSOR */ LOCKSTAT_START_TIMER(lsflag, slptime); turnstile_block(ts, TS_WRITER_Q, mtx, &mutex_syncobj); LOCKSTAT_STOP_TIMER(lsflag, slptime); LOCKSTAT_COUNT(slpcnt, 1); owner = mtx->mtx_owner; } KPREEMPT_ENABLE(curlwp); LOCKSTAT_EVENT(lsflag, mtx, LB_ADAPTIVE_MUTEX | LB_SLEEP1, slpcnt, slptime); LOCKSTAT_EVENT(lsflag, mtx, LB_ADAPTIVE_MUTEX | LB_SPIN, spincnt, spintime); LOCKSTAT_EXIT(lsflag); MUTEX_DASSERT(mtx, MUTEX_OWNER(mtx->mtx_owner) == curthread); MUTEX_LOCKED(mtx); }
static bool_t rpcgss_calls_init(void) { void *handle; bool_t ret = FALSE; if (initialized) { membar_consumer(); return (TRUE); } (void) mutex_lock(&rpcgss_calls_mutex); if (initialized) { (void) mutex_unlock(&rpcgss_calls_mutex); membar_consumer(); return (TRUE); } if ((handle = dlopen(RPCSEC, RTLD_LAZY)) == NULL) goto done; if ((calls.rpc_gss_seccreate = (AUTH *(*)()) dlsym(handle, "__rpc_gss_seccreate")) == NULL) goto done; if ((calls.rpc_gss_set_defaults = (bool_t (*)()) dlsym(handle, "__rpc_gss_set_defaults")) == NULL) goto done; if ((calls.rpc_gss_get_principal_name = (bool_t (*)()) dlsym(handle, "__rpc_gss_get_principal_name")) == NULL) goto done; if ((calls.rpc_gss_get_mechanisms = (char **(*)()) dlsym(handle, "__rpc_gss_get_mechanisms")) == NULL) goto done; if ((calls.rpc_gss_get_mech_info = (char **(*)()) dlsym(handle, "__rpc_gss_get_mech_info")) == NULL) goto done; if ((calls.rpc_gss_get_versions = (bool_t (*)()) dlsym(handle, "__rpc_gss_get_versions")) == NULL) goto done; if ((calls.rpc_gss_is_installed = (bool_t (*)()) dlsym(handle, "__rpc_gss_is_installed")) == NULL) goto done; if ((calls.rpc_gss_set_svc_name = (bool_t (*)()) dlsym(handle, "__rpc_gss_set_svc_name")) == NULL) goto done; if ((calls.rpc_gss_set_callback = (bool_t (*)()) dlsym(handle, "__rpc_gss_set_callback")) == NULL) goto done; if ((calls.rpc_gss_getcred = (bool_t (*)()) dlsym(handle, "__rpc_gss_getcred")) == NULL) goto done; if ((calls.rpc_gss_mech_to_oid = (bool_t (*)()) dlsym(handle, "__rpc_gss_mech_to_oid")) == NULL) goto done; if ((calls.rpc_gss_qop_to_num = (bool_t (*)()) dlsym(handle, "__rpc_gss_qop_to_num")) == NULL) goto done; if ((calls.__svcrpcsec_gss = (enum auth_stat (*)()) dlsym(handle, "__svcrpcsec_gss")) == NULL) goto done; if ((calls.__rpc_gss_wrap = (bool_t (*)()) dlsym(handle, "__rpc_gss_wrap")) == NULL) goto done; if ((calls.__rpc_gss_unwrap = (bool_t (*)()) dlsym(handle, "__rpc_gss_unwrap")) == NULL) goto done; if ((calls.rpc_gss_max_data_length = (int (*)()) dlsym(handle, "__rpc_gss_max_data_length")) == NULL) goto done; if ((calls.rpc_gss_svc_max_data_length = (int (*)()) dlsym(handle, "__rpc_gss_svc_max_data_length")) == NULL) goto done; if ((calls.rpc_gss_get_error = (void (*)()) dlsym(handle, "__rpc_gss_get_error")) == NULL) goto done; ret = TRUE; done: if (!ret) { if (handle != NULL) (void) dlclose(handle); } membar_producer(); initialized = ret; (void) mutex_unlock(&rpcgss_calls_mutex); return (ret); }
/* * mutex_vector_enter() is called from the assembly mutex_enter() routine * if the lock is held or is not of type MUTEX_ADAPTIVE. */ void mutex_vector_enter(mutex_impl_t *lp) { kthread_id_t owner; hrtime_t sleep_time = 0; /* how long we slept */ uint_t spin_count = 0; /* how many times we spun */ cpu_t *cpup, *last_cpu; extern cpu_t *cpu_list; turnstile_t *ts; volatile mutex_impl_t *vlp = (volatile mutex_impl_t *)lp; int backoff; /* current backoff */ int backctr; /* ctr for backoff */ int sleep_count = 0; ASSERT_STACK_ALIGNED(); if (MUTEX_TYPE_SPIN(lp)) { lock_set_spl(&lp->m_spin.m_spinlock, lp->m_spin.m_minspl, &lp->m_spin.m_oldspl); return; } if (!MUTEX_TYPE_ADAPTIVE(lp)) { mutex_panic("mutex_enter: bad mutex", lp); return; } /* * Adaptive mutexes must not be acquired from above LOCK_LEVEL. * We can migrate after loading CPU but before checking CPU_ON_INTR, * so we must verify by disabling preemption and loading CPU again. */ cpup = CPU; if (CPU_ON_INTR(cpup) && !panicstr) { kpreempt_disable(); if (CPU_ON_INTR(CPU)) mutex_panic("mutex_enter: adaptive at high PIL", lp); kpreempt_enable(); } CPU_STATS_ADDQ(cpup, sys, mutex_adenters, 1); if (&plat_lock_delay) { backoff = 0; } else { backoff = BACKOFF_BASE; } for (;;) { spin: spin_count++; /* * Add an exponential backoff delay before trying again * to touch the mutex data structure. * the spin_count test and call to nulldev are to prevent * the compiler optimizer from eliminating the delay loop. */ if (&plat_lock_delay) { plat_lock_delay(&backoff); } else { for (backctr = backoff; backctr; backctr--) { if (!spin_count) (void) nulldev(); }; /* delay */ backoff = backoff << 1; /* double it */ if (backoff > BACKOFF_CAP) { backoff = BACKOFF_CAP; } SMT_PAUSE(); } if (panicstr) return; if ((owner = MUTEX_OWNER(vlp)) == NULL) { if (mutex_adaptive_tryenter(lp)) break; continue; } if (owner == curthread) mutex_panic("recursive mutex_enter", lp); /* * If lock is held but owner is not yet set, spin. * (Only relevant for platforms that don't have cas.) */ if (owner == MUTEX_NO_OWNER) continue; /* * When searching the other CPUs, start with the one where * we last saw the owner thread. If owner is running, spin. * * We must disable preemption at this point to guarantee * that the list doesn't change while we traverse it * without the cpu_lock mutex. While preemption is * disabled, we must revalidate our cached cpu pointer. */ kpreempt_disable(); if (cpup->cpu_next == NULL) cpup = cpu_list; last_cpu = cpup; /* mark end of search */ do { if (cpup->cpu_thread == owner) { kpreempt_enable(); goto spin; } } while ((cpup = cpup->cpu_next) != last_cpu); kpreempt_enable(); /* * The owner appears not to be running, so block. * See the Big Theory Statement for memory ordering issues. */ ts = turnstile_lookup(lp); MUTEX_SET_WAITERS(lp); membar_enter(); /* * Recheck whether owner is running after waiters bit hits * global visibility (above). If owner is running, spin. * * Since we are at ipl DISP_LEVEL, kernel preemption is * disabled, however we still need to revalidate our cached * cpu pointer to make sure the cpu hasn't been deleted. */ if (cpup->cpu_next == NULL) last_cpu = cpup = cpu_list; do { if (cpup->cpu_thread == owner) { turnstile_exit(lp); goto spin; } } while ((cpup = cpup->cpu_next) != last_cpu); membar_consumer(); /* * If owner and waiters bit are unchanged, block. */ if (MUTEX_OWNER(vlp) == owner && MUTEX_HAS_WAITERS(vlp)) { sleep_time -= gethrtime(); (void) turnstile_block(ts, TS_WRITER_Q, lp, &mutex_sobj_ops, NULL, NULL); sleep_time += gethrtime(); sleep_count++; } else { turnstile_exit(lp); } } ASSERT(MUTEX_OWNER(lp) == curthread); if (sleep_time != 0) { /* * Note, sleep time is the sum of all the sleeping we * did. */ LOCKSTAT_RECORD(LS_MUTEX_ENTER_BLOCK, lp, sleep_time); } /* * We do not count a sleep as a spin. */ if (spin_count > sleep_count) LOCKSTAT_RECORD(LS_MUTEX_ENTER_SPIN, lp, spin_count - sleep_count); LOCKSTAT_RECORD0(LS_MUTEX_ENTER_ACQUIRE, lp); }
/*ARGSUSED*/ static void dtrace_sync_func(uint64_t arg1, uint64_t arg2) { membar_consumer(); }
// Constructor for a full fenced block. explicit solaris_fenced_block(full_t) { membar_consumer(); }
static int atomic_get(sp_counted_base_atomic_type volatile *pw) { membar_consumer(); return pw->i; }
// Constructor. solaris_fenced_block() { membar_consumer(); }
static timestruc_t todxen_get(tod_ops_t *top) { todinfo_t tod; timestruc_t ts, wcts; shared_info_t *si = HYPERVISOR_shared_info; uint32_t xen_wc_version; hrtime_t now; ASSERT(MUTEX_HELD(&tod_lock)); /* * Pick up the wallclock base time */ do { xen_wc_version = si->wc_version; membar_consumer(); wcts.tv_sec = si->wc_sec; wcts.tv_nsec = si->wc_nsec; membar_consumer(); } while ((si->wc_version & 1) | (xen_wc_version ^ si->wc_version)); /* * Compute the TOD as the wallclock (boot) time plus time-since-boot * (/not/ hrtime!) and normalize. */ now = xpv_getsystime() + (hrtime_t)wcts.tv_nsec + (hrtime_t)wcts.tv_sec * NANOSEC; ts.tv_sec = (time_t)(now / NANOSEC); ts.tv_nsec = (long)(now % NANOSEC); /* * Apply GMT lag correction from /etc/rtc_config to get UTC time */ ts.tv_sec += ggmtl(); /* * Validate the TOD in case of total insanity */ tod = utc_to_tod(ts.tv_sec); if (tod.tod_year < 69) { static int range_warn = 1; /* warn only once */ if (range_warn) { /* * If we're dom0, go invoke the underlying driver; the * routine should complain if it discovers something * wrong. */ if (DOMAIN_IS_INITDOMAIN(xen_info)) (void) TODOP_GET(top->tod_next); /* * Check the virtual hardware. */ if (tod.tod_year > 38) cmn_err(CE_WARN, "hypervisor wall clock is out " "of range -- time needs to be reset"); range_warn = 0; } tod.tod_year += 100; ts.tv_sec = tod_to_utc(tod); } return (ts); }