/* * buf points to a user address and the data should be copied out to that * address in the current process. */ int kcpc_sample(kcpc_set_t *set, uint64_t *buf, hrtime_t *hrtime, uint64_t *tick) { kcpc_ctx_t *ctx = set->ks_ctx; uint64_t curtick = KCPC_GET_TICK(); if (ctx == NULL) return (EINVAL); else if (ctx->kc_flags & KCPC_CTX_INVALID) return (EAGAIN); if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0) { /* * Kernel preemption must be disabled while reading the * hardware regs, and if this is a CPU-bound context, while * checking the CPU binding of the current thread. */ kpreempt_disable(); if (ctx->kc_cpuid != -1) { if (curthread->t_bind_cpu != ctx->kc_cpuid) { kpreempt_enable(); return (EAGAIN); } } if (ctx->kc_thread == curthread) { ctx->kc_hrtime = gethrtime(); pcbe_ops->pcbe_sample(ctx); ctx->kc_vtick += curtick - ctx->kc_rawtick; ctx->kc_rawtick = curtick; } kpreempt_enable(); /* * The config may have been invalidated by * the pcbe_sample op. */ if (ctx->kc_flags & KCPC_CTX_INVALID) return (EAGAIN); } if (copyout(set->ks_data, buf, set->ks_nreqs * sizeof (uint64_t)) == -1) return (EFAULT); if (copyout(&ctx->kc_hrtime, hrtime, sizeof (uint64_t)) == -1) return (EFAULT); if (copyout(&ctx->kc_vtick, tick, sizeof (uint64_t)) == -1) return (EFAULT); return (0); }
/* * Called from trap() when processing the ast posted by the high-level * interrupt handler. */ int kcpc_overflow_ast() { kcpc_ctx_t *ctx = curthread->t_cpc_ctx; int i; int found = 0; uint64_t curtick = KCPC_GET_TICK(); ASSERT(ctx != NULL); /* Beware of interrupt skid. */ /* * An overflow happened: sample the context to ensure that * the overflow is propagated into the upper bits of the * virtualized 64-bit counter(s). */ kpreempt_disable(); ctx->kc_hrtime = gethrtime_waitfree(); pcbe_ops->pcbe_sample(ctx); kpreempt_enable(); ctx->kc_vtick += curtick - ctx->kc_rawtick; /* * The interrupt handler has marked any pics with KCPC_PIC_OVERFLOWED * if that pic generated an overflow and if the request it was counting * on behalf of had CPC_OVERFLOW_REQUEST specified. We go through all * pics in the context and clear the KCPC_PIC_OVERFLOWED flags. If we * found any overflowed pics, keep the context frozen and return true * (thus causing a signal to be sent). */ for (i = 0; i < cpc_ncounters; i++) { if (ctx->kc_pics[i].kp_flags & KCPC_PIC_OVERFLOWED) { atomic_and_uint(&ctx->kc_pics[i].kp_flags, ~KCPC_PIC_OVERFLOWED); found = 1; } } if (found) return (1); /* * Otherwise, re-enable the counters and continue life as before. */ kpreempt_disable(); atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE); pcbe_ops->pcbe_program(ctx); kpreempt_enable(); return (0); }
/* * Level 10 (clock) interrupts from system counter. */ int clockintr_4m(void *cap) { /* * XXX this needs to be fixed in a more general way * problem is that the kernel enables interrupts and THEN * sets up clocks. In between there's an opportunity to catch * a timer interrupt - if we call hardclock() at that point we'll * panic * so for now just bail when cold * * For MP, we defer calling hardclock() to the schedintr so * that we call it on all cpus. */ if (cold) return 0; kpreempt_disable(); /* read the limit register to clear the interrupt */ *((volatile int *)&timerreg4m->t_limit); tickle_tc(); /* * We don't have a system-clock per-cpu, and we'd like to keep * the per-cpu timer for the statclock, so, send an IPI to * everyone to call hardclock. */ handle_hardclock(cap); kpreempt_enable(); return (1); }
int kcpc_restart(kcpc_set_t *set) { kcpc_ctx_t *ctx = set->ks_ctx; int i; ASSERT(ctx != NULL); ASSERT(ctx->kc_thread == curthread); ASSERT(ctx->kc_cpuid == -1); kpreempt_disable(); /* * If the user is doing this on a running set, make sure the counters * are stopped first. */ if ((ctx->kc_flags & KCPC_CTX_FREEZE) == 0) pcbe_ops->pcbe_allstop(); for (i = 0; i < set->ks_nreqs; i++) { *(set->ks_req[i].kr_data) = set->ks_req[i].kr_preset; pcbe_ops->pcbe_configure(0, NULL, set->ks_req[i].kr_preset, 0, 0, NULL, &set->ks_req[i].kr_config, NULL); } /* * Ask the backend to program the hardware. */ ctx->kc_rawtick = KCPC_GET_TICK(); atomic_and_uint(&ctx->kc_flags, ~KCPC_CTX_FREEZE); pcbe_ops->pcbe_program(ctx); kpreempt_enable(); return (0); }
/* * fill in the extra register state area specified with the specified lwp's * platform-dependent floating-point extra register state information. * NOTE: 'lwp' might not correspond to 'curthread' since this is * called from code in /proc to get the registers of another lwp. */ void xregs_getfpfiller(klwp_id_t lwp, caddr_t xrp) { prxregset_t *xregs = (prxregset_t *)xrp; kfpu_t *fp = lwptofpu(lwp); uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL); uint64_t gsr; /* * fp_fksave() does not flush the GSR register into * the lwp area, so do it now */ kpreempt_disable(); if (ttolwp(curthread) == lwp && fpu_exists) { fp->fpu_fprs = _fp_read_fprs(); if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) { _fp_write_fprs(fprs); fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs; } save_gsr(fp); } gsr = get_gsr(fp); kpreempt_enable(); PRXREG_GSR(xregs) = gsr; }
void cpu_set_curpri(int pri) { kpreempt_disable(); curcpu()->ci_schedstate.spc_curpriority = pri; kpreempt_enable(); }
void userret(struct lwp *l) { #if defined(__PROG32) && defined(ARM_MMU_EXTENDED) /* * If our ASID got released, access via TTBR0 will have been disabled. * So if it is disabled, activate the lwp again to get a new ASID. */ #ifdef __HAVE_PREEMPTION kpreempt_disable(); #endif KASSERT(curcpu()->ci_pmap_cur == l->l_proc->p_vmspace->vm_map.pmap); if (__predict_false(armreg_ttbcr_read() & TTBCR_S_PD0)) { pmap_activate(l); } KASSERT(!(armreg_ttbcr_read() & TTBCR_S_PD0)); #ifdef __HAVE_PREEMPTION kpreempt_enable(); #endif #endif /* Invoke MI userret code */ mi_userret(l); #if defined(__PROG32) && defined(DIAGNOSTIC) KASSERT(VALID_R15_PSR(lwp_trapframe(l)->tf_pc, lwp_trapframe(l)->tf_spsr)); #endif }
uint64_t txg_hold_open(dsl_pool_t *dp, txg_handle_t *th) { tx_state_t *tx = &dp->dp_tx; tx_cpu_t *tc; uint64_t txg; /* * It appears the processor id is simply used as a "random" * number to index into the array, and there isn't any other * significance to the chosen tx_cpu. Because.. Why not use * the current cpu to index into the array? */ kpreempt_disable(); tc = &tx->tx_cpu[CPU_SEQID]; kpreempt_enable(); mutex_enter(&tc->tc_lock); txg = tx->tx_open_txg; tc->tc_count[txg & TXG_MASK]++; th->th_cpu = tc; th->th_txg = txg; return (txg); }
static void dr_enable_intr(void) { ASSERT(MUTEX_HELD(&cpu_lock)); cyclic_resume(); kpreempt_enable(); }
static void fletcher_4_benchmark_impl(boolean_t native, char *data, uint64_t data_size) { struct fletcher_4_kstat *fastest_stat = &fletcher_4_stat_data[fletcher_4_supp_impls_cnt]; hrtime_t start; uint64_t run_bw, run_time_ns, best_run = 0; zio_cksum_t zc; uint32_t i, l, sel_save = IMPL_READ(fletcher_4_impl_chosen); fletcher_checksum_func_t *fletcher_4_test = native ? fletcher_4_native : fletcher_4_byteswap; for (i = 0; i < fletcher_4_supp_impls_cnt; i++) { struct fletcher_4_kstat *stat = &fletcher_4_stat_data[i]; uint64_t run_count = 0; /* temporary set an implementation */ fletcher_4_impl_chosen = i; kpreempt_disable(); start = gethrtime(); do { for (l = 0; l < 32; l++, run_count++) fletcher_4_test(data, data_size, NULL, &zc); run_time_ns = gethrtime() - start; } while (run_time_ns < FLETCHER_4_BENCH_NS); kpreempt_enable(); run_bw = data_size * run_count * NANOSEC; run_bw /= run_time_ns; /* B/s */ if (native) stat->native = run_bw; else stat->byteswap = run_bw; if (run_bw > best_run) { best_run = run_bw; if (native) { fastest_stat->native = i; FLETCHER_4_FASTEST_FN_COPY(native, fletcher_4_supp_impls[i]); } else { fastest_stat->byteswap = i; FLETCHER_4_FASTEST_FN_COPY(byteswap, fletcher_4_supp_impls[i]); } } } /* restore original selection */ atomic_swap_32(&fletcher_4_impl_chosen, sel_save); }
bool cpu_intr_p(void) { bool rv; kpreempt_disable(); rv = (curcpu()->ci_idepth != 0); kpreempt_enable(); return rv; }
bool cpu_intr_p(void) { int idepth; kpreempt_disable(); idepth = curcpu()->ci_idepth; kpreempt_enable(); return (idepth >= 0); }
/*ARGSUSED*/ static void s10_amd64_correct_fsreg(klwp_t *l) { if (lwp_getdatamodel(l) == DATAMODEL_NATIVE) { kpreempt_disable(); l->lwp_pcb.pcb_fs = LWPFS_SEL; l->lwp_pcb.pcb_rupdate = 1; lwptot(l)->t_post_sys = 1; /* Guarantee update_sregs() */ kpreempt_enable(); } }
/* * Called from lwp_exit() and thread_exit() */ void kcpc_passivate(void) { kcpc_ctx_t *ctx = curthread->t_cpc_ctx; kcpc_set_t *set = curthread->t_cpc_set; if (set == NULL) return; /* * We're cleaning up after this thread; ensure there are no dangling * CPC pointers left behind. The context and set will be freed by * freectx() in the case of an LWP-bound set, and by kcpc_unbind() in * the case of a CPU-bound set. */ curthread->t_cpc_ctx = NULL; if (ctx == NULL) { /* * This thread has a set but no context; it must be a CPU-bound * set. The hardware will be stopped via kcpc_unbind() when the * process exits and closes its file descriptors with * kcpc_close(). Our only job here is to clean up this thread's * state; the set will be freed with the unbind(). */ (void) kcpc_unbind(set); /* * Unbinding a set belonging to the current thread should clear * its set pointer. */ ASSERT(curthread->t_cpc_set == NULL); return; } curthread->t_cpc_set = NULL; /* * This thread/LWP is exiting but context switches will continue to * happen for a bit as the exit proceeds. Kernel preemption must be * disabled here to prevent a race between checking or setting the * INVALID_STOPPED flag here and kcpc_restore() setting the flag during * a context switch. */ kpreempt_disable(); if ((ctx->kc_flags & KCPC_CTX_INVALID_STOPPED) == 0) { pcbe_ops->pcbe_allstop(); atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED); } kpreempt_enable(); }
static void gdt_ucode_model(model_t model) { kpreempt_disable(); if (model == DATAMODEL_NATIVE) { gdt_update_usegd(GDT_UCODE, &ucs_on); gdt_update_usegd(GDT_U32CODE, &ucs32_off); } else { gdt_update_usegd(GDT_U32CODE, &ucs32_on); gdt_update_usegd(GDT_UCODE, &ucs_off); } kpreempt_enable(); }
static uint64_t dmu_object_alloc_impl(objset_t *os, dmu_object_type_t ot, int blocksize, int indirect_blockshift, dmu_object_type_t bonustype, int bonuslen, int dnodesize, dnode_t **allocated_dnode, void *tag, dmu_tx_t *tx) { uint64_t object; uint64_t L1_dnode_count = DNODES_PER_BLOCK << (DMU_META_DNODE(os)->dn_indblkshift - SPA_BLKPTRSHIFT); dnode_t *dn = NULL; int dn_slots = dnodesize >> DNODE_SHIFT; boolean_t restarted = B_FALSE; uint64_t *cpuobj = NULL; int dnodes_per_chunk = 1 << dmu_object_alloc_chunk_shift; int error; kpreempt_disable(); cpuobj = &os->os_obj_next_percpu[CPU_SEQID % os->os_obj_next_percpu_len]; kpreempt_enable(); if (dn_slots == 0) { dn_slots = DNODE_MIN_SLOTS; } else { ASSERT3S(dn_slots, >=, DNODE_MIN_SLOTS); ASSERT3S(dn_slots, <=, DNODE_MAX_SLOTS); } /* * The "chunk" of dnodes that is assigned to a CPU-specific * allocator needs to be at least one block's worth, to avoid * lock contention on the dbuf. It can be at most one L1 block's * worth, so that the "rescan after polishing off a L1's worth" * logic below will be sure to kick in. */ if (dnodes_per_chunk < DNODES_PER_BLOCK) dnodes_per_chunk = DNODES_PER_BLOCK; if (dnodes_per_chunk > L1_dnode_count) dnodes_per_chunk = L1_dnode_count; /* * The caller requested the dnode be returned as a performance * optimization in order to avoid releasing the hold only to * immediately reacquire it. Since they caller is responsible * for releasing the hold they must provide the tag. */ if (allocated_dnode != NULL) { ASSERT3P(tag, !=, NULL); } else {
/* * Level 14 (stat clock) interrupts from processor counter. */ int statintr_4m(void *cap) { struct clockframe *frame = cap; u_long newint; kpreempt_disable(); /* read the limit register to clear the interrupt */ *((volatile int *)&counterreg4m->t_limit); statclock(frame); /* * Compute new randomized interval. */ newint = new_interval(); /* * Use the `non-resetting' limit register, so we don't * loose the counter ticks that happened since this * interrupt was raised. */ counterreg4m->t_limit_nr = tmr_ustolim4m(newint); /* * The factor 8 is only valid for stathz==100. * See also clock.c */ if ((++cpuinfo.ci_schedstate.spc_schedticks & 7) == 0 && schedhz != 0) { if (CLKF_LOPRI(frame, IPL_SCHED)) { /* No need to schedule a soft interrupt */ spllowerschedclock(); schedintr(cap); } else { /* * We're interrupting a thread that may have the * scheduler lock; run schedintr() on this CPU later. */ raise_ipi(&cpuinfo, IPL_SCHED); /* sched_cookie->pil */ } } kpreempt_enable(); return (1); }
void syscall_mstate(int fromms, int toms) { kthread_t *t = curthread; zone_t *z = ttozone(t); struct mstate *ms; hrtime_t *mstimep; hrtime_t curtime; klwp_t *lwp; hrtime_t newtime; cpu_t *cpu; uint16_t gen; if ((lwp = ttolwp(t)) == NULL) return; ASSERT(fromms < NMSTATES); ASSERT(toms < NMSTATES); ms = &lwp->lwp_mstate; mstimep = &ms->ms_acct[fromms]; curtime = gethrtime_unscaled(); newtime = curtime - ms->ms_state_start; while (newtime < 0) { curtime = gethrtime_unscaled(); newtime = curtime - ms->ms_state_start; } *mstimep += newtime; if (fromms == LMS_USER) atomic_add_64(&z->zone_utime, newtime); else if (fromms == LMS_SYSTEM) atomic_add_64(&z->zone_stime, newtime); t->t_mstate = toms; ms->ms_state_start = curtime; ms->ms_prev = fromms; kpreempt_disable(); /* don't change CPU while changing CPU's state */ cpu = CPU; ASSERT(cpu == t->t_cpu); if ((toms != LMS_USER) && (cpu->cpu_mstate != CMS_SYSTEM)) { NEW_CPU_MSTATE(CMS_SYSTEM); } else if ((toms == LMS_USER) && (cpu->cpu_mstate != CMS_USER)) { NEW_CPU_MSTATE(CMS_USER); } kpreempt_enable(); }
static void pwrnow_power(cpuset_t set, uint32_t req_state) { /* * If thread is already running on target CPU then just * make the transition request. Otherwise, we'll need to * make a cross-call. */ kpreempt_disable(); if (CPU_IN_SET(set, CPU->cpu_id)) { pwrnow_pstate_transition(req_state); CPUSET_DEL(set, CPU->cpu_id); } if (!CPUSET_ISNULL(set)) { xc_call((xc_arg_t)req_state, NULL, NULL, CPUSET2BV(set), (xc_func_t)pwrnow_pstate_transition); } kpreempt_enable(); }
/* * If this is a process in a branded zone, then we want it to use the brand * syscall entry points instead of the standard Solaris entry points. This * routine must be called when a new lwp is created within a branded zone * or when an existing lwp moves into a branded zone via a zone_enter() * operation. */ void lwp_attach_brand_hdlrs(klwp_t *lwp) { kthread_t *t = lwptot(lwp); ASSERT(PROC_IS_BRANDED(lwptoproc(lwp))); ASSERT(removectx(t, NULL, brand_interpositioning_disable, brand_interpositioning_enable, NULL, NULL, brand_interpositioning_disable, NULL) == 0); installctx(t, NULL, brand_interpositioning_disable, brand_interpositioning_enable, NULL, NULL, brand_interpositioning_disable, NULL); if (t == curthread) { kpreempt_disable(); brand_interpositioning_enable(); kpreempt_enable(); } }
/* * If this is a process in a branded zone, then we want it to disable the * brand syscall entry points. This routine must be called when the last * lwp in a process is exiting in proc_exit(). */ void lwp_detach_brand_hdlrs(klwp_t *lwp) { kthread_t *t = lwptot(lwp); ASSERT(PROC_IS_BRANDED(lwptoproc(lwp))); if (t == curthread) kpreempt_disable(); /* Remove the original context handlers */ VERIFY(removectx(t, NULL, brand_interpositioning_disable, brand_interpositioning_enable, NULL, NULL, brand_interpositioning_disable, NULL) != 0); if (t == curthread) { /* Cleanup our MSR and IDT entries. */ brand_interpositioning_disable(); kpreempt_enable(); } }
/* * set the specified lwp's platform-dependent floating-point * extra register state based on the specified input */ void xregs_setfpfiller(klwp_id_t lwp, caddr_t xrp) { prxregset_t *xregs = (prxregset_t *)xrp; kfpu_t *fp = lwptofpu(lwp); uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL); uint64_t gsr = PRXREG_GSR(xregs); kpreempt_disable(); set_gsr(gsr, lwptofpu(lwp)); if ((lwp == ttolwp(curthread)) && fpu_exists) { fp->fpu_fprs = _fp_read_fprs(); if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) { _fp_write_fprs(fprs); fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs; } restore_gsr(lwptofpu(lwp)); } kpreempt_enable(); }
void setfpasrs(klwp_t *lwp, asrset_t asr) { kfpu_t *fp = lwptofpu(lwp); uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL); kpreempt_disable(); if (ttolwp(curthread) == lwp) fp->fpu_fprs = _fp_read_fprs(); if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF)) { set_gsr(asr[ASR_GSR], fp); if (fpu_exists && ttolwp(curthread) == lwp) { if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) { _fp_write_fprs(fprs); fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs; } restore_gsr(fp); } } kpreempt_enable(); }
/* * Stop the counters on the CPU this context is bound to. */ static void kcpc_stop_hw(kcpc_ctx_t *ctx) { cpu_t *cp; ASSERT((ctx->kc_flags & (KCPC_CTX_INVALID | KCPC_CTX_INVALID_STOPPED)) == KCPC_CTX_INVALID); kpreempt_disable(); cp = cpu_get(ctx->kc_cpuid); ASSERT(cp != NULL); if (cp == CPU) { pcbe_ops->pcbe_allstop(); atomic_or_uint(&ctx->kc_flags, KCPC_CTX_INVALID_STOPPED); } else kcpc_remote_stop(cp); kpreempt_enable(); }
void ast(struct trapframe *tf) { struct lwp * const l = curlwp; #ifdef acorn26 /* Enable interrupts if they were enabled before the trap. */ if ((tf->tf_r15 & R15_IRQ_DISABLE) == 0) int_on(); #else /* Interrupts were restored by exception_exit. */ #endif #ifdef __PROG32 KASSERT(VALID_R15_PSR(tf->tf_pc, tf->tf_spsr)); #endif #ifdef __HAVE_PREEMPTION kpreempt_disable(); #endif struct cpu_info * const ci = curcpu(); ci->ci_data.cpu_ntrap++; KDASSERT(ci->ci_cpl == IPL_NONE); const int want_resched = ci->ci_want_resched; #ifdef __HAVE_PREEMPTION kpreempt_enable(); #endif if (l->l_pflag & LP_OWEUPC) { l->l_pflag &= ~LP_OWEUPC; ADDUPROF(l); } /* Allow a forced task switch. */ if (want_resched) preempt(); userret(l); }
/* * Start Output * * We dont want to be calling the network stack with sc_intr_lock held * so make a note of what is to be sent, and schedule an interrupt to * bundle it up and queue it. */ static int btsco_start_output(void *hdl, void *block, int blksize, void (*intr)(void *), void *intrarg) { struct btsco_softc *sc = hdl; DPRINTFN(5, "%s blksize %d\n", sc->sc_name, blksize); if (sc->sc_sco == NULL) return ENOTCONN; /* connection lost */ sc->sc_tx_block = block; sc->sc_tx_pending = 0; sc->sc_tx_size = blksize; sc->sc_tx_intr = intr; sc->sc_tx_intrarg = intrarg; kpreempt_disable(); softint_schedule(sc->sc_intr); kpreempt_enable(); return 0; }
/* * For use by procfs to save the floating point context of the thread. * Note the if (ttolwp(lwp) == curthread) in prstop, which calls * this function, ensures that it is safe to read the fprs here. */ void fp_prsave(kfpu_t *fp) { if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF)) { kpreempt_disable(); if (fpu_exists) { fp->fpu_fprs = _fp_read_fprs(); if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) { uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL); _fp_write_fprs(fprs); fp->fpu_fprs = fprs; #ifdef DEBUG if (fpdispr) cmn_err(CE_NOTE, "fp_prsave with fp disabled!"); #endif } fp_fksave(fp); } kpreempt_enable(); } }
/* * Drop the prom lock if it is held by the current CPU. If the lock is held * recursively, return without clearing prom_cpu. If the hold count is now * zero, clear prom_cpu and cv_signal any waiting CPU. */ void kern_postprom(void) { processorid_t cpuid = getprocessorid(); cpu_t *cp = cpu[cpuid]; if (panicstr) return; /* do not modify lock further if we have panicked */ if (prom_cpu != cp) panic("kern_postprom: not owner, cp=%p owner=%p", (void *)cp, (void *)prom_cpu); if (prom_holdcnt == 0) panic("kern_postprom: prom_holdcnt == 0, owner=%p", (void *)prom_cpu); if (atomic_dec_32_nv(&prom_holdcnt) != 0) return; /* prom lock is held recursively by this CPU */ if ((boothowto & RB_DEBUG) && prom_exit_enter_debugger) kmdb_enter(); prom_thread = NULL; membar_producer(); prom_cpu = NULL; membar_producer(); if (CPU_IN_SET(cpu_ready_set, cpuid) && cp->cpu_m.mutex_ready) { mutex_enter(&prom_mutex); cv_signal(&prom_cv); mutex_exit(&prom_mutex); kpreempt_enable(); } }
/* * Top level routine to direct suspend/resume of a domain. */ void xen_suspend_domain(void) { extern void rtcsync(void); extern hrtime_t hres_last_tick; mfn_t start_info_mfn; ulong_t flags; pfn_t pfn; int i; /* * Check that we are happy to suspend on this hypervisor. */ if (xen_hypervisor_supports_solaris(XEN_SUSPEND_CHECK) == 0) { cpr_err(CE_WARN, "Cannot suspend on this hypervisor " "version: v%lu.%lu%s, need at least version v3.0.4 or " "-xvm based hypervisor", XENVER_CURRENT(xv_major), XENVER_CURRENT(xv_minor), XENVER_CURRENT(xv_ver)); return; } /* * XXPV - Are we definitely OK to suspend by the time we've connected * the handler? */ cpr_err(CE_NOTE, "Domain suspending for save/migrate"); SUSPEND_DEBUG("xen_suspend_domain\n"); /* * suspend interrupts and devices * XXPV - we use suspend/resume for both save/restore domains (like sun * cpr) and for migration. Would be nice to know the difference if * possible. For save/restore where down time may be a long time, we * may want to do more of the things that cpr does. (i.e. notify user * processes, shrink memory footprint for faster restore, etc.) */ xen_suspend_devices(); SUSPEND_DEBUG("xenbus_suspend\n"); xenbus_suspend(); pfn = hat_getpfnum(kas.a_hat, (caddr_t)xen_info); start_info_mfn = pfn_to_mfn(pfn); /* * XXPV: cpu hotplug can hold this under a xenbus watch. Are we safe * wrt xenbus being suspended here? */ mutex_enter(&cpu_lock); /* * Suspend must be done on vcpu 0, as no context for other CPUs is * saved. * * XXPV - add to taskq API ? */ thread_affinity_set(curthread, 0); kpreempt_disable(); SUSPEND_DEBUG("xen_start_migrate\n"); xen_start_migrate(); if (ncpus > 1) suspend_cpus(); /* * We can grab the ec_lock as it's a spinlock with a high SPL. Hence * any holder would have dropped it to get through suspend_cpus(). */ mutex_enter(&ec_lock); /* * From here on in, we can't take locks. */ SUSPEND_DEBUG("ec_suspend\n"); ec_suspend(); SUSPEND_DEBUG("gnttab_suspend\n"); gnttab_suspend(); flags = intr_clear(); xpv_time_suspend(); /* * Currently, the hypervisor incorrectly fails to bring back * powered-down VCPUs. Thus we need to record any powered-down VCPUs * to prevent any attempts to operate on them. But we have to do this * *after* the very first time we do ec_suspend(). */ for (i = 1; i < ncpus; i++) { if (cpu[i] == NULL) continue; if (cpu_get_state(cpu[i]) == P_POWEROFF) CPUSET_ATOMIC_ADD(cpu_suspend_lost_set, i); } /* * The dom0 save/migrate code doesn't automatically translate * these into PFNs, but expects them to be, so we do it here. * We don't use mfn_to_pfn() because so many OS services have * been disabled at this point. */ xen_info->store_mfn = mfn_to_pfn_mapping[xen_info->store_mfn]; xen_info->console.domU.mfn = mfn_to_pfn_mapping[xen_info->console.domU.mfn]; if (CPU->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask == 0) { prom_printf("xen_suspend_domain(): " "CPU->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask not set\n"); (void) HYPERVISOR_shutdown(SHUTDOWN_crash); } if (HYPERVISOR_update_va_mapping((uintptr_t)HYPERVISOR_shared_info, 0, UVMF_INVLPG)) { prom_printf("xen_suspend_domain(): " "HYPERVISOR_update_va_mapping() failed\n"); (void) HYPERVISOR_shutdown(SHUTDOWN_crash); } SUSPEND_DEBUG("HYPERVISOR_suspend\n"); /* * At this point we suspend and sometime later resume. */ if (HYPERVISOR_suspend(start_info_mfn)) { prom_printf("xen_suspend_domain(): " "HYPERVISOR_suspend() failed\n"); (void) HYPERVISOR_shutdown(SHUTDOWN_crash); } /* * Point HYPERVISOR_shared_info to its new value. */ if (HYPERVISOR_update_va_mapping((uintptr_t)HYPERVISOR_shared_info, xen_info->shared_info | PT_NOCONSIST | PT_VALID | PT_WRITABLE, UVMF_INVLPG)) (void) HYPERVISOR_shutdown(SHUTDOWN_crash); if (xen_info->nr_pages != mfn_count) { prom_printf("xen_suspend_domain(): number of pages" " changed, was 0x%lx, now 0x%lx\n", mfn_count, xen_info->nr_pages); (void) HYPERVISOR_shutdown(SHUTDOWN_crash); } xpv_time_resume(); cached_max_mfn = 0; SUSPEND_DEBUG("gnttab_resume\n"); gnttab_resume(); /* XXPV: add a note that this must be lockless. */ SUSPEND_DEBUG("ec_resume\n"); ec_resume(); intr_restore(flags); if (ncpus > 1) resume_cpus(); mutex_exit(&ec_lock); xen_end_migrate(); mutex_exit(&cpu_lock); /* * Now we can take locks again. */ /* * Force the tick value used for tv_nsec in hres_tick() to be up to * date. rtcsync() will reset the hrestime value appropriately. */ hres_last_tick = xpv_gethrtime(); /* * XXPV: we need to have resumed the CPUs since this takes locks, but * can remote CPUs see bad state? Presumably yes. Should probably nest * taking of todlock inside of cpu_lock, or vice versa, then provide an * unlocked version. Probably need to call clkinitf to reset cpu freq * and re-calibrate if we migrated to a different speed cpu. Also need * to make a (re)init_cpu_info call to update processor info structs * and device tree info. That remains to be written at the moment. */ rtcsync(); rebuild_mfn_list(); SUSPEND_DEBUG("xenbus_resume\n"); xenbus_resume(); SUSPEND_DEBUG("xenbus_resume_devices\n"); xen_resume_devices(); thread_affinity_clear(curthread); kpreempt_enable(); SUSPEND_DEBUG("finished xen_suspend_domain\n"); /* * We have restarted our suspended domain, update the hypervisor * details. NB: This must be done at the end of this function, * since we need the domain to be completely resumed before * these functions will work correctly. */ xen_set_version(XENVER_CURRENT_IDX); /* * We can check and report a warning, but we don't stop the * process. */ if (xen_hypervisor_supports_solaris(XEN_SUSPEND_CHECK) == 0) cmn_err(CE_WARN, "Found hypervisor version: v%lu.%lu%s " "but need at least version v3.0.4", XENVER_CURRENT(xv_major), XENVER_CURRENT(xv_minor), XENVER_CURRENT(xv_ver)); cmn_err(CE_NOTE, "domain restore/migrate completed"); }
/* * rw_vector_enter: * * Acquire a rwlock. */ void rw_vector_enter(krwlock_t *rw, const krw_t op) { uintptr_t owner, incr, need_wait, set_wait, curthread, next; turnstile_t *ts; int queue; lwp_t *l; LOCKSTAT_TIMER(slptime); LOCKSTAT_TIMER(slpcnt); LOCKSTAT_TIMER(spintime); LOCKSTAT_COUNTER(spincnt); LOCKSTAT_FLAG(lsflag); l = curlwp; curthread = (uintptr_t)l; RW_ASSERT(rw, !cpu_intr_p()); RW_ASSERT(rw, curthread != 0); RW_WANTLOCK(rw, op); if (panicstr == NULL) { LOCKDEBUG_BARRIER(&kernel_lock, 1); } /* * We play a slight trick here. If we're a reader, we want * increment the read count. If we're a writer, we want to * set the owner field and whe WRITE_LOCKED bit. * * In the latter case, we expect those bits to be zero, * therefore we can use an add operation to set them, which * means an add operation for both cases. */ if (__predict_true(op == RW_READER)) { incr = RW_READ_INCR; set_wait = RW_HAS_WAITERS; need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED; queue = TS_READER_Q; } else { RW_DASSERT(rw, op == RW_WRITER); incr = curthread | RW_WRITE_LOCKED; set_wait = RW_HAS_WAITERS | RW_WRITE_WANTED; need_wait = RW_WRITE_LOCKED | RW_THREAD; queue = TS_WRITER_Q; } LOCKSTAT_ENTER(lsflag); KPREEMPT_DISABLE(curlwp); for (owner = rw->rw_owner; ;) { /* * Read the lock owner field. If the need-to-wait * indicator is clear, then try to acquire the lock. */ if ((owner & need_wait) == 0) { next = rw_cas(rw, owner, (owner + incr) & ~RW_WRITE_WANTED); if (__predict_true(next == owner)) { /* Got it! */ membar_enter(); break; } /* * Didn't get it -- spin around again (we'll * probably sleep on the next iteration). */ owner = next; continue; } if (__predict_false(panicstr != NULL)) { kpreempt_enable(); return; } if (__predict_false(RW_OWNER(rw) == curthread)) { rw_abort(rw, __func__, "locking against myself"); } /* * If the lock owner is running on another CPU, and * there are no existing waiters, then spin. */ if (rw_oncpu(owner)) { LOCKSTAT_START_TIMER(lsflag, spintime); u_int count = SPINLOCK_BACKOFF_MIN; do { KPREEMPT_ENABLE(curlwp); SPINLOCK_BACKOFF(count); KPREEMPT_DISABLE(curlwp); owner = rw->rw_owner; } while (rw_oncpu(owner)); LOCKSTAT_STOP_TIMER(lsflag, spintime); LOCKSTAT_COUNT(spincnt, 1); if ((owner & need_wait) == 0) continue; } /* * Grab the turnstile chain lock. Once we have that, we * can adjust the waiter bits and sleep queue. */ ts = turnstile_lookup(rw); /* * Mark the rwlock as having waiters. If the set fails, * then we may not need to sleep and should spin again. * Reload rw_owner because turnstile_lookup() may have * spun on the turnstile chain lock. */ owner = rw->rw_owner; if ((owner & need_wait) == 0 || rw_oncpu(owner)) { turnstile_exit(rw); continue; } next = rw_cas(rw, owner, owner | set_wait); if (__predict_false(next != owner)) { turnstile_exit(rw); owner = next; continue; } LOCKSTAT_START_TIMER(lsflag, slptime); turnstile_block(ts, queue, rw, &rw_syncobj); LOCKSTAT_STOP_TIMER(lsflag, slptime); LOCKSTAT_COUNT(slpcnt, 1); /* * No need for a memory barrier because of context switch. * If not handed the lock, then spin again. */ if (op == RW_READER || (rw->rw_owner & RW_THREAD) == curthread) break; owner = rw->rw_owner; } KPREEMPT_ENABLE(curlwp); LOCKSTAT_EVENT(lsflag, rw, LB_RWLOCK | (op == RW_WRITER ? LB_SLEEP1 : LB_SLEEP2), slpcnt, slptime); LOCKSTAT_EVENT(lsflag, rw, LB_RWLOCK | LB_SPIN, spincnt, spintime); LOCKSTAT_EXIT(lsflag); RW_DASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) || (op == RW_READER && RW_COUNT(rw) != 0)); RW_LOCKED(rw, op); }