/* * Common spl raise routine, takes new ipl to set * returns the old ipl, will not lower ipl. */ int splr(int newpri) { ulong_t flag; cpu_t *cpu; int curpri, basepri; flag = intr_clear(); cpu = CPU; /* ints are disabled, now safe to cache cpu ptr */ curpri = cpu->cpu_m.mcpu_pri; /* * Only do something if new priority is larger */ if (newpri > curpri) { basepri = cpu->cpu_base_spl; if (newpri < basepri) newpri = basepri; cpu->cpu_m.mcpu_pri = newpri; (*setspl)(newpri); /* * See if new priority level allows pending softint delivery */ if (IS_FAKE_SOFTINT(flag, newpri)) fakesoftint(); } intr_restore(flag); return (curpri); }
/* * do_splx routine, takes new ipl to set * returns the old ipl. * We are careful not to set priority lower than CPU->cpu_base_pri, * even though it seems we're raising the priority, it could be set * higher at any time by an interrupt routine, so we must block interrupts * and look at CPU->cpu_base_pri */ int do_splx(int newpri) { ulong_t flag; cpu_t *cpu; int curpri, basepri; flag = intr_clear(); cpu = CPU; /* ints are disabled, now safe to cache cpu ptr */ curpri = cpu->cpu_m.mcpu_pri; basepri = cpu->cpu_base_spl; if (newpri < basepri) newpri = basepri; cpu->cpu_m.mcpu_pri = newpri; (*setspl)(newpri); /* * If we are going to reenable interrupts see if new priority level * allows pending softint delivery. */ if (IS_FAKE_SOFTINT(flag, newpri)) fakesoftint(); ASSERT(!interrupts_enabled()); intr_restore(flag); return (curpri); }
/* * This function will reprogram the timer. * * When in oneshot mode the argument is the absolute time in future at which to * generate the interrupt. * * When in periodic mode, the argument is the interval at which the * interrupts should be generated. There is no need to support the periodic * mode timer change at this time. * * Note that we must be careful to convert from hrtime to Xen system time (see * xpv_timestamp.c). */ static void xen_uppc_timer_reprogram(hrtime_t timer_req) { hrtime_t now, timer_new, time_delta, xen_time; ulong_t flags; flags = intr_clear(); /* * We should be called from high PIL context (CBE_HIGH_PIL), * so kpreempt is disabled. */ now = xpv_gethrtime(); xen_time = xpv_getsystime(); if (timer_req <= now) { /* * requested to generate an interrupt in the past * generate an interrupt as soon as possible */ time_delta = XEN_NSEC_PER_TICK; } else time_delta = timer_req - now; timer_new = xen_time + time_delta; if (HYPERVISOR_set_timer_op(timer_new) != 0) panic("can't set hypervisor timer?"); intr_restore(flags); }
/* * Clear MPERF/APERF MSR */ static void reset_turbo_info(void) { ulong_t iflag; iflag = intr_clear(); wrmsr(IA32_MPERF_MSR, 0); wrmsr(IA32_APERF_MSR, 0); intr_restore(iflag); }
/* * ENERGY_PERF_BIAS setting, * A hint to HW, based on user power-policy */ static void cpupm_iepb_set_policy(uint64_t iepb_policy) { ulong_t iflag; uint64_t epb_value; epb_value = iepb_policy & EPB_MSR_MASK; iflag = intr_clear(); wrmsr(IA32_ENERGY_PERF_BIAS_MSR, epb_value); intr_restore(iflag); }
/* * Get count of MPERF/APERF MSR */ static void get_turbo_info(cpupm_mach_turbo_info_t *turbo_info) { ulong_t iflag; uint64_t mcnt, acnt; iflag = intr_clear(); mcnt = rdmsr(IA32_MPERF_MSR); acnt = rdmsr(IA32_APERF_MSR); turbo_info->t_mcnt += mcnt; turbo_info->t_acnt += acnt; intr_restore(iflag); }
/* ARGSUSED */ static int evtchndrv_write(dev_t dev, struct uio *uio, cred_t *cr) { int rc, i; ssize_t count; evtchn_port_t *kbuf; struct evtsoftdata *ep; ulong_t flags; minor_t minor = getminor(dev); evtchn_port_t sbuf[32]; if (secpolicy_xvm_control(cr)) return (EPERM); ep = EVTCHNDRV_INST2SOFTS(EVTCHNDRV_MINOR2INST(minor)); /* Whole number of ports. */ count = uio->uio_resid; count &= ~(sizeof (evtchn_port_t) - 1); if (count == 0) return (0); if (count > PAGESIZE) count = PAGESIZE; if (count <= sizeof (sbuf)) kbuf = sbuf; else kbuf = kmem_alloc(PAGESIZE, KM_SLEEP); if ((rc = uiomove(kbuf, count, UIO_WRITE, uio)) != 0) goto out; mutex_enter(&port_user_lock); for (i = 0; i < (count / sizeof (evtchn_port_t)); i++) if ((kbuf[i] < NR_EVENT_CHANNELS) && (port_user[kbuf[i]] == ep)) { flags = intr_clear(); ec_unmask_evtchn(kbuf[i]); intr_restore(flags); } mutex_exit(&port_user_lock); out: if (kbuf != sbuf) kmem_free(kbuf, PAGESIZE); return (rc); }
/* * Reach a point at which the CPU can be safely powered-off or * suspended. Nothing can wake this CPU out of the loop. */ static void enter_safe_phase(void) { ulong_t flags = intr_clear(); if (setjmp(&curthread->t_pcb) == 0) { cpu_phase[CPU->cpu_id] = CPU_PHASE_SAFE; while (cpu_phase[CPU->cpu_id] == CPU_PHASE_SAFE) SMT_PAUSE(); } ASSERT(!interrupts_enabled()); intr_restore(flags); }
static void evtchn_bind_to_user(struct evtsoftdata *u, int port) { ulong_t flags; /* * save away the PID of the last process to bind to this event channel. * Useful for debugging. */ u->pid = ddi_get_pid(); mutex_enter(&port_user_lock); ASSERT(port_user[port] == NULL); port_user[port] = u; ec_irq_add_evtchn(ec_dev_irq, port); flags = intr_clear(); ec_unmask_evtchn(port); intr_restore(flags); mutex_exit(&port_user_lock); }
static void apic_cpu_send_SIPI(processorid_t cpun, boolean_t start) { int loop_count; uint32_t vector; uint_t apicid; ulong_t iflag; apicid = apic_cpus[cpun].aci_local_id; /* * Interrupts on current CPU will be disabled during the * steps in order to avoid unwanted side effects from * executing interrupt handlers on a problematic BIOS. */ iflag = intr_clear(); if (start) { outb(CMOS_ADDR, SSB); outb(CMOS_DATA, BIOS_SHUTDOWN); } /* * According to X2APIC specification in section '2.3.5.1' of * Interrupt Command Register Semantics, the semantics of * programming the Interrupt Command Register to dispatch an interrupt * is simplified. A single MSR write to the 64-bit ICR is required * for dispatching an interrupt. Specifically, with the 64-bit MSR * interface to ICR, system software is not required to check the * status of the delivery status bit prior to writing to the ICR * to send an IPI. With the removal of the Delivery Status bit, * system software no longer has a reason to read the ICR. It remains * readable only to aid in debugging. */ #ifdef DEBUG APIC_AV_PENDING_SET(); #else if (apic_mode == LOCAL_APIC) { APIC_AV_PENDING_SET(); } #endif /* DEBUG */ /* for integrated - make sure there is one INIT IPI in buffer */ /* for external - it will wake up the cpu */ apic_reg_ops->apic_write_int_cmd(apicid, AV_ASSERT | AV_RESET); /* If only 1 CPU is installed, PENDING bit will not go low */ for (loop_count = apic_sipi_max_loop_count; loop_count; loop_count--) { if (apic_mode == LOCAL_APIC && apic_reg_ops->apic_read(APIC_INT_CMD1) & AV_PENDING) apic_ret(); else break; } apic_reg_ops->apic_write_int_cmd(apicid, AV_DEASSERT | AV_RESET); drv_usecwait(20000); /* 20 milli sec */ if (apic_cpus[cpun].aci_local_ver >= APIC_INTEGRATED_VERS) { /* integrated apic */ vector = (rm_platter_pa >> MMU_PAGESHIFT) & (APIC_VECTOR_MASK | APIC_IPL_MASK); /* to offset the INIT IPI queue up in the buffer */ apic_reg_ops->apic_write_int_cmd(apicid, vector | AV_STARTUP); drv_usecwait(200); /* 20 micro sec */ /* * send the second SIPI (Startup IPI) as recommended by Intel * software development manual. */ apic_reg_ops->apic_write_int_cmd(apicid, vector | AV_STARTUP); drv_usecwait(200); /* 20 micro sec */ }
/* * Top level routine to direct suspend/resume of a domain. */ void xen_suspend_domain(void) { extern void rtcsync(void); extern hrtime_t hres_last_tick; mfn_t start_info_mfn; ulong_t flags; pfn_t pfn; int i; /* * Check that we are happy to suspend on this hypervisor. */ if (xen_hypervisor_supports_solaris(XEN_SUSPEND_CHECK) == 0) { cpr_err(CE_WARN, "Cannot suspend on this hypervisor " "version: v%lu.%lu%s, need at least version v3.0.4 or " "-xvm based hypervisor", XENVER_CURRENT(xv_major), XENVER_CURRENT(xv_minor), XENVER_CURRENT(xv_ver)); return; } /* * XXPV - Are we definitely OK to suspend by the time we've connected * the handler? */ cpr_err(CE_NOTE, "Domain suspending for save/migrate"); SUSPEND_DEBUG("xen_suspend_domain\n"); /* * suspend interrupts and devices * XXPV - we use suspend/resume for both save/restore domains (like sun * cpr) and for migration. Would be nice to know the difference if * possible. For save/restore where down time may be a long time, we * may want to do more of the things that cpr does. (i.e. notify user * processes, shrink memory footprint for faster restore, etc.) */ xen_suspend_devices(); SUSPEND_DEBUG("xenbus_suspend\n"); xenbus_suspend(); pfn = hat_getpfnum(kas.a_hat, (caddr_t)xen_info); start_info_mfn = pfn_to_mfn(pfn); /* * XXPV: cpu hotplug can hold this under a xenbus watch. Are we safe * wrt xenbus being suspended here? */ mutex_enter(&cpu_lock); /* * Suspend must be done on vcpu 0, as no context for other CPUs is * saved. * * XXPV - add to taskq API ? */ thread_affinity_set(curthread, 0); kpreempt_disable(); SUSPEND_DEBUG("xen_start_migrate\n"); xen_start_migrate(); if (ncpus > 1) suspend_cpus(); /* * We can grab the ec_lock as it's a spinlock with a high SPL. Hence * any holder would have dropped it to get through suspend_cpus(). */ mutex_enter(&ec_lock); /* * From here on in, we can't take locks. */ SUSPEND_DEBUG("ec_suspend\n"); ec_suspend(); SUSPEND_DEBUG("gnttab_suspend\n"); gnttab_suspend(); flags = intr_clear(); xpv_time_suspend(); /* * Currently, the hypervisor incorrectly fails to bring back * powered-down VCPUs. Thus we need to record any powered-down VCPUs * to prevent any attempts to operate on them. But we have to do this * *after* the very first time we do ec_suspend(). */ for (i = 1; i < ncpus; i++) { if (cpu[i] == NULL) continue; if (cpu_get_state(cpu[i]) == P_POWEROFF) CPUSET_ATOMIC_ADD(cpu_suspend_lost_set, i); } /* * The dom0 save/migrate code doesn't automatically translate * these into PFNs, but expects them to be, so we do it here. * We don't use mfn_to_pfn() because so many OS services have * been disabled at this point. */ xen_info->store_mfn = mfn_to_pfn_mapping[xen_info->store_mfn]; xen_info->console.domU.mfn = mfn_to_pfn_mapping[xen_info->console.domU.mfn]; if (CPU->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask == 0) { prom_printf("xen_suspend_domain(): " "CPU->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask not set\n"); (void) HYPERVISOR_shutdown(SHUTDOWN_crash); } if (HYPERVISOR_update_va_mapping((uintptr_t)HYPERVISOR_shared_info, 0, UVMF_INVLPG)) { prom_printf("xen_suspend_domain(): " "HYPERVISOR_update_va_mapping() failed\n"); (void) HYPERVISOR_shutdown(SHUTDOWN_crash); } SUSPEND_DEBUG("HYPERVISOR_suspend\n"); /* * At this point we suspend and sometime later resume. */ if (HYPERVISOR_suspend(start_info_mfn)) { prom_printf("xen_suspend_domain(): " "HYPERVISOR_suspend() failed\n"); (void) HYPERVISOR_shutdown(SHUTDOWN_crash); } /* * Point HYPERVISOR_shared_info to its new value. */ if (HYPERVISOR_update_va_mapping((uintptr_t)HYPERVISOR_shared_info, xen_info->shared_info | PT_NOCONSIST | PT_VALID | PT_WRITABLE, UVMF_INVLPG)) (void) HYPERVISOR_shutdown(SHUTDOWN_crash); if (xen_info->nr_pages != mfn_count) { prom_printf("xen_suspend_domain(): number of pages" " changed, was 0x%lx, now 0x%lx\n", mfn_count, xen_info->nr_pages); (void) HYPERVISOR_shutdown(SHUTDOWN_crash); } xpv_time_resume(); cached_max_mfn = 0; SUSPEND_DEBUG("gnttab_resume\n"); gnttab_resume(); /* XXPV: add a note that this must be lockless. */ SUSPEND_DEBUG("ec_resume\n"); ec_resume(); intr_restore(flags); if (ncpus > 1) resume_cpus(); mutex_exit(&ec_lock); xen_end_migrate(); mutex_exit(&cpu_lock); /* * Now we can take locks again. */ /* * Force the tick value used for tv_nsec in hres_tick() to be up to * date. rtcsync() will reset the hrestime value appropriately. */ hres_last_tick = xpv_gethrtime(); /* * XXPV: we need to have resumed the CPUs since this takes locks, but * can remote CPUs see bad state? Presumably yes. Should probably nest * taking of todlock inside of cpu_lock, or vice versa, then provide an * unlocked version. Probably need to call clkinitf to reset cpu freq * and re-calibrate if we migrated to a different speed cpu. Also need * to make a (re)init_cpu_info call to update processor info structs * and device tree info. That remains to be written at the moment. */ rtcsync(); rebuild_mfn_list(); SUSPEND_DEBUG("xenbus_resume\n"); xenbus_resume(); SUSPEND_DEBUG("xenbus_resume_devices\n"); xen_resume_devices(); thread_affinity_clear(curthread); kpreempt_enable(); SUSPEND_DEBUG("finished xen_suspend_domain\n"); /* * We have restarted our suspended domain, update the hypervisor * details. NB: This must be done at the end of this function, * since we need the domain to be completely resumed before * these functions will work correctly. */ xen_set_version(XENVER_CURRENT_IDX); /* * We can check and report a warning, but we don't stop the * process. */ if (xen_hypervisor_supports_solaris(XEN_SUSPEND_CHECK) == 0) cmn_err(CE_WARN, "Found hypervisor version: v%lu.%lu%s " "but need at least version v3.0.4", XENVER_CURRENT(xv_major), XENVER_CURRENT(xv_minor), XENVER_CURRENT(xv_ver)); cmn_err(CE_NOTE, "domain restore/migrate completed"); }
/* * Top level routine to direct suspend/resume of a domain. */ void xen_suspend_domain(void) { extern void rtcsync(void); extern void ec_resume(void); extern kmutex_t ec_lock; struct xen_add_to_physmap xatp; ulong_t flags; int err; cmn_err(CE_NOTE, "Domain suspending for save/migrate"); SUSPEND_DEBUG("xen_suspend_domain\n"); /* * We only want to suspend the PV devices, since the emulated devices * are suspended by saving the emulated device state. The PV devices * are all children of the xpvd nexus device. So we search the * device tree for the xpvd node to use as the root of the tree to * be suspended. */ if (xpvd_dip == NULL) ddi_walk_devs(ddi_root_node(), check_xpvd, NULL); /* * suspend interrupts and devices */ if (xpvd_dip != NULL) (void) xen_suspend_devices(ddi_get_child(xpvd_dip)); else cmn_err(CE_WARN, "No PV devices found to suspend"); SUSPEND_DEBUG("xenbus_suspend\n"); xenbus_suspend(); mutex_enter(&cpu_lock); /* * Suspend on vcpu 0 */ thread_affinity_set(curthread, 0); kpreempt_disable(); if (ncpus > 1) pause_cpus(NULL, NULL); /* * We can grab the ec_lock as it's a spinlock with a high SPL. Hence * any holder would have dropped it to get through pause_cpus(). */ mutex_enter(&ec_lock); /* * From here on in, we can't take locks. */ flags = intr_clear(); SUSPEND_DEBUG("HYPERVISOR_suspend\n"); /* * At this point we suspend and sometime later resume. * Note that this call may return with an indication of a cancelled * for now no matter ehat the return we do a full resume of all * suspended drivers, etc. */ (void) HYPERVISOR_shutdown(SHUTDOWN_suspend); /* * Point HYPERVISOR_shared_info to the proper place. */ xatp.domid = DOMID_SELF; xatp.idx = 0; xatp.space = XENMAPSPACE_shared_info; xatp.gpfn = xen_shared_info_frame; if ((err = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) != 0) panic("Could not set shared_info page. error: %d", err); SUSPEND_DEBUG("gnttab_resume\n"); gnttab_resume(); SUSPEND_DEBUG("ec_resume\n"); ec_resume(); intr_restore(flags); if (ncpus > 1) start_cpus(); mutex_exit(&ec_lock); mutex_exit(&cpu_lock); /* * Now we can take locks again. */ rtcsync(); SUSPEND_DEBUG("xenbus_resume\n"); xenbus_resume(); SUSPEND_DEBUG("xen_resume_devices\n"); if (xpvd_dip != NULL) (void) xen_resume_devices(ddi_get_child(xpvd_dip), 0); thread_affinity_clear(curthread); kpreempt_enable(); SUSPEND_DEBUG("finished xen_suspend_domain\n"); cmn_err(CE_NOTE, "domain restore/migrate completed"); }