/* * Save current CPU's FPU state. Must be called at IPL_HIGH. */ void fpusave_cpu(bool save) { struct cpu_info *ci; struct pcb *pcb; struct lwp *l; KASSERT(curcpu()->ci_ilevel == IPL_HIGH); ci = curcpu(); l = ci->ci_fpcurlwp; if (l == NULL) { return; } pcb = lwp_getpcb(l); if (save) { /* * Set ci->ci_fpsaving, so that any pending exception will * be thrown away. It will be caught again if/when the * FPU state is restored. */ KASSERT(ci->ci_fpsaving == 0); clts(); ci->ci_fpsaving = 1; fxsave(&pcb->pcb_savefpu); ci->ci_fpsaving = 0; } stts(); pcb->pcb_fpcpu = NULL; ci->ci_fpcurlwp = NULL; }
/*H:040 * This is the i386-specific code to setup and run the Guest. Interrupts * are disabled: we own the CPU. */ void lguest_arch_run_guest(struct lg_cpu *cpu) { /* * Remember the awfully-named TS bit? If the Guest has asked to set it * we set it now, so we can trap and pass that trap to the Guest if it * uses the FPU. */ if (cpu->ts && user_has_fpu()) stts(); /* * SYSENTER is an optimized way of doing system calls. We can't allow * it because it always jumps to privilege level 0. A normal Guest * won't try it because we don't advertise it in CPUID, but a malicious * Guest (or malicious Guest userspace program) could, so we tell the * CPU to disable it before running the Guest. */ if (boot_cpu_has(X86_FEATURE_SEP)) wrmsr(MSR_IA32_SYSENTER_CS, 0, 0); /* * Now we actually run the Guest. It will return when something * interesting happens, and we can examine its registers to see what it * was doing. */ run_guest_once(cpu, lguest_pages(raw_smp_processor_id())); /* * Note that the "regs" structure contains two extra entries which are * not really registers: a trap number which says what interrupt or * trap made the switcher code come back, and an error code which some * traps set. */ /* Restore SYSENTER if it's supposed to be on. */ if (boot_cpu_has(X86_FEATURE_SEP)) wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); /* Clear the host TS bit if it was set above. */ if (cpu->ts && user_has_fpu()) clts(); /* * If the Guest page faulted, then the cr2 register will tell us the * bad virtual address. We have to grab this now, because once we * re-enable interrupts an interrupt could fault and thus overwrite * cr2, or we could even move off to a different CPU. */ if (cpu->regs->trapnum == 14) cpu->arch.last_pagefault = read_cr2(); /* * Similarly, if we took a trap because the Guest used the FPU, * we have to restore the FPU it expects to see. * math_state_restore() may sleep and we may even move off to * a different CPU. So all the critical stuff should be done * before this. */ else if (cpu->regs->trapnum == 7 && !user_has_fpu()) math_state_restore(); }
/* * Init the FPU. */ void fpuinit(struct cpu_info *ci) { clts(); fninit(); stts(); }
/* * This restores directly out of user space. Exceptions are handled. */ static inline int restore_i387(struct _fpstate __user *buf) { struct task_struct *tsk = current; int err; if (!used_math()) { err = init_fpu(tsk); if (err) return err; } if (!(task_thread_info(current)->status & TS_USEDFPU)) { clts(); task_thread_info(current)->status |= TS_USEDFPU; } err = restore_fpu_checking((__force struct i387_fxsave_struct *)buf); if (unlikely(err)) { /* * Encountered an error while doing the restore from the * user buffer, clear the fpu state. */ clear_fpu(tsk); clear_used_math(); } return err; }
/* * 'math_state_restore()' saves the current math information in the * old math state array, and gets the new ones from the current task * * Careful.. There are problems with IBM-designed IRQ13 behaviour. * Don't touch unless you *really* know how it works. * * Must be called with kernel preemption disabled (in this case, * local interrupts are disabled at the call-site in entry.S). */ asmlinkage void math_state_restore(void) { struct thread_info *thread = current_thread_info(); struct task_struct *tsk = thread->task; if (!tsk_used_math(tsk)) { local_irq_enable(); /* * does a slab alloc which can sleep */ if (init_fpu(tsk)) { /* * ran out of memory! */ do_group_exit(SIGKILL); return; } local_irq_disable(); } clts(); /* Allow maths ops (or we recurse) */ /* * Paranoid restore. send a SIGSEGV if we fail to restore the state. */ if (unlikely(restore_fpu_checking(tsk))) { stts(); force_sig(SIGSEGV, tsk); return; } thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ tsk->fpu_counter++; }
/* * Initialize the TS bit in CR0 according to the style of context-switches * we are using: */ static void fpu__init_cpu_ctx_switch(void) { if (!boot_cpu_has(X86_FEATURE_EAGER_FPU)) stts(); else clts(); }
/* * Initialize the TS bit in CR0 according to the style of context-switches * we are using: */ static void fpu__init_cpu_ctx_switch(void) { if (!cpu_has_eager_fpu) stts(); else clts(); }
__notrace_funcgraph struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *prev = &prev_p->thread, *next = &next_p->thread; int cpu = smp_processor_id(); struct tss_struct *tss = &per_cpu(init_tss, cpu); bool preload_fpu; preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5; __unlazy_fpu(prev_p); if (preload_fpu) prefetch(next->xstate); load_sp0(tss, next); lazy_save_gs(prev->gs); load_TLS(next, cpu); if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl)) set_iopl_mask(next->iopl); if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV || task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) __switch_to_xtra(prev_p, next_p, tss); if (preload_fpu) clts(); arch_end_context_switch(next_p); if (preload_fpu) __math_state_restore(); if (prev->gs | next->gs) lazy_load_gs(next->gs); percpu_write(current_task, next_p); return prev_p; }
/* * 'math_state_restore()' saves the current math information in the * old math state array, and gets the new ones from the current task * * Careful.. There are problems with IBM-designed IRQ13 behaviour. * Don't touch unless you *really* know how it works. */ asmlinkage void math_state_restore(void) { struct task_struct *me = current; clts(); /* Allow maths ops (or we recurse) */ if (!used_math()) init_fpu(me); restore_fpu_checking(&me->thread.i387.fxsave); me->thread_info->status |= TS_USEDFPU; }
/* * This overrides the _exit() function in libc. * If the serial console (or remote GDB) is being used, it waits * until all the data has cleared out of the FIFOs; if the VGA * display is being used (normal console), then it waits for a keypress. * When it is done, it calls pc_reset() to reboot the computer. */ static void our_exit(int rc) { extern oskit_addr_t return_address; #if 0 printf("_exit(%d) called; %s...\r\n", rc, return_address ? "returning to netboot" : "rebooting"); #endif if (enable_gdb) { /* Detach from the remote GDB. */ gdb_serial_exit(rc); #ifdef HAVE_DEBUG_REGS /* Turn off the debug registers. */ set_dr7(get_dr7() & ~(DR7_G0 | DR7_G1 | DR7_G2 | DR7_G3)); #endif } /* flush and wait for `_exit called` message */ oskit_stream_release(console); if (!serial_console) { /* This is so that the user has a chance to SEE the output */ //~ printf("Press a key to reboot"); //~ printf("hit dat shit yo."); //~ getchar(); } if (return_address) { /* * The cleanup needs to be done here instead of in the * returned-to code because the return address may not * be accessible with our current paging and segment * state. * The order is important here: paging must be disabled * after we reload the gdt. */ cli(); clts(); phys_mem_va = 0; linear_base_va = 0; base_gdt_init(); /* Reload all since we changed linear_base_va. */ base_cpu_load(); paging_disable(); ((void (*)(void))return_address)(); } else pc_reset(); }
/* called on first use of fpu by thread */ status_t i386_device_not_available(void) { thread_t *thread = thread_get_current_thread(); /* let thread use fpu */ clts(); /* clear TS flag */ /* so... thread tries to use fpu, load its context into fpu. */ i386_fpu_context_load((fpu_state *)thread->arch.fpu_state); /* set flag that thread used fpu */ thread->arch.fpu_used = true; return NO_ERROR; }
void __stop_this_cpu(void) { ASSERT(!local_irq_is_enabled()); disable_local_APIC(); hvm_cpu_down(); /* * Clear FPU, zapping any pending exceptions. Needed for warm reset with * some BIOSes. */ clts(); asm volatile ( "fninit" ); cpumask_clear_cpu(smp_processor_id(), &cpu_online_map); }
/* * This restores directly out of user space. Exceptions are handled. */ int restore_i387_xstate(void __user *buf) { struct task_struct *tsk = current; int err = 0; if (!buf) { if (used_math()) goto clear; return 0; } else if (!access_ok(VERIFY_READ, buf, sig_xstate_size)) return -EACCES; if (!used_math()) { err = init_fpu(tsk); if (err) return err; } if (!(task_thread_info(current)->status & TS_USEDFPU)) { clts(); task_thread_info(current)->status |= TS_USEDFPU; } if (use_xsave()) err = restore_user_xstate(buf); else err = fxrstor_checking((__force struct i387_fxsave_struct *) buf); if (unlikely(err)) { /* * Encountered an error while doing the restore from the * user buffer, clear the fpu state. */ clear: clear_fpu(tsk); clear_used_math(); } return err; }
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { int ret; *dst = *src; if (fpu_allocated(&src->thread.fpu)) { memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu)); ret = fpu_alloc(&dst->thread.fpu); if (ret) return ret; fpu_copy(&dst->thread.fpu, &src->thread.fpu); } #ifdef CONFIG_X86_EARLYMIC if (dst->thread.fpu.state) { /* * No need to set this flag ? it should be inherited from the * parent thread since the threadinfo is copied from the * parent in setup_thread_stack() */ set_stopped_child_used_math(dst); /* * Simulate FPU DNA * Undo the effects of unlazy_fpu in prepare_to_copy() */ preempt_disable(); clts(); #ifdef CONFIG_ML1OM __math_state_restore(); #else restore_mask_regs(); stts(); #endif preempt_enable(); } #endif return 0; }
/* * 'math_state_restore()' saves the current math information in the * old math state array, and gets the new ones from the current task * * Careful.. There are problems with IBM-designed IRQ13 behaviour. * Don't touch unless you *really* know how it works. * * Must be called with kernel preemption disabled (in this case, * local interrupts are disabled at the call-site in entry.S). */ asmlinkage void math_state_restore(void) { struct thread_info *thread = current_thread_info(); struct task_struct *tsk = thread->task; if (!tsk_used_math(tsk)) { local_irq_enable(); /* * does a slab alloc which can sleep */ if (init_fpu(tsk)) { /* * ran out of memory! */ do_group_exit(SIGKILL); return; } local_irq_disable(); } clts(); /* Allow maths ops (or we recurse) */ __math_state_restore(); }
/* * setup the xstate image representing the init state */ static void __init setup_xstate_init(void) { setup_xstate_features(); /* * Setup init_xstate_buf to represent the init state of * all the features managed by the xsave */ init_xstate_buf = alloc_bootmem_align(xstate_size, __alignof__(struct xsave_struct)); init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT; clts(); /* * Init all the features state with header_bv being 0x0 */ xrstor_state(init_xstate_buf, -1); /* * Dump the init state again. This is to identify the init state * of any feature which is not represented by all zero's. */ xsave_state(init_xstate_buf, -1); stts(); }
static void __init setup_xstate_init(void) { setup_xstate_features(); /* */ init_xstate_buf = alloc_bootmem_align(xstate_size, __alignof__(struct xsave_struct)); init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT; clts(); /* */ xrstor_state(init_xstate_buf, -1); /* */ xsave_state(init_xstate_buf, -1); stts(); }
/* * Implement device not available (DNA) exception * * If we were the last lwp to use the FPU, we can simply return. * Otherwise, we save the previous state, if necessary, and restore * our last saved state. */ void fpudna(struct cpu_info *ci) { uint16_t cw; uint32_t mxcsr; struct lwp *l, *fl; struct pcb *pcb; int s; if (ci->ci_fpsaving) { /* Recursive trap. */ x86_enable_intr(); return; } /* Lock out IPIs and disable preemption. */ s = splhigh(); x86_enable_intr(); /* Save state on current CPU. */ l = ci->ci_curlwp; pcb = lwp_getpcb(l); fl = ci->ci_fpcurlwp; if (fl != NULL) { /* * It seems we can get here on Xen even if we didn't * switch lwp. In this case do nothing */ if (fl == l) { KASSERT(pcb->pcb_fpcpu == ci); clts(); splx(s); return; } KASSERT(fl != l); fpusave_cpu(true); KASSERT(ci->ci_fpcurlwp == NULL); } /* Save our state if on a remote CPU. */ if (pcb->pcb_fpcpu != NULL) { /* Explicitly disable preemption before dropping spl. */ KPREEMPT_DISABLE(l); splx(s); fpusave_lwp(l, true); KASSERT(pcb->pcb_fpcpu == NULL); s = splhigh(); KPREEMPT_ENABLE(l); } /* * Restore state on this CPU, or initialize. Ensure that * the entire update is atomic with respect to FPU-sync IPIs. */ clts(); ci->ci_fpcurlwp = l; pcb->pcb_fpcpu = ci; if ((l->l_md.md_flags & MDL_USEDFPU) == 0) { fninit(); cw = pcb->pcb_savefpu.fp_fxsave.fx_fcw; fldcw(&cw); mxcsr = pcb->pcb_savefpu.fp_fxsave.fx_mxcsr; x86_ldmxcsr(&mxcsr); l->l_md.md_flags |= MDL_USEDFPU; } else { /* * AMD FPU's do not restore FIP, FDP, and FOP on fxrstor, * leaking other process's execution history. Clear them * manually. */ static const double zero = 0.0; int status; /* * Clear the ES bit in the x87 status word if it is currently * set, in order to avoid causing a fault in the upcoming load. */ fnstsw(&status); if (status & 0x80) fnclex(); /* * Load the dummy variable into the x87 stack. This mangles * the x87 stack, but we don't care since we're about to call * fxrstor() anyway. */ fldummy(&zero); fxrstor(&pcb->pcb_savefpu); } KASSERT(ci == curcpu()); splx(s); }
PUBLIC void disable_fpu_exception(void) { clts(); }
/* * switch_to(x,yn) should switch tasks from x to y. * * We fsave/fwait so that an exception goes off at the right time * (as a call from the fsave or fwait in effect) rather than to * the wrong process. Lazy FP saving no longer makes any sense * with modern CPU's, and this simplifies a lot of things (SMP * and UP become the same). * * NOTE! We used to use the x86 hardware context switching. The * reason for not using it any more becomes apparent when you * try to recover gracefully from saved state that is no longer * valid (stale segment register values in particular). With the * hardware task-switch, there is no way to fix up bad state in * a reasonable manner. * * The fact that Intel documents the hardware task-switching to * be slow is a fairly red herring - this code is not noticeably * faster. However, there _is_ some room for improvement here, * so the performance issues may eventually be a valid point. * More important, however, is the fact that this allows us much * more flexibility. * * The return value (in %ax) will be the "prev" task after * the task-switch, and shows up in ret_from_fork in entry.S, * for example. */ __notrace_funcgraph struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *prev = &prev_p->thread, *next = &next_p->thread; int cpu = smp_processor_id(); struct tss_struct *tss = &per_cpu(init_tss, cpu); bool preload_fpu; /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ /* * If the task has used fpu the last 5 timeslices, just do a full * restore of the math state immediately to avoid the trap; the * chances of needing FPU soon are obviously high now */ preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5; __unlazy_fpu(prev_p); /* we're going to use this soon, after a few expensive things */ if (preload_fpu) prefetch(next->fpu.state); /* * Reload esp0. */ load_sp0(tss, next); /* * Save away %gs. No need to save %fs, as it was saved on the * stack on entry. No need to save %es and %ds, as those are * always kernel segments while inside the kernel. Doing this * before setting the new TLS descriptors avoids the situation * where we temporarily have non-reloadable segments in %fs * and %gs. This could be an issue if the NMI handler ever * used %fs or %gs (it does not today), or if the kernel is * running inside of a hypervisor layer. */ lazy_save_gs(prev->gs); /* * Load the per-thread Thread-Local Storage descriptor. */ load_TLS(next, cpu); /* * Restore IOPL if needed. In normal use, the flags restore * in the switch assembly will handle this. But if the kernel * is running virtualized at a non-zero CPL, the popf will * not restore flags, so it must be done in a separate step. */ if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl)) set_iopl_mask(next->iopl); /* * Now maybe handle debug registers and/or IO bitmaps */ if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV || task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) __switch_to_xtra(prev_p, next_p, tss); /* If we're going to preload the fpu context, make sure clts is run while we're batching the cpu state updates. */ if (preload_fpu) clts(); /* * Leave lazy mode, flushing any hypercalls made here. * This must be done before restoring TLS segments so * the GDT and LDT are properly updated, and must be * done before math_state_restore, so the TS bit is up * to date. */ arch_end_context_switch(next_p); if (preload_fpu) __math_state_restore(); /* * Restore %gs if needed (which is common) */ if (prev->gs | next->gs) lazy_load_gs(next->gs); percpu_write(current_task, next_p); return prev_p; }