PRIVATE void fpu_init(void) { unsigned short cw, sw; fninit(); sw = fnstsw(); fnstcw(&cw); if((sw & 0xff) == 0 && (cw & 0x103f) == 0x3f) { /* We have some sort of FPU, but don't check exact model. * Set CR0_NE and CR0_MP to handle fpu exceptions * in native mode. */ write_cr0(read_cr0() | CR0_MP_NE); fpu_presence = 1; if(_cpufeature(_CPUF_I386_FXSR)) { register struct proc *rp; phys_bytes aligned_fp_area; u32_t cr4 = read_cr4() | CR4_OSFXSR; /* Enable FXSR. */ /* OSXMMEXCPT if supported * FXSR feature can be available without SSE */ if(_cpufeature(_CPUF_I386_SSE)) cr4 |= CR4_OSXMMEXCPT; write_cr4(cr4); osfxsr_feature = 1; for (rp = BEG_PROC_ADDR; rp < END_PROC_ADDR; ++rp) { /* FXSR requires 16-byte alignment of memory * image, but unfortunately some old tools * (probably linker) ignores ".balign 16" * applied to our memory image. * Thus we have to do manual alignment. */ aligned_fp_area = (phys_bytes) &rp->p_fpu_state.fpu_image; if(aligned_fp_area % FPUALIGN) { aligned_fp_area += FPUALIGN - (aligned_fp_area % FPUALIGN); } rp->p_fpu_state.fpu_save_area_p = (void *) aligned_fp_area; } } else { osfxsr_feature = 0; } } else { /* No FPU presents. */ fpu_presence = 0; osfxsr_feature = 0; return; } }
int fetestexcept(int excepts) { #if defined(__aarch64__) // Fetch exception flags. return mrs_fpsr() & excepts; #elif defined(__x86_64__) // Combine the x87 and SSE exception flags. return (fnstsw() | stmxcsr()) & excepts; #else #error "Unsupported platform" #endif }
/* * Look for FPU and initialize it. * Called on each CPU. */ void init_fpu() { unsigned short status, control; #ifdef MACH_HYP clear_ts(); #else /* MACH_HYP */ unsigned int native = 0; if (machine_slot[cpu_number()].cpu_type >= CPU_TYPE_I486) native = CR0_NE; /* * Check for FPU by initializing it, * then trying to read the correct bit patterns from * the control and status registers. */ set_cr0((get_cr0() & ~(CR0_EM|CR0_TS)) | native); /* allow use of FPU */ #endif /* MACH_HYP */ fninit(); status = fnstsw(); fnstcw(&control); if ((status & 0xff) == 0 && (control & 0x103f) == 0x3f) { /* * We have a FPU of some sort. * Compare -infinity against +infinity * to check whether we have a 287 or a 387. */ volatile double fp_infinity, fp_one, fp_zero; fp_one = 1.0; fp_zero = 0.0; fp_infinity = fp_one / fp_zero; if (fp_infinity == -fp_infinity) { /* * We have an 80287. */ fp_kind = FP_287; asm volatile(".byte 0xdb; .byte 0xe4"); /* fnsetpm */ } else { /* * We have a 387. */ if (CPU_HAS_FEATURE(CPU_FEATURE_FXSR)) {
/* * Look for FPU and initialize it. * Called on each CPU. */ void init_fpu(void) { unsigned short status, control; /* * Check for FPU by initializing it, * then trying to read the correct bit patterns from * the control and status registers. */ set_cr0(get_cr0() & ~(CR0_EM|CR0_TS)); /* allow use of FPU */ fninit(); status = fnstsw(); fnstcw(&control); if ((status & 0xff) == 0 && (control & 0x103f) == 0x3f) { fp_kind = FP_387; /* assume we have a 387 compatible instruction set */ /* Use FPU save/restore instructions if available */ if (cpuid_features() & CPUID_FEATURE_FXSR) { fp_kind = FP_FXSR; set_cr4(get_cr4() | CR4_FXS); printf("Enabling XMM register save/restore"); /* And allow SIMD instructions if present */ if (cpuid_features() & CPUID_FEATURE_SSE) { printf(" and SSE/SSE2"); set_cr4(get_cr4() | CR4_XMM); } printf(" opcodes\n"); } /* * Trap wait instructions. Turn off FPU for now. */ set_cr0(get_cr0() | CR0_TS | CR0_MP); } else { /* * NO FPU. */ fp_kind = FP_NO; set_cr0(get_cr0() | CR0_EM); } }
PUBLIC void fpu_init(void) { unsigned short cw, sw; fninit(); sw = fnstsw(); fnstcw(&cw); if((sw & 0xff) == 0 && (cw & 0x103f) == 0x3f) { /* We have some sort of FPU, but don't check exact model. * Set CR0_NE and CR0_MP to handle fpu exceptions * in native mode. */ write_cr0(read_cr0() | CR0_MP_NE); get_cpulocal_var(fpu_presence) = 1; if(_cpufeature(_CPUF_I386_FXSR)) { u32_t cr4 = read_cr4() | CR4_OSFXSR; /* Enable FXSR. */ /* OSXMMEXCPT if supported * FXSR feature can be available without SSE */ if(_cpufeature(_CPUF_I386_SSE)) cr4 |= CR4_OSXMMEXCPT; write_cr4(cr4); osfxsr_feature = 1; } else { osfxsr_feature = 0; } } else { /* No FPU presents. */ get_cpulocal_var(fpu_presence) = 0; osfxsr_feature = 0; return; } }
/* * Implement device not available (DNA) exception * * If we were the last lwp to use the FPU, we can simply return. * Otherwise, we save the previous state, if necessary, and restore * our last saved state. */ void fpudna(struct cpu_info *ci) { uint16_t cw; uint32_t mxcsr; struct lwp *l, *fl; struct pcb *pcb; int s; if (ci->ci_fpsaving) { /* Recursive trap. */ x86_enable_intr(); return; } /* Lock out IPIs and disable preemption. */ s = splhigh(); x86_enable_intr(); /* Save state on current CPU. */ l = ci->ci_curlwp; pcb = lwp_getpcb(l); fl = ci->ci_fpcurlwp; if (fl != NULL) { /* * It seems we can get here on Xen even if we didn't * switch lwp. In this case do nothing */ if (fl == l) { KASSERT(pcb->pcb_fpcpu == ci); clts(); splx(s); return; } KASSERT(fl != l); fpusave_cpu(true); KASSERT(ci->ci_fpcurlwp == NULL); } /* Save our state if on a remote CPU. */ if (pcb->pcb_fpcpu != NULL) { /* Explicitly disable preemption before dropping spl. */ KPREEMPT_DISABLE(l); splx(s); fpusave_lwp(l, true); KASSERT(pcb->pcb_fpcpu == NULL); s = splhigh(); KPREEMPT_ENABLE(l); } /* * Restore state on this CPU, or initialize. Ensure that * the entire update is atomic with respect to FPU-sync IPIs. */ clts(); ci->ci_fpcurlwp = l; pcb->pcb_fpcpu = ci; if ((l->l_md.md_flags & MDL_USEDFPU) == 0) { fninit(); cw = pcb->pcb_savefpu.fp_fxsave.fx_fcw; fldcw(&cw); mxcsr = pcb->pcb_savefpu.fp_fxsave.fx_mxcsr; x86_ldmxcsr(&mxcsr); l->l_md.md_flags |= MDL_USEDFPU; } else { /* * AMD FPU's do not restore FIP, FDP, and FOP on fxrstor, * leaking other process's execution history. Clear them * manually. */ static const double zero = 0.0; int status; /* * Clear the ES bit in the x87 status word if it is currently * set, in order to avoid causing a fault in the upcoming load. */ fnstsw(&status); if (status & 0x80) fnclex(); /* * Load the dummy variable into the x87 stack. This mangles * the x87 stack, but we don't care since we're about to call * fxrstor() anyway. */ fldummy(&zero); fxrstor(&pcb->pcb_savefpu); } KASSERT(ci == curcpu()); splx(s); }