PRIVATE void fpu_init(void) { unsigned short cw, sw; fninit(); sw = fnstsw(); fnstcw(&cw); if((sw & 0xff) == 0 && (cw & 0x103f) == 0x3f) { /* We have some sort of FPU, but don't check exact model. * Set CR0_NE and CR0_MP to handle fpu exceptions * in native mode. */ write_cr0(read_cr0() | CR0_MP_NE); fpu_presence = 1; if(_cpufeature(_CPUF_I386_FXSR)) { register struct proc *rp; phys_bytes aligned_fp_area; u32_t cr4 = read_cr4() | CR4_OSFXSR; /* Enable FXSR. */ /* OSXMMEXCPT if supported * FXSR feature can be available without SSE */ if(_cpufeature(_CPUF_I386_SSE)) cr4 |= CR4_OSXMMEXCPT; write_cr4(cr4); osfxsr_feature = 1; for (rp = BEG_PROC_ADDR; rp < END_PROC_ADDR; ++rp) { /* FXSR requires 16-byte alignment of memory * image, but unfortunately some old tools * (probably linker) ignores ".balign 16" * applied to our memory image. * Thus we have to do manual alignment. */ aligned_fp_area = (phys_bytes) &rp->p_fpu_state.fpu_image; if(aligned_fp_area % FPUALIGN) { aligned_fp_area += FPUALIGN - (aligned_fp_area % FPUALIGN); } rp->p_fpu_state.fpu_save_area_p = (void *) aligned_fp_area; } } else { osfxsr_feature = 0; } } else { /* No FPU presents. */ fpu_presence = 0; osfxsr_feature = 0; return; } }
/* * Look for FPU and initialize it. * Called on each CPU. */ void init_fpu() { unsigned short status, control; #ifdef MACH_HYP clear_ts(); #else /* MACH_HYP */ unsigned int native = 0; if (machine_slot[cpu_number()].cpu_type >= CPU_TYPE_I486) native = CR0_NE; /* * Check for FPU by initializing it, * then trying to read the correct bit patterns from * the control and status registers. */ set_cr0((get_cr0() & ~(CR0_EM|CR0_TS)) | native); /* allow use of FPU */ #endif /* MACH_HYP */ fninit(); status = fnstsw(); fnstcw(&control); if ((status & 0xff) == 0 && (control & 0x103f) == 0x3f) { /* * We have a FPU of some sort. * Compare -infinity against +infinity * to check whether we have a 287 or a 387. */ volatile double fp_infinity, fp_one, fp_zero; fp_one = 1.0; fp_zero = 0.0; fp_infinity = fp_one / fp_zero; if (fp_infinity == -fp_infinity) { /* * We have an 80287. */ fp_kind = FP_287; asm volatile(".byte 0xdb; .byte 0xe4"); /* fnsetpm */ } else { /* * We have a 387. */ if (CPU_HAS_FEATURE(CPU_FEATURE_FXSR)) {
/* * Look for FPU and initialize it. * Called on each CPU. */ void init_fpu(void) { unsigned short status, control; /* * Check for FPU by initializing it, * then trying to read the correct bit patterns from * the control and status registers. */ set_cr0(get_cr0() & ~(CR0_EM|CR0_TS)); /* allow use of FPU */ fninit(); status = fnstsw(); fnstcw(&control); if ((status & 0xff) == 0 && (control & 0x103f) == 0x3f) { fp_kind = FP_387; /* assume we have a 387 compatible instruction set */ /* Use FPU save/restore instructions if available */ if (cpuid_features() & CPUID_FEATURE_FXSR) { fp_kind = FP_FXSR; set_cr4(get_cr4() | CR4_FXS); printf("Enabling XMM register save/restore"); /* And allow SIMD instructions if present */ if (cpuid_features() & CPUID_FEATURE_SSE) { printf(" and SSE/SSE2"); set_cr4(get_cr4() | CR4_XMM); } printf(" opcodes\n"); } /* * Trap wait instructions. Turn off FPU for now. */ set_cr0(get_cr0() | CR0_TS | CR0_MP); } else { /* * NO FPU. */ fp_kind = FP_NO; set_cr0(get_cr0() | CR0_EM); } }
PUBLIC void fpu_init(void) { unsigned short cw, sw; fninit(); sw = fnstsw(); fnstcw(&cw); if((sw & 0xff) == 0 && (cw & 0x103f) == 0x3f) { /* We have some sort of FPU, but don't check exact model. * Set CR0_NE and CR0_MP to handle fpu exceptions * in native mode. */ write_cr0(read_cr0() | CR0_MP_NE); get_cpulocal_var(fpu_presence) = 1; if(_cpufeature(_CPUF_I386_FXSR)) { u32_t cr4 = read_cr4() | CR4_OSFXSR; /* Enable FXSR. */ /* OSXMMEXCPT if supported * FXSR feature can be available without SSE */ if(_cpufeature(_CPUF_I386_SSE)) cr4 |= CR4_OSXMMEXCPT; write_cr4(cr4); osfxsr_feature = 1; } else { osfxsr_feature = 0; } } else { /* No FPU presents. */ get_cpulocal_var(fpu_presence) = 0; osfxsr_feature = 0; return; } }
void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) { Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2; Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start; assert_different_registers(tmp, eax, ecx, edx); jmp(start); address static_const_table = (address)_static_const_table; bind(start); subl(rsp, 120); movl(Address(rsp, 64), tmp); lea(tmp, ExternalAddress(static_const_table)); movdqu(xmm0, Address(rsp, 128)); unpcklpd(xmm0, xmm0); movdqu(xmm1, Address(tmp, 64)); // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL movdqu(xmm6, Address(tmp, 48)); // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL movdqu(xmm2, Address(tmp, 80)); // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL movdqu(xmm3, Address(tmp, 96)); // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL pextrw(eax, xmm0, 3); andl(eax, 32767); movl(edx, 16527); subl(edx, eax); subl(eax, 15504); orl(edx, eax); cmpl(edx, INT_MIN); jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2); mulpd(xmm1, xmm0); addpd(xmm1, xmm6); movapd(xmm7, xmm1); subpd(xmm1, xmm6); mulpd(xmm2, xmm1); movdqu(xmm4, Address(tmp, 128)); // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL mulpd(xmm3, xmm1); movdqu(xmm5, Address(tmp, 144)); // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL subpd(xmm0, xmm2); movdl(eax, xmm7); movl(ecx, eax); andl(ecx, 63); shll(ecx, 4); sarl(eax, 6); movl(edx, eax); movdqu(xmm6, Address(tmp, 16)); // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL pand(xmm7, xmm6); movdqu(xmm6, Address(tmp, 32)); // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL paddq(xmm7, xmm6); psllq(xmm7, 46); subpd(xmm0, xmm3); movdqu(xmm2, Address(tmp, ecx, Address::times_1, 160)); mulpd(xmm4, xmm0); movapd(xmm6, xmm0); movapd(xmm1, xmm0); mulpd(xmm6, xmm6); mulpd(xmm0, xmm6); addpd(xmm5, xmm4); mulsd(xmm0, xmm6); mulpd(xmm6, Address(tmp, 112)); // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL addsd(xmm1, xmm2); unpckhpd(xmm2, xmm2); mulpd(xmm0, xmm5); addsd(xmm1, xmm0); por(xmm2, xmm7); unpckhpd(xmm0, xmm0); addsd(xmm0, xmm1); addsd(xmm0, xmm6); addl(edx, 894); cmpl(edx, 1916); jcc (Assembler::above, L_2TAG_PACKET_1_0_2); mulsd(xmm0, xmm2); addsd(xmm0, xmm2); jmp(L_2TAG_PACKET_2_0_2); bind(L_2TAG_PACKET_1_0_2); fnstcw(Address(rsp, 24)); movzwl(edx, Address(rsp, 24)); orl(edx, 768); movw(Address(rsp, 28), edx); fldcw(Address(rsp, 28)); movl(edx, eax); sarl(eax, 1); subl(edx, eax); movdqu(xmm6, Address(tmp, 0)); // 0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL pandn(xmm6, xmm2); addl(eax, 1023); movdl(xmm3, eax); psllq(xmm3, 52); por(xmm6, xmm3); addl(edx, 1023); movdl(xmm4, edx); psllq(xmm4, 52); movsd(Address(rsp, 8), xmm0); fld_d(Address(rsp, 8)); movsd(Address(rsp, 16), xmm6); fld_d(Address(rsp, 16)); fmula(1); faddp(1); movsd(Address(rsp, 8), xmm4); fld_d(Address(rsp, 8)); fmulp(1); fstp_d(Address(rsp, 8)); movsd(xmm0,Address(rsp, 8)); fldcw(Address(rsp, 24)); pextrw(ecx, xmm0, 3); andl(ecx, 32752); cmpl(ecx, 32752); jcc(Assembler::greaterEqual, L_2TAG_PACKET_3_0_2); cmpl(ecx, 0); jcc(Assembler::equal, L_2TAG_PACKET_4_0_2); jmp(L_2TAG_PACKET_2_0_2); cmpl(ecx, INT_MIN); jcc(Assembler::less, L_2TAG_PACKET_3_0_2); cmpl(ecx, -1064950997); jcc(Assembler::less, L_2TAG_PACKET_2_0_2); jcc(Assembler::greater, L_2TAG_PACKET_4_0_2); movl(edx, Address(rsp, 128)); cmpl(edx ,-17155601); jcc(Assembler::less, L_2TAG_PACKET_2_0_2); jmp(L_2TAG_PACKET_4_0_2); bind(L_2TAG_PACKET_3_0_2); movl(edx, 14); jmp(L_2TAG_PACKET_5_0_2); bind(L_2TAG_PACKET_4_0_2); movl(edx, 15); bind(L_2TAG_PACKET_5_0_2); movsd(Address(rsp, 0), xmm0); movsd(xmm0, Address(rsp, 128)); fld_d(Address(rsp, 0)); jmp(L_2TAG_PACKET_6_0_2); bind(L_2TAG_PACKET_7_0_2); cmpl(eax, 2146435072); jcc(Assembler::greaterEqual, L_2TAG_PACKET_8_0_2); movl(eax, Address(rsp, 132)); cmpl(eax, INT_MIN); jcc(Assembler::greaterEqual, L_2TAG_PACKET_9_0_2); movsd(xmm0, Address(tmp, 1208)); // 0xffffffffUL, 0x7fefffffUL mulsd(xmm0, xmm0); movl(edx, 14); jmp(L_2TAG_PACKET_5_0_2); bind(L_2TAG_PACKET_9_0_2); movsd(xmm0, Address(tmp, 1216)); mulsd(xmm0, xmm0); movl(edx, 15); jmp(L_2TAG_PACKET_5_0_2); bind(L_2TAG_PACKET_8_0_2); movl(edx, Address(rsp, 128)); cmpl(eax, 2146435072); jcc(Assembler::above, L_2TAG_PACKET_10_0_2); cmpl(edx, 0); jcc(Assembler::notEqual, L_2TAG_PACKET_10_0_2); movl(eax, Address(rsp, 132)); cmpl(eax, 2146435072); jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2); movsd(xmm0, Address(tmp, 1192)); // 0x00000000UL, 0x7ff00000UL jmp(L_2TAG_PACKET_2_0_2); bind(L_2TAG_PACKET_11_0_2); movsd(xmm0, Address(tmp, 1200)); // 0x00000000UL, 0x00000000UL jmp(L_2TAG_PACKET_2_0_2); bind(L_2TAG_PACKET_10_0_2); movsd(xmm0, Address(rsp, 128)); addsd(xmm0, xmm0); jmp(L_2TAG_PACKET_2_0_2); bind(L_2TAG_PACKET_0_0_2); movl(eax, Address(rsp, 132)); andl(eax, 2147483647); cmpl(eax, 1083179008); jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2); movsd(xmm0, Address(rsp, 128)); addsd(xmm0, Address(tmp, 1184)); // 0x00000000UL, 0x3ff00000UL jmp(L_2TAG_PACKET_2_0_2); bind(L_2TAG_PACKET_2_0_2); movsd(Address(rsp, 48), xmm0); fld_d(Address(rsp, 48)); bind(L_2TAG_PACKET_6_0_2); movl(tmp, Address(rsp, 64)); }