double __ieee754_exp10 (double arg) { int32_t lx; double arg_high, arg_low; double exp_high, exp_low; if (!isfinite (arg)) return __ieee754_exp (arg); if (arg < DBL_MIN_10_EXP - DBL_DIG - 10) return DBL_MIN * DBL_MIN; else if (arg > DBL_MAX_10_EXP + 1) return DBL_MAX * DBL_MAX; else if (fabs (arg) < 0x1p-56) return 1.0; GET_LOW_WORD (lx, arg); lx &= 0xf8000000; arg_high = arg; SET_LOW_WORD (arg_high, lx); arg_low = arg - arg_high; exp_high = arg_high * log10_high; exp_low = arg_high * log10_low + arg_low * M_LN10; return __ieee754_exp (exp_high) * __ieee754_exp (exp_low); }
/* * Fused multiply-add: Compute x * y + z with a single rounding error. * * A double has more than twice as much precision than a float, so * direct double-precision arithmetic suffices, except where double * rounding occurs. */ DLLEXPORT float fmaf(float x, float y, float z) { double xy, result; u_int32_t hr, lr; xy = (double)x * y; result = xy + z; EXTRACT_WORDS(hr, lr, result); /* Common case: The double precision result is fine. */ if ((lr & 0x1fffffff) != 0x10000000 || /* not a halfway case */ (hr & 0x7ff00000) == 0x7ff00000 || /* NaN */ result - xy == z || /* exact */ fegetround() != FE_TONEAREST) /* not round-to-nearest */ return (result); /* * If result is inexact, and exactly halfway between two float values, * we need to adjust the low-order bit in the direction of the error. */ fesetround(FE_TOWARDZERO); volatile double vxy = xy; /* XXX work around gcc CSE bug */ double adjusted_result = vxy + z; fesetround(FE_TONEAREST); if (result == adjusted_result) SET_LOW_WORD(adjusted_result, lr + 1); return (adjusted_result); }
double __tan(double x, double y, int odd) { double_t z, r, v, w, s, a; double w0, a0; uint32_t hx; int big, sign; GET_HIGH_WORD(hx,x); big = (hx&0x7fffffff) >= 0x3FE59428; /* |x| >= 0.6744 */ if (big) { sign = hx>>31; if (sign) { x = -x; y = -y; } x = (pio4 - x) + (pio4lo - y); y = 0.0; } z = x * x; w = z * z; /* * Break x^5*(T[1]+x^2*T[2]+...) into * x^5(T[1]+x^4*T[3]+...+x^20*T[11]) + * x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12])) */ r = T[1] + w*(T[3] + w*(T[5] + w*(T[7] + w*(T[9] + w*T[11])))); v = z*(T[2] + w*(T[4] + w*(T[6] + w*(T[8] + w*(T[10] + w*T[12]))))); s = z * x; r = y + z*(s*(r + v) + y) + s*T[0]; w = x + r; if (big) { s = 1 - 2*odd; v = s - 2.0 * (x + (r - w*w/(w + s))); return sign ? -v : v; } if (!odd) return w; /* -1.0/(x+r) has up to 2ulp error, so compute it accurately */ w0 = w; SET_LOW_WORD(w0, 0); v = r - (w0 - x); /* w0+v = r+x */ a0 = a = -1.0 / w; SET_LOW_WORD(a0, 0); return a0 + a*(1.0 + a0*w0 + a0*v); }
double acos(double x) { double z, p, q, r, w, s, c, df; s32_t hx, ix; GET_HIGH_WORD(hx,x); ix = hx & 0x7fffffff; if (ix >= 0x3ff00000) { u32_t lx; GET_LOW_WORD(lx,x); if (((ix - 0x3ff00000) | lx) == 0) { if (hx > 0) return 0.0; else return pi + 2.0 * pio2_lo; } return (x - x) / (x - x); } if (ix < 0x3fe00000) { if (ix <= 0x3c600000) return pio2_hi + pio2_lo; z = x * x; p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + z * (pS4 + z * pS5))))); q = one + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4))); r = p / q; return pio2_hi - (x - (pio2_lo - x * r)); } else if (hx < 0) { z = (one + x) * 0.5; p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + z * (pS4 + z * pS5))))); q = one + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4))); s = sqrt(z); r = p / q; w = r * s - pio2_lo; return pi - 2.0 * (s + w); } else { z = (one - x) * 0.5; s = sqrt(z); df = s; SET_LOW_WORD(df,0); c = (z - df * df) / (s + df); p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + z * (pS4 + z * pS5))))); q = one + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4))); r = p / q; w = r * s + c; return 2.0 * (df + w); } }
double log10(double x) { double f,hfsq,hi,lo,r,val_hi,val_lo,w,y,y2; int32_t i,k,hx; uint32_t lx; EXTRACT_WORDS(hx, lx, x); k = 0; if (hx < 0x00100000) { /* x < 2**-1022 */ if (((hx&0x7fffffff)|lx) == 0) return -two54/0.0; /* log(+-0)=-inf */ if (hx<0) return (x-x)/0.0; /* log(-#) = NaN */ /* subnormal number, scale up x */ k -= 54; x *= two54; GET_HIGH_WORD(hx, x); } if (hx >= 0x7ff00000) return x+x; if (hx == 0x3ff00000 && lx == 0) return 0.0; /* log(1) = +0 */ k += (hx>>20) - 1023; hx &= 0x000fffff; i = (hx+0x95f64)&0x100000; SET_HIGH_WORD(x, hx|(i^0x3ff00000)); /* normalize x or x/2 */ k += i>>20; y = (double)k; f = x - 1.0; hfsq = 0.5*f*f; r = __log1p(f); /* See log2.c for details. */ hi = f - hfsq; SET_LOW_WORD(hi, 0); lo = (f - hi) - hfsq + r; val_hi = hi*ivln10hi; y2 = y*log10_2hi; val_lo = y*log10_2lo + (lo+hi)*ivln10lo + lo*ivln10hi; /* * Extra precision in for adding y*log10_2hi is not strictly needed * since there is no very large cancellation near x = sqrt(2) or * x = 1/sqrt(2), but we do it anyway since it costs little on CPUs * with some parallelism and it reduces the error for many args. */ w = y2 + val_hi; val_lo += (y2 - w) + val_hi; val_hi = w; return val_lo + val_hi; }
double asin(double x) { double t=0.0,w,p,q,c,r,s; int32_t hx,ix; GET_HIGH_WORD(hx, x); ix = hx & 0x7fffffff; if (ix >= 0x3ff00000) { /* |x|>= 1 */ uint32_t lx; GET_LOW_WORD(lx, x); if ((ix-0x3ff00000 | lx) == 0) /* asin(1) = +-pi/2 with inexact */ return x*pio2_hi + x*pio2_lo; return (x-x)/(x-x); /* asin(|x|>1) is NaN */ } else if (ix < 0x3fe00000) { /* |x|<0.5 */ if (ix < 0x3e500000) { /* if |x| < 2**-26 */ if (huge+x > 1.0) return x; /* return x with inexact if x!=0*/ } t = x*x; p = t*(pS0+t*(pS1+t*(pS2+t*(pS3+t*(pS4+t*pS5))))); q = 1.0+t*(qS1+t*(qS2+t*(qS3+t*qS4))); w = p/q; return x + x*w; } /* 1 > |x| >= 0.5 */ w = 1.0 - fabs(x); t = w*0.5; p = t*(pS0+t*(pS1+t*(pS2+t*(pS3+t*(pS4+t*pS5))))); q = 1.0+t*(qS1+t*(qS2+t*(qS3+t*qS4))); s = sqrt(t); if (ix >= 0x3FEF3333) { /* if |x| > 0.975 */ w = p/q; t = pio2_hi-(2.0*(s+s*w)-pio2_lo); } else { w = s; SET_LOW_WORD(w,0); c = (t-w*w)/(s+w); r = p/q; p = 2.0*s*r-(pio2_lo-2.0*c); q = pio4_hi - 2.0*w; t = pio4_hi - (p-q); } if (hx > 0) return t; return -t; }
double __ieee754_acos(double x) { double z, p, q, r, w, s, c, df; int32_t hx, ix; GET_HIGH_WORD(hx, x); ix = hx & 0x7fffffff; if (ix >= 0x3ff00000) { /* |x| >= 1 */ uint32_t lx; GET_LOW_WORD(lx, x); if (((ix - 0x3ff00000) | lx) == 0) { /* |x|==1 */ if (hx>0) return 0.0; /* acos(1) = 0 */ else return pi + 2.0*pio2_lo; /* acos(-1)= pi */ } return (x - x) / (x - x); /* acos(|x|>1) is NaN */ } if (ix<0x3fe00000) { /* |x| < 0.5 */ if (ix <= 0x3c600000) return pio2_hi + pio2_lo;/*if|x|<2**-57*/ z = x*x; p = z*(pS0 + z*(pS1 + z*(pS2 + z*(pS3 + z*(pS4 + z*pS5))))); q = one + z*(qS1 + z*(qS2 + z*(qS3 + z*qS4))); r = p / q; return pio2_hi - (x - (pio2_lo - x*r)); } else if (hx<0) { /* x < -0.5 */ z = (one + x)*0.5; p = z*(pS0 + z*(pS1 + z*(pS2 + z*(pS3 + z*(pS4 + z*pS5))))); q = one + z*(qS1 + z*(qS2 + z*(qS3 + z*qS4))); s = sqrt(z); r = p / q; w = r*s - pio2_lo; return pi - 2.0*(s + w); } else { /* x > 0.5 */ z = (one - x)*0.5; s = sqrt(z); df = s; SET_LOW_WORD(df, 0); c = (z - df*df) / (s + df); p = z*(pS0 + z*(pS1 + z*(pS2 + z*(pS3 + z*(pS4 + z*pS5))))); q = one + z*(qS1 + z*(qS2 + z*(qS3 + z*qS4))); r = p / q; w = r*s + c; return 2.0*(df + w); } }
double __builtin_cbrt(double x) { int hx; double r,s,t=0.0,w; unsigned sign; hx = GET_HI(x); /* high word of x */ sign=hx&0x80000000; /* sign= sign(x) */ hx ^=sign; if(hx>=0x7ff00000) return(x); /* cbrt(NaN,INF) is itself */ if((hx|GET_LO(x))==0) return(x); /* cbrt(0) is itself */ SET_HIGH_WORD(x, hx); /* x <- |x| */ /* rough cbrt to 5 bits */ if(hx<0x00100000) /* subnormal number */ {SET_HIGH_WORD(t, 0x43500000); /* set t= 2**54 */ t*=x; SET_HIGH_WORD(t,GET_HI(t)/3+B2); } else SET_HIGH_WORD(t,hx/3+B1); /* new cbrt to 23 bits, may be implemented in single precision */ r=t*t/x; s=C+r*t; t*=G+F/(s+E+D/s); /* chopped to 20 bits and make it larger than cbrt(x) */ SET_LOW_WORD(t,0); SET_HIGH_WORD(t, GET_HI(t) + 0x00000001); /* one step newton iteration to 53 bits with error less than 0.667 ulps */ s=t*t; /* t*t is exact */ r=x/s; w=t+t; r=(r-t)/(w+r); /* r-s is exact */ t=t+t*r; /* retore the sign bit */ SET_HIGH_WORD(t, GET_HI(t) | sign); return(t); }
double __hide_ieee754_asin(double x) { double t=0.0,w,p,q,c,r,s; int hx,ix; hx = GET_HI(x); ix = hx&0x7fffffff; if(ix>= 0x3ff00000) { /* |x|>= 1 */ if(((ix-0x3ff00000)|GET_LO(x))==0) /* asin(1)=+-pi/2 with inexact */ return x*pio2_hi+x*pio2_lo; return __builtin_nan(""); /* asin(|x|>1) is NaN */ } else if (ix<0x3fe00000) { /* |x|<0.5 */ if(ix<0x3e400000) { /* if |x| < 2**-27 */ if(huge+x>one) return x;/* return x with inexact if x!=0*/ } else t = x*x; p = t*(pS0+t*(pS1+t*(pS2+t*(pS3+t*(pS4+t*pS5))))); q = one+t*(qS1+t*(qS2+t*(qS3+t*qS4))); w = p/q; return x+x*w; } /* 1> |x|>= 0.5 */ w = one-__builtin_fabs(x); t = w*0.5; p = t*(pS0+t*(pS1+t*(pS2+t*(pS3+t*(pS4+t*pS5))))); q = one+t*(qS1+t*(qS2+t*(qS3+t*qS4))); s = __builtin_sqrt(t); if(ix>=0x3FEF3333) { /* if |x| > 0.975 */ w = p/q; t = pio2_hi-(2.0*(s+s*w)-pio2_lo); } else { w = s; SET_LOW_WORD(w, 0); c = (t-w*w)/(s+w); r = p/q; p = 2.0*s*r-(pio2_lo-2.0*c); q = pio4_hi-2.0*w; t = pio4_hi-(p-q); } if(hx>0) return t; else return -t; }
double log2(double x) { double f,hfsq,hi,lo,r,val_hi,val_lo,w,y; int32_t i,k,hx; uint32_t lx; EXTRACT_WORDS(hx, lx, x); k = 0; if (hx < 0x00100000) { /* x < 2**-1022 */ if (((hx&0x7fffffff)|lx) == 0) return -two54/0.0; /* log(+-0)=-inf */ if (hx < 0) return (x-x)/0.0; /* log(-#) = NaN */ /* subnormal number, scale up x */ k -= 54; x *= two54; GET_HIGH_WORD(hx, x); } if (hx >= 0x7ff00000) return x+x; if (hx == 0x3ff00000 && lx == 0) return 0.0; /* log(1) = +0 */ k += (hx>>20) - 1023; hx &= 0x000fffff; i = (hx+0x95f64) & 0x100000; SET_HIGH_WORD(x, hx|(i^0x3ff00000)); /* normalize x or x/2 */ k += i>>20; y = (double)k; f = x - 1.0; hfsq = 0.5*f*f; r = __log1p(f); /* * f-hfsq must (for args near 1) be evaluated in extra precision * to avoid a large cancellation when x is near sqrt(2) or 1/sqrt(2). * This is fairly efficient since f-hfsq only depends on f, so can * be evaluated in parallel with R. Not combining hfsq with R also * keeps R small (though not as small as a true `lo' term would be), * so that extra precision is not needed for terms involving R. * * Compiler bugs involving extra precision used to break Dekker's * theorem for spitting f-hfsq as hi+lo, unless double_t was used * or the multi-precision calculations were avoided when double_t * has extra precision. These problems are now automatically * avoided as a side effect of the optimization of combining the * Dekker splitting step with the clear-low-bits step. * * y must (for args near sqrt(2) and 1/sqrt(2)) be added in extra * precision to avoid a very large cancellation when x is very near * these values. Unlike the above cancellations, this problem is * specific to base 2. It is strange that adding +-1 is so much * harder than adding +-ln2 or +-log10_2. * * This uses Dekker's theorem to normalize y+val_hi, so the * compiler bugs are back in some configurations, sigh. And I * don't want to used double_t to avoid them, since that gives a * pessimization and the support for avoiding the pessimization * is not yet available. * * The multi-precision calculations for the multiplications are * routine. */ hi = f - hfsq; SET_LOW_WORD(hi, 0); lo = (f - hi) - hfsq + r; val_hi = hi*ivln2hi; val_lo = (lo+hi)*ivln2lo + lo*ivln2hi; /* spadd(val_hi, val_lo, y), except for not using double_t: */ w = y + val_hi; val_lo += (y - w) + val_hi; val_hi = w; return val_lo + val_hi; }