double npy_nextafter(double x, double y) { volatile double t; npy_int32 hx, hy, ix, iy; npy_uint32 lx, ly; EXTRACT_WORDS(hx, lx, x); EXTRACT_WORDS(hy, ly, y); ix = hx & 0x7fffffff; /* |x| */ iy = hy & 0x7fffffff; /* |y| */ if (((ix >= 0x7ff00000) && ((ix - 0x7ff00000) | lx) != 0) || /* x is nan */ ((iy >= 0x7ff00000) && ((iy - 0x7ff00000) | ly) != 0)) /* y is nan */ return x + y; if (x == y) return y; /* x=y, return y */ if ((ix | lx) == 0) { /* x == 0 */ INSERT_WORDS(x, hy & 0x80000000, 1); /* return +-minsubnormal */ t = x * x; if (t == x) return t; else return x; /* raise underflow flag */ } if (hx >= 0) { /* x > 0 */ if (hx > hy || ((hx == hy) && (lx > ly))) { /* x > y, x -= ulp */ if (lx == 0) hx -= 1; lx -= 1; } else { /* x < y, x += ulp */ lx += 1; if (lx == 0) hx += 1; } } else { /* x < 0 */ if (hy >= 0 || hx > hy || ((hx == hy) && (lx > ly))) { /* x < y, x -= ulp */ if (lx == 0) hx -= 1; lx -= 1; } else { /* x > y, x += ulp */ lx += 1; if (lx == 0) hx += 1; } } hy = hx & 0x7ff00000; if (hy >= 0x7ff00000) return x + x; /* overflow */ if (hy < 0x00100000) { /* underflow */ t = x * x; if (t != x) { /* raise underflow flag */ INSERT_WORDS(y, hx, lx); return y; } } INSERT_WORDS(x, hx, lx); return x; }
double __nexttoward(double x, long double y) { int32_t hx,ix,iy; u_int32_t lx,hy,ly,esy; EXTRACT_WORDS(hx,lx,x); GET_LDOUBLE_WORDS(esy,hy,ly,y); ix = hx&0x7fffffff; /* |x| */ iy = esy&0x7fff; /* |y| */ /* Intel's extended format has the normally implicit 1 explicit present. Sigh! */ if(((ix>=0x7ff00000)&&((ix-0x7ff00000)|lx)!=0) || /* x is nan */ ((iy>=0x7fff)&&((hy&0x7fffffff)|ly)!=0)) /* y is nan */ return x+y; if((long double) x==y) return y; /* x=y, return y */ if((ix|lx)==0) { /* x == 0 */ double u; INSERT_WORDS(x,(esy&0x8000)<<16,1); /* return +-minsub */ u = math_opt_barrier (x); u = u * u; math_force_eval (u); /* raise underflow flag */ return x; } if(hx>=0) { /* x > 0 */ if (x > y) { /* x -= ulp */ if(lx==0) hx -= 1; lx -= 1; } else { /* x < y, x += ulp */ lx += 1; if(lx==0) hx += 1; } } else { /* x < 0 */ if (x < y) { /* x -= ulp */ if(lx==0) hx -= 1; lx -= 1; } else { /* x > y, x += ulp */ lx += 1; if(lx==0) hx += 1; } } hy = hx&0x7ff00000; if(hy>=0x7ff00000) { double u = x+x; /* overflow */ math_force_eval (u); } if(hy<0x00100000) { double u = x*x; /* underflow */ math_force_eval (u); /* raise underflow flag */ } INSERT_WORDS(x,hx,lx); return x; }
double nextafter(double x, double y) { int32_t hx,hy,ix,iy; u_int32_t lx,ly; EXTRACT_WORDS(hx,lx,x); EXTRACT_WORDS(hy,ly,y); ix = hx&0x7fffffff; /* |x| */ iy = hy&0x7fffffff; /* |y| */ if(((ix>=0x7ff00000)&&((ix-0x7ff00000)|lx)!=0) || /* x is nan */ ((iy>=0x7ff00000)&&((iy-0x7ff00000)|ly)!=0)) /* y is nan */ return x+y; if(x==y) return y; /* x=y, return y */ if((ix|lx)==0) { /* x == 0 */ double u; INSERT_WORDS(x,hy&0x80000000,1); /* return +-minsubnormal */ //u = math_opt_barrier (x); //u = u*u; //math_force_eval (u); /* raise underflow flag */ return x; } if(hx>=0) { /* x > 0 */ if(hx>hy||((hx==hy)&&(lx>ly))) { /* x > y, x -= ulp */ if(lx==0) hx -= 1; lx -= 1; } else { /* x < y, x += ulp */ lx += 1; if(lx==0) hx += 1; } } else { /* x < 0 */ if(hy>=0||hx>hy||((hx==hy)&&(lx>ly))){/* x < y, x -= ulp */ if(lx==0) hx -= 1; lx -= 1; } else { /* x > y, x += ulp */ lx += 1; if(lx==0) hx += 1; } } hy = hx&0x7ff00000; if(hy>=0x7ff00000) { x = x+x; /* overflow */ //if (FLT_EVAL_METHOD != 0 && FLT_EVAL_METHOD != 1) // asm ("" : "+m"(x)); return x; /* overflow */ } //if(hy<0x00100000) { //double u = x*x; /* underflow */ //math_force_eval (u); /* raise underflow flag */ //} INSERT_WORDS(x,hx,lx); return x; }
double scalbn(double x, int n) { double scale; if (n > 1023) { x *= 0x1p1023; n -= 1023; if (n > 1023) { x *= 0x1p1023; n -= 1023; if (n > 1023) return x * 0x1p1023; } } else if (n < -1022) { x *= 0x1p-1022; n += 1022; if (n < -1022) { x *= 0x1p-1022; n += 1022; if (n < -1022) return x * 0x1p-1022; } } INSERT_WORDS(scale, (uint32_t)(0x3ff+n)<<20, 0); return x * scale; }
double __kernel_cos(double x, double y) { double a,hz,z,r,qx; int32_t ix; GET_HIGH_WORD(ix,x); ix &= 0x7fffffff; /* ix = |x|'s high word*/ if(ix<0x3e400000) { /* if x < 2**27 */ if(((int)x)==0) return one; /* generate inexact */ } z = x*x; r = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6))))); if(ix < 0x3FD33333) /* if |x| < 0.3 */ return one - (0.5*z - (z*r - x*y)); else { if(ix > 0x3fe90000) { /* x > 0.78125 */ qx = 0.28125; } else { INSERT_WORDS(qx,ix-0x00200000,0); /* x/4 */ } hz = 0.5*z-qx; a = one-qx; return a - (hz - (z*r-x*y)); } }
/* Return the least floating-point number greater than X. */ double __nextup (double x) { int32_t hx, ix; uint32_t lx; EXTRACT_WORDS (hx, lx, x); ix = hx & 0x7fffffff; if (((ix >= 0x7ff00000) && ((ix - 0x7ff00000) | lx) != 0)) /* x is nan. */ return x + x; if ((ix | lx) == 0) return DBL_TRUE_MIN; if (hx >= 0) { /* x > 0. */ if (isinf (x)) return x; lx += 1; if (lx == 0) hx += 1; } else { /* x < 0. */ if (lx == 0) hx -= 1; lx -= 1; } INSERT_WORDS (x, hx, lx); return x; }
double infinity() { double x; INSERT_WORDS(x,0x7ff00000,0); return x; }
/* * FIXME: There is a lot of redundancy between _next* and npy_nextafter*. * refactor this at some point * * p >= 0, returnx x + nulp * p < 0, returnx x - nulp */ double _next(double x, int p) { volatile double t; npy_int32 hx, hy, ix; npy_uint32 lx; EXTRACT_WORDS(hx, lx, x); ix = hx & 0x7fffffff; /* |x| */ if (((ix >= 0x7ff00000) && ((ix - 0x7ff00000) | lx) != 0)) /* x is nan */ return x; if ((ix | lx) == 0) { /* x == 0 */ if (p >= 0) { INSERT_WORDS(x, 0x0, 1); /* return +minsubnormal */ } else { INSERT_WORDS(x, 0x80000000, 1); /* return -minsubnormal */ } t = x * x; if (t == x) return t; else return x; /* raise underflow flag */ } if (p < 0) { /* x -= ulp */ if (lx == 0) hx -= 1; lx -= 1; } else { /* x += ulp */ lx += 1; if (lx == 0) hx += 1; } hy = hx & 0x7ff00000; if (hy >= 0x7ff00000) return x + x; /* overflow */ if (hy < 0x00100000) { /* underflow */ t = x * x; if (t != x) { /* raise underflow flag */ INSERT_WORDS(x, hx, lx); return x; } } INSERT_WORDS(x, hx, lx); return x; }
/* exp(x)/2 for x >= log(DBL_MAX), slightly better than 0.5*exp(x/2)*exp(x/2) */ double __expo2(double x) { double scale; /* note that k is odd and scale*scale overflows */ INSERT_WORDS(scale, (uint32_t)(0x3ff + k/2) << 20, 0); /* exp(x - k ln2) * 2**(k-1) */ return exp(x - kln2) * scale * scale; }
double nan(const char *unused) { double x; #if __GNUC_PREREQ (3, 3) x = __builtin_nan(""); #else INSERT_WORDS(x,0x7ff80000,0); #endif return x; }
double nexttoward(double x, long double y) { union IEEEl2bits uy; volatile double t; int32_t hx,ix; u_int32_t lx; EXTRACT_WORDS(hx,lx,x); ix = hx&0x7fffffff; /* |x| */ uy.e = y; if(((ix>=0x7ff00000)&&((ix-0x7ff00000)|lx)!=0) || (uy.bits.exp == 0x7fff && ((uy.bits.manh&~LDBL_NBIT)|uy.bits.manl) != 0)) return x+y; /* x or y is nan */ if(x==y) return (double)y; /* x=y, return y */ if(x==0.0) { INSERT_WORDS(x,uy.bits.sign<<31,1); /* return +-minsubnormal */ t = x*x; if(t==x) return t; else return x; /* raise underflow flag */ } if(hx>0.0 ^ x < y) { /* x -= ulp */ if(lx==0) hx -= 1; lx -= 1; } else { /* x += ulp */ lx += 1; if(lx==0) hx += 1; } ix = hx&0x7ff00000; if(ix>=0x7ff00000) return x+x; /* overflow */ if(ix<0x00100000) { /* underflow */ t = x*x; if(t!=x) { /* raise underflow flag */ INSERT_WORDS(x,hx,lx); return x; } } INSERT_WORDS(x,hx,lx); return x; }
double cbrt(double x) { int32_t hx; double r,s,t=0.0,w; u_int32_t sign; u_int32_t high,low; GET_HIGH_WORD(hx,x); sign=hx&0x80000000; /* sign= sign(x) */ hx ^=sign; if(hx>=0x7ff00000) return(x+x); /* cbrt(NaN,INF) is itself */ GET_LOW_WORD(low,x); if((hx|low)==0) return(x); /* cbrt(0) is itself */ SET_HIGH_WORD(x,hx); /* x <- |x| */ /* rough cbrt to 5 bits */ if(hx<0x00100000) /* subnormal number */ {SET_HIGH_WORD(t,0x43500000); /* set t= 2**54 */ t*=x; GET_HIGH_WORD(high,t); SET_HIGH_WORD(t,high/3+B2); } else SET_HIGH_WORD(t,hx/3+B1); /* new cbrt to 23 bits, may be implemented in single precision */ r=t*t/x; s=C+r*t; t*=G+F/(s+E+D/s); /* chopped to 20 bits and make it larger than cbrt(x) */ GET_HIGH_WORD(high,t); INSERT_WORDS(t,high+0x00000001,0); /* one step newton iteration to 53 bits with error less than 0.667 ulps */ s=t*t; /* t*t is exact */ r=x/s; w=t+t; r=(r-t)/(w+r); /* r-s is exact */ t=t+t*r; /* retore the sign bit */ GET_HIGH_WORD(high,t); SET_HIGH_WORD(t,high|sign); return(t); }
double expm1(double x) { double y,hi,lo,c,t,e,hxs,hfx,r1,twopk; int32_t k,xsb; uint32_t hx; GET_HIGH_WORD(hx,x); xsb = hx&0x80000000; /* sign bit of x */ if(xsb==0) y=x; else y= -x; /* y = |x| */ hx &= 0x7fffffff; /* high word of |x| */ /* filter out huge and non-finite argument */ if(hx >= 0x4043687A) { /* if |x|>=56*ln2 */ if(hx >= 0x40862E42) { /* if |x|>=709.78... */ if(hx>=0x7ff00000) { uint32_t low; GET_LOW_WORD(low,x); if(((hx&0xfffff)|low)!=0) return x+x; /* NaN */ else return (xsb==0)? x:-1.0;/* exp(+-inf)={inf,-1} */ } if(x > o_threshold) return huge*huge; /* overflow */ } if(xsb!=0) { /* x < -56*ln2, return -1.0 with inexact */ if(x+tiny<0.0) /* raise inexact */ return tiny-one; /* return -1 */ } } /* argument reduction */ if(hx > 0x3fd62e42) { /* if |x| > 0.5 ln2 */ if(hx < 0x3FF0A2B2) { /* and |x| < 1.5 ln2 */ if(xsb==0) {hi = x - ln2_hi; lo = ln2_lo; k = 1;} else {hi = x + ln2_hi; lo = -ln2_lo; k = -1;} } else { k = invln2*x+((xsb==0)?0.5:-0.5); t = k; hi = x - t*ln2_hi; /* t*ln2_hi is exact here */ lo = t*ln2_lo; } x = hi - lo; c = (hi-x)-lo; } else if(hx < 0x3c900000) { /* when |x|<2**-54, return x */ t = huge+x; /* return x with inexact flags when x!=0 */ return x - (t-(huge+x)); } else k = 0; /* x is now in primary range */ hfx = 0.5*x; hxs = x*hfx; r1 = one+hxs*(Q1+hxs*(Q2+hxs*(Q3+hxs*(Q4+hxs*Q5)))); t = 3.0-r1*hfx; e = hxs*((r1-t)/(6.0 - x*t)); if(k==0) return x - (x*e-hxs); /* c is 0 */ else { INSERT_WORDS(twopk,0x3ff00000+(k<<20),0); /* 2^k */ e = (x*(e-c)-c); e -= hxs; if(k== -1) return 0.5*(x-e)-0.5; if(k==1) { if(x < -0.25) return -2.0*(e-(x+0.5)); else return one+2.0*(x-e); } if (k <= -2 || k>56) { /* suffice to return exp(x)-1 */ y = one-(e-x); if (k == 1024) y = y*2.0*0x1p1023; else y = y*twopk; return y-one; } t = one; if(k<20) { SET_HIGH_WORD(t,0x3ff00000 - (0x200000>>k)); /* t=1-2^-k */ y = t-(e-x); y = y*twopk; } else {
double cbrt(double x) { int32_t hx; union { double value; u_int64_t bits; } u; double r,s,t=0.0,w; u_int32_t sign; u_int32_t high,low; EXTRACT_WORDS(hx,low,x); sign=hx&0x80000000; /* sign= sign(x) */ hx ^=sign; if(hx>=0x7ff00000) return(x+x); /* cbrt(NaN,INF) is itself */ /* * Rough cbrt to 5 bits: * cbrt(2**e*(1+m) ~= 2**(e/3)*(1+(e%3+m)/3) * where e is integral and >= 0, m is real and in [0, 1), and "/" and * "%" are integer division and modulus with rounding towards minus * infinity. The RHS is always >= the LHS and has a maximum relative * error of about 1 in 16. Adding a bias of -0.03306235651 to the * (e%3+m)/3 term reduces the error to about 1 in 32. With the IEEE * floating point representation, for finite positive normal values, * ordinary integer divison of the value in bits magically gives * almost exactly the RHS of the above provided we first subtract the * exponent bias (1023 for doubles) and later add it back. We do the * subtraction virtually to keep e >= 0 so that ordinary integer * division rounds towards minus infinity; this is also efficient. */ if(hx<0x00100000) { /* zero or subnormal? */ if((hx|low)==0) return(x); /* cbrt(0) is itself */ SET_HIGH_WORD(t,0x43500000); /* set t= 2**54 */ t*=x; GET_HIGH_WORD(high,t); INSERT_WORDS(t,sign|((high&0x7fffffff)/3+B2),0); } else INSERT_WORDS(t,sign|(hx/3+B1),0); /* * New cbrt to 23 bits: * cbrt(x) = t*cbrt(x/t**3) ~= t*P(t**3/x) * where P(r) is a polynomial of degree 4 that approximates 1/cbrt(r) * to within 2**-23.5 when |r - 1| < 1/10. The rough approximation * has produced t such than |t/cbrt(x) - 1| ~< 1/32, and cubing this * gives us bounds for r = t**3/x. * * Try to optimize for parallel evaluation as in k_tanf.c. */ r=(t*t)*(t/x); t=t*((P0+r*(P1+r*P2))+((r*r)*r)*(P3+r*P4)); /* * Round t away from zero to 23 bits (sloppily except for ensuring that * the result is larger in magnitude than cbrt(x) but not much more than * 2 23-bit ulps larger). With rounding towards zero, the error bound * would be ~5/6 instead of ~4/6. With a maximum error of 2 23-bit ulps * in the rounded t, the infinite-precision error in the Newton * approximation barely affects third digit in the final error * 0.667; the error in the rounded t can be up to about 3 23-bit ulps * before the final error is larger than 0.667 ulps. */ u.value=t; u.bits=(u.bits+0x80000000)&0xffffffffc0000000ULL; t=u.value; /* one step Newton iteration to 53 bits with error < 0.667 ulps */ s=t*t; /* t*t is exact */ r=x/s; /* error <= 0.5 ulps; |r| < |t| */ w=t+t; /* t+t is exact */ r=(r-t)/(w+r); /* r-t is exact; w+r ~= 3*t */ t=t+t*r; /* error <= 0.5 + 0.5/3 + epsilon */ return(t); }
double roundeven (double x) { uint32_t hx, lx, uhx; EXTRACT_WORDS (hx, lx, x); uhx = hx & 0x7fffffff; int exponent = uhx >> (MANT_DIG - 1 - 32); if (exponent >= BIAS + MANT_DIG - 1) { /* Integer, infinity or NaN. */ if (exponent == MAX_EXP) /* Infinity or NaN; quiet signaling NaNs. */ return x + x; else return x; } else if (exponent >= BIAS + MANT_DIG - 32) { /* Not necessarily an integer; integer bit is in low word. Locate the bits with exponents 0 and -1. */ int int_pos = (BIAS + MANT_DIG - 1) - exponent; int half_pos = int_pos - 1; uint32_t half_bit = 1U << half_pos; uint32_t int_bit = 1U << int_pos; if ((lx & (int_bit | (half_bit - 1))) != 0) { /* Carry into the exponent works correctly. No need to test whether HALF_BIT is set. */ lx += half_bit; hx += lx < half_bit; } lx &= ~(int_bit - 1); } else if (exponent == BIAS + MANT_DIG - 33) { /* Not necessarily an integer; integer bit is bottom of high word, half bit is top of low word. */ if (((hx & 1) | (lx & 0x7fffffff)) != 0) { lx += 0x80000000; hx += lx < 0x80000000; } lx = 0; } else if (exponent >= BIAS) { /* At least 1; not necessarily an integer, integer bit and half bit are in the high word. Locate the bits with exponents 0 and -1 (when the unbiased exponent is 0, the bit with exponent 0 is implicit, but as the bias is odd it is OK to take it from the low bit of the exponent). */ int int_pos = (BIAS + MANT_DIG - 33) - exponent; int half_pos = int_pos - 1; uint32_t half_bit = 1U << half_pos; uint32_t int_bit = 1U << int_pos; if (((hx & (int_bit | (half_bit - 1))) | lx) != 0) hx += half_bit; hx &= ~(int_bit - 1); lx = 0; } else if (exponent == BIAS - 1 && (uhx > 0x3fe00000 || lx != 0)) { /* Interval (0.5, 1). */ hx = (hx & 0x80000000) | 0x3ff00000; lx = 0; } else { /* Rounds to 0. */ hx &= 0x80000000; lx = 0; } INSERT_WORDS (x, hx, lx); return x; }