DLLEXPORT long double cbrtl(long double x) { union IEEEl2bits u, v; long double r, s, t, w; double dr, dt, dx; float ft, fx; u_int32_t hx; u_int16_t expsign; int k; u.e = x; expsign = u.xbits.expsign; k = expsign & 0x7fff; /* * If x = +-Inf, then cbrt(x) = +-Inf. * If x = NaN, then cbrt(x) = NaN. */ if (k == BIAS + LDBL_MAX_EXP) return (x + x); #ifdef __i386__ fp_prec_t oprec; oprec = fpgetprec(); if (oprec != FP_PE) fpsetprec(FP_PE); #endif if (k == 0) { /* If x = +-0, then cbrt(x) = +-0. */ if ((u.bits.manh | u.bits.manl) == 0) { #ifdef __i386__ if (oprec != FP_PE) fpsetprec(oprec); #endif return (x); } /* Adjust subnormal numbers. */ u.e *= 0x1.0p514; k = u.bits.exp; k -= BIAS + 514; } else k -= BIAS; u.xbits.expsign = BIAS; v.e = 1; x = u.e; switch (k % 3) { case 1: case -2: x = 2*x; k--; break; case 2: case -1: x = 4*x; k -= 2; break; } v.xbits.expsign = (expsign & 0x8000) | (BIAS + k / 3); /* * The following is the guts of s_cbrtf, with the handling of * special values removed and extra care for accuracy not taken, * but with most of the extra accuracy not discarded. */ /* ~5-bit estimate: */ fx = x; GET_FLOAT_WORD(hx, fx); SET_FLOAT_WORD(ft, ((hx & 0x7fffffff) / 3 + B1)); /* ~16-bit estimate: */ dx = x; dt = ft; dr = dt * dt * dt; dt = dt * (dx + dx + dr) / (dx + dr + dr); /* ~47-bit estimate: */ dr = dt * dt * dt; dt = dt * (dx + dx + dr) / (dx + dr + dr); #if LDBL_MANT_DIG == 64 /* * dt is cbrtl(x) to ~47 bits (after x has been reduced to 1 <= x < 8). * Round it away from zero to 32 bits (32 so that t*t is exact, and * away from zero for technical reasons). */ volatile double vd2 = 0x1.0p32; volatile double vd1 = 0x1.0p-31; #define vd ((long double)vd2 + vd1) t = dt + vd - 0x1.0p32; #elif LDBL_MANT_DIG == 113 /* * Round dt away from zero to 47 bits. Since we don't trust the 47, * add 2 47-bit ulps instead of 1 to round up. Rounding is slow and * might be avoidable in this case, since on most machines dt will * have been evaluated in 53-bit precision and the technical reasons * for rounding up might not apply to either case in cbrtl() since * dt is much more accurate than needed. */ t = dt + 0x2.0p-46 + 0x1.0p60L - 0x1.0p60; #else #error "Unsupported long double format" #endif /* * Final step Newton iteration to 64 or 113 bits with * error < 0.667 ulps */ s=t*t; /* t*t is exact */ r=x/s; /* error <= 0.5 ulps; |r| < |t| */ w=t+t; /* t+t is exact */ r=(r-t)/(w+r); /* r-t is exact; w+r ~= 3*t */ t=t+t*r; /* error <= 0.5 + 0.5/3 + epsilon */ t *= v.e; #ifdef __i386__ if (oprec != FP_PE) fpsetprec(oprec); #endif return (t); }
/* * exp2l(x): compute the base 2 exponential of x * * Accuracy: Peak error < 0.511 ulp. * * Method: (equally-spaced tables) * * Reduce x: * x = 2**k + y, for integer k and |y| <= 1/2. * Thus we have exp2l(x) = 2**k * exp2(y). * * Reduce y: * y = i/TBLSIZE + z for integer i near y * TBLSIZE. * Thus we have exp2(y) = exp2(i/TBLSIZE) * exp2(z), * with |z| <= 2**-(TBLBITS+1). * * We compute exp2(i/TBLSIZE) via table lookup and exp2(z) via a * degree-6 minimax polynomial with maximum error under 2**-69. * The table entries each have 104 bits of accuracy, encoded as * a pair of double precision values. */ long double exp2l(long double x) { union IEEEl2bits u, v; long double r, z; long double twopk = 0, twopkp10000 = 0; uint32_t hx, ix, i0; int k; /* Filter out exceptional cases. */ u.e = x; hx = u.xbits.expsign; ix = hx & EXPMASK; if (ix >= BIAS + 14) { /* |x| >= 16384 or x is NaN */ if (ix == BIAS + LDBL_MAX_EXP) { if (u.xbits.man != 1ULL << 63 || (hx & 0x8000) == 0) return (x + x); /* x is +Inf or NaN */ else return (0.0); /* x is -Inf */ } if (x >= 16384) return (huge * huge); /* overflow */ if (x <= -16446) return (twom10000 * twom10000); /* underflow */ } else if (ix <= BIAS - 66) { /* |x| < 0x1p-66 */ return (1.0 + x); } #ifdef __i386__ /* * The default precision on i386 is 53 bits, so long doubles are * broken. Call exp2() to get an accurate (double precision) result. */ if (fpgetprec() != FP_PE) return (exp2(x)); #endif /* * Reduce x, computing z, i0, and k. The low bits of x + redux * contain the 16-bit integer part of the exponent (k) followed by * TBLBITS fractional bits (i0). We use bit tricks to extract these * as integers, then set z to the remainder. * * Example: Suppose x is 0xabc.123456p0 and TBLBITS is 8. * Then the low-order word of x + redux is 0x000abc12, * We split this into k = 0xabc and i0 = 0x12 (adjusted to * index into the table), then we compute z = 0x0.003456p0. * * XXX If the exponent is negative, the computation of k depends on * '>>' doing sign extension. */ u.e = x + redux; i0 = u.bits.manl + TBLSIZE / 2; k = (int)i0 >> TBLBITS; i0 = (i0 & (TBLSIZE - 1)) << 1; u.e -= redux; z = x - u.e; v.xbits.man = 1ULL << 63; if (k >= LDBL_MIN_EXP) { v.xbits.expsign = LDBL_MAX_EXP - 1 + k; twopk = v.e; } else { v.xbits.expsign = LDBL_MAX_EXP - 1 + k + 10000; twopkp10000 = v.e; } /* Compute r = exp2l(y) = exp2lt[i0] * p(z). */ long double t_hi = tbl[i0]; long double t_lo = tbl[i0 + 1]; /* XXX This gives > 1 ulp errors outside of FE_TONEAREST mode */ r = t_lo + (t_hi + t_lo) * z * (P1 + z * (P2 + z * (P3 + z * (P4 + z * (P5 + z * P6))))) + t_hi; /* Scale by 2**k. */ if (k >= LDBL_MIN_EXP) { if (k == LDBL_MAX_EXP) return (r * 2.0 * 0x1p16383L); return (r * twopk); } else { return (r * twopkp10000 * twom10000); } }