/* executes single exponent reduction cycle */ static Bit64u remainder_kernel(Bit64u aSig0, Bit64u bSig, int expDiff, Bit64u *zSig0, Bit64u *zSig1) { Bit64u term0, term1; Bit64u aSig1 = 0; shortShift128Left(aSig1, aSig0, expDiff, &aSig1, &aSig0); Bit64u q = estimateDiv128To64(aSig1, aSig0, bSig); mul64To128(bSig, q, &term0, &term1); sub128(aSig1, aSig0, term0, term1, zSig1, zSig0); while ((Bit64s)(*zSig1) < 0) { --q; add128(*zSig1, *zSig0, 0, bSig, zSig1, zSig0); } return q; }
/* reduce trigonometric function argument using 128-bit precision M_PI approximation */ static Bit64u argument_reduction_kernel(Bit64u aSig0, int Exp, Bit64u *zSig0, Bit64u *zSig1) { Bit64u term0, term1, term2; Bit64u aSig1 = 0; shortShift128Left(aSig1, aSig0, Exp, &aSig1, &aSig0); Bit64u q = estimateDiv128To64(aSig1, aSig0, FLOAT_PI_HI); mul128By64To192(FLOAT_PI_HI, FLOAT_PI_LO, q, &term0, &term1, &term2); sub128(aSig1, aSig0, term0, term1, zSig1, zSig0); while ((Bit64s)(*zSig1) < 0) { --q; add192(*zSig1, *zSig0, term2, 0, FLOAT_PI_HI, FLOAT_PI_LO, zSig1, zSig0, &term2); } *zSig1 = term2; return q; }
/* reduce trigonometric function argument using 128-bit precision M_PI approximation */ static UINT64 argument_reduction_kernel(UINT64 aSig0, int Exp, UINT64 *zSig0, UINT64 *zSig1) { UINT64 term0, term1, term2; UINT64 aSig1 = 0; shortShift128Left(aSig1, aSig0, Exp, &aSig1, &aSig0); UINT64 q = estimateDiv128To64(aSig1, aSig0, FLOAT_PI_HI); mul128By64To192(FLOAT_PI_HI, FLOAT_PI_LO, q, &term0, &term1, &term2); sub128(aSig1, aSig0, term0, term1, zSig1, zSig0); while ((INT64)(*zSig1) < 0) { --q; add192(*zSig1, *zSig0, term2, 0, FLOAT_PI_HI, FLOAT_PI_LO, zSig1, zSig0, &term2); } *zSig1 = term2; return q; }
floatx80 fyl2xp1(floatx80 a, floatx80 b) { INT32 aExp, bExp; UINT64 aSig, bSig, zSig0, zSig1, zSig2; int aSign, bSign; aSig = extractFloatx80Frac(a); aExp = extractFloatx80Exp(a); aSign = extractFloatx80Sign(a); bSig = extractFloatx80Frac(b); bExp = extractFloatx80Exp(b); bSign = extractFloatx80Sign(b); int zSign = aSign ^ bSign; if (aExp == 0x7FFF) { if ((UINT64) (aSig<<1) || ((bExp == 0x7FFF) && (UINT64) (bSig<<1))) { return propagateFloatx80NaN(a, b); } if (aSign) { invalid: float_raise(float_flag_invalid); return floatx80_default_nan; } else { if (bExp == 0) { if (bSig == 0) goto invalid; float_raise(float_flag_denormal); } return packFloatx80(bSign, 0x7FFF, U64(0x8000000000000000)); } } if (bExp == 0x7FFF) { if ((UINT64) (bSig<<1)) return propagateFloatx80NaN(a, b); if (aExp == 0) { if (aSig == 0) goto invalid; float_raise(float_flag_denormal); } return packFloatx80(zSign, 0x7FFF, U64(0x8000000000000000)); } if (aExp == 0) { if (aSig == 0) { if (bSig && (bExp == 0)) float_raise(float_flag_denormal); return packFloatx80(zSign, 0, 0); } float_raise(float_flag_denormal); normalizeFloatx80Subnormal(aSig, &aExp, &aSig); } if (bExp == 0) { if (bSig == 0) return packFloatx80(zSign, 0, 0); float_raise(float_flag_denormal); normalizeFloatx80Subnormal(bSig, &bExp, &bSig); } float_raise(float_flag_inexact); if (aSign && aExp >= 0x3FFF) return a; if (aExp >= 0x3FFC) // big argument { return fyl2x(floatx80_add(a, floatx80_one), b); } // handle tiny argument if (aExp < EXP_BIAS-70) { // first order approximation, return (a*b)/ln(2) INT32 zExp = aExp + FLOAT_LN2INV_EXP - 0x3FFE; mul128By64To192(FLOAT_LN2INV_HI, FLOAT_LN2INV_LO, aSig, &zSig0, &zSig1, &zSig2); if (0 < (INT64) zSig0) { shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1); --zExp; } zExp = zExp + bExp - 0x3FFE; mul128By64To192(zSig0, zSig1, bSig, &zSig0, &zSig1, &zSig2); if (0 < (INT64) zSig0) { shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1); --zExp; } return roundAndPackFloatx80(80, aSign ^ bSign, zExp, zSig0, zSig1); } /* ******************************** */ /* using float128 for approximation */ /* ******************************** */ shift128Right(aSig<<1, 0, 16, &zSig0, &zSig1); float128 x = packFloat128(aSign, aExp, zSig0, zSig1); x = poly_l2p1(x); return floatx80_mul(b, float128_to_floatx80(x)); }
floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status) { flag aSign, zSign; int32_t aExp, bExp, expDiff; uint64_t aSig0, aSig1, bSig; uint64_t qTemp, term0, term1; aSig0 = extractFloatx80Frac(a); aExp = extractFloatx80Exp(a); aSign = extractFloatx80Sign(a); bSig = extractFloatx80Frac(b); bExp = extractFloatx80Exp(b); if (aExp == 0x7FFF) { if ((uint64_t) (aSig0 << 1) || ((bExp == 0x7FFF) && (uint64_t) (bSig << 1))) { return propagateFloatx80NaN(a, b, status); } goto invalid; } if (bExp == 0x7FFF) { if ((uint64_t) (bSig << 1)) { return propagateFloatx80NaN(a, b, status); } return a; } if (bExp == 0) { if (bSig == 0) { invalid: float_raise(float_flag_invalid, status); return floatx80_default_nan(status); } normalizeFloatx80Subnormal(bSig, &bExp, &bSig); } if (aExp == 0) { if ((uint64_t) (aSig0 << 1) == 0) { return a; } normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0); } bSig |= LIT64(0x8000000000000000); zSign = aSign; expDiff = aExp - bExp; aSig1 = 0; if (expDiff < 0) { return a; } qTemp = (bSig <= aSig0); if (qTemp) { aSig0 -= bSig; } expDiff -= 64; while (0 < expDiff) { qTemp = estimateDiv128To64(aSig0, aSig1, bSig); qTemp = (2 < qTemp) ? qTemp - 2 : 0; mul64To128(bSig, qTemp, &term0, &term1); sub128(aSig0, aSig1, term0, term1, &aSig0, &aSig1); shortShift128Left(aSig0, aSig1, 62, &aSig0, &aSig1); } expDiff += 64; if (0 < expDiff) { qTemp = estimateDiv128To64(aSig0, aSig1, bSig); qTemp = (2 < qTemp) ? qTemp - 2 : 0; qTemp >>= 64 - expDiff; mul64To128(bSig, qTemp << (64 - expDiff), &term0, &term1); sub128(aSig0, aSig1, term0, term1, &aSig0, &aSig1); shortShift128Left(0, bSig, 64 - expDiff, &term0, &term1); while (le128(term0, term1, aSig0, aSig1)) { ++qTemp; sub128(aSig0, aSig1, term0, term1, &aSig0, &aSig1); } }