/* executes single exponent reduction cycle */ static Bit64u remainder_kernel(Bit64u aSig0, Bit64u bSig, int expDiff, Bit64u *zSig0, Bit64u *zSig1) { Bit64u term0, term1; Bit64u aSig1 = 0; shortShift128Left(aSig1, aSig0, expDiff, &aSig1, &aSig0); Bit64u q = estimateDiv128To64(aSig1, aSig0, bSig); mul64To128(bSig, q, &term0, &term1); sub128(aSig1, aSig0, term0, term1, zSig1, zSig0); while ((Bit64s)(*zSig1) < 0) { --q; add128(*zSig1, *zSig0, 0, bSig, zSig1, zSig0); } return q; }
/* reduce trigonometric function argument using 128-bit precision M_PI approximation */ static Bit64u argument_reduction_kernel(Bit64u aSig0, int Exp, Bit64u *zSig0, Bit64u *zSig1) { Bit64u term0, term1, term2; Bit64u aSig1 = 0; shortShift128Left(aSig1, aSig0, Exp, &aSig1, &aSig0); Bit64u q = estimateDiv128To64(aSig1, aSig0, FLOAT_PI_HI); mul128By64To192(FLOAT_PI_HI, FLOAT_PI_LO, q, &term0, &term1, &term2); sub128(aSig1, aSig0, term0, term1, zSig1, zSig0); while ((Bit64s)(*zSig1) < 0) { --q; add192(*zSig1, *zSig0, term2, 0, FLOAT_PI_HI, FLOAT_PI_LO, zSig1, zSig0, &term2); } *zSig1 = term2; return q; }
/* reduce trigonometric function argument using 128-bit precision M_PI approximation */ static UINT64 argument_reduction_kernel(UINT64 aSig0, int Exp, UINT64 *zSig0, UINT64 *zSig1) { UINT64 term0, term1, term2; UINT64 aSig1 = 0; shortShift128Left(aSig1, aSig0, Exp, &aSig1, &aSig0); UINT64 q = estimateDiv128To64(aSig1, aSig0, FLOAT_PI_HI); mul128By64To192(FLOAT_PI_HI, FLOAT_PI_LO, q, &term0, &term1, &term2); sub128(aSig1, aSig0, term0, term1, zSig1, zSig0); while ((INT64)(*zSig1) < 0) { --q; add192(*zSig1, *zSig0, term2, 0, FLOAT_PI_HI, FLOAT_PI_LO, zSig1, zSig0, &term2); } *zSig1 = term2; return q; }
} if (aExp == 0) { if (aSig == 0) return packFloat64 (zSign, 0, 0); normalizeFloat64Subnormal (aSig, &aExp, &aSig); } zExp = aExp - bExp + 0x3FD; aSig = (aSig | LIT64 (0x0010000000000000)) << 10; bSig = (bSig | LIT64 (0x0010000000000000)) << 11; if (bSig <= (aSig + aSig)) { aSig >>= 1; ++zExp; } zSig = estimateDiv128To64 (aSig, 0, bSig); if ((zSig & 0x1FF) <= 2) { mul64To128 (bSig, zSig, &term0, &term1); sub128 (aSig, 0, term0, term1, &rem0, &rem1); while ((sbits64) rem0 < 0) { --zSig; add128 (rem0, rem1, 0, bSig, &rem0, &rem1); } zSig |= (rem1 != 0); } return roundAndPackFloat64 (zSign, zExp, zSig); }
floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status) { flag aSign, zSign; int32_t aExp, bExp, expDiff; uint64_t aSig0, aSig1, bSig; uint64_t qTemp, term0, term1; aSig0 = extractFloatx80Frac(a); aExp = extractFloatx80Exp(a); aSign = extractFloatx80Sign(a); bSig = extractFloatx80Frac(b); bExp = extractFloatx80Exp(b); if (aExp == 0x7FFF) { if ((uint64_t) (aSig0 << 1) || ((bExp == 0x7FFF) && (uint64_t) (bSig << 1))) { return propagateFloatx80NaN(a, b, status); } goto invalid; } if (bExp == 0x7FFF) { if ((uint64_t) (bSig << 1)) { return propagateFloatx80NaN(a, b, status); } return a; } if (bExp == 0) { if (bSig == 0) { invalid: float_raise(float_flag_invalid, status); return floatx80_default_nan(status); } normalizeFloatx80Subnormal(bSig, &bExp, &bSig); } if (aExp == 0) { if ((uint64_t) (aSig0 << 1) == 0) { return a; } normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0); } bSig |= LIT64(0x8000000000000000); zSign = aSign; expDiff = aExp - bExp; aSig1 = 0; if (expDiff < 0) { return a; } qTemp = (bSig <= aSig0); if (qTemp) { aSig0 -= bSig; } expDiff -= 64; while (0 < expDiff) { qTemp = estimateDiv128To64(aSig0, aSig1, bSig); qTemp = (2 < qTemp) ? qTemp - 2 : 0; mul64To128(bSig, qTemp, &term0, &term1); sub128(aSig0, aSig1, term0, term1, &aSig0, &aSig1); shortShift128Left(aSig0, aSig1, 62, &aSig0, &aSig1); } expDiff += 64; if (0 < expDiff) { qTemp = estimateDiv128To64(aSig0, aSig1, bSig); qTemp = (2 < qTemp) ? qTemp - 2 : 0; qTemp >>= 64 - expDiff; mul64To128(bSig, qTemp << (64 - expDiff), &term0, &term1); sub128(aSig0, aSig1, term0, term1, &aSig0, &aSig1); shortShift128Left(0, bSig, 64 - expDiff, &term0, &term1); while (le128(term0, term1, aSig0, aSig1)) { ++qTemp; sub128(aSig0, aSig1, term0, term1, &aSig0, &aSig1); } }