static int reduce_trig_arg(int expDiff, int &zSign, Bit64u &aSig0, Bit64u &aSig1) { Bit64u term0, term1, q = 0; if (expDiff < 0) { shift128Right(aSig0, 0, 1, &aSig0, &aSig1); expDiff = 0; } if (expDiff > 0) { q = argument_reduction_kernel(aSig0, expDiff, &aSig0, &aSig1); } else { if (FLOAT_PI_HI <= aSig0) { aSig0 -= FLOAT_PI_HI; q = 1; } } shift128Right(FLOAT_PI_HI, FLOAT_PI_LO, 1, &term0, &term1); if (! lt128(aSig0, aSig1, term0, term1)) { int lt = lt128(term0, term1, aSig0, aSig1); int eq = eq128(aSig0, aSig1, term0, term1); if ((eq && (q & 1)) || lt) { zSign = !zSign; ++q; } if (lt) sub128(FLOAT_PI_HI, FLOAT_PI_LO, aSig0, aSig1, &aSig0, &aSig1); } return (int)(q & 3); }
/* executes single exponent reduction cycle */ static Bit64u remainder_kernel(Bit64u aSig0, Bit64u bSig, int expDiff, Bit64u *zSig0, Bit64u *zSig1) { Bit64u term0, term1; Bit64u aSig1 = 0; shortShift128Left(aSig1, aSig0, expDiff, &aSig1, &aSig0); Bit64u q = estimateDiv128To64(aSig1, aSig0, bSig); mul64To128(bSig, q, &term0, &term1); sub128(aSig1, aSig0, term0, term1, zSig1, zSig0); while ((Bit64s)(*zSig1) < 0) { --q; add128(*zSig1, *zSig0, 0, bSig, zSig1, zSig0); } return q; }
/* reduce trigonometric function argument using 128-bit precision M_PI approximation */ static Bit64u argument_reduction_kernel(Bit64u aSig0, int Exp, Bit64u *zSig0, Bit64u *zSig1) { Bit64u term0, term1, term2; Bit64u aSig1 = 0; shortShift128Left(aSig1, aSig0, Exp, &aSig1, &aSig0); Bit64u q = estimateDiv128To64(aSig1, aSig0, FLOAT_PI_HI); mul128By64To192(FLOAT_PI_HI, FLOAT_PI_LO, q, &term0, &term1, &term2); sub128(aSig1, aSig0, term0, term1, zSig1, zSig0); while ((Bit64s)(*zSig1) < 0) { --q; add192(*zSig1, *zSig0, term2, 0, FLOAT_PI_HI, FLOAT_PI_LO, zSig1, zSig0, &term2); } *zSig1 = term2; return q; }
/* reduce trigonometric function argument using 128-bit precision M_PI approximation */ static UINT64 argument_reduction_kernel(UINT64 aSig0, int Exp, UINT64 *zSig0, UINT64 *zSig1) { UINT64 term0, term1, term2; UINT64 aSig1 = 0; shortShift128Left(aSig1, aSig0, Exp, &aSig1, &aSig0); UINT64 q = estimateDiv128To64(aSig1, aSig0, FLOAT_PI_HI); mul128By64To192(FLOAT_PI_HI, FLOAT_PI_LO, q, &term0, &term1, &term2); sub128(aSig1, aSig0, term0, term1, zSig1, zSig0); while ((INT64)(*zSig1) < 0) { --q; add192(*zSig1, *zSig0, term2, 0, FLOAT_PI_HI, FLOAT_PI_LO, zSig1, zSig0, &term2); } *zSig1 = term2; return q; }
static floatx80 do_fprem(floatx80 a, floatx80 b, Bit64u &q, int rounding_mode, float_status_t &status) { Bit32s aExp, bExp, zExp, expDiff; Bit64u aSig0, aSig1, bSig; int aSign; q = 0; // handle unsupported extended double-precision floating encodings if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b)) { float_raise(status, float_flag_invalid); return floatx80_default_nan; } aSig0 = extractFloatx80Frac(a); aExp = extractFloatx80Exp(a); aSign = extractFloatx80Sign(a); bSig = extractFloatx80Frac(b); bExp = extractFloatx80Exp(b); if (aExp == 0x7FFF) { if ((Bit64u) (aSig0<<1) || ((bExp == 0x7FFF) && (Bit64u) (bSig<<1))) { return propagateFloatx80NaN(a, b, status); } goto invalid; } if (bExp == 0x7FFF) { if ((Bit64u) (bSig<<1)) return propagateFloatx80NaN(a, b, status); return a; } if (bExp == 0) { if (bSig == 0) { invalid: float_raise(status, float_flag_invalid); return floatx80_default_nan; } float_raise(status, float_flag_denormal); normalizeFloatx80Subnormal(bSig, &bExp, &bSig); } if (aExp == 0) { if ((Bit64u) (aSig0<<1) == 0) return a; float_raise(status, float_flag_denormal); normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0); } expDiff = aExp - bExp; aSig1 = 0; if (expDiff >= 64) { int n = (expDiff & 0x1f) | 0x20; remainder_kernel(aSig0, bSig, n, &aSig0, &aSig1); zExp = aExp - n; q = (Bit64u) -1; } else { zExp = bExp; if (expDiff < 0) { if (expDiff < -1) return (a.fraction & BX_CONST64(0x8000000000000000)) ? packFloatx80(aSign, aExp, aSig0) : a; shift128Right(aSig0, 0, 1, &aSig0, &aSig1); expDiff = 0; } if (expDiff > 0) { q = remainder_kernel(aSig0, bSig, expDiff, &aSig0, &aSig1); } else { if (bSig <= aSig0) { aSig0 -= bSig; q = 1; } } if (rounding_mode == float_round_nearest_even) { Bit64u term0, term1; shift128Right(bSig, 0, 1, &term0, &term1); if (! lt128(aSig0, aSig1, term0, term1)) { int lt = lt128(term0, term1, aSig0, aSig1); int eq = eq128(aSig0, aSig1, term0, term1); if ((eq && (q & 1)) || lt) { aSign = !aSign; ++q; } if (lt) sub128(bSig, 0, aSig0, aSig1, &aSig0, &aSig1); } } } return normalizeRoundAndPackFloatx80(80, aSign, zExp, aSig0, aSig1, status); }
floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status) { flag aSign, zSign; int32_t aExp, bExp, expDiff; uint64_t aSig0, aSig1, bSig; uint64_t qTemp, term0, term1; aSig0 = extractFloatx80Frac(a); aExp = extractFloatx80Exp(a); aSign = extractFloatx80Sign(a); bSig = extractFloatx80Frac(b); bExp = extractFloatx80Exp(b); if (aExp == 0x7FFF) { if ((uint64_t) (aSig0 << 1) || ((bExp == 0x7FFF) && (uint64_t) (bSig << 1))) { return propagateFloatx80NaN(a, b, status); } goto invalid; } if (bExp == 0x7FFF) { if ((uint64_t) (bSig << 1)) { return propagateFloatx80NaN(a, b, status); } return a; } if (bExp == 0) { if (bSig == 0) { invalid: float_raise(float_flag_invalid, status); return floatx80_default_nan(status); } normalizeFloatx80Subnormal(bSig, &bExp, &bSig); } if (aExp == 0) { if ((uint64_t) (aSig0 << 1) == 0) { return a; } normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0); } bSig |= LIT64(0x8000000000000000); zSign = aSign; expDiff = aExp - bExp; aSig1 = 0; if (expDiff < 0) { return a; } qTemp = (bSig <= aSig0); if (qTemp) { aSig0 -= bSig; } expDiff -= 64; while (0 < expDiff) { qTemp = estimateDiv128To64(aSig0, aSig1, bSig); qTemp = (2 < qTemp) ? qTemp - 2 : 0; mul64To128(bSig, qTemp, &term0, &term1); sub128(aSig0, aSig1, term0, term1, &aSig0, &aSig1); shortShift128Left(aSig0, aSig1, 62, &aSig0, &aSig1); } expDiff += 64; if (0 < expDiff) { qTemp = estimateDiv128To64(aSig0, aSig1, bSig); qTemp = (2 < qTemp) ? qTemp - 2 : 0; qTemp >>= 64 - expDiff; mul64To128(bSig, qTemp << (64 - expDiff), &term0, &term1); sub128(aSig0, aSig1, term0, term1, &aSig0, &aSig1); shortShift128Left(0, bSig, 64 - expDiff, &term0, &term1); while (le128(term0, term1, aSig0, aSig1)) { ++qTemp; sub128(aSig0, aSig1, term0, term1, &aSig0, &aSig1); } }