INLINE bits64 extractFloat64Frac (float64 a) { return a & LIT64 (0x000FFFFFFFFFFFFF); }
int float64_is_nan( float64 a1 ) { float64u u; uint64_t a; u.f = a1; a = u.i; return ( LIT64( 0xFFF0000000000000 ) < (bits64) ( a<<1 ) ); }
int float64_is_quiet_nan( float64 a1 ) { float64u u; uint64_t a; u.f = a1; a = u.i; return ( LIT64( 0xFFF0000000000000 ) < (uint64_t) ( a<<1 ) ); }
float64 float64_div (float64 a, float64 b) { flag aSign, bSign, zSign; int16 aExp, bExp, zExp; bits64 aSig, bSig, zSig; bits64 rem0, rem1, term0, term1; aSig = extractFloat64Frac (a); aExp = extractFloat64Exp (a); aSign = extractFloat64Sign (a); bSig = extractFloat64Frac (b); bExp = extractFloat64Exp (b); bSign = extractFloat64Sign (b); zSign = aSign ^ bSign; if (aExp == 0x7FF) { if (aSig) return propagateFloat64NaN (a, b); if (bExp == 0x7FF) { if (bSig) return propagateFloat64NaN (a, b); float_raise (float_flag_invalid); return float64_default_nan; } return packFloat64 (zSign, 0x7FF, 0); } if (bExp == 0x7FF) { if (bSig) return propagateFloat64NaN (a, b); return packFloat64 (zSign, 0, 0); } if (bExp == 0) { if (bSig == 0) { if ((aExp | aSig) == 0) { float_raise (float_flag_invalid); return float64_default_nan; } float_raise (float_flag_divbyzero); return packFloat64 (zSign, 0x7FF, 0); } normalizeFloat64Subnormal (bSig, &bExp, &bSig); } if (aExp == 0) { if (aSig == 0) return packFloat64 (zSign, 0, 0); normalizeFloat64Subnormal (aSig, &aExp, &aSig); } zExp = aExp - bExp + 0x3FD; aSig = (aSig | LIT64 (0x0010000000000000)) << 10; bSig = (bSig | LIT64 (0x0010000000000000)) << 11; if (bSig <= (aSig + aSig)) { aSig >>= 1; ++zExp; }
static float64 roundAndPackFloat64 (flag zSign, int16 zExp, bits64 zSig) { int8 roundingMode; flag roundNearestEven, isTiny; int16 roundIncrement, roundBits; roundingMode = float_rounding_mode; roundNearestEven = (roundingMode == float_round_nearest_even); roundIncrement = 0x200; if (!roundNearestEven) { if (roundingMode == float_round_to_zero) { roundIncrement = 0; } else { roundIncrement = 0x3FF; if (zSign) { if (roundingMode == float_round_up) roundIncrement = 0; } else { if (roundingMode == float_round_down) roundIncrement = 0; } } } roundBits = zSig & 0x3FF; if (0x7FD <= (bits16) zExp) { if ((0x7FD < zExp) || ((zExp == 0x7FD) && ((sbits64) (zSig + roundIncrement) < 0))) { float_raise (float_flag_overflow | float_flag_inexact); return packFloat64 (zSign, 0x7FF, 0) - (roundIncrement == 0); } if (zExp < 0) { isTiny = (float_detect_tininess == float_tininess_before_rounding) || (zExp < -1) || (zSig + roundIncrement < LIT64 (0x8000000000000000)); shift64RightJamming (zSig, -zExp, &zSig); zExp = 0; roundBits = zSig & 0x3FF; if (isTiny && roundBits) float_raise (float_flag_underflow); } } if (roundBits) float_exception_flags |= float_flag_inexact; zSig = (zSig + roundIncrement) >> 10; zSig &= ~(((roundBits ^ 0x200) == 0) & roundNearestEven); if (zSig == 0) zExp = 0; return packFloat64 (zSign, zExp, zSig); }
float64 float64_mul (float64 a, float64 b) { flag aSign, bSign, zSign; int16 aExp, bExp, zExp; bits64 aSig, bSig, zSig0, zSig1; aSig = extractFloat64Frac (a); aExp = extractFloat64Exp (a); aSign = extractFloat64Sign (a); bSig = extractFloat64Frac (b); bExp = extractFloat64Exp (b); bSign = extractFloat64Sign (b); zSign = aSign ^ bSign; if (aExp == 0x7FF) { if (aSig || ((bExp == 0x7FF) && bSig)) return propagateFloat64NaN (a, b); if ((bExp | bSig) == 0) { float_raise (float_flag_invalid); return float64_default_nan; } return packFloat64 (zSign, 0x7FF, 0); } if (bExp == 0x7FF) { if (bSig) return propagateFloat64NaN (a, b); if ((aExp | aSig) == 0) { float_raise (float_flag_invalid); return float64_default_nan; } return packFloat64 (zSign, 0x7FF, 0); } if (aExp == 0) { if (aSig == 0) return packFloat64 (zSign, 0, 0); normalizeFloat64Subnormal (aSig, &aExp, &aSig); } if (bExp == 0) { if (bSig == 0) return packFloat64 (zSign, 0, 0); normalizeFloat64Subnormal (bSig, &bExp, &bSig); } zExp = aExp + bExp - 0x3FF; aSig = (aSig | LIT64 (0x0010000000000000)) << 10; bSig = (bSig | LIT64 (0x0010000000000000)) << 11; mul64To128 (aSig, bSig, &zSig0, &zSig1); zSig0 |= (zSig1 != 0); if (0 <= (sbits64) (zSig0 << 1)) { zSig0 <<= 1; --zExp; } return roundAndPackFloat64 (zSign, zExp, zSig0); }
static float64 subFloat64Sigs (float64 a, float64 b, flag zSign) { int16 aExp, bExp, zExp; bits64 aSig, bSig, zSig; int16 expDiff; aSig = extractFloat64Frac (a); aExp = extractFloat64Exp (a); bSig = extractFloat64Frac (b); bExp = extractFloat64Exp (b); expDiff = aExp - bExp; aSig <<= 10; bSig <<= 10; if (0 < expDiff) goto aExpBigger; if (expDiff < 0) goto bExpBigger; if (aExp == 0x7FF) { if (aSig | bSig) return propagateFloat64NaN (a, b); float_raise (float_flag_invalid); return float64_default_nan; } if (aExp == 0) { aExp = 1; bExp = 1; } if (bSig < aSig) goto aBigger; if (aSig < bSig) goto bBigger; return packFloat64 (float_rounding_mode == float_round_down, 0, 0); bExpBigger: if (bExp == 0x7FF) { if (bSig) return propagateFloat64NaN (a, b); return packFloat64 (zSign ^ 1, 0x7FF, 0); } if (aExp == 0) ++expDiff; else aSig |= LIT64 (0x4000000000000000); shift64RightJamming (aSig, -expDiff, &aSig); bSig |= LIT64 (0x4000000000000000); bBigger: zSig = bSig - aSig; zExp = bExp; zSign ^= 1; goto normalizeRoundAndPack; aExpBigger: if (aExp == 0x7FF) { if (aSig) return propagateFloat64NaN (a, b); return a; } if (bExp == 0) --expDiff; else bSig |= LIT64 (0x4000000000000000); shift64RightJamming (bSig, expDiff, &bSig); aSig |= LIT64 (0x4000000000000000); aBigger: zSig = aSig - bSig; zExp = aExp; normalizeRoundAndPack: --zExp; return normalizeRoundAndPackFloat64 (zSign, zExp, zSig); }
static float64 addFloat64Sigs (float64 a, float64 b, flag zSign) { int16 aExp, bExp, zExp; bits64 aSig, bSig, zSig; int16 expDiff; aSig = extractFloat64Frac (a); aExp = extractFloat64Exp (a); bSig = extractFloat64Frac (b); bExp = extractFloat64Exp (b); expDiff = aExp - bExp; aSig <<= 9; bSig <<= 9; if (0 < expDiff) { if (aExp == 0x7FF) { if (aSig) return propagateFloat64NaN (a, b); return a; } if (bExp == 0) --expDiff; else bSig |= LIT64 (0x2000000000000000); shift64RightJamming (bSig, expDiff, &bSig); zExp = aExp; } else if (expDiff < 0) { if (bExp == 0x7FF) { if (bSig) return propagateFloat64NaN (a, b); return packFloat64 (zSign, 0x7FF, 0); } if (aExp == 0) ++expDiff; else { aSig |= LIT64 (0x2000000000000000); } shift64RightJamming (aSig, -expDiff, &aSig); zExp = bExp; } else { if (aExp == 0x7FF) { if (aSig | bSig) return propagateFloat64NaN (a, b); return a; } if (aExp == 0) return packFloat64 (zSign, 0, (aSig + bSig) >> 9); zSig = LIT64 (0x4000000000000000) + aSig + bSig; zExp = aExp; goto roundAndPack; } aSig |= LIT64 (0x2000000000000000); zSig = (aSig + bSig) << 1; --zExp; if ((sbits64) zSig < 0) { zSig = aSig + bSig; ++zExp; } roundAndPack: return roundAndPackFloat64 (zSign, zExp, zSig); }
int32_t int64_is_zero(bits64 a0) { return (((uint32_t)(a0 >> 32)) == 0) && ((((uint32_t)(a0 & LIT64(0xFFFFFFFF)))) == 0); }
floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status) { flag aSign, zSign; int32_t aExp, bExp, expDiff; uint64_t aSig0, aSig1, bSig; uint64_t qTemp, term0, term1; aSig0 = extractFloatx80Frac(a); aExp = extractFloatx80Exp(a); aSign = extractFloatx80Sign(a); bSig = extractFloatx80Frac(b); bExp = extractFloatx80Exp(b); if (aExp == 0x7FFF) { if ((uint64_t) (aSig0 << 1) || ((bExp == 0x7FFF) && (uint64_t) (bSig << 1))) { return propagateFloatx80NaN(a, b, status); } goto invalid; } if (bExp == 0x7FFF) { if ((uint64_t) (bSig << 1)) { return propagateFloatx80NaN(a, b, status); } return a; } if (bExp == 0) { if (bSig == 0) { invalid: float_raise(float_flag_invalid, status); return floatx80_default_nan(status); } normalizeFloatx80Subnormal(bSig, &bExp, &bSig); } if (aExp == 0) { if ((uint64_t) (aSig0 << 1) == 0) { return a; } normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0); } bSig |= LIT64(0x8000000000000000); zSign = aSign; expDiff = aExp - bExp; aSig1 = 0; if (expDiff < 0) { return a; } qTemp = (bSig <= aSig0); if (qTemp) { aSig0 -= bSig; } expDiff -= 64; while (0 < expDiff) { qTemp = estimateDiv128To64(aSig0, aSig1, bSig); qTemp = (2 < qTemp) ? qTemp - 2 : 0; mul64To128(bSig, qTemp, &term0, &term1); sub128(aSig0, aSig1, term0, term1, &aSig0, &aSig1); shortShift128Left(aSig0, aSig1, 62, &aSig0, &aSig1); } expDiff += 64; if (0 < expDiff) { qTemp = estimateDiv128To64(aSig0, aSig1, bSig); qTemp = (2 < qTemp) ? qTemp - 2 : 0; qTemp >>= 64 - expDiff; mul64To128(bSig, qTemp << (64 - expDiff), &term0, &term1); sub128(aSig0, aSig1, term0, term1, &aSig0, &aSig1); shortShift128Left(0, bSig, 64 - expDiff, &term0, &term1); while (le128(term0, term1, aSig0, aSig1)) { ++qTemp; sub128(aSig0, aSig1, term0, term1, &aSig0, &aSig1); } }