/** * Greater-than comparison between two floats. NaNs are not recognized. * * @a First quadruple-precision operand. * @b Second quadruple-precision operand. * @return 1 if a is greater than b, 0 otherwise. */ int is_float128_gt(float128 a, float128 b) { uint64_t tmp_hi; uint64_t tmp_lo; or128(a.bin.hi, a.bin.lo, b.bin.hi, b.bin.lo, &tmp_hi, &tmp_lo); and128(tmp_hi, tmp_lo, 0x7FFFFFFFFFFFFFFFll, 0xFFFFFFFFFFFFFFFFll, &tmp_hi, &tmp_lo); if (eq128(tmp_hi, tmp_lo, 0x0ll, 0x0ll)) { /* zeroes are equal with any sign */ return 0; } if ((a.parts.sign) && (b.parts.sign)) { /* if both are negative, greater is that with smaller binary value */ return lt128(a.bin.hi, a.bin.lo, b.bin.hi, b.bin.lo); } /* * lets negate signs - now will be positive numbers always * bigger than negative (first bit will be set for unsigned * integer comparison) */ a.parts.sign = !a.parts.sign; b.parts.sign = !b.parts.sign; return lt128(b.bin.hi, b.bin.lo, a.bin.hi, a.bin.lo); }
static int reduce_trig_arg(int expDiff, int &zSign, Bit64u &aSig0, Bit64u &aSig1) { Bit64u term0, term1, q = 0; if (expDiff < 0) { shift128Right(aSig0, 0, 1, &aSig0, &aSig1); expDiff = 0; } if (expDiff > 0) { q = argument_reduction_kernel(aSig0, expDiff, &aSig0, &aSig1); } else { if (FLOAT_PI_HI <= aSig0) { aSig0 -= FLOAT_PI_HI; q = 1; } } shift128Right(FLOAT_PI_HI, FLOAT_PI_LO, 1, &term0, &term1); if (! lt128(aSig0, aSig1, term0, term1)) { int lt = lt128(term0, term1, aSig0, aSig1); int eq = eq128(aSig0, aSig1, term0, term1); if ((eq && (q & 1)) || lt) { zSign = !zSign; ++q; } if (lt) sub128(FLOAT_PI_HI, FLOAT_PI_LO, aSig0, aSig1, &aSig0, &aSig1); } return (int)(q & 3); }
/** * Determines whether the given float represents signalling NaN. * * @param ld Quadruple-precision float. * @return 1 if float is signalling NaN, 0 otherwise. */ int is_float128_signan(float128 ld) { /* SigNaN : exp = 0x7fff and fraction = 0xxxxx..x (binary), * where at least one x is nonzero */ return ((ld.parts.exp == 0x7FFF) && (ld.parts.frac_hi || ld.parts.frac_lo) && lt128(ld.parts.frac_hi, ld.parts.frac_lo, 0x800000000000ll, 0x0ll)); }
float128 float32_to_float128(float32 a) { float128 result; uint64_t frac_hi, frac_lo; uint64_t tmp_hi, tmp_lo; result.parts.sign = a.parts.sign; result.parts.frac_hi = 0; result.parts.frac_lo = a.parts.fraction; lshift128(result.parts.frac_hi, result.parts.frac_lo, (FLOAT128_FRACTION_SIZE - FLOAT32_FRACTION_SIZE), &frac_hi, &frac_lo); result.parts.frac_hi = frac_hi; result.parts.frac_lo = frac_lo; if ((is_float32_infinity(a)) || (is_float32_nan(a))) { result.parts.exp = FLOAT128_MAX_EXPONENT; // TODO; check if its correct for SigNaNs return result; } result.parts.exp = a.parts.exp + ((int) FLOAT128_BIAS - FLOAT32_BIAS); if (a.parts.exp == 0) { /* normalize denormalized numbers */ if (eq128(result.parts.frac_hi, result.parts.frac_lo, 0x0ll, 0x0ll)) { /* fix zero */ result.parts.exp = 0; return result; } frac_hi = result.parts.frac_hi; frac_lo = result.parts.frac_lo; and128(frac_hi, frac_lo, FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, &tmp_hi, &tmp_lo); while (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) { lshift128(frac_hi, frac_lo, 1, &frac_hi, &frac_lo); --result.parts.exp; } ++result.parts.exp; result.parts.frac_hi = frac_hi; result.parts.frac_lo = frac_lo; } return result; }
static floatx80 do_fprem(floatx80 a, floatx80 b, Bit64u &q, int rounding_mode, float_status_t &status) { Bit32s aExp, bExp, zExp, expDiff; Bit64u aSig0, aSig1, bSig; int aSign; q = 0; // handle unsupported extended double-precision floating encodings if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b)) { float_raise(status, float_flag_invalid); return floatx80_default_nan; } aSig0 = extractFloatx80Frac(a); aExp = extractFloatx80Exp(a); aSign = extractFloatx80Sign(a); bSig = extractFloatx80Frac(b); bExp = extractFloatx80Exp(b); if (aExp == 0x7FFF) { if ((Bit64u) (aSig0<<1) || ((bExp == 0x7FFF) && (Bit64u) (bSig<<1))) { return propagateFloatx80NaN(a, b, status); } goto invalid; } if (bExp == 0x7FFF) { if ((Bit64u) (bSig<<1)) return propagateFloatx80NaN(a, b, status); return a; } if (bExp == 0) { if (bSig == 0) { invalid: float_raise(status, float_flag_invalid); return floatx80_default_nan; } float_raise(status, float_flag_denormal); normalizeFloatx80Subnormal(bSig, &bExp, &bSig); } if (aExp == 0) { if ((Bit64u) (aSig0<<1) == 0) return a; float_raise(status, float_flag_denormal); normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0); } expDiff = aExp - bExp; aSig1 = 0; if (expDiff >= 64) { int n = (expDiff & 0x1f) | 0x20; remainder_kernel(aSig0, bSig, n, &aSig0, &aSig1); zExp = aExp - n; q = (Bit64u) -1; } else { zExp = bExp; if (expDiff < 0) { if (expDiff < -1) return (a.fraction & BX_CONST64(0x8000000000000000)) ? packFloatx80(aSign, aExp, aSig0) : a; shift128Right(aSig0, 0, 1, &aSig0, &aSig1); expDiff = 0; } if (expDiff > 0) { q = remainder_kernel(aSig0, bSig, expDiff, &aSig0, &aSig1); } else { if (bSig <= aSig0) { aSig0 -= bSig; q = 1; } } if (rounding_mode == float_round_nearest_even) { Bit64u term0, term1; shift128Right(bSig, 0, 1, &term0, &term1); if (! lt128(aSig0, aSig1, term0, term1)) { int lt = lt128(term0, term1, aSig0, aSig1); int eq = eq128(aSig0, aSig1, term0, term1); if ((eq && (q & 1)) || lt) { aSign = !aSign; ++q; } if (lt) sub128(bSig, 0, aSig0, aSig1, &aSig0, &aSig1); } } } return normalizeRoundAndPackFloatx80(80, aSign, zExp, aSig0, aSig1, status); }