Example #1
0
/**
 * Greater-than comparison between two floats. NaNs are not recognized.
 *
 * @a First quadruple-precision operand.
 * @b Second quadruple-precision operand.
 * @return 1 if a is greater than b, 0 otherwise.
 */
int is_float128_gt(float128 a, float128 b)
{
	uint64_t tmp_hi;
	uint64_t tmp_lo;
	
	or128(a.bin.hi, a.bin.lo,
	    b.bin.hi, b.bin.lo, &tmp_hi, &tmp_lo);
	and128(tmp_hi, tmp_lo,
	    0x7FFFFFFFFFFFFFFFll, 0xFFFFFFFFFFFFFFFFll, &tmp_hi, &tmp_lo);
	if (eq128(tmp_hi, tmp_lo, 0x0ll, 0x0ll)) {
		/* zeroes are equal with any sign */
		return 0;
	}
	
	if ((a.parts.sign) && (b.parts.sign)) {
		/* if both are negative, greater is that with smaller binary value */
		return lt128(a.bin.hi, a.bin.lo, b.bin.hi, b.bin.lo);
	}
	
	/*
	 * lets negate signs - now will be positive numbers always
	 * bigger than negative (first bit will be set for unsigned
	 * integer comparison)
	 */
	a.parts.sign = !a.parts.sign;
	b.parts.sign = !b.parts.sign;
	return lt128(b.bin.hi, b.bin.lo, a.bin.hi, a.bin.lo);
}
Example #2
0
static int reduce_trig_arg(int expDiff, int &zSign, Bit64u &aSig0, Bit64u &aSig1)
{
    Bit64u term0, term1, q = 0;

    if (expDiff < 0) {
        shift128Right(aSig0, 0, 1, &aSig0, &aSig1);
        expDiff = 0;
    }
    if (expDiff > 0) {
        q = argument_reduction_kernel(aSig0, expDiff, &aSig0, &aSig1);
    }
    else {
        if (FLOAT_PI_HI <= aSig0) {
            aSig0 -= FLOAT_PI_HI;
            q = 1;
        }
    }

    shift128Right(FLOAT_PI_HI, FLOAT_PI_LO, 1, &term0, &term1);
    if (! lt128(aSig0, aSig1, term0, term1))
    {
        int lt = lt128(term0, term1, aSig0, aSig1);
        int eq = eq128(aSig0, aSig1, term0, term1);
              
        if ((eq && (q & 1)) || lt) {
            zSign = !zSign;
            ++q;
        }
        if (lt) sub128(FLOAT_PI_HI, FLOAT_PI_LO, aSig0, aSig1, &aSig0, &aSig1);
    }

    return (int)(q & 3);
}
Example #3
0
/**
 * Determines whether the given float represents signalling NaN.
 *
 * @param ld Quadruple-precision float.
 * @return 1 if float is signalling NaN, 0 otherwise.
 */
int is_float128_signan(float128 ld)
{
	/* SigNaN : exp = 0x7fff and fraction = 0xxxxx..x (binary),
	 * where at least one x is nonzero */
	return ((ld.parts.exp == 0x7FFF) &&
	    (ld.parts.frac_hi || ld.parts.frac_lo) &&
	    lt128(ld.parts.frac_hi, ld.parts.frac_lo, 0x800000000000ll, 0x0ll));

}
Example #4
0
float128 float32_to_float128(float32 a)
{
	float128 result;
	uint64_t frac_hi, frac_lo;
	uint64_t tmp_hi, tmp_lo;
	
	result.parts.sign = a.parts.sign;
	result.parts.frac_hi = 0;
	result.parts.frac_lo = a.parts.fraction;
	lshift128(result.parts.frac_hi, result.parts.frac_lo,
	    (FLOAT128_FRACTION_SIZE - FLOAT32_FRACTION_SIZE),
	    &frac_hi, &frac_lo);
	result.parts.frac_hi = frac_hi;
	result.parts.frac_lo = frac_lo;
	
	if ((is_float32_infinity(a)) || (is_float32_nan(a))) {
		result.parts.exp = FLOAT128_MAX_EXPONENT;
		// TODO; check if its correct for SigNaNs
		return result;
	}
	
	result.parts.exp = a.parts.exp + ((int) FLOAT128_BIAS - FLOAT32_BIAS);
	if (a.parts.exp == 0) {
		/* normalize denormalized numbers */
		
		if (eq128(result.parts.frac_hi,
		    result.parts.frac_lo, 0x0ll, 0x0ll)) { /* fix zero */
			result.parts.exp = 0;
			return result;
		}
		
		frac_hi = result.parts.frac_hi;
		frac_lo = result.parts.frac_lo;
		
		and128(frac_hi, frac_lo,
		    FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
		    &tmp_hi, &tmp_lo);
		while (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
			lshift128(frac_hi, frac_lo, 1, &frac_hi, &frac_lo);
			--result.parts.exp;
		}
		
		++result.parts.exp;
		result.parts.frac_hi = frac_hi;
		result.parts.frac_lo = frac_lo;
	}
	
	return result;
}
Example #5
0
static floatx80 do_fprem(floatx80 a, floatx80 b, Bit64u &q, int rounding_mode, float_status_t &status)
{
    Bit32s aExp, bExp, zExp, expDiff;
    Bit64u aSig0, aSig1, bSig;
    int aSign;
    q = 0;

    // handle unsupported extended double-precision floating encodings
    if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b))
    {
        float_raise(status, float_flag_invalid);
        return floatx80_default_nan;
    }

    aSig0 = extractFloatx80Frac(a);
    aExp = extractFloatx80Exp(a);
    aSign = extractFloatx80Sign(a);
    bSig = extractFloatx80Frac(b);
    bExp = extractFloatx80Exp(b);

    if (aExp == 0x7FFF) {
        if ((Bit64u) (aSig0<<1)
                || ((bExp == 0x7FFF) && (Bit64u) (bSig<<1)))
        {
            return propagateFloatx80NaN(a, b, status);
        }
        goto invalid;
    }
    if (bExp == 0x7FFF) {
        if ((Bit64u) (bSig<<1)) return propagateFloatx80NaN(a, b, status);
        return a;
    }
    if (bExp == 0) {
        if (bSig == 0) {
invalid:
            float_raise(status, float_flag_invalid);
            return floatx80_default_nan;
        }
        float_raise(status, float_flag_denormal);
        normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
    }
    if (aExp == 0) {
        if ((Bit64u) (aSig0<<1) == 0) return a;
        float_raise(status, float_flag_denormal);
        normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0);
    }
    expDiff = aExp - bExp;
    aSig1 = 0;

    if (expDiff >= 64) {
        int n = (expDiff & 0x1f) | 0x20;
        remainder_kernel(aSig0, bSig, n, &aSig0, &aSig1);
        zExp = aExp - n;
        q = (Bit64u) -1;
    }
    else {
        zExp = bExp;

        if (expDiff < 0) {
            if (expDiff < -1)
                return (a.fraction & BX_CONST64(0x8000000000000000)) ?
                       packFloatx80(aSign, aExp, aSig0) : a;
            shift128Right(aSig0, 0, 1, &aSig0, &aSig1);
            expDiff = 0;
        }

        if (expDiff > 0) {
            q = remainder_kernel(aSig0, bSig, expDiff, &aSig0, &aSig1);
        }
        else {
            if (bSig <= aSig0) {
                aSig0 -= bSig;
                q = 1;
            }
        }

        if (rounding_mode == float_round_nearest_even)
        {
            Bit64u term0, term1;
            shift128Right(bSig, 0, 1, &term0, &term1);

            if (! lt128(aSig0, aSig1, term0, term1))
            {
                int lt = lt128(term0, term1, aSig0, aSig1);
                int eq = eq128(aSig0, aSig1, term0, term1);

                if ((eq && (q & 1)) || lt) {
                    aSign = !aSign;
                    ++q;
                }
                if (lt) sub128(bSig, 0, aSig0, aSig1, &aSig0, &aSig1);
            }
        }
    }

    return normalizeRoundAndPackFloatx80(80, aSign, zExp, aSig0, aSig1, status);
}