float128 float32_to_float128(float32 a) { float128 result; uint64_t frac_hi, frac_lo; uint64_t tmp_hi, tmp_lo; result.parts.sign = a.parts.sign; result.parts.frac_hi = 0; result.parts.frac_lo = a.parts.fraction; lshift128(result.parts.frac_hi, result.parts.frac_lo, (FLOAT128_FRACTION_SIZE - FLOAT32_FRACTION_SIZE), &frac_hi, &frac_lo); result.parts.frac_hi = frac_hi; result.parts.frac_lo = frac_lo; if ((is_float32_infinity(a)) || (is_float32_nan(a))) { result.parts.exp = FLOAT128_MAX_EXPONENT; // TODO; check if its correct for SigNaNs return result; } result.parts.exp = a.parts.exp + ((int) FLOAT128_BIAS - FLOAT32_BIAS); if (a.parts.exp == 0) { /* normalize denormalized numbers */ if (eq128(result.parts.frac_hi, result.parts.frac_lo, 0x0ll, 0x0ll)) { /* fix zero */ result.parts.exp = 0; return result; } frac_hi = result.parts.frac_hi; frac_lo = result.parts.frac_lo; and128(frac_hi, frac_lo, FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, &tmp_hi, &tmp_lo); while (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) { lshift128(frac_hi, frac_lo, 1, &frac_hi, &frac_lo); --result.parts.exp; } ++result.parts.exp; result.parts.frac_hi = frac_hi; result.parts.frac_lo = frac_lo; } return result; }
float64 float32_to_float64(float32 a) { float64 result; uint64_t frac; result.parts.sign = a.parts.sign; result.parts.fraction = a.parts.fraction; result.parts.fraction <<= (FLOAT64_FRACTION_SIZE - FLOAT32_FRACTION_SIZE); if ((is_float32_infinity(a)) || (is_float32_nan(a))) { result.parts.exp = FLOAT64_MAX_EXPONENT; // TODO; check if its correct for SigNaNs return result; } result.parts.exp = a.parts.exp + ((int) FLOAT64_BIAS - FLOAT32_BIAS); if (a.parts.exp == 0) { /* normalize denormalized numbers */ if (result.parts.fraction == 0) { /* fix zero */ result.parts.exp = 0; return result; } frac = result.parts.fraction; while (!(frac & FLOAT64_HIDDEN_BIT_MASK)) { frac <<= 1; --result.parts.exp; } ++result.parts.exp; result.parts.fraction = frac; } return result; }
/** Multiply two single-precision floats. * * @param a First input operand. * @param b Second input operand. * * @return Result of multiplication. * */ float32 mul_float32(float32 a, float32 b) { float32 result; uint64_t frac1, frac2; int32_t exp; result.parts.sign = a.parts.sign ^ b.parts.sign; if (is_float32_nan(a) || is_float32_nan(b)) { /* TODO: fix SigNaNs */ if (is_float32_signan(a)) { result.parts.fraction = a.parts.fraction; result.parts.exp = a.parts.exp; return result; } if (is_float32_signan(b)) { /* TODO: fix SigNaN */ result.parts.fraction = b.parts.fraction; result.parts.exp = b.parts.exp; return result; } /* set NaN as result */ result.bin = FLOAT32_NAN; return result; } if (is_float32_infinity(a)) { if (is_float32_zero(b)) { /* FIXME: zero * infinity */ result.bin = FLOAT32_NAN; return result; } result.parts.fraction = a.parts.fraction; result.parts.exp = a.parts.exp; return result; } if (is_float32_infinity(b)) { if (is_float32_zero(a)) { /* FIXME: zero * infinity */ result.bin = FLOAT32_NAN; return result; } result.parts.fraction = b.parts.fraction; result.parts.exp = b.parts.exp; return result; } /* exp is signed so we can easy detect underflow */ exp = a.parts.exp + b.parts.exp; exp -= FLOAT32_BIAS; if (exp >= FLOAT32_MAX_EXPONENT) { /* FIXME: overflow */ /* set infinity as result */ result.bin = FLOAT32_INF; result.parts.sign = a.parts.sign ^ b.parts.sign; return result; } if (exp < 0) { /* FIXME: underflow */ /* return signed zero */ result.parts.fraction = 0x0; result.parts.exp = 0x0; return result; } frac1 = a.parts.fraction; if (a.parts.exp > 0) { frac1 |= FLOAT32_HIDDEN_BIT_MASK; } else { ++exp; } frac2 = b.parts.fraction; if (b.parts.exp > 0) { frac2 |= FLOAT32_HIDDEN_BIT_MASK; } else { ++exp; } frac1 <<= 1; /* one bit space for rounding */ frac1 = frac1 * frac2; /* round and return */ while ((exp < FLOAT32_MAX_EXPONENT) && (frac1 >= (1 << (FLOAT32_FRACTION_SIZE + 2)))) { /* 23 bits of fraction + one more for hidden bit (all shifted 1 bit left) */ ++exp; frac1 >>= 1; } /* rounding */ /* ++frac1; FIXME: not works - without it is ok */ frac1 >>= 1; /* shift off rounding space */ if ((exp < FLOAT32_MAX_EXPONENT) && (frac1 >= (1 << (FLOAT32_FRACTION_SIZE + 1)))) { ++exp; frac1 >>= 1; }
/** Divide two single-precision floats. * * @param a Nominator. * @param b Denominator. * * @return Result of division. * */ float32 div_float32(float32 a, float32 b) { float32 result; int32_t aexp, bexp, cexp; uint64_t afrac, bfrac, cfrac; result.parts.sign = a.parts.sign ^ b.parts.sign; if (is_float32_nan(a)) { if (is_float32_signan(a)) { // FIXME: SigNaN } /* NaN */ return a; } if (is_float32_nan(b)) { if (is_float32_signan(b)) { // FIXME: SigNaN } /* NaN */ return b; } if (is_float32_infinity(a)) { if (is_float32_infinity(b)) { /*FIXME: inf / inf */ result.bin = FLOAT32_NAN; return result; } /* inf / num */ result.parts.exp = a.parts.exp; result.parts.fraction = a.parts.fraction; return result; } if (is_float32_infinity(b)) { if (is_float32_zero(a)) { /* FIXME 0 / inf */ result.parts.exp = 0; result.parts.fraction = 0; return result; } /* FIXME: num / inf*/ result.parts.exp = 0; result.parts.fraction = 0; return result; } if (is_float32_zero(b)) { if (is_float32_zero(a)) { /*FIXME: 0 / 0*/ result.bin = FLOAT32_NAN; return result; } /* FIXME: division by zero */ result.parts.exp = 0; result.parts.fraction = 0; return result; } afrac = a.parts.fraction; aexp = a.parts.exp; bfrac = b.parts.fraction; bexp = b.parts.exp; /* denormalized numbers */ if (aexp == 0) { if (afrac == 0) { result.parts.exp = 0; result.parts.fraction = 0; return result; } /* normalize it*/ afrac <<= 1; /* afrac is nonzero => it must stop */ while (!(afrac & FLOAT32_HIDDEN_BIT_MASK)) { afrac <<= 1; aexp--; } } if (bexp == 0) { bfrac <<= 1; /* bfrac is nonzero => it must stop */ while (!(bfrac & FLOAT32_HIDDEN_BIT_MASK)) { bfrac <<= 1; bexp--; } } afrac = (afrac | FLOAT32_HIDDEN_BIT_MASK) << (32 - FLOAT32_FRACTION_SIZE - 1); bfrac = (bfrac | FLOAT32_HIDDEN_BIT_MASK) << (32 - FLOAT32_FRACTION_SIZE); if (bfrac <= (afrac << 1)) { afrac >>= 1; aexp++; }