extFloat80_t f16_to_extF80( float16_t a ) { union ui16_f16 uA; uint_fast16_t uiA; bool sign; int_fast8_t exp; uint_fast16_t frac; struct commonNaN commonNaN; struct uint128 uiZ; uint_fast16_t uiZ64; uint_fast64_t uiZ0; struct exp8_sig16 normExpSig; union { struct extFloat80M s; extFloat80_t f; } uZ; /*------------------------------------------------------------------------ *------------------------------------------------------------------------*/ uA.f = a; uiA = uA.ui; sign = signF16UI( uiA ); exp = expF16UI( uiA ); frac = fracF16UI( uiA ); /*------------------------------------------------------------------------ *------------------------------------------------------------------------*/ if ( exp == 0x1F ) { if ( frac ) { softfloat_f16UIToCommonNaN( uiA, &commonNaN ); uiZ = softfloat_commonNaNToExtF80UI( &commonNaN ); uiZ64 = uiZ.v64; uiZ0 = uiZ.v0; } else { uiZ64 = packToExtF80UI64( sign, 0x7FFF ); uiZ0 = UINT64_C( 0x8000000000000000 ); } goto uiZ; } /*------------------------------------------------------------------------ *------------------------------------------------------------------------*/ if ( ! exp ) { if ( ! frac ) { uiZ64 = packToExtF80UI64( sign, 0 ); uiZ0 = 0; goto uiZ; } normExpSig = softfloat_normSubnormalF16Sig( frac ); exp = normExpSig.exp; frac = normExpSig.sig; } /*------------------------------------------------------------------------ *------------------------------------------------------------------------*/ uiZ64 = packToExtF80UI64( sign, exp + 0x3FF0 ); uiZ0 = (uint_fast64_t) (frac | 0x0400)<<53; uiZ: uZ.s.signExp = uiZ64; uZ.s.signif = uiZ0; return uZ.f; }
float128_t f16_to_f128( float16_t a ) { union ui16_f16 uA; uint_fast16_t uiA; bool sign; int_fast8_t exp; uint_fast16_t frac; struct commonNaN commonNaN; struct uint128 uiZ; struct exp8_sig16 normExpSig; union ui128_f128 uZ; /*------------------------------------------------------------------------ *------------------------------------------------------------------------*/ uA.f = a; uiA = uA.ui; sign = signF16UI( uiA ); exp = expF16UI( uiA ); frac = fracF16UI( uiA ); /*------------------------------------------------------------------------ *------------------------------------------------------------------------*/ if ( exp == 0x1F ) { if ( frac ) { softfloat_f16UIToCommonNaN( uiA, &commonNaN ); uiZ = softfloat_commonNaNToF128UI( &commonNaN ); } else { uiZ.v64 = packToF128UI64( sign, 0x7FFF, 0 ); uiZ.v0 = 0; } goto uiZ; } /*------------------------------------------------------------------------ *------------------------------------------------------------------------*/ if ( ! exp ) { if ( ! frac ) { uiZ.v64 = packToF128UI64( sign, 0, 0 ); uiZ.v0 = 0; goto uiZ; } normExpSig = softfloat_normSubnormalF16Sig( frac ); exp = normExpSig.exp - 1; frac = normExpSig.sig; } /*------------------------------------------------------------------------ *------------------------------------------------------------------------*/ uiZ.v64 = packToF128UI64( sign, exp + 0x3FF0, (uint_fast64_t) frac<<38 ); uiZ.v0 = 0; uiZ: uZ.ui = uiZ; return uZ.f; }
float32_t f16_to_f32( float16_t a ) { union ui16_f16 uA; uint_fast16_t uiA; bool sign; int_fast8_t exp; uint_fast16_t frac; struct commonNaN commonNaN; uint_fast32_t uiZ; struct exp8_sig16 normExpSig; union ui32_f32 uZ; /*------------------------------------------------------------------------ *------------------------------------------------------------------------*/ uA.f = a; uiA = uA.ui; sign = signF16UI( uiA ); exp = expF16UI( uiA ); frac = fracF16UI( uiA ); /*------------------------------------------------------------------------ *------------------------------------------------------------------------*/ if ( exp == 0x1F ) { if ( frac ) { softfloat_f16UIToCommonNaN( uiA, &commonNaN ); uiZ = softfloat_commonNaNToF32UI( &commonNaN ); } else { uiZ = packToF32UI( sign, 0xFF, 0 ); } goto uiZ; } /*------------------------------------------------------------------------ *------------------------------------------------------------------------*/ if ( ! exp ) { if ( ! frac ) { uiZ = packToF32UI( sign, 0, 0 ); goto uiZ; } normExpSig = softfloat_normSubnormalF16Sig( frac ); exp = normExpSig.exp - 1; frac = normExpSig.sig; } /*------------------------------------------------------------------------ *------------------------------------------------------------------------*/ uiZ = packToF32UI( sign, exp + 0x70, (uint_fast32_t) frac<<13 ); uiZ: uZ.ui = uiZ; return uZ.f; }
int_fast64_t f16_to_i64_r_minMag( float16_t a, bool exact ) { union ui16_f16 uA; uint_fast16_t uiA; int_fast8_t exp; uint_fast16_t frac; int_fast8_t shiftDist; bool sign; int_fast32_t alignedSig; /*------------------------------------------------------------------------ *------------------------------------------------------------------------*/ uA.f = a; uiA = uA.ui; exp = expF16UI( uiA ); frac = fracF16UI( uiA ); /*------------------------------------------------------------------------ *------------------------------------------------------------------------*/ shiftDist = exp - 0x0F; if ( shiftDist < 0 ) { if ( exact && (exp | frac) ) { softfloat_exceptionFlags |= softfloat_flag_inexact; } return 0; } /*------------------------------------------------------------------------ *------------------------------------------------------------------------*/ sign = signF16UI( uiA ); if ( exp == 0x1F ) { softfloat_raiseFlags( softfloat_flag_invalid ); return (exp == 0x1F) && frac ? i64_fromNaN : sign ? i64_fromNegOverflow : i64_fromPosOverflow; } /*------------------------------------------------------------------------ *------------------------------------------------------------------------*/ alignedSig = (int_fast32_t) (frac | 0x0400)<<shiftDist; if ( exact && (alignedSig & 0x3FF) ) { softfloat_exceptionFlags |= softfloat_flag_inexact; } alignedSig >>= 10; return sign ? -alignedSig : alignedSig; }
int_fast64_t f16_to_i64( float16_t a, uint_fast8_t roundingMode, bool exact ) { union ui16_f16 uA; uint_fast16_t uiA; bool sign; int_fast8_t exp; uint_fast16_t frac; int_fast32_t sig32; int_fast8_t shiftDist; /*------------------------------------------------------------------------ *------------------------------------------------------------------------*/ uA.f = a; uiA = uA.ui; sign = signF16UI( uiA ); exp = expF16UI( uiA ); frac = fracF16UI( uiA ); /*------------------------------------------------------------------------ *------------------------------------------------------------------------*/ if ( exp == 0x1F ) { softfloat_raiseFlags( softfloat_flag_invalid ); return frac ? i64_fromNaN : sign ? i64_fromNegOverflow : i64_fromPosOverflow; } /*------------------------------------------------------------------------ *------------------------------------------------------------------------*/ sig32 = frac; if ( exp ) { sig32 |= 0x0400; shiftDist = exp - 0x19; if ( 0 <= shiftDist ) { sig32 <<= shiftDist; return sign ? -sig32 : sig32; } shiftDist = exp - 0x0D; if ( 0 < shiftDist ) sig32 <<= shiftDist; } return softfloat_roundPackToI32( sign, (uint_fast32_t) sig32, roundingMode, exact ); }
float16_t f16_rem( float16_t a, float16_t b ) { union ui16_f16 uA; uint_fast16_t uiA; bool signA; int_fast8_t expA; uint_fast16_t sigA; union ui16_f16 uB; uint_fast16_t uiB; int_fast8_t expB; uint_fast16_t sigB; struct exp8_sig16 normExpSig; uint16_t rem; int_fast8_t expDiff; uint_fast16_t q; uint32_t recip32, q32; uint16_t altRem, meanRem; bool signRem; uint_fast16_t uiZ; union ui16_f16 uZ; /*------------------------------------------------------------------------ *------------------------------------------------------------------------*/ uA.f = a; uiA = uA.ui; signA = signF16UI( uiA ); expA = expF16UI( uiA ); sigA = fracF16UI( uiA ); uB.f = b; uiB = uB.ui; expB = expF16UI( uiB ); sigB = fracF16UI( uiB ); /*------------------------------------------------------------------------ *------------------------------------------------------------------------*/ if ( expA == 0x1F ) { if ( sigA || ((expB == 0x1F) && sigB) ) goto propagateNaN; goto invalid; } if ( expB == 0x1F ) { if ( sigB ) goto propagateNaN; return a; } /*------------------------------------------------------------------------ *------------------------------------------------------------------------*/ if ( ! expB ) { if ( ! sigB ) goto invalid; normExpSig = softfloat_normSubnormalF16Sig( sigB ); expB = normExpSig.exp; sigB = normExpSig.sig; } if ( ! expA ) { if ( ! sigA ) return a; normExpSig = softfloat_normSubnormalF16Sig( sigA ); expA = normExpSig.exp; sigA = normExpSig.sig; } /*------------------------------------------------------------------------ *------------------------------------------------------------------------*/ rem = sigA | 0x0400; sigB |= 0x0400; expDiff = expA - expB; if ( expDiff < 1 ) { if ( expDiff < -1 ) return a; sigB <<= 3; if ( expDiff ) { rem <<= 2; q = 0; } else { rem <<= 3; q = (sigB <= rem); if ( q ) rem -= sigB; } } else { recip32 = softfloat_approxRecip32_1( (uint_fast32_t) sigB<<21 ); /*-------------------------------------------------------------------- | Changing the shift of `rem' here requires also changing the initial | subtraction from `expDiff'. *--------------------------------------------------------------------*/ rem <<= 4; expDiff -= 31; /*-------------------------------------------------------------------- | The scale of `sigB' affects how many bits are obtained during each | cycle of the loop. Currently this is 29 bits per loop iteration, | which is believed to be the maximum possible. *--------------------------------------------------------------------*/ sigB <<= 3; for (;;) { q32 = (rem * (uint_fast64_t) recip32)>>16; if ( expDiff < 0 ) break; rem = -((uint_fast16_t) q32 * sigB); expDiff -= 29; } /*-------------------------------------------------------------------- | (`expDiff' cannot be less than -30 here.) *--------------------------------------------------------------------*/ q32 >>= ~expDiff & 31; q = q32; rem = (rem<<(expDiff + 30)) - q * sigB; } /*------------------------------------------------------------------------ *------------------------------------------------------------------------*/ do { altRem = rem; ++q; rem -= sigB; } while ( ! (rem & 0x8000) ); meanRem = rem + altRem; if ( (meanRem & 0x8000) || (! meanRem && (q & 1)) ) rem = altRem; signRem = signA; if ( 0x8000 <= rem ) { signRem = ! signRem; rem = -rem; } return softfloat_normRoundPackToF16( signRem, expB, rem ); /*------------------------------------------------------------------------ *------------------------------------------------------------------------*/ propagateNaN: uiZ = softfloat_propagateNaNF16UI( uiA, uiB ); goto uiZ; invalid: softfloat_raiseFlags( softfloat_flag_invalid ); uiZ = defaultNaNF16UI; uiZ: uZ.ui = uiZ; return uZ.f; }
float16_t f16_mul( float16_t a, float16_t b ) { union ui16_f16 uA; uint_fast16_t uiA; bool signA; int_fast8_t expA; uint_fast16_t sigA; union ui16_f16 uB; uint_fast16_t uiB; bool signB; int_fast8_t expB; uint_fast16_t sigB; bool signZ; uint_fast16_t magBits; struct exp8_sig16 normExpSig; int_fast8_t expZ; uint_fast32_t sig32Z; uint_fast16_t sigZ, uiZ; union ui16_f16 uZ; /*------------------------------------------------------------------------ *------------------------------------------------------------------------*/ uA.f = a; uiA = uA.ui; signA = signF16UI( uiA ); expA = expF16UI( uiA ); sigA = fracF16UI( uiA ); uB.f = b; uiB = uB.ui; signB = signF16UI( uiB ); expB = expF16UI( uiB ); sigB = fracF16UI( uiB ); signZ = signA ^ signB; /*------------------------------------------------------------------------ *------------------------------------------------------------------------*/ if ( expA == 0x1F ) { if ( sigA || ((expB == 0x1F) && sigB) ) goto propagateNaN; magBits = expB | sigB; goto infArg; } if ( expB == 0x1F ) { if ( sigB ) goto propagateNaN; magBits = expA | sigA; goto infArg; } /*------------------------------------------------------------------------ *------------------------------------------------------------------------*/ if ( ! expA ) { if ( ! sigA ) goto zero; normExpSig = softfloat_normSubnormalF16Sig( sigA ); expA = normExpSig.exp; sigA = normExpSig.sig; } if ( ! expB ) { if ( ! sigB ) goto zero; normExpSig = softfloat_normSubnormalF16Sig( sigB ); expB = normExpSig.exp; sigB = normExpSig.sig; } /*------------------------------------------------------------------------ *------------------------------------------------------------------------*/ expZ = expA + expB - 0xF; sigA = (sigA | 0x0400)<<4; sigB = (sigB | 0x0400)<<5; sig32Z = (uint_fast32_t) sigA * sigB; sigZ = sig32Z>>16; if ( sig32Z & 0xFFFF ) sigZ |= 1; if ( sigZ < 0x4000 ) { --expZ; sigZ <<= 1; } return softfloat_roundPackToF16( signZ, expZ, sigZ ); /*------------------------------------------------------------------------ *------------------------------------------------------------------------*/ propagateNaN: uiZ = softfloat_propagateNaNF16UI( uiA, uiB ); goto uiZ; /*------------------------------------------------------------------------ *------------------------------------------------------------------------*/ infArg: if ( ! magBits ) { softfloat_raiseFlags( softfloat_flag_invalid ); uiZ = defaultNaNF16UI; } else { uiZ = packToF16UI( signZ, 0x1F, 0 ); } goto uiZ; /*------------------------------------------------------------------------ *------------------------------------------------------------------------*/ zero: uiZ = packToF16UI( signZ, 0, 0 ); uiZ: uZ.ui = uiZ; return uZ.f; }
float16_t softfloat_mulAddF16( uint_fast16_t uiA, uint_fast16_t uiB, uint_fast16_t uiC, uint_fast8_t op ) { bool signA; int_fast8_t expA; uint_fast16_t sigA; bool signB; int_fast8_t expB; uint_fast16_t sigB; bool signC; int_fast8_t expC; uint_fast16_t sigC; bool signProd; uint_fast16_t magBits, uiZ; struct exp8_sig16 normExpSig; int_fast8_t expProd; uint_fast32_t sigProd; bool signZ; int_fast8_t expZ; uint_fast16_t sigZ; int_fast8_t expDiff; uint_fast32_t sig32Z, sig32C; int_fast8_t shiftDist; union ui16_f16 uZ; /*------------------------------------------------------------------------ *------------------------------------------------------------------------*/ signA = signF16UI( uiA ); expA = expF16UI( uiA ); sigA = fracF16UI( uiA ); signB = signF16UI( uiB ); expB = expF16UI( uiB ); sigB = fracF16UI( uiB ); signC = signF16UI( uiC ) ^ (op == softfloat_mulAdd_subC); expC = expF16UI( uiC ); sigC = fracF16UI( uiC ); signProd = signA ^ signB ^ (op == softfloat_mulAdd_subProd); /*------------------------------------------------------------------------ *------------------------------------------------------------------------*/ if ( expA == 0x1F ) { if ( sigA || ((expB == 0x1F) && sigB) ) goto propagateNaN_ABC; magBits = expB | sigB; goto infProdArg; } if ( expB == 0x1F ) { if ( sigB ) goto propagateNaN_ABC; magBits = expA | sigA; goto infProdArg; } if ( expC == 0x1F ) { if ( sigC ) { uiZ = 0; goto propagateNaN_ZC; } uiZ = uiC; goto uiZ; } /*------------------------------------------------------------------------ *------------------------------------------------------------------------*/ if ( ! expA ) { if ( ! sigA ) goto zeroProd; normExpSig = softfloat_normSubnormalF16Sig( sigA ); expA = normExpSig.exp; sigA = normExpSig.sig; } if ( ! expB ) { if ( ! sigB ) goto zeroProd; normExpSig = softfloat_normSubnormalF16Sig( sigB ); expB = normExpSig.exp; sigB = normExpSig.sig; } /*------------------------------------------------------------------------ *------------------------------------------------------------------------*/ expProd = expA + expB - 0xE; sigA = (sigA | 0x0400)<<4; sigB = (sigB | 0x0400)<<4; sigProd = (uint_fast32_t) sigA * sigB; if ( sigProd < 0x20000000 ) { --expProd; sigProd <<= 1; } signZ = signProd; if ( ! expC ) { if ( ! sigC ) { expZ = expProd - 1; sigZ = sigProd>>15 | ((sigProd & 0x7FFF) != 0); goto roundPack; } normExpSig = softfloat_normSubnormalF16Sig( sigC ); expC = normExpSig.exp; sigC = normExpSig.sig; }