uint16_t half_from_float( uint32_t f ) { const uint32_t one = _uint32_li( 0x00000001 ); const uint32_t f_e_mask = _uint32_li( 0x7f800000 ); const uint32_t f_m_mask = _uint32_li( 0x007fffff ); const uint32_t f_s_mask = _uint32_li( 0x80000000 ); const uint32_t h_e_mask = _uint32_li( 0x00007c00 ); const uint32_t f_e_pos = _uint32_li( 0x00000017 ); const uint32_t f_m_round_bit = _uint32_li( 0x00001000 ); const uint32_t h_nan_em_min = _uint32_li( 0x00007c01 ); const uint32_t f_h_s_pos_offset = _uint32_li( 0x00000010 ); const uint32_t f_m_hidden_bit = _uint32_li( 0x00800000 ); const uint32_t f_h_m_pos_offset = _uint32_li( 0x0000000d ); const uint32_t f_h_bias_offset = _uint32_li( 0x38000000 ); const uint32_t f_m_snan_mask = _uint32_li( 0x003fffff ); const uint16_t h_snan_mask = _uint32_li( 0x00007e00 ); const uint32_t f_e = _uint32_and( f, f_e_mask ); const uint32_t f_m = _uint32_and( f, f_m_mask ); const uint32_t f_s = _uint32_and( f, f_s_mask ); const uint32_t f_e_h_bias = _uint32_sub( f_e, f_h_bias_offset ); const uint32_t f_e_h_bias_amount = _uint32_srl( f_e_h_bias, f_e_pos ); const uint32_t f_m_round_mask = _uint32_and( f_m, f_m_round_bit ); const uint32_t f_m_round_offset = _uint32_sll( f_m_round_mask, one ); const uint32_t f_m_rounded = _uint32_add( f_m, f_m_round_offset ); const uint32_t f_m_rounded_overflow = _uint32_and( f_m_rounded, f_m_hidden_bit ); const uint32_t f_m_denorm_sa = _uint32_sub( one, f_e_h_bias_amount ); const uint32_t f_m_with_hidden = _uint32_or( f_m_rounded, f_m_hidden_bit ); const uint32_t f_m_denorm = _uint32_srl( f_m_with_hidden, f_m_denorm_sa ); const uint32_t f_em_norm_packed = _uint32_or( f_e_h_bias, f_m_rounded ); const uint32_t f_e_overflow = _uint32_add( f_e_h_bias, f_m_hidden_bit ); const uint32_t h_s = _uint32_srl( f_s, f_h_s_pos_offset ); const uint32_t h_m_nan = _uint32_srl( f_m, f_h_m_pos_offset ); const uint32_t h_m_denorm = _uint32_srl( f_m_denorm, f_h_m_pos_offset ); const uint32_t h_em_norm = _uint32_srl( f_em_norm_packed, f_h_m_pos_offset ); const uint32_t h_em_overflow = _uint32_srl( f_e_overflow, f_h_m_pos_offset ); const uint32_t is_e_eqz_msb = _uint32_dec( f_e ); const uint32_t is_m_nez_msb = _uint32_neg( f_m ); const uint32_t is_h_m_nan_nez_msb = _uint32_neg( h_m_nan ); const uint32_t is_e_nflagged_msb = _uint32_sub( f_e, f_e_mask ); const uint32_t is_ninf_msb = _uint32_or( is_e_nflagged_msb, is_m_nez_msb ); const uint32_t is_underflow_msb = _uint32_sub( is_e_eqz_msb, f_h_bias_offset ); const uint32_t is_nan_nunderflow_msb = _uint32_or( is_h_m_nan_nez_msb, is_e_nflagged_msb ); const uint32_t is_m_snan_msb = _uint32_sub( f_m_snan_mask, f_m ); const uint32_t is_snan_msb = _uint32_andc( is_m_snan_msb, is_e_nflagged_msb ); const uint32_t is_overflow_msb = _uint32_neg( f_m_rounded_overflow ); const uint32_t h_nan_underflow_result = _uint32_sels( is_nan_nunderflow_msb, h_em_norm, h_nan_em_min ); const uint32_t h_inf_result = _uint32_sels( is_ninf_msb, h_nan_underflow_result, h_e_mask ); const uint32_t h_underflow_result = _uint32_sels( is_underflow_msb, h_m_denorm, h_inf_result ); const uint32_t h_overflow_result = _uint32_sels( is_overflow_msb, h_em_overflow, h_underflow_result ); const uint32_t h_em_result = _uint32_sels( is_snan_msb, h_snan_mask, h_overflow_result ); const uint32_t h_result = _uint32_or( h_em_result, h_s ); return (h_result); }
// Count Leading Zeros static inline uint32 _uint32_cntlz( uint32 x ) { #if NV_CC_GCC /* On PowerPC, this will map to insn: cntlzw */ /* On Pentium, this will map to insn: clz */ uint32 is_x_nez_msb = _uint32_neg( x ); uint32 nlz = __builtin_clz( x ); uint32 result = _uint32_sels( is_x_nez_msb, nlz, 0x00000020 ); return (result); #elif NV_OS_XBOX // Xbox PPC has this as an intrinsic. return _CountLeadingZeros(x); #elif NV_CC_MSVC uint32 is_x_nez_msb = _uint32_neg( x ); uint32 nlz = _uint32_nlz( x ); uint32 result = _uint32_sels( is_x_nez_msb, nlz, 0x00000020 ); return (result); #else const uint32 x0 = _uint32_srl( x, 1 ); const uint32 x1 = _uint32_or( x, x0 ); const uint32 x2 = _uint32_srl( x1, 2 ); const uint32 x3 = _uint32_or( x1, x2 ); const uint32 x4 = _uint32_srl( x3, 4 ); const uint32 x5 = _uint32_or( x3, x4 ); const uint32 x6 = _uint32_srl( x5, 8 ); const uint32 x7 = _uint32_or( x5, x6 ); const uint32 x8 = _uint32_srl( x7, 16 ); const uint32 x9 = _uint32_or( x7, x8 ); const uint32 xA = _uint32_not( x9 ); const uint32 xB = _uint32_srl( xA, 1 ); const uint32 xC = _uint32_and( xB, 0x55555555 ); const uint32 xD = _uint32_sub( xA, xC ); const uint32 xE = _uint32_and( xD, 0x33333333 ); const uint32 xF = _uint32_srl( xD, 2 ); const uint32 x10 = _uint32_and( xF, 0x33333333 ); const uint32 x11 = _uint32_add( xE, x10 ); const uint32 x12 = _uint32_srl( x11, 4 ); const uint32 x13 = _uint32_add( x11, x12 ); const uint32 x14 = _uint32_and( x13, 0x0f0f0f0f ); const uint32 x15 = _uint32_srl( x14, 8 ); const uint32 x16 = _uint32_add( x14, x15 ); const uint32 x17 = _uint32_srl( x16, 16 ); const uint32 x18 = _uint32_add( x16, x17 ); const uint32 x19 = _uint32_and( x18, 0x0000003f ); return ( x19 ); #endif }
static inline uint32_t _half_to_float( uint16_t h ) { const uint32_t h_e_mask = _uint32_li( 0x00007c00 ); const uint32_t h_m_mask = _uint32_li( 0x000003ff ); const uint32_t h_s_mask = _uint32_li( 0x00008000 ); const uint32_t h_f_s_pos_offset = _uint32_li( 0x00000010 ); const uint32_t h_f_e_pos_offset = _uint32_li( 0x0000000d ); const uint32_t h_f_bias_offset = _uint32_li( 0x0001c000 ); const uint32_t f_e_mask = _uint32_li( 0x7f800000 ); const uint32_t f_m_mask = _uint32_li( 0x007fffff ); const uint32_t h_f_e_denorm_bias = _uint32_li( 0x0000007e ); const uint32_t h_f_m_denorm_sa_bias = _uint32_li( 0x00000008 ); const uint32_t f_e_pos = _uint32_li( 0x00000017 ); const uint32_t h_e = _uint32_and( h, h_e_mask ); const uint32_t h_m = _uint32_and( h, h_m_mask ); const uint32_t h_s = _uint32_and( h, h_s_mask ); const uint32_t h_e_f_bias = _uint32_add( h_e, h_f_bias_offset ); const uint32_t h_m_nlz = _uint32_cntlz( h_m ); const uint32_t f_s = _uint32_sll( h_s, h_f_s_pos_offset ); const uint32_t f_e = _uint32_sll( h_e_f_bias, h_f_e_pos_offset ); const uint32_t f_m = _uint32_sll( h_m, h_f_e_pos_offset ); const uint32_t h_f_m_sa = _uint32_sub( h_m_nlz, h_f_m_denorm_sa_bias ); const uint32_t f_e_denorm_unpacked = _uint32_sub( h_f_e_denorm_bias, h_f_m_sa ); const uint32_t h_f_m = _uint32_sll( h_m, h_f_m_sa ); const uint32_t f_m_denorm = _uint32_and( h_f_m, f_m_mask ); const uint32_t f_e_denorm = _uint32_sll( f_e_denorm_unpacked, f_e_pos ); const uint32_t f_em_denorm = _uint32_or( f_e_denorm, f_m_denorm ); const uint32_t is_e_eqz_msb = _uint32_dec( h_e ); const uint32_t is_m_nez_msb = _uint32_neg( h_m ); const uint32_t is_e_flagged_msb = _uint32_sub( h_e_mask, is_e_eqz_msb ); const uint32_t is_denorm_msb = _uint32_and( is_e_eqz_msb, is_m_nez_msb ); const uint32_t is_ninf_msb = _uint32_andc( is_m_nez_msb, is_e_flagged_msb ); const uint32_t is_zero_msb = _uint32_andc( is_e_eqz_msb, is_m_nez_msb ); const uint32_t is_ninf = _uint32_ext( is_ninf_msb ); const uint32_t is_zero = _uint32_ext( is_zero_msb ); const uint32_t f_e_flagged_result = _uint32_muxs( is_e_flagged_msb, f_e_mask, f_e ); const uint32_t f_m_inf_result = _uint32_and( f_m, is_ninf ); const uint32_t f_em_result_partial = _uint32_or( f_e_flagged_result, f_m_inf_result ); const uint32_t f_em_denorm_result = _uint32_muxs( is_denorm_msb, f_em_denorm, f_em_result_partial ); const uint32_t f_em_result = _uint32_andc( f_em_denorm_result, is_zero ); const uint32_t f_result = _uint32_or( f_s, f_em_result ); return (f_result); }
uint32_t half_to_float( uint16_t h ) { const uint32_t h_e_mask = _uint32_li( 0x00007c00 ); const uint32_t h_m_mask = _uint32_li( 0x000003ff ); const uint32_t h_s_mask = _uint32_li( 0x00008000 ); const uint32_t h_f_s_pos_offset = _uint32_li( 0x00000010 ); const uint32_t h_f_e_pos_offset = _uint32_li( 0x0000000d ); const uint32_t h_f_bias_offset = _uint32_li( 0x0001c000 ); const uint32_t f_e_mask = _uint32_li( 0x7f800000 ); const uint32_t f_m_mask = _uint32_li( 0x007fffff ); const uint32_t h_f_e_denorm_bias = _uint32_li( 0x0000007e ); const uint32_t h_f_m_denorm_sa_bias = _uint32_li( 0x00000008 ); const uint32_t f_e_pos = _uint32_li( 0x00000017 ); const uint32_t h_e_mask_minus_one = _uint32_li( 0x00007bff ); const uint32_t h_e = _uint32_and( h, h_e_mask ); const uint32_t h_m = _uint32_and( h, h_m_mask ); const uint32_t h_s = _uint32_and( h, h_s_mask ); const uint32_t h_e_f_bias = _uint32_add( h_e, h_f_bias_offset ); const uint32_t h_m_nlz = _uint32_cntlz( h_m ); const uint32_t f_s = _uint32_sll( h_s, h_f_s_pos_offset ); const uint32_t f_e = _uint32_sll( h_e_f_bias, h_f_e_pos_offset ); const uint32_t f_m = _uint32_sll( h_m, h_f_e_pos_offset ); const uint32_t f_em = _uint32_or( f_e, f_m ); const uint32_t h_f_m_sa = _uint32_sub( h_m_nlz, h_f_m_denorm_sa_bias ); const uint32_t f_e_denorm_unpacked = _uint32_sub( h_f_e_denorm_bias, h_f_m_sa ); const uint32_t h_f_m = _uint32_sll( h_m, h_f_m_sa ); const uint32_t f_m_denorm = _uint32_and( h_f_m, f_m_mask ); const uint32_t f_e_denorm = _uint32_sll( f_e_denorm_unpacked, f_e_pos ); const uint32_t f_em_denorm = _uint32_or( f_e_denorm, f_m_denorm ); const uint32_t f_em_nan = _uint32_or( f_e_mask, f_m ); const uint32_t is_e_eqz_msb = _uint32_dec( h_e ); const uint32_t is_m_nez_msb = _uint32_neg( h_m ); const uint32_t is_e_flagged_msb = _uint32_sub( h_e_mask_minus_one, h_e ); const uint32_t is_zero_msb = _uint32_andc( is_e_eqz_msb, is_m_nez_msb ); const uint32_t is_inf_msb = _uint32_andc( is_e_flagged_msb, is_m_nez_msb ); const uint32_t is_denorm_msb = _uint32_and( is_m_nez_msb, is_e_eqz_msb ); const uint32_t is_nan_msb = _uint32_and( is_e_flagged_msb, is_m_nez_msb ); const uint32_t is_zero = _uint32_ext( is_zero_msb ); const uint32_t f_zero_result = _uint32_andc( f_em, is_zero ); const uint32_t f_denorm_result = _uint32_sels( is_denorm_msb, f_em_denorm, f_zero_result ); const uint32_t f_inf_result = _uint32_sels( is_inf_msb, f_e_mask, f_denorm_result ); const uint32_t f_nan_result = _uint32_sels( is_nan_msb, f_em_nan, f_inf_result ); const uint32_t f_result = _uint32_or( f_s, f_nan_result ); return (f_result); }
// Count Leading Zeros static __inline uint32_t _uint32_cntlz( uint32_t x ) { #ifdef __GNUC__ /* NOTE: __builtin_clz is undefined for x == 0 */ /* On PowerPC, this will map to insn: cntlzw */ /* On Pentium, this will map to insn: clz */ uint32_t is_x_nez_msb = _uint32_neg( x ); uint32_t nlz = __builtin_clz( x ); uint32_t result = _uint32_sels( is_x_nez_msb, nlz, 0x00000020 ); return (result); #else const uint32_t x0 = _uint32_srl( x, 1 ); const uint32_t x1 = _uint32_or( x, x0 ); const uint32_t x2 = _uint32_srl( x1, 2 ); const uint32_t x3 = _uint32_or( x1, x2 ); const uint32_t x4 = _uint32_srl( x3, 4 ); const uint32_t x5 = _uint32_or( x3, x4 ); const uint32_t x6 = _uint32_srl( x5, 8 ); const uint32_t x7 = _uint32_or( x5, x6 ); const uint32_t x8 = _uint32_srl( x7, 16 ); const uint32_t x9 = _uint32_or( x7, x8 ); const uint32_t xA = _uint32_not( x9 ); const uint32_t xB = _uint32_srl( xA, 1 ); const uint32_t xC = _uint32_and( xB, 0x55555555 ); const uint32_t xD = _uint32_sub( xA, xC ); const uint32_t xE = _uint32_and( xD, 0x33333333 ); const uint32_t xF = _uint32_srl( xD, 2 ); const uint32_t x10 = _uint32_and( xF, 0x33333333 ); const uint32_t x11 = _uint32_add( xE, x10 ); const uint32_t x12 = _uint32_srl( x11, 4 ); const uint32_t x13 = _uint32_add( x11, x12 ); const uint32_t x14 = _uint32_and( x13, 0x0f0f0f0f ); const uint32_t x15 = _uint32_srl( x14, 8 ); const uint32_t x16 = _uint32_add( x14, x15 ); const uint32_t x17 = _uint32_srl( x16, 16 ); const uint32_t x18 = _uint32_add( x16, x17 ); const uint32_t x19 = _uint32_and( x18, 0x0000003f ); return ( x19 ); #endif }
// half_mul // -------- // // May have 0 or 1 ulp difference from the following result: // (Round to nearest) // NOTE: Rounding mode differs between conversion and multiply // // union FLOAT_32 // { // float f32; // uint32_t u32; // }; // // union FLOAT_32 fx = { .u32 = half_to_float( x ) }; // union FLOAT_32 fy = { .u32 = half_to_float( y ) }; // union FLOAT_32 fz = { .f32 = fx.f32 * fy.f32 }; // uint16_t z = float_to_half( fz ); // uint16_t half_mul( uint16_t x, uint16_t y ) { const uint32_t one = _uint32_li( 0x00000001 ); const uint32_t h_s_mask = _uint32_li( 0x00008000 ); const uint32_t h_e_mask = _uint32_li( 0x00007c00 ); const uint32_t h_m_mask = _uint32_li( 0x000003ff ); const uint32_t h_m_hidden = _uint32_li( 0x00000400 ); const uint32_t h_e_pos = _uint32_li( 0x0000000a ); const uint32_t h_e_bias = _uint32_li( 0x0000000f ); const uint32_t h_m_bit_count = _uint32_li( 0x0000000a ); const uint32_t h_m_bit_half_count = _uint32_li( 0x00000005 ); const uint32_t h_nan_min = _uint32_li( 0x00007c01 ); const uint32_t h_e_mask_minus_one = _uint32_li( 0x00007bff ); const uint32_t h_snan = _uint32_li( 0x0000fe00 ); const uint32_t m_round_overflow_bit = _uint32_li( 0x00000020 ); const uint32_t m_hidden_bit = _uint32_li( 0x00100000 ); const uint32_t a_s = _uint32_and( x, h_s_mask ); const uint32_t b_s = _uint32_and( y, h_s_mask ); const uint32_t c_s = _uint32_xor( a_s, b_s ); const uint32_t x_e = _uint32_and( x, h_e_mask ); const uint32_t x_e_eqz_msb = _uint32_dec( x_e ); const uint32_t a = _uint32_sels( x_e_eqz_msb, y, x ); const uint32_t b = _uint32_sels( x_e_eqz_msb, x, y ); const uint32_t a_e = _uint32_and( a, h_e_mask ); const uint32_t b_e = _uint32_and( b, h_e_mask ); const uint32_t a_m = _uint32_and( a, h_m_mask ); const uint32_t b_m = _uint32_and( b, h_m_mask ); const uint32_t a_e_amount = _uint32_srl( a_e, h_e_pos ); const uint32_t b_e_amount = _uint32_srl( b_e, h_e_pos ); const uint32_t a_m_with_hidden = _uint32_or( a_m, h_m_hidden ); const uint32_t b_m_with_hidden = _uint32_or( b_m, h_m_hidden ); const uint32_t c_m_normal = _uint32_mul( a_m_with_hidden, b_m_with_hidden ); const uint32_t c_m_denorm_biased = _uint32_mul( a_m_with_hidden, b_m ); const uint32_t c_e_denorm_unbias_e = _uint32_sub( h_e_bias, a_e_amount ); const uint32_t c_m_denorm_round_amount = _uint32_and( c_m_denorm_biased, h_m_mask ); const uint32_t c_m_denorm_rounded = _uint32_add( c_m_denorm_biased, c_m_denorm_round_amount ); const uint32_t c_m_denorm_inplace = _uint32_srl( c_m_denorm_rounded, h_m_bit_count ); const uint32_t c_m_denorm_unbiased = _uint32_srl( c_m_denorm_inplace, c_e_denorm_unbias_e ); const uint32_t c_m_denorm = _uint32_and( c_m_denorm_unbiased, h_m_mask ); const uint32_t c_e_amount_biased = _uint32_add( a_e_amount, b_e_amount ); const uint32_t c_e_amount_unbiased = _uint32_sub( c_e_amount_biased, h_e_bias ); const uint32_t is_c_e_unbiased_underflow = _uint32_ext( c_e_amount_unbiased ); const uint32_t c_e_underflow_half_sa = _uint32_neg( c_e_amount_unbiased ); const uint32_t c_e_underflow_sa = _uint32_sll( c_e_underflow_half_sa, one ); const uint32_t c_m_underflow = _uint32_srl( c_m_normal, c_e_underflow_sa ); const uint32_t c_e_underflow_added = _uint32_andc( c_e_amount_unbiased, is_c_e_unbiased_underflow ); const uint32_t c_m_underflow_added = _uint32_selb( is_c_e_unbiased_underflow, c_m_underflow, c_m_normal ); const uint32_t is_mul_overflow_test = _uint32_and( c_e_underflow_added, m_round_overflow_bit ); const uint32_t is_mul_overflow_msb = _uint32_neg( is_mul_overflow_test ); const uint32_t c_e_norm_radix_corrected = _uint32_inc( c_e_underflow_added ); const uint32_t c_m_norm_radix_corrected = _uint32_srl( c_m_underflow_added, one ); const uint32_t c_m_norm_hidden_bit = _uint32_and( c_m_norm_radix_corrected, m_hidden_bit ); const uint32_t is_c_m_norm_no_hidden_msb = _uint32_dec( c_m_norm_hidden_bit ); const uint32_t c_m_norm_lo = _uint32_srl( c_m_norm_radix_corrected, h_m_bit_half_count ); const uint32_t c_m_norm_lo_nlz = _uint16_cntlz( c_m_norm_lo ); const uint32_t is_c_m_hidden_nunderflow_msb = _uint32_sub( c_m_norm_lo_nlz, c_e_norm_radix_corrected ); const uint32_t is_c_m_hidden_underflow_msb = _uint32_not( is_c_m_hidden_nunderflow_msb ); const uint32_t is_c_m_hidden_underflow = _uint32_ext( is_c_m_hidden_underflow_msb ); const uint32_t c_m_hidden_underflow_normalized_sa = _uint32_srl( c_m_norm_lo_nlz, one ); const uint32_t c_m_hidden_underflow_normalized = _uint32_sll( c_m_norm_radix_corrected, c_m_hidden_underflow_normalized_sa ); const uint32_t c_m_hidden_normalized = _uint32_sll( c_m_norm_radix_corrected, c_m_norm_lo_nlz ); const uint32_t c_e_hidden_normalized = _uint32_sub( c_e_norm_radix_corrected, c_m_norm_lo_nlz ); const uint32_t c_e_hidden = _uint32_andc( c_e_hidden_normalized, is_c_m_hidden_underflow ); const uint32_t c_m_hidden = _uint32_sels( is_c_m_hidden_underflow_msb, c_m_hidden_underflow_normalized, c_m_hidden_normalized ); const uint32_t c_m_normalized = _uint32_sels( is_c_m_norm_no_hidden_msb, c_m_hidden, c_m_norm_radix_corrected ); const uint32_t c_e_normalized = _uint32_sels( is_c_m_norm_no_hidden_msb, c_e_hidden, c_e_norm_radix_corrected ); const uint32_t c_m_norm_round_amount = _uint32_and( c_m_normalized, h_m_mask ); const uint32_t c_m_norm_rounded = _uint32_add( c_m_normalized, c_m_norm_round_amount ); const uint32_t is_round_overflow_test = _uint32_and( c_e_normalized, m_round_overflow_bit ); const uint32_t is_round_overflow_msb = _uint32_neg( is_round_overflow_test ); const uint32_t c_m_norm_inplace = _uint32_srl( c_m_norm_rounded, h_m_bit_count ); const uint32_t c_m = _uint32_and( c_m_norm_inplace, h_m_mask ); const uint32_t c_e_norm_inplace = _uint32_sll( c_e_normalized, h_e_pos ); const uint32_t c_e = _uint32_and( c_e_norm_inplace, h_e_mask ); const uint32_t c_em_nan = _uint32_or( h_e_mask, a_m ); const uint32_t c_nan = _uint32_or( a_s, c_em_nan ); const uint32_t c_denorm = _uint32_or( c_s, c_m_denorm ); const uint32_t c_inf = _uint32_or( c_s, h_e_mask ); const uint32_t c_em_norm = _uint32_or( c_e, c_m ); const uint32_t is_a_e_flagged_msb = _uint32_sub( h_e_mask_minus_one, a_e ); const uint32_t is_b_e_flagged_msb = _uint32_sub( h_e_mask_minus_one, b_e ); const uint32_t is_a_e_eqz_msb = _uint32_dec( a_e ); const uint32_t is_a_m_eqz_msb = _uint32_dec( a_m ); const uint32_t is_b_e_eqz_msb = _uint32_dec( b_e ); const uint32_t is_b_m_eqz_msb = _uint32_dec( b_m ); const uint32_t is_b_eqz_msb = _uint32_and( is_b_e_eqz_msb, is_b_m_eqz_msb ); const uint32_t is_a_eqz_msb = _uint32_and( is_a_e_eqz_msb, is_a_m_eqz_msb ); const uint32_t is_c_nan_via_a_msb = _uint32_andc( is_a_e_flagged_msb, is_b_e_flagged_msb ); const uint32_t is_c_nan_via_b_msb = _uint32_andc( is_b_e_flagged_msb, is_b_m_eqz_msb ); const uint32_t is_c_nan_msb = _uint32_or( is_c_nan_via_a_msb, is_c_nan_via_b_msb ); const uint32_t is_c_denorm_msb = _uint32_andc( is_b_e_eqz_msb, is_a_e_flagged_msb ); const uint32_t is_a_inf_msb = _uint32_and( is_a_e_flagged_msb, is_a_m_eqz_msb ); const uint32_t is_c_snan_msb = _uint32_and( is_a_inf_msb, is_b_eqz_msb ); const uint32_t is_c_nan_min_via_a_msb = _uint32_and( is_a_e_flagged_msb, is_b_eqz_msb ); const uint32_t is_c_nan_min_via_b_msb = _uint32_and( is_b_e_flagged_msb, is_a_eqz_msb ); const uint32_t is_c_nan_min_msb = _uint32_or( is_c_nan_min_via_a_msb, is_c_nan_min_via_b_msb ); const uint32_t is_c_inf_msb = _uint32_or( is_a_e_flagged_msb, is_b_e_flagged_msb ); const uint32_t is_overflow_msb = _uint32_or( is_round_overflow_msb, is_mul_overflow_msb ); const uint32_t c_em_overflow_result = _uint32_sels( is_overflow_msb, h_e_mask, c_em_norm ); const uint32_t c_common_result = _uint32_or( c_s, c_em_overflow_result ); const uint32_t c_zero_result = _uint32_sels( is_b_eqz_msb, c_s, c_common_result ); const uint32_t c_nan_result = _uint32_sels( is_c_nan_msb, c_nan, c_zero_result ); const uint32_t c_nan_min_result = _uint32_sels( is_c_nan_min_msb, h_nan_min, c_nan_result ); const uint32_t c_inf_result = _uint32_sels( is_c_inf_msb, c_inf, c_nan_min_result ); const uint32_t c_denorm_result = _uint32_sels( is_c_denorm_msb, c_denorm, c_inf_result); const uint32_t c_result = _uint32_sels( is_c_snan_msb, h_snan, c_denorm_result ); return (uint16_t)(c_result); }
uint16_t half_from_float( uint32_t f ) { const uint32_t one = _uint32_li( 0x00000001 ); const uint32_t f_s_mask = _uint32_li( 0x80000000 ); const uint32_t f_e_mask = _uint32_li( 0x7f800000 ); const uint32_t f_m_mask = _uint32_li( 0x007fffff ); const uint32_t f_m_hidden_bit = _uint32_li( 0x00800000 ); const uint32_t f_m_round_bit = _uint32_li( 0x00001000 ); const uint32_t f_snan_mask = _uint32_li( 0x7fc00000 ); const uint32_t f_e_pos = _uint32_li( 0x00000017 ); const uint32_t h_e_pos = _uint32_li( 0x0000000a ); const uint32_t h_e_mask = _uint32_li( 0x00007c00 ); const uint32_t h_snan_mask = _uint32_li( 0x00007e00 ); const uint32_t h_e_mask_value = _uint32_li( 0x0000001f ); const uint32_t f_h_s_pos_offset = _uint32_li( 0x00000010 ); const uint32_t f_h_bias_offset = _uint32_li( 0x00000070 ); const uint32_t f_h_m_pos_offset = _uint32_li( 0x0000000d ); const uint32_t h_nan_min = _uint32_li( 0x00007c01 ); const uint32_t f_h_e_biased_flag = _uint32_li( 0x0000008f ); const uint32_t f_s = _uint32_and( f, f_s_mask ); const uint32_t f_e = _uint32_and( f, f_e_mask ); const uint16_t h_s = _uint32_srl( f_s, f_h_s_pos_offset ); const uint32_t f_m = _uint32_and( f, f_m_mask ); const uint16_t f_e_amount = _uint32_srl( f_e, f_e_pos ); const uint32_t f_e_half_bias = _uint32_sub( f_e_amount, f_h_bias_offset ); const uint32_t f_snan = _uint32_and( f, f_snan_mask ); const uint32_t f_m_round_mask = _uint32_and( f_m, f_m_round_bit ); const uint32_t f_m_round_offset = _uint32_sll( f_m_round_mask, one ); const uint32_t f_m_rounded = _uint32_add( f_m, f_m_round_offset ); const uint32_t f_m_denorm_sa = _uint32_sub( one, f_e_half_bias ); const uint32_t f_m_with_hidden = _uint32_or( f_m_rounded, f_m_hidden_bit ); const uint32_t f_m_denorm = _uint32_srl( f_m_with_hidden, f_m_denorm_sa ); const uint32_t h_m_denorm = _uint32_srl( f_m_denorm, f_h_m_pos_offset ); const uint32_t f_m_rounded_overflow = _uint32_and( f_m_rounded, f_m_hidden_bit ); const uint32_t m_nan = _uint32_srl( f_m, f_h_m_pos_offset ); const uint32_t h_em_nan = _uint32_or( h_e_mask, m_nan ); const uint32_t h_e_norm_overflow_offset = _uint32_inc( f_e_half_bias ); const uint32_t h_e_norm_overflow = _uint32_sll( h_e_norm_overflow_offset, h_e_pos ); const uint32_t h_e_norm = _uint32_sll( f_e_half_bias, h_e_pos ); const uint32_t h_m_norm = _uint32_srl( f_m_rounded, f_h_m_pos_offset ); const uint32_t h_em_norm = _uint32_or( h_e_norm, h_m_norm ); const uint32_t is_h_ndenorm_msb = _uint32_sub( f_h_bias_offset, f_e_amount ); const uint32_t is_f_e_flagged_msb = _uint32_sub( f_h_e_biased_flag, f_e_half_bias ); const uint32_t is_h_denorm_msb = _uint32_not( is_h_ndenorm_msb ); const uint32_t is_f_m_eqz_msb = _uint32_dec( f_m ); const uint32_t is_h_nan_eqz_msb = _uint32_dec( m_nan ); const uint32_t is_f_inf_msb = _uint32_and( is_f_e_flagged_msb, is_f_m_eqz_msb ); const uint32_t is_f_nan_underflow_msb = _uint32_and( is_f_e_flagged_msb, is_h_nan_eqz_msb ); const uint32_t is_e_overflow_msb = _uint32_sub( h_e_mask_value, f_e_half_bias ); const uint32_t is_h_inf_msb = _uint32_or( is_e_overflow_msb, is_f_inf_msb ); const uint32_t is_f_nsnan_msb = _uint32_sub( f_snan, f_snan_mask ); const uint32_t is_m_norm_overflow_msb = _uint32_neg( f_m_rounded_overflow ); const uint32_t is_f_snan_msb = _uint32_not( is_f_nsnan_msb ); const uint32_t h_em_overflow_result = _uint32_sels( is_m_norm_overflow_msb, h_e_norm_overflow, h_em_norm ); const uint32_t h_em_nan_result = _uint32_sels( is_f_e_flagged_msb, h_em_nan, h_em_overflow_result ); const uint32_t h_em_nan_underflow_result = _uint32_sels( is_f_nan_underflow_msb, h_nan_min, h_em_nan_result ); const uint32_t h_em_inf_result = _uint32_sels( is_h_inf_msb, h_e_mask, h_em_nan_underflow_result ); const uint32_t h_em_denorm_result = _uint32_sels( is_h_denorm_msb, h_m_denorm, h_em_inf_result ); const uint32_t h_em_snan_result = _uint32_sels( is_f_snan_msb, h_snan_mask, h_em_denorm_result ); const uint32_t h_result = _uint32_or( h_s, h_em_snan_result ); return (uint16_t)(h_result); }