Example #1
0
// Count Leading Zeros
static inline uint16_t _uint16_cntlz( uint16_t x )
{
#ifdef __GNUC__
  /* On PowerPC, this will map to insn: cntlzw */
  /* On Pentium, this will map to insn: clz    */
  uint32_t x32   = _uint32_sll( x, 16 );
  uint16_t nlz   = (uint16_t)__builtin_clz( x32 );
  return (nlz);
#else
  const uint16_t x0  = _uint16_srl(  x,  1 );
  const uint16_t x1  = _uint16_or(   x,  x0 );
  const uint16_t x2  = _uint16_srl(  x1, 2 );
  const uint16_t x3  = _uint16_or(   x1, x2 );
  const uint16_t x4  = _uint16_srl(  x3, 4 );
  const uint16_t x5  = _uint16_or(   x3, x4 );
  const uint16_t x6  = _uint16_srl(  x5, 8 );
  const uint16_t x7  = _uint16_or(   x5, x6 );
  const uint16_t x8  = _uint16_not(  x7 );
  const uint16_t x9  = _uint16_srlm( x8, 1, 0x5555 );
  const uint16_t xA  = _uint16_sub(  x8, x9 );
  const uint16_t xB  = _uint16_and(  xA, 0x3333 );
  const uint16_t xC  = _uint16_srlm( xA, 2, 0x3333 );
  const uint16_t xD  = _uint16_add(  xB, xC );
  const uint16_t xE  = _uint16_srl(  xD, 4 );
  const uint16_t xF  = _uint16_addm( xD, xE, 0x0f0f );
  const uint16_t x10 = _uint16_srl(  xF, 8 );
  const uint16_t x11 = _uint16_addm( xF, x10, 0x001f );
  return ( x11 );
#endif
}
Example #2
0
// Count Leading Zeros
static __inline uint16_t _uint16_cntlz( uint16_t x )
{
#ifdef __GNUC__
  uint16_t nlz32 = (uint16_t)_uint32_cntlz( (uint32_t)x );
  uint32_t nlz   = _uint32_sub( nlz32, 16 );
  return (nlz);
#else
  const uint16_t x0  = _uint16_srl(  x,  1 );
  const uint16_t x1  = _uint16_or(   x,  x0 );
  const uint16_t x2  = _uint16_srl(  x1, 2 );
  const uint16_t x3  = _uint16_or(   x1, x2 );
  const uint16_t x4  = _uint16_srl(  x3, 4 );
  const uint16_t x5  = _uint16_or(   x3, x4 );
  const uint16_t x6  = _uint16_srl(  x5, 8 );
  const uint16_t x7  = _uint16_or(   x5, x6 );
  const uint16_t x8  = _uint16_not(  x7 );
  const uint16_t x9  = _uint16_srlm( x8, 1, 0x5555 );
  const uint16_t xA  = _uint16_sub(  x8, x9 );
  const uint16_t xB  = _uint16_and(  xA, 0x3333 );
  const uint16_t xC  = _uint16_srlm( xA, 2, 0x3333 );
  const uint16_t xD  = _uint16_add(  xB, xC );
  const uint16_t xE  = _uint16_srl(  xD, 4 );
  const uint16_t xF  = _uint16_addm( xD, xE, 0x0f0f );
  const uint16_t x10 = _uint16_srl(  xF, 8 );
  const uint16_t x11 = _uint16_addm( xF, x10, 0x001f );
  return ( x11 );
#endif
}
Example #3
0
// Count Leading Zeros
static inline uint16 _uint16_cntlz( uint16 x )
{
#ifdef __GNUC__
    /* On PowerPC, this will map to insn: cntlzw */
    /* On Pentium, this will map to insn: clz    */
    uint16 nlz32 = (uint16)_uint32_cntlz( (uint32)x );
    uint32 nlz   = _uint32_sub( nlz32, 16 );
    return (nlz);
#elif _NV_OS_XBOX_
    uint16 nlz32 = (uint16)_CountLeadingZeros( (uint32)x );
    return _uint32_sub( nlz32, 16);
#else
    const uint16 x0  = _uint16_srl(  x,  1 );
    const uint16 x1  = _uint16_or(   x,  x0 );
    const uint16 x2  = _uint16_srl(  x1, 2 );
    const uint16 x3  = _uint16_or(   x1, x2 );
    const uint16 x4  = _uint16_srl(  x3, 4 );
    const uint16 x5  = _uint16_or(   x3, x4 );
    const uint16 x6  = _uint16_srl(  x5, 8 );
    const uint16 x7  = _uint16_or(   x5, x6 );
    const uint16 x8  = _uint16_not(  x7 );
    const uint16 x9  = _uint16_srlm( x8, 1, 0x5555 );
    const uint16 xA  = _uint16_sub(  x8, x9 );
    const uint16 xB  = _uint16_and(  xA, 0x3333 );
    const uint16 xC  = _uint16_srlm( xA, 2, 0x3333 );
    const uint16 xD  = _uint16_add(  xB, xC );
    const uint16 xE  = _uint16_srl(  xD, 4 );
    const uint16 xF  = _uint16_addm( xD, xE, 0x0f0f );
    const uint16 x10 = _uint16_srl(  xF, 8 );
    const uint16 x11 = _uint16_addm( xF, x10, 0x001f );
    return ( x11 );
#endif
}
Example #4
0
// Select on Sign bit
static inline uint16_t _uint16_sels( uint16_t test, uint16_t a, uint16_t b )
{
  const uint16_t mask   = _uint16_ext( test );
  const uint16_t sel_a  = _uint16_and(  a,     mask  );
  const uint16_t sel_b  = _uint16_andc( b,     mask  );
  const uint16_t result = _uint16_or(   sel_a, sel_b );

  return (result);
}
Example #5
0
// Select on Sign bit
static inline uint16 _uint16_sels( uint16 test, uint16 a, uint16 b )
{
    const uint16 mask   = _uint16_ext( test );
    const uint16 sel_a  = _uint16_and(  a,     mask  );
    const uint16 sel_b  = _uint16_andc( b,     mask  );
    const uint16 result = _uint16_or(   sel_a, sel_b );

    return (result);
}
Example #6
0
uint16_t
half_add( uint16_t x, uint16_t y )
{
  const uint16_t one                       = _uint16_li( 0x0001 );
  const uint16_t msb_to_lsb_sa             = _uint16_li( 0x000f );
  const uint16_t h_s_mask                  = _uint16_li( 0x8000 );
  const uint16_t h_e_mask                  = _uint16_li( 0x7c00 );
  const uint16_t h_m_mask                  = _uint16_li( 0x03ff );
  const uint16_t h_m_msb_mask              = _uint16_li( 0x2000 );
  const uint16_t h_m_msb_sa                = _uint16_li( 0x000d );
  const uint16_t h_m_hidden                = _uint16_li( 0x0400 );
  const uint16_t h_e_pos                   = _uint16_li( 0x000a );
  const uint16_t h_e_bias_minus_one        = _uint16_li( 0x000e );
  const uint16_t h_m_grs_carry             = _uint16_li( 0x4000 );
  const uint16_t h_m_grs_carry_pos         = _uint16_li( 0x000e );
  const uint16_t h_grs_size                = _uint16_li( 0x0003 );
  const uint16_t h_snan                    = _uint16_li( 0xfe00 );
  const uint16_t h_e_mask_minus_one        = _uint16_li( 0x7bff );
  const uint16_t h_grs_round_carry         = _uint16_sll( one, h_grs_size );
  const uint16_t h_grs_round_mask          = _uint16_sub( h_grs_round_carry, one );
  const uint16_t x_e                       = _uint16_and( x, h_e_mask );
  const uint16_t y_e                       = _uint16_and( y, h_e_mask );
  const uint16_t is_y_e_larger_msb         = _uint16_sub( x_e, y_e );
  const uint16_t a                         = _uint16_sels( is_y_e_larger_msb, y, x);
  const uint16_t a_s                       = _uint16_and( a, h_s_mask );
  const uint16_t a_e                       = _uint16_and( a, h_e_mask );
  const uint16_t a_m_no_hidden_bit         = _uint16_and( a, h_m_mask );
  const uint16_t a_em_no_hidden_bit        = _uint16_or( a_e, a_m_no_hidden_bit );
  const uint16_t b                         = _uint16_sels( is_y_e_larger_msb, x, y);
  const uint16_t b_s                       = _uint16_and( b, h_s_mask );
  const uint16_t b_e                       = _uint16_and( b, h_e_mask );
  const uint16_t b_m_no_hidden_bit         = _uint16_and( b, h_m_mask );
  const uint16_t b_em_no_hidden_bit        = _uint16_or( b_e, b_m_no_hidden_bit );
  const uint16_t is_diff_sign_msb          = _uint16_xor( a_s, b_s );
  const uint16_t is_a_inf_msb              = _uint16_sub( h_e_mask_minus_one, a_em_no_hidden_bit );
  const uint16_t is_b_inf_msb              = _uint16_sub( h_e_mask_minus_one, b_em_no_hidden_bit );
  const uint16_t is_undenorm_msb           = _uint16_dec( a_e );
  const uint16_t is_undenorm               = _uint16_ext( is_undenorm_msb );
  const uint16_t is_both_inf_msb           = _uint16_and( is_a_inf_msb, is_b_inf_msb );
  const uint16_t is_invalid_inf_op_msb     = _uint16_and( is_both_inf_msb, b_s );
  const uint16_t is_a_e_nez_msb            = _uint16_neg( a_e );
  const uint16_t is_b_e_nez_msb            = _uint16_neg( b_e );
  const uint16_t is_a_e_nez                = _uint16_ext( is_a_e_nez_msb );
  const uint16_t is_b_e_nez                = _uint16_ext( is_b_e_nez_msb );
  const uint16_t a_m_hidden_bit            = _uint16_and( is_a_e_nez, h_m_hidden );
  const uint16_t b_m_hidden_bit            = _uint16_and( is_b_e_nez, h_m_hidden );
  const uint16_t a_m_no_grs                = _uint16_or( a_m_no_hidden_bit, a_m_hidden_bit );
  const uint16_t b_m_no_grs                = _uint16_or( b_m_no_hidden_bit, b_m_hidden_bit );
  const uint16_t diff_e                    = _uint16_sub( a_e,        b_e );
  const uint16_t a_e_unbias                = _uint16_sub( a_e,        h_e_bias_minus_one );
  const uint16_t a_m                       = _uint16_sll( a_m_no_grs, h_grs_size );
  const uint16_t a_e_biased                = _uint16_srl( a_e,        h_e_pos );
  const uint16_t m_sa_unbias               = _uint16_srl( a_e_unbias, h_e_pos );
  const uint16_t m_sa_default              = _uint16_srl( diff_e,     h_e_pos );
  const uint16_t m_sa_unbias_mask          = _uint16_andc( is_a_e_nez_msb,   is_b_e_nez_msb );
  const uint16_t m_sa                      = _uint16_sels( m_sa_unbias_mask, m_sa_unbias, m_sa_default );
  const uint16_t b_m_no_sticky             = _uint16_sll( b_m_no_grs,        h_grs_size );
  const uint16_t sh_m                      = _uint16_srl( b_m_no_sticky,     m_sa );
  const uint16_t sticky_overflow           = _uint16_sll( one,               m_sa );
  const uint16_t sticky_mask               = _uint16_dec( sticky_overflow );
  const uint16_t sticky_collect            = _uint16_and( b_m_no_sticky, sticky_mask );
  const uint16_t is_sticky_set_msb         = _uint16_neg( sticky_collect );
  const uint16_t sticky                    = _uint16_srl( is_sticky_set_msb, msb_to_lsb_sa);
  const uint16_t b_m                       = _uint16_or( sh_m, sticky );
  const uint16_t is_c_m_ab_pos_msb         = _uint16_sub( b_m, a_m );
  const uint16_t c_inf                     = _uint16_or( a_s, h_e_mask );
  const uint16_t c_m_sum                   = _uint16_add( a_m, b_m );
  const uint16_t c_m_diff_ab               = _uint16_sub( a_m, b_m );
  const uint16_t c_m_diff_ba               = _uint16_sub( b_m, a_m );
  const uint16_t c_m_smag_diff             = _uint16_sels( is_c_m_ab_pos_msb, c_m_diff_ab, c_m_diff_ba );
  const uint16_t c_s_diff                  = _uint16_sels( is_c_m_ab_pos_msb, a_s,         b_s         );
  const uint16_t c_s                       = _uint16_sels( is_diff_sign_msb,  c_s_diff,    a_s         );
  const uint16_t c_m_smag_diff_nlz         = _uint16_cntlz( c_m_smag_diff );
  const uint16_t diff_norm_sa              = _uint16_sub( c_m_smag_diff_nlz, one );
  const uint16_t is_diff_denorm_msb        = _uint16_sub( a_e_biased, diff_norm_sa );
  const uint16_t is_diff_denorm            = _uint16_ext( is_diff_denorm_msb );
  const uint16_t is_a_or_b_norm_msb        = _uint16_neg( a_e_biased );
  const uint16_t diff_denorm_sa            = _uint16_dec( a_e_biased );
  const uint16_t c_m_diff_denorm           = _uint16_sll( c_m_smag_diff, diff_denorm_sa );
  const uint16_t c_m_diff_norm             = _uint16_sll( c_m_smag_diff, diff_norm_sa );
  const uint16_t c_e_diff_norm             = _uint16_sub( a_e_biased,  diff_norm_sa );
  const uint16_t c_m_diff_ab_norm          = _uint16_sels( is_diff_denorm_msb, c_m_diff_denorm, c_m_diff_norm );
  const uint16_t c_e_diff_ab_norm          = _uint16_andc( c_e_diff_norm, is_diff_denorm );
  const uint16_t c_m_diff                  = _uint16_sels( is_a_or_b_norm_msb, c_m_diff_ab_norm, c_m_smag_diff );
  const uint16_t c_e_diff                  = _uint16_sels( is_a_or_b_norm_msb, c_e_diff_ab_norm, a_e_biased    );
  const uint16_t is_diff_eqz_msb           = _uint16_dec( c_m_diff );
  const uint16_t is_diff_exactly_zero_msb  = _uint16_and( is_diff_sign_msb, is_diff_eqz_msb );
  const uint16_t is_diff_exactly_zero      = _uint16_ext( is_diff_exactly_zero_msb );
  const uint16_t c_m_added                 = _uint16_sels( is_diff_sign_msb, c_m_diff, c_m_sum );
  const uint16_t c_e_added                 = _uint16_sels( is_diff_sign_msb, c_e_diff, a_e_biased );
  const uint16_t c_m_carry                 = _uint16_and( c_m_added, h_m_grs_carry );
  const uint16_t is_c_m_carry_msb          = _uint16_neg( c_m_carry );
  const uint16_t c_e_hidden_offset         = _uint16_andsrl( c_m_added, h_m_grs_carry, h_m_grs_carry_pos );
  const uint16_t c_m_sub_hidden            = _uint16_srl( c_m_added, one );
  const uint16_t c_m_no_hidden             = _uint16_sels( is_c_m_carry_msb, c_m_sub_hidden, c_m_added );
  const uint16_t c_e_no_hidden             = _uint16_add( c_e_added,         c_e_hidden_offset  );
  const uint16_t c_m_no_hidden_msb         = _uint16_and( c_m_no_hidden,     h_m_msb_mask       );
  const uint16_t undenorm_m_msb_odd        = _uint16_srl( c_m_no_hidden_msb, h_m_msb_sa         );
  const uint16_t undenorm_fix_e            = _uint16_and( is_undenorm,       undenorm_m_msb_odd );
  const uint16_t c_e_fixed                 = _uint16_add( c_e_no_hidden,     undenorm_fix_e     );
  const uint16_t c_m_round_amount          = _uint16_and( c_m_no_hidden,     h_grs_round_mask   );
  const uint16_t c_m_rounded               = _uint16_add( c_m_no_hidden,     c_m_round_amount   );
  const uint16_t c_m_round_overflow        = _uint16_andsrl( c_m_rounded, h_m_grs_carry, h_m_grs_carry_pos );
  const uint16_t c_e_rounded               = _uint16_add( c_e_fixed, c_m_round_overflow );
  const uint16_t c_m_no_grs                = _uint16_srlm( c_m_rounded, h_grs_size,  h_m_mask );
  const uint16_t c_e                       = _uint16_sll( c_e_rounded, h_e_pos );
  const uint16_t c_em                      = _uint16_or( c_e, c_m_no_grs );
  const uint16_t c_normal                  = _uint16_or( c_s, c_em );
  const uint16_t c_inf_result              = _uint16_sels( is_a_inf_msb, c_inf, c_normal );
  const uint16_t c_zero_result             = _uint16_andc( c_inf_result, is_diff_exactly_zero );
  const uint16_t c_result                  = _uint16_sels( is_invalid_inf_op_msb, h_snan, c_zero_result );

  return (c_result);
}