// Count Leading Zeros static __inline uint16_t _uint16_cntlz( uint16_t x ) { #ifdef __GNUC__ uint16_t nlz32 = (uint16_t)_uint32_cntlz( (uint32_t)x ); uint32_t nlz = _uint32_sub( nlz32, 16 ); return (nlz); #else const uint16_t x0 = _uint16_srl( x, 1 ); const uint16_t x1 = _uint16_or( x, x0 ); const uint16_t x2 = _uint16_srl( x1, 2 ); const uint16_t x3 = _uint16_or( x1, x2 ); const uint16_t x4 = _uint16_srl( x3, 4 ); const uint16_t x5 = _uint16_or( x3, x4 ); const uint16_t x6 = _uint16_srl( x5, 8 ); const uint16_t x7 = _uint16_or( x5, x6 ); const uint16_t x8 = _uint16_not( x7 ); const uint16_t x9 = _uint16_srlm( x8, 1, 0x5555 ); const uint16_t xA = _uint16_sub( x8, x9 ); const uint16_t xB = _uint16_and( xA, 0x3333 ); const uint16_t xC = _uint16_srlm( xA, 2, 0x3333 ); const uint16_t xD = _uint16_add( xB, xC ); const uint16_t xE = _uint16_srl( xD, 4 ); const uint16_t xF = _uint16_addm( xD, xE, 0x0f0f ); const uint16_t x10 = _uint16_srl( xF, 8 ); const uint16_t x11 = _uint16_addm( xF, x10, 0x001f ); return ( x11 ); #endif }
// Count Leading Zeros static inline uint16_t _uint16_cntlz( uint16_t x ) { #ifdef __GNUC__ /* On PowerPC, this will map to insn: cntlzw */ /* On Pentium, this will map to insn: clz */ uint32_t x32 = _uint32_sll( x, 16 ); uint16_t nlz = (uint16_t)__builtin_clz( x32 ); return (nlz); #else const uint16_t x0 = _uint16_srl( x, 1 ); const uint16_t x1 = _uint16_or( x, x0 ); const uint16_t x2 = _uint16_srl( x1, 2 ); const uint16_t x3 = _uint16_or( x1, x2 ); const uint16_t x4 = _uint16_srl( x3, 4 ); const uint16_t x5 = _uint16_or( x3, x4 ); const uint16_t x6 = _uint16_srl( x5, 8 ); const uint16_t x7 = _uint16_or( x5, x6 ); const uint16_t x8 = _uint16_not( x7 ); const uint16_t x9 = _uint16_srlm( x8, 1, 0x5555 ); const uint16_t xA = _uint16_sub( x8, x9 ); const uint16_t xB = _uint16_and( xA, 0x3333 ); const uint16_t xC = _uint16_srlm( xA, 2, 0x3333 ); const uint16_t xD = _uint16_add( xB, xC ); const uint16_t xE = _uint16_srl( xD, 4 ); const uint16_t xF = _uint16_addm( xD, xE, 0x0f0f ); const uint16_t x10 = _uint16_srl( xF, 8 ); const uint16_t x11 = _uint16_addm( xF, x10, 0x001f ); return ( x11 ); #endif }
// Count Leading Zeros static inline uint16 _uint16_cntlz( uint16 x ) { #ifdef __GNUC__ /* On PowerPC, this will map to insn: cntlzw */ /* On Pentium, this will map to insn: clz */ uint16 nlz32 = (uint16)_uint32_cntlz( (uint32)x ); uint32 nlz = _uint32_sub( nlz32, 16 ); return (nlz); #elif _NV_OS_XBOX_ uint16 nlz32 = (uint16)_CountLeadingZeros( (uint32)x ); return _uint32_sub( nlz32, 16); #else const uint16 x0 = _uint16_srl( x, 1 ); const uint16 x1 = _uint16_or( x, x0 ); const uint16 x2 = _uint16_srl( x1, 2 ); const uint16 x3 = _uint16_or( x1, x2 ); const uint16 x4 = _uint16_srl( x3, 4 ); const uint16 x5 = _uint16_or( x3, x4 ); const uint16 x6 = _uint16_srl( x5, 8 ); const uint16 x7 = _uint16_or( x5, x6 ); const uint16 x8 = _uint16_not( x7 ); const uint16 x9 = _uint16_srlm( x8, 1, 0x5555 ); const uint16 xA = _uint16_sub( x8, x9 ); const uint16 xB = _uint16_and( xA, 0x3333 ); const uint16 xC = _uint16_srlm( xA, 2, 0x3333 ); const uint16 xD = _uint16_add( xB, xC ); const uint16 xE = _uint16_srl( xD, 4 ); const uint16 xF = _uint16_addm( xD, xE, 0x0f0f ); const uint16 x10 = _uint16_srl( xF, 8 ); const uint16 x11 = _uint16_addm( xF, x10, 0x001f ); return ( x11 ); #endif }