constexpr T _pdep(T src, T mask) { static_assert(binary_digits<T>::value, ""); constexpr T digits = binary_digits<T>::value; T dest = 0; if (digits <= std::numeric_limits<unsigned int>::digits) { dest = _pdep_u32(src, mask); } else if (digits <= std::numeric_limits<unsigned long long int>::digits) { dest = _pdep_u64(src, mask); } else { dest = _pdep(src, mask, std::ignore); } return dest; }
unsigned long long test_pdep_u64(unsigned long long __X, unsigned long long __Y) { // CHECK: @llvm.x86.bmi.pdep.64 return _pdep_u64(__X, __Y); }
constexpr T _lzcnt(T src, X...); // Trailing zeros count template <class T, class = decltype(__builtin_ctzll(T()))> constexpr T _tzcnt(T src); template <class T, class... X> constexpr T _tzcnt(T src, X...); // Bit field extraction template <class T, class = decltype(__builtin_ia32_bextr_u64(T(), T(), T()))> constexpr T _bextr(T src, T start, T len); template <class T, class... X> constexpr T _bextr(T src, T start, T len, X...); // Parallel bits deposit template <class T, class = decltype(_pdep_u64(T()))> constexpr T _pdep(T src, T mask); template <class T, class... X> constexpr T _pdep(T src, T mask, X...); // Parallel bits extract template <class T, class = decltype(_pext_u64(T()))> constexpr T _pext(T src, T mask); template <class T, class... X> constexpr T _pext(T src, T mask, X...); // Byte swap template <class T, class = decltype(__builtin_bswap64(T()))> constexpr T _byteswap(T src); template <class T, class... X> constexpr T _byteswap(T src, X...);
uint64_t linearize(uint32_t x, uint32_t y) { uint64_t result; result = _pdep_u64(x, 0x5555555555555555); result ^= _pdep_u64(y, 0xAAAAAAAAAAAAAAAA); return result; }