static void init_bmi2(uint16_t table[], uint16_t *attacks[], Bitboard masks[], Bitboard masks2[], int deltas[], Fn index) { Bitboard edges, b; for (int s = 0; s < 64; s++) { attacks[s] = table; // Board edges are not considered in the relevant occupancies edges = ((Rank1BB | Rank8BB) & ~rank_bb_s(s)) | ((FileABB | FileHBB) & ~file_bb_s(s)); masks2[s] = sliding_attack(deltas, s, 0); masks[s] = masks2[s] & ~edges; // Use Carry-Rippler trick to enumerate all subsets of masks[s] and // fill the attacks table. b = 0; do { attacks[s][index(s, b)] = _pext_u64(sliding_attack(deltas, s, b), masks2[s]); b = (b - masks[s]) & masks[s]; table++; } while (b); } }
unsigned long long test_pext_u64(unsigned long long __X, unsigned long long __Y) { // CHECK: @llvm.x86.bmi.pext.64 return _pext_u64(__X, __Y); }
constexpr T _tzcnt(T src, X...); // Bit field extraction template <class T, class = decltype(__builtin_ia32_bextr_u64(T(), T(), T()))> constexpr T _bextr(T src, T start, T len); template <class T, class... X> constexpr T _bextr(T src, T start, T len, X...); // Parallel bits deposit template <class T, class = decltype(_pdep_u64(T()))> constexpr T _pdep(T src, T mask); template <class T, class... X> constexpr T _pdep(T src, T mask, X...); // Parallel bits extract template <class T, class = decltype(_pext_u64(T()))> constexpr T _pext(T src, T mask); template <class T, class... X> constexpr T _pext(T src, T mask, X...); // Byte swap template <class T, class = decltype(__builtin_bswap64(T()))> constexpr T _byteswap(T src); template <class T, class... X> constexpr T _byteswap(T src, X...); // Bit swap template <class T> constexpr T _bitswap(T src); /* ************************************************************************** */