void extern avx512bw_test (void) { m16 = _mm_cmpeq_epi8_mask (x128, x128); m32 = _mm256_cmpeq_epi8_mask (x256, x256); m64 = _mm512_cmpeq_epi8_mask (x512, x512); m16 = _mm_mask_cmpeq_epi8_mask (3, x128, x128); m32 = _mm256_mask_cmpeq_epi8_mask (3, x256, x256); m64 = _mm512_mask_cmpeq_epi8_mask (3, x512, x512); }
size_t avx512bw_strstr_v3_memcmp(const char* string, size_t n, const char* needle, MEMCMP memeq_fun) { assert(n > 0); assert(k > 0); const __m512i first = _mm512_set1_epi8(needle[0]); const __m512i last = _mm512_set1_epi8(needle[k - 1]); char* haystack = const_cast<char*>(string); char* end = haystack + n; for (/**/; haystack < end; haystack += 64) { const __m512i block_first = _mm512_loadu_si512(haystack + 0); const __mmask64 first_eq = _mm512_cmpeq_epi8_mask(block_first, first); if (first_eq == 0) continue; const __m512i block_last = _mm512_loadu_si512(haystack + k - 1); uint64_t mask = _mm512_mask_cmpeq_epi8_mask(first_eq, block_last, last); while (mask != 0) { const uint64_t bitpos = bits::get_first_bit_set(mask); const char* s = reinterpret_cast<const char*>(haystack); if (memeq_fun(s + bitpos + 1, needle + 1)) { return (s - string) + bitpos; } mask = bits::clear_leftmost_set(mask); } } return size_t(-1); }
__mmask64 test_mm512_mask_cmpeq_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) { // CHECK-LABEL: @test_mm512_mask_cmpeq_epi8_mask // CHECK: @llvm.x86.avx512.mask.pcmpeq.b.512 return (__mmask64)_mm512_mask_cmpeq_epi8_mask(__u, __a, __b); }