Beispiel #1
0
void extern
avx512bw_test (void)
{
  m16 = _mm_cmpeq_epi8_mask (x128, x128);
  m32 = _mm256_cmpeq_epi8_mask (x256, x256);
  m64 = _mm512_cmpeq_epi8_mask (x512, x512);
  m16 = _mm_mask_cmpeq_epi8_mask (3, x128, x128);
  m32 = _mm256_mask_cmpeq_epi8_mask (3, x256, x256);
  m64 = _mm512_mask_cmpeq_epi8_mask (3, x512, x512);
}
size_t avx512bw_strstr_v3_memcmp(const char* string, size_t n, const char* needle, MEMCMP memeq_fun) {

    assert(n > 0);
    assert(k > 0);

    const __m512i first = _mm512_set1_epi8(needle[0]);
    const __m512i last  = _mm512_set1_epi8(needle[k - 1]);

    char* haystack = const_cast<char*>(string);
    char* end      = haystack + n;

    for (/**/; haystack < end; haystack += 64) {

        const __m512i block_first = _mm512_loadu_si512(haystack + 0);
        const __mmask64 first_eq  = _mm512_cmpeq_epi8_mask(block_first, first);

        if (first_eq == 0)
            continue;

        const __m512i block_last  = _mm512_loadu_si512(haystack + k - 1);
        uint64_t mask = _mm512_mask_cmpeq_epi8_mask(first_eq, block_last, last);

        while (mask != 0) {

            const uint64_t bitpos = bits::get_first_bit_set(mask);
            const char* s = reinterpret_cast<const char*>(haystack);

            if (memeq_fun(s + bitpos + 1, needle + 1)) {
                return (s - string) + bitpos;
            }

            mask = bits::clear_leftmost_set(mask);
        }
    }

    return size_t(-1);
}
__mmask64 test_mm512_mask_cmpeq_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
  // CHECK-LABEL: @test_mm512_mask_cmpeq_epi8_mask
  // CHECK: @llvm.x86.avx512.mask.pcmpeq.b.512
  return (__mmask64)_mm512_mask_cmpeq_epi8_mask(__u, __a, __b);
}