bool sse42_is_xdigit(const char* s) { if (s == nullptr) { return false; } // 3 ranges const __m128i ranges = _mm_setr_epi8( '0', '9', 'a', 'f', 'A', 'F', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ); __m128i* mem = reinterpret_cast<__m128i*>(const_cast<char*>(s)); const uint8_t mode = _SIDD_UBYTE_OPS | _SIDD_CMP_RANGES | _SIDD_MASKED_NEGATIVE_POLARITY | _SIDD_LEAST_SIGNIFICANT | _SIDD_BIT_MASK; for (/**/; /**/; mem++) { const __m128i chunk = _mm_loadu_si128(mem); if (_mm_cmpistrc(ranges, chunk, mode)) { // there are some characters outside the given ranges in a chunk return false; } else if (_mm_cmpistrz(ranges, chunk, mode)) { // there is zero byte in a chunk if (*s == 0) { // empty string return false; } else { return true; } } } assert(false && "impossible happend"); return false; }
void test8bit (void) { i1 = _mm_cmpistrm (i2, i3, k4); /* { dg-error "the third argument must be an 8-bit immediate" } */ k1 = _mm_cmpistri (i2, i3, k4); /* { dg-error "the third argument must be an 8-bit immediate" } */ k1 = _mm_cmpistra (i2, i3, k4); /* { dg-error "the third argument must be an 8-bit immediate" } */ k1 = _mm_cmpistrc (i2, i3, k4); /* { dg-error "the third argument must be an 8-bit immediate" } */ k1 = _mm_cmpistro (i2, i3, k4); /* { dg-error "the third argument must be an 8-bit immediate" } */ k1 = _mm_cmpistrs (i2, i3, k4); /* { dg-error "the third argument must be an 8-bit immediate" } */ k1 = _mm_cmpistrz (i2, i3, k4); /* { dg-error "the third argument must be an 8-bit immediate" } */ i1 = _mm_cmpestrm (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */ k1 = _mm_cmpestri (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */ k1 = _mm_cmpestra (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */ k1 = _mm_cmpestrc (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */ k1 = _mm_cmpestro (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */ k1 = _mm_cmpestrs (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */ k1 = _mm_cmpestrz (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */ b1 = _mm256_blend_ps (b2, b3, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */ k1 = _cvtss_sh (f1, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */ i1 = _mm256_cvtps_ph (b2, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */ b1 = _mm256_dp_ps (b2, b3, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */ e1 = _mm256_permute2f128_pd (e2, e3, k4);/* { dg-error "the last argument must be an 8-bit immediate" } */ b1 = _mm256_permute2f128_ps (b2, b3, k4);/* { dg-error "the last argument must be an 8-bit immediate" } */ l1 = _mm256_permute2f128_si256 (l2, l3, k4);/* { dg-error "the last argument must be an 8-bit immediate" } */ b1 = _mm256_permute_ps (b2, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */ i1 = _mm_aeskeygenassist_si128 (i2, k4);/* { dg-error "the last argument must be an 8-bit immediate" } */ i1 = _mm_blend_epi16 (i2, i3, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */ i1 = _mm_clmulepi64_si128 (i2, i3, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */ i1 = _mm_cvtps_ph (a1, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */ d1 = _mm_dp_pd (d2, d3, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */ a1 = _mm_dp_ps (a2, a3, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */ a1 = _mm_insert_ps (a2, a3, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */ i1 = _mm_mpsadbw_epu8 (i2, i3, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */ a1 = _mm_permute_ps (a2, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */ i1 = _mm_slli_si128 (i2, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */ i1 = _mm_srli_si128 (i2, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */ }
__strspn_sse42 (const char *s, const char *a) { if (*a == 0) return 0; const char *aligned; __m128i mask; int offset = (int) ((size_t) a & 15); if (offset != 0) { /* Load masks. */ aligned = (const char *) ((size_t) a & -16L); __m128i mask0 = _mm_load_si128 ((__m128i *) aligned); mask = __m128i_shift_right (mask0, offset); /* Find where the NULL terminator is. */ int length = _mm_cmpistri (mask, mask, 0x3a); if (length == 16 - offset) { /* There is no NULL terminator. */ __m128i mask1 = _mm_load_si128 ((__m128i *) (aligned + 16)); int index = _mm_cmpistri (mask1, mask1, 0x3a); length += index; /* Don't use SSE4.2 if the length of A > 16. */ if (length > 16) return __strspn_sse2 (s, a); if (index != 0) { /* Combine mask0 and mask1. We could play games with palignr, but frankly this data should be in L1 now so do the merge via an unaligned load. */ mask = _mm_loadu_si128 ((__m128i *) a); } } } else { /* A is aligned. */ mask = _mm_load_si128 ((__m128i *) a); /* Find where the NULL terminator is. */ int length = _mm_cmpistri (mask, mask, 0x3a); if (length == 16) { /* There is no NULL terminator. Don't use SSE4.2 if the length of A > 16. */ if (a[16] != 0) return __strspn_sse2 (s, a); } } offset = (int) ((size_t) s & 15); if (offset != 0) { /* Check partial string. */ aligned = (const char *) ((size_t) s & -16L); __m128i value = _mm_load_si128 ((__m128i *) aligned); value = __m128i_shift_right (value, offset); int length = _mm_cmpistri (mask, value, 0x12); /* No need to check CFlag since it is always 1. */ if (length < 16 - offset) return length; /* Find where the NULL terminator is. */ int index = _mm_cmpistri (value, value, 0x3a); if (index < 16 - offset) return length; aligned += 16; } else aligned = s; while (1) { __m128i value = _mm_load_si128 ((__m128i *) aligned); int index = _mm_cmpistri (mask, value, 0x12); int cflag = _mm_cmpistrc (mask, value, 0x12); if (cflag) return (size_t) (aligned + index - s); aligned += 16; } }
wchar_t * __cdecl wcsstr ( const wchar_t * wcs1, const wchar_t * wcs2 ) { const wchar_t *stmp1, *stmp2; __m128i zero, pattern, characters1, characters2; // An empty search string matches everything. if (0 == *wcs2) return (wchar_t *)wcs1; if (__isa_available > __ISA_AVAILABLE_SSE2) { wchar_t c; unsigned i; // Load XMM with first characters of wcs2. if (XMM_PAGE_SAFE(wcs2)) { pattern = _mm_loadu_si128((__m128i*)wcs2); } else { pattern = _mm_xor_si128(pattern, pattern); c = *(stmp2 = wcs2); for (i = 0; i < XMM_CHARS; ++i) { pattern = _mm_srli_si128(pattern, sizeof(wchar_t)); pattern = _mm_insert_epi16(pattern, c, (XMM_CHARS-1)); if (0 != c) c = *++stmp2; } } for(;;) { // Check for partial match, if none step forward and continue. if (XMM_PAGE_SAFE(wcs1)) { characters1 = _mm_loadu_si128((__m128i*)wcs1); // If no potential match or end found, try next XMMWORD. if (_mm_cmpistra(pattern, characters1, f_srch_sub)) { wcs1 += XMM_CHARS; continue; } // If end found there was no match. else if (!_mm_cmpistrc(pattern, characters1, f_srch_sub)) { return NULL; } // Get position of potential match. wcs1 += _mm_cmpistri(pattern, characters1, f_srch_sub); } else { // If end of string found there was no match. if (0 == *wcs1) { return NULL; } // If current character doesn't match first character // of search string try next character. if (*wcs1 != *wcs2) { ++wcs1; continue; } } // Potential match, compare to check for full match. stmp1 = wcs1; stmp2 = wcs2; for (;;) { // If next XMMWORD is page-safe for each string // do a XMMWORD comparison. if (XMM_PAGE_SAFE(stmp1) && XMM_PAGE_SAFE(stmp2)) { characters1 = _mm_loadu_si128((__m128i*)stmp1); characters2 = _mm_loadu_si128((__m128i*)stmp2); // If unequal then no match found. if (!_mm_cmpistro(characters2, characters1, f_srch_sub)) { break; } // If end of search string then match found. else if (_mm_cmpistrs(characters2, characters1, f_srch_sub)) { return (wchar_t *)wcs1; } stmp1 += XMM_CHARS; stmp2 += XMM_CHARS; continue; } // Compare next character. else { // If end of search string then match found. if (0 == *stmp2) { return (wchar_t *)wcs1; } // If unequal then no match found. if (*stmp1 != *stmp2) { break; } // Character matched - try next character. ++stmp1; ++stmp2; } } // Match not found at current position, try next. ++wcs1; } } else if (__isa_available == __ISA_AVAILABLE_SSE2) { unsigned offset, mask; // Build search pattern and zero pattern. Search pattern is // XMMWORD with the initial character of the search string // in every position. Zero pattern has a zero termination // character in every position. pattern = _mm_cvtsi32_si128(wcs2[0]); pattern = _mm_shufflelo_epi16(pattern, 0); pattern = _mm_shuffle_epi32(pattern, 0); zero = _mm_xor_si128(zero, zero); // Main loop for searching wcs1. for (;;) { // If XMM check is safe advance wcs1 to the next // possible match or end. if (XMM_PAGE_SAFE(wcs1)) { characters1 = _mm_loadu_si128((__m128i*)wcs1); characters2 = _mm_cmpeq_epi16(characters1, zero); characters1 = _mm_cmpeq_epi16(characters1, pattern); characters1 = _mm_or_si128(characters1, characters2); mask = _mm_movemask_epi8(characters1); // If no character match or end found try next XMMWORD. if (0 == mask) { wcs1 += XMM_CHARS; continue; } // Advance wcs1 pointer to next possible match or end. _BitScanForward(&offset, mask); wcs1 += (offset/sizeof(wchar_t)); } // If at the end of wcs1, then no match found. if (0 == wcs1[0]) return NULL; // If a first-character match is found compare // strings to look for match. if (wcs2[0] == wcs1[0]) { stmp1 = wcs1; stmp2 = wcs2; for (;;) { // If aligned as specified advance to next // possible difference or wcs2 end. if (XMM_PAGE_SAFE(stmp2) && XMM_PAGE_SAFE(stmp1)) { characters1 = _mm_loadu_si128((__m128i*)stmp1); characters2 = _mm_loadu_si128((__m128i*)stmp2); characters1 = _mm_cmpeq_epi16(characters1, characters2); characters2 = _mm_cmpeq_epi16(characters2, zero); characters1 = _mm_cmpeq_epi16(characters1, zero); characters1 = _mm_or_si128(characters1, characters2); mask = _mm_movemask_epi8(characters1); // If mask is zero there is no difference and // wcs2 does not end in this XMMWORD. Continue // with next XMMWORD. if (0 == mask) { stmp1 += XMM_CHARS; stmp2 += XMM_CHARS; continue; } // Advance string pointers to next significant // character. _BitScanForward(&offset, mask); stmp1 += (offset/sizeof(wchar_t)); stmp2 += (offset/sizeof(wchar_t)); } // If we've reached the end of wcs2 then a match // has been found. if (0 == stmp2[0]) return (wchar_t *)wcs1; // If we've reached a difference then no match // was found. if (stmp1[0] != stmp2[0]) break; // Otherwise advance to next character and try // again. ++stmp1; ++stmp2; } } // Current character wasn't a match, try next character. ++wcs1; } } else { const wchar_t *cp = wcs1; const wchar_t *s1, *s2; while (*cp) { s1 = cp; s2 = wcs2; while ( *s1 && *s2 && !(*s1-*s2) ) s1++, s2++; if (!*s2) return (wchar_t *) cp; cp++; } return NULL; } }
int test_mm_cmpistrc(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_cmpistrc // CHECK: call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8 7) return _mm_cmpistrc(A, B, 7); }
int test_mm_cmpistrc(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_cmpistrc // CHECK: @llvm.x86.sse42.pcmpistric128 return _mm_cmpistrc(A, B, 7); }