Exemplo n.º 1
0
bool sse42_is_xdigit(const char* s) {

    if (s == nullptr) {
        return false;
    }

    // 3 ranges 
    const __m128i ranges = _mm_setr_epi8(
        '0', '9', 'a', 'f', 'A', 'F', 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0
    );

    __m128i* mem = reinterpret_cast<__m128i*>(const_cast<char*>(s));

    const uint8_t mode =
        _SIDD_UBYTE_OPS |
        _SIDD_CMP_RANGES |
        _SIDD_MASKED_NEGATIVE_POLARITY |
        _SIDD_LEAST_SIGNIFICANT |
        _SIDD_BIT_MASK;

    for (/**/; /**/; mem++) {

        const __m128i chunk = _mm_loadu_si128(mem);

        if (_mm_cmpistrc(ranges, chunk, mode)) {
            // there are some characters outside the given ranges in a chunk
            return false;
        } else if (_mm_cmpistrz(ranges, chunk, mode)) {
            // there is zero byte in a chunk
            if (*s == 0) {
                // empty string
                return false;
            } else {
                return true;
            }
        }
    }

    assert(false && "impossible happend");
    return false;
}
Exemplo n.º 2
0
void
test8bit (void)
{
  i1 = _mm_cmpistrm (i2, i3, k4);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
  k1 = _mm_cmpistri (i2, i3, k4);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
  k1 = _mm_cmpistra (i2, i3, k4);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
  k1 = _mm_cmpistrc (i2, i3, k4);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
  k1 = _mm_cmpistro (i2, i3, k4);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
  k1 = _mm_cmpistrs (i2, i3, k4);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
  k1 = _mm_cmpistrz (i2, i3, k4);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
  i1 = _mm_cmpestrm (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */
  k1 = _mm_cmpestri (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */
  k1 = _mm_cmpestra (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */
  k1 = _mm_cmpestrc (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */
  k1 = _mm_cmpestro (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */
  k1 = _mm_cmpestrs (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */
  k1 = _mm_cmpestrz (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */
  b1 = _mm256_blend_ps (b2, b3, k4);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
  k1 = _cvtss_sh (f1, k4);		  /* { dg-error "the last argument must be an 8-bit immediate" } */
  i1 = _mm256_cvtps_ph (b2, k4);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
  b1 = _mm256_dp_ps (b2, b3, k4);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
  e1 = _mm256_permute2f128_pd (e2, e3, k4);/* { dg-error "the last argument must be an 8-bit immediate" } */
  b1 = _mm256_permute2f128_ps (b2, b3, k4);/* { dg-error "the last argument must be an 8-bit immediate" } */
  l1 = _mm256_permute2f128_si256 (l2, l3, k4);/* { dg-error "the last argument must be an 8-bit immediate" } */
  b1 = _mm256_permute_ps (b2, k4);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
  i1 = _mm_aeskeygenassist_si128 (i2, k4);/* { dg-error "the last argument must be an 8-bit immediate" } */
  i1 = _mm_blend_epi16 (i2, i3, k4);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
  i1 = _mm_clmulepi64_si128 (i2, i3, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */
  i1 = _mm_cvtps_ph (a1, k4);		  /* { dg-error "the last argument must be an 8-bit immediate" } */
  d1 = _mm_dp_pd (d2, d3, k4);		  /* { dg-error "the last argument must be an 8-bit immediate" } */
  a1 = _mm_dp_ps (a2, a3, k4);		  /* { dg-error "the last argument must be an 8-bit immediate" } */
  a1 = _mm_insert_ps (a2, a3, k4);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
  i1 = _mm_mpsadbw_epu8 (i2, i3, k4);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
  a1 = _mm_permute_ps (a2, k4);		  /* { dg-error "the last argument must be an 8-bit immediate" } */
  i1 = _mm_slli_si128 (i2, k4);		  /* { dg-error "the last argument must be an 8-bit immediate" } */
  i1 = _mm_srli_si128 (i2, k4);		  /* { dg-error "the last argument must be an 8-bit immediate" } */
}
Exemplo n.º 3
0
__strspn_sse42 (const char *s, const char *a)
{
  if (*a == 0)
    return 0;

  const char *aligned;
  __m128i mask;
  int offset = (int) ((size_t) a & 15);
  if (offset != 0)
    {
      /* Load masks.  */
      aligned = (const char *) ((size_t) a & -16L);
      __m128i mask0 = _mm_load_si128 ((__m128i *) aligned);

      mask = __m128i_shift_right (mask0, offset);

      /* Find where the NULL terminator is.  */
      int length = _mm_cmpistri (mask, mask, 0x3a);
      if (length == 16 - offset)
	{
	  /* There is no NULL terminator.  */
	  __m128i mask1 = _mm_load_si128 ((__m128i *) (aligned + 16));
	  int index = _mm_cmpistri (mask1, mask1, 0x3a);
	  length += index;

	  /* Don't use SSE4.2 if the length of A > 16.  */
	  if (length > 16)
	    return __strspn_sse2 (s, a);

	  if (index != 0)
	    {
	      /* Combine mask0 and mask1.  We could play games with
		 palignr, but frankly this data should be in L1 now
		 so do the merge via an unaligned load.  */
	      mask = _mm_loadu_si128 ((__m128i *) a);
	    }
	}
    }
  else
    {
      /* A is aligned.  */
      mask = _mm_load_si128 ((__m128i *) a);

      /* Find where the NULL terminator is.  */
      int length = _mm_cmpistri (mask, mask, 0x3a);
      if (length == 16)
	{
	  /* There is no NULL terminator.  Don't use SSE4.2 if the length
	     of A > 16.  */
	  if (a[16] != 0)
	    return __strspn_sse2 (s, a);
	}
    }

  offset = (int) ((size_t) s & 15);
  if (offset != 0)
    {
      /* Check partial string.  */
      aligned = (const char *) ((size_t) s & -16L);
      __m128i value = _mm_load_si128 ((__m128i *) aligned);

      value = __m128i_shift_right (value, offset);

      int length = _mm_cmpistri (mask, value, 0x12);
      /* No need to check CFlag since it is always 1.  */
      if (length < 16 - offset)
	return length;
      /* Find where the NULL terminator is.  */
      int index = _mm_cmpistri (value, value, 0x3a);
      if (index < 16 - offset)
	return length;
      aligned += 16;
    }
  else
    aligned = s;

  while (1)
    {
      __m128i value = _mm_load_si128 ((__m128i *) aligned);
      int index = _mm_cmpistri (mask, value, 0x12);
      int cflag = _mm_cmpistrc (mask, value, 0x12);
      if (cflag)
	return (size_t) (aligned + index - s);
      aligned += 16;
    }
}
Exemplo n.º 4
0
wchar_t * __cdecl wcsstr (
        const wchar_t * wcs1,
        const wchar_t * wcs2
        )
{
    const wchar_t *stmp1, *stmp2;
    __m128i zero, pattern, characters1, characters2;

    // An empty search string matches everything.
    if (0 == *wcs2)
        return (wchar_t *)wcs1;

    if (__isa_available > __ISA_AVAILABLE_SSE2)
    {
        wchar_t c;
        unsigned i;

        // Load XMM with first characters of wcs2.
        if (XMM_PAGE_SAFE(wcs2))
        {
            pattern = _mm_loadu_si128((__m128i*)wcs2);
        }
        else
        {
            pattern = _mm_xor_si128(pattern, pattern);
            c = *(stmp2 = wcs2);
            for (i = 0; i < XMM_CHARS; ++i)
            {
                pattern = _mm_srli_si128(pattern, sizeof(wchar_t));
                pattern = _mm_insert_epi16(pattern, c, (XMM_CHARS-1));
                if (0 != c) c = *++stmp2;
            }
        }

        for(;;)
        {
            // Check for partial match, if none step forward and continue.
            if (XMM_PAGE_SAFE(wcs1))
            {
                characters1 = _mm_loadu_si128((__m128i*)wcs1);
                // If no potential match or end found, try next XMMWORD.
                if (_mm_cmpistra(pattern, characters1, f_srch_sub))
                {
                    wcs1 += XMM_CHARS;
                    continue;
                }
                // If end found there was no match.
                else if (!_mm_cmpistrc(pattern, characters1, f_srch_sub))
                {
                    return NULL;
                }

                // Get position of potential match.
                wcs1 += _mm_cmpistri(pattern, characters1, f_srch_sub);
            }
            else
            {
              // If end of string found there was no match.
              if (0 == *wcs1)
              {
                  return NULL;
              }

              // If current character doesn't match first character
              // of search string try next character.
              if (*wcs1 != *wcs2)
              {
                  ++wcs1;
                  continue;
              }
            }

            // Potential match, compare to check for full match.
            stmp1 = wcs1;
            stmp2 = wcs2;
            for (;;)
            {
                // If next XMMWORD is page-safe for each string
                // do a XMMWORD comparison.
                if (XMM_PAGE_SAFE(stmp1) && XMM_PAGE_SAFE(stmp2))
                {
                    characters1 = _mm_loadu_si128((__m128i*)stmp1);
                    characters2 = _mm_loadu_si128((__m128i*)stmp2);

                    // If unequal then no match found.
                    if (!_mm_cmpistro(characters2, characters1, f_srch_sub))
                    {
                        break;
                    }

                    // If end of search string then match found.
                    else if (_mm_cmpistrs(characters2, characters1, f_srch_sub))
                    {
                        return (wchar_t *)wcs1;
                    }

                    stmp1 += XMM_CHARS;
                    stmp2 += XMM_CHARS;
                    continue;
                }

                // Compare next character.
                else
                {
                    // If end of search string then match found.
                    if (0 == *stmp2)
                    {
                        return (wchar_t *)wcs1;
                    }

                    // If unequal then no match found.
                    if (*stmp1 != *stmp2)
                    {
                        break;
                    }

                    // Character matched - try next character.
                    ++stmp1;
                    ++stmp2;
                }
            }

            // Match not found at current position, try next.
            ++wcs1;
        }
    }
    else if (__isa_available == __ISA_AVAILABLE_SSE2)
    {
        unsigned offset, mask;

        // Build search pattern and zero pattern. Search pattern is
        // XMMWORD with the initial character of the search string
        // in every position. Zero pattern has a zero termination
        // character in every position.

        pattern = _mm_cvtsi32_si128(wcs2[0]);
        pattern = _mm_shufflelo_epi16(pattern, 0);
        pattern = _mm_shuffle_epi32(pattern, 0);
        zero = _mm_xor_si128(zero, zero);

        // Main loop for searching wcs1.

        for (;;)
        {
            // If XMM check is safe advance wcs1 to the next
            // possible match or end.

            if (XMM_PAGE_SAFE(wcs1))
            {
                characters1 = _mm_loadu_si128((__m128i*)wcs1);
                characters2 = _mm_cmpeq_epi16(characters1, zero);
                characters1 = _mm_cmpeq_epi16(characters1, pattern);
                characters1 = _mm_or_si128(characters1, characters2);
                mask = _mm_movemask_epi8(characters1);

                // If no character match or end found try next XMMWORD.

                if (0 == mask)
                {
                    wcs1 += XMM_CHARS;
                    continue;
                }

                // Advance wcs1 pointer to next possible match or end.

                _BitScanForward(&offset, mask);
                wcs1 += (offset/sizeof(wchar_t));
            }

            // If at the end of wcs1, then no match found.

            if (0 == wcs1[0]) return NULL;

            // If a first-character match is found compare
            // strings to look for match.

            if (wcs2[0] == wcs1[0])
            {
                stmp1 = wcs1;
                stmp2 = wcs2;
                for (;;)
                {
                    // If aligned as specified advance to next
                    // possible difference or wcs2 end.

                    if (XMM_PAGE_SAFE(stmp2) && XMM_PAGE_SAFE(stmp1))
                    {
                        characters1 = _mm_loadu_si128((__m128i*)stmp1);
                        characters2 = _mm_loadu_si128((__m128i*)stmp2);
                        characters1 = _mm_cmpeq_epi16(characters1, characters2);
                        characters2 = _mm_cmpeq_epi16(characters2, zero);
                        characters1 = _mm_cmpeq_epi16(characters1, zero);
                        characters1 = _mm_or_si128(characters1, characters2);
                        mask = _mm_movemask_epi8(characters1);

                        // If mask is zero there is no difference and
                        // wcs2 does not end in this XMMWORD. Continue
                        // with next XMMWORD.

                        if (0 == mask)
                        {
                            stmp1 += XMM_CHARS;
                            stmp2 += XMM_CHARS;
                            continue;
                        }

                        // Advance string pointers to next significant
                        // character.

                        _BitScanForward(&offset, mask);
                        stmp1 += (offset/sizeof(wchar_t));
                        stmp2 += (offset/sizeof(wchar_t));
                    }

                    // If we've reached the end of wcs2 then a match
                    // has been found.

                    if (0 == stmp2[0]) return (wchar_t *)wcs1;

                    // If we've reached a difference then no match
                    // was found.

                    if (stmp1[0] != stmp2[0]) break;

                    // Otherwise advance to next character and try
                    // again.

                    ++stmp1;
                    ++stmp2;
                }
            }

            // Current character wasn't a match, try next character.

            ++wcs1;
        }
    }
    else
    {
        const wchar_t *cp = wcs1;
        const wchar_t *s1, *s2;

        while (*cp)
        {
            s1 = cp;
            s2 = wcs2;

            while ( *s1 && *s2 && !(*s1-*s2) )
                s1++, s2++;

            if (!*s2)
                return (wchar_t *) cp;

            cp++;
        }

        return NULL;
    }
}
Exemplo n.º 5
0
int test_mm_cmpistrc(__m128i A, __m128i B) {
  // CHECK-LABEL: test_mm_cmpistrc
  // CHECK: call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8 7)
  return _mm_cmpistrc(A, B, 7);
}
Exemplo n.º 6
0
int test_mm_cmpistrc(__m128i A, __m128i B) {
  // CHECK-LABEL: test_mm_cmpistrc
  // CHECK: @llvm.x86.sse42.pcmpistric128
  return _mm_cmpistrc(A, B, 7);
}