コード例 #1
0
ファイル: strcmpsse.cpp プロジェクト: TrygveS/sweet.hpp
int sse_strcmp(const char *p1, const char *p2)  {
	const int mode = _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_EACH | _SIDD_BIT_MASK | _SIDD_NEGATIVE_POLARITY;
	__m128i smm1 = _mm_loadu_si128 ((__m128i *) p1);
	__m128i smm2 = _mm_loadu_si128 ((__m128i *) p2);
	int ResultIndex;
	while (1) {
		ResultIndex = _mm_cmpistri (smm1, smm2, mode );
		if (ResultIndex != 16) { break; }
		p1 = p1+16;
		p2 = p2+16;
		smm1 = _mm_loadu_si128 ((__m128i *)p1);
		smm2 = _mm_loadu_si128 ((__m128i *)p2);
	}
	p1 = (char *) & smm1;
	p2 = (char *) & smm2;
	if(p1[ResultIndex] < p2[ResultIndex]) return -1;
	if(p1[ResultIndex] > p2[ResultIndex]) return 1;
	return 0;
}
コード例 #2
0
ファイル: testimm-3.c プロジェクト: 0day-ci/gcc
void
test8bit (void)
{
  i1 = _mm_cmpistrm (i2, i3, k4);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
  k1 = _mm_cmpistri (i2, i3, k4);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
  k1 = _mm_cmpistra (i2, i3, k4);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
  k1 = _mm_cmpistrc (i2, i3, k4);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
  k1 = _mm_cmpistro (i2, i3, k4);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
  k1 = _mm_cmpistrs (i2, i3, k4);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
  k1 = _mm_cmpistrz (i2, i3, k4);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
  i1 = _mm_cmpestrm (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */
  k1 = _mm_cmpestri (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */
  k1 = _mm_cmpestra (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */
  k1 = _mm_cmpestrc (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */
  k1 = _mm_cmpestro (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */
  k1 = _mm_cmpestrs (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */
  k1 = _mm_cmpestrz (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */
  b1 = _mm256_blend_ps (b2, b3, k4);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
  k1 = _cvtss_sh (f1, k4);		  /* { dg-error "the last argument must be an 8-bit immediate" } */
  i1 = _mm256_cvtps_ph (b2, k4);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
  b1 = _mm256_dp_ps (b2, b3, k4);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
  e1 = _mm256_permute2f128_pd (e2, e3, k4);/* { dg-error "the last argument must be an 8-bit immediate" } */
  b1 = _mm256_permute2f128_ps (b2, b3, k4);/* { dg-error "the last argument must be an 8-bit immediate" } */
  l1 = _mm256_permute2f128_si256 (l2, l3, k4);/* { dg-error "the last argument must be an 8-bit immediate" } */
  b1 = _mm256_permute_ps (b2, k4);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
  i1 = _mm_aeskeygenassist_si128 (i2, k4);/* { dg-error "the last argument must be an 8-bit immediate" } */
  i1 = _mm_blend_epi16 (i2, i3, k4);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
  i1 = _mm_clmulepi64_si128 (i2, i3, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */
  i1 = _mm_cvtps_ph (a1, k4);		  /* { dg-error "the last argument must be an 8-bit immediate" } */
  d1 = _mm_dp_pd (d2, d3, k4);		  /* { dg-error "the last argument must be an 8-bit immediate" } */
  a1 = _mm_dp_ps (a2, a3, k4);		  /* { dg-error "the last argument must be an 8-bit immediate" } */
  a1 = _mm_insert_ps (a2, a3, k4);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
  i1 = _mm_mpsadbw_epu8 (i2, i3, k4);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
  a1 = _mm_permute_ps (a2, k4);		  /* { dg-error "the last argument must be an 8-bit immediate" } */
  i1 = _mm_slli_si128 (i2, k4);		  /* { dg-error "the last argument must be an 8-bit immediate" } */
  i1 = _mm_srli_si128 (i2, k4);		  /* { dg-error "the last argument must be an 8-bit immediate" } */
}
コード例 #3
0
ファイル: strspn-c.c プロジェクト: mbref/eglibc-microblaze
__strspn_sse42 (const char *s, const char *a)
{
  if (*a == 0)
    return 0;

  const char *aligned;
  __m128i mask;
  int offset = (int) ((size_t) a & 15);
  if (offset != 0)
    {
      /* Load masks.  */
      aligned = (const char *) ((size_t) a & -16L);
      __m128i mask0 = _mm_load_si128 ((__m128i *) aligned);

      mask = __m128i_shift_right (mask0, offset);

      /* Find where the NULL terminator is.  */
      int length = _mm_cmpistri (mask, mask, 0x3a);
      if (length == 16 - offset)
	{
	  /* There is no NULL terminator.  */
	  __m128i mask1 = _mm_load_si128 ((__m128i *) (aligned + 16));
	  int index = _mm_cmpistri (mask1, mask1, 0x3a);
	  length += index;

	  /* Don't use SSE4.2 if the length of A > 16.  */
	  if (length > 16)
	    return __strspn_sse2 (s, a);

	  if (index != 0)
	    {
	      /* Combine mask0 and mask1.  We could play games with
		 palignr, but frankly this data should be in L1 now
		 so do the merge via an unaligned load.  */
	      mask = _mm_loadu_si128 ((__m128i *) a);
	    }
	}
    }
  else
    {
      /* A is aligned.  */
      mask = _mm_load_si128 ((__m128i *) a);

      /* Find where the NULL terminator is.  */
      int length = _mm_cmpistri (mask, mask, 0x3a);
      if (length == 16)
	{
	  /* There is no NULL terminator.  Don't use SSE4.2 if the length
	     of A > 16.  */
	  if (a[16] != 0)
	    return __strspn_sse2 (s, a);
	}
    }

  offset = (int) ((size_t) s & 15);
  if (offset != 0)
    {
      /* Check partial string.  */
      aligned = (const char *) ((size_t) s & -16L);
      __m128i value = _mm_load_si128 ((__m128i *) aligned);

      value = __m128i_shift_right (value, offset);

      int length = _mm_cmpistri (mask, value, 0x12);
      /* No need to check CFlag since it is always 1.  */
      if (length < 16 - offset)
	return length;
      /* Find where the NULL terminator is.  */
      int index = _mm_cmpistri (value, value, 0x3a);
      if (index < 16 - offset)
	return length;
      aligned += 16;
    }
  else
    aligned = s;

  while (1)
    {
      __m128i value = _mm_load_si128 ((__m128i *) aligned);
      int index = _mm_cmpistri (mask, value, 0x12);
      int cflag = _mm_cmpistrc (mask, value, 0x12);
      if (cflag)
	return (size_t) (aligned + index - s);
      aligned += 16;
    }
}
コード例 #4
0
wchar_t * __cdecl wcsstr (
        const wchar_t * wcs1,
        const wchar_t * wcs2
        )
{
    const wchar_t *stmp1, *stmp2;
    __m128i zero, pattern, characters1, characters2;

    // An empty search string matches everything.
    if (0 == *wcs2)
        return (wchar_t *)wcs1;

    if (__isa_available > __ISA_AVAILABLE_SSE2)
    {
        wchar_t c;
        unsigned i;

        // Load XMM with first characters of wcs2.
        if (XMM_PAGE_SAFE(wcs2))
        {
            pattern = _mm_loadu_si128((__m128i*)wcs2);
        }
        else
        {
            pattern = _mm_xor_si128(pattern, pattern);
            c = *(stmp2 = wcs2);
            for (i = 0; i < XMM_CHARS; ++i)
            {
                pattern = _mm_srli_si128(pattern, sizeof(wchar_t));
                pattern = _mm_insert_epi16(pattern, c, (XMM_CHARS-1));
                if (0 != c) c = *++stmp2;
            }
        }

        for(;;)
        {
            // Check for partial match, if none step forward and continue.
            if (XMM_PAGE_SAFE(wcs1))
            {
                characters1 = _mm_loadu_si128((__m128i*)wcs1);
                // If no potential match or end found, try next XMMWORD.
                if (_mm_cmpistra(pattern, characters1, f_srch_sub))
                {
                    wcs1 += XMM_CHARS;
                    continue;
                }
                // If end found there was no match.
                else if (!_mm_cmpistrc(pattern, characters1, f_srch_sub))
                {
                    return NULL;
                }

                // Get position of potential match.
                wcs1 += _mm_cmpistri(pattern, characters1, f_srch_sub);
            }
            else
            {
              // If end of string found there was no match.
              if (0 == *wcs1)
              {
                  return NULL;
              }

              // If current character doesn't match first character
              // of search string try next character.
              if (*wcs1 != *wcs2)
              {
                  ++wcs1;
                  continue;
              }
            }

            // Potential match, compare to check for full match.
            stmp1 = wcs1;
            stmp2 = wcs2;
            for (;;)
            {
                // If next XMMWORD is page-safe for each string
                // do a XMMWORD comparison.
                if (XMM_PAGE_SAFE(stmp1) && XMM_PAGE_SAFE(stmp2))
                {
                    characters1 = _mm_loadu_si128((__m128i*)stmp1);
                    characters2 = _mm_loadu_si128((__m128i*)stmp2);

                    // If unequal then no match found.
                    if (!_mm_cmpistro(characters2, characters1, f_srch_sub))
                    {
                        break;
                    }

                    // If end of search string then match found.
                    else if (_mm_cmpistrs(characters2, characters1, f_srch_sub))
                    {
                        return (wchar_t *)wcs1;
                    }

                    stmp1 += XMM_CHARS;
                    stmp2 += XMM_CHARS;
                    continue;
                }

                // Compare next character.
                else
                {
                    // If end of search string then match found.
                    if (0 == *stmp2)
                    {
                        return (wchar_t *)wcs1;
                    }

                    // If unequal then no match found.
                    if (*stmp1 != *stmp2)
                    {
                        break;
                    }

                    // Character matched - try next character.
                    ++stmp1;
                    ++stmp2;
                }
            }

            // Match not found at current position, try next.
            ++wcs1;
        }
    }
    else if (__isa_available == __ISA_AVAILABLE_SSE2)
    {
        unsigned offset, mask;

        // Build search pattern and zero pattern. Search pattern is
        // XMMWORD with the initial character of the search string
        // in every position. Zero pattern has a zero termination
        // character in every position.

        pattern = _mm_cvtsi32_si128(wcs2[0]);
        pattern = _mm_shufflelo_epi16(pattern, 0);
        pattern = _mm_shuffle_epi32(pattern, 0);
        zero = _mm_xor_si128(zero, zero);

        // Main loop for searching wcs1.

        for (;;)
        {
            // If XMM check is safe advance wcs1 to the next
            // possible match or end.

            if (XMM_PAGE_SAFE(wcs1))
            {
                characters1 = _mm_loadu_si128((__m128i*)wcs1);
                characters2 = _mm_cmpeq_epi16(characters1, zero);
                characters1 = _mm_cmpeq_epi16(characters1, pattern);
                characters1 = _mm_or_si128(characters1, characters2);
                mask = _mm_movemask_epi8(characters1);

                // If no character match or end found try next XMMWORD.

                if (0 == mask)
                {
                    wcs1 += XMM_CHARS;
                    continue;
                }

                // Advance wcs1 pointer to next possible match or end.

                _BitScanForward(&offset, mask);
                wcs1 += (offset/sizeof(wchar_t));
            }

            // If at the end of wcs1, then no match found.

            if (0 == wcs1[0]) return NULL;

            // If a first-character match is found compare
            // strings to look for match.

            if (wcs2[0] == wcs1[0])
            {
                stmp1 = wcs1;
                stmp2 = wcs2;
                for (;;)
                {
                    // If aligned as specified advance to next
                    // possible difference or wcs2 end.

                    if (XMM_PAGE_SAFE(stmp2) && XMM_PAGE_SAFE(stmp1))
                    {
                        characters1 = _mm_loadu_si128((__m128i*)stmp1);
                        characters2 = _mm_loadu_si128((__m128i*)stmp2);
                        characters1 = _mm_cmpeq_epi16(characters1, characters2);
                        characters2 = _mm_cmpeq_epi16(characters2, zero);
                        characters1 = _mm_cmpeq_epi16(characters1, zero);
                        characters1 = _mm_or_si128(characters1, characters2);
                        mask = _mm_movemask_epi8(characters1);

                        // If mask is zero there is no difference and
                        // wcs2 does not end in this XMMWORD. Continue
                        // with next XMMWORD.

                        if (0 == mask)
                        {
                            stmp1 += XMM_CHARS;
                            stmp2 += XMM_CHARS;
                            continue;
                        }

                        // Advance string pointers to next significant
                        // character.

                        _BitScanForward(&offset, mask);
                        stmp1 += (offset/sizeof(wchar_t));
                        stmp2 += (offset/sizeof(wchar_t));
                    }

                    // If we've reached the end of wcs2 then a match
                    // has been found.

                    if (0 == stmp2[0]) return (wchar_t *)wcs1;

                    // If we've reached a difference then no match
                    // was found.

                    if (stmp1[0] != stmp2[0]) break;

                    // Otherwise advance to next character and try
                    // again.

                    ++stmp1;
                    ++stmp2;
                }
            }

            // Current character wasn't a match, try next character.

            ++wcs1;
        }
    }
    else
    {
        const wchar_t *cp = wcs1;
        const wchar_t *s1, *s2;

        while (*cp)
        {
            s1 = cp;
            s2 = wcs2;

            while ( *s1 && *s2 && !(*s1-*s2) )
                s1++, s2++;

            if (!*s2)
                return (wchar_t *) cp;

            cp++;
        }

        return NULL;
    }
}
コード例 #5
0
ファイル: strcmp.cpp プロジェクト: sanjosh/smallprogs
int substr( const char* s1, const char * s2 )
{
        __m128i s1chars = _mm_loadu_si128( (const __m128i *) s2 );
        __m128i s2chars = _mm_loadu_si128( (const __m128i *) (s1));
        return _mm_cmpistri( s1chars, s2chars, _SIDD_CMP_EQUAL_ANY );
}
コード例 #6
0
ファイル: sse42-builtins.c プロジェクト: CSI-LLVM/clang
int test_mm_cmpistri(__m128i A, __m128i B) {
  // CHECK-LABEL: test_mm_cmpistri
  // CHECK: call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8 7)
  return _mm_cmpistri(A, B, 7);
}
コード例 #7
0
ファイル: sse42-builtins.c プロジェクト: AlexDenisov/clang
int test_mm_cmpistri(__m128i A, __m128i B) {
  // CHECK-LABEL: test_mm_cmpistri
  // CHECK: @llvm.x86.sse42.pcmpistri128
  return _mm_cmpistri(A, B, 7);
}