static void
sse4_2_test (void)
{
  union
    {
      __m128i x[NUM];
      char c[NUM *16];
    } src1, src2;
  int res, correct, l1, l2;
  int i;

  for (i = 0; i < NUM *16; i++)
    {
      src1.c[i] = rand ();
      src2.c[i] = rand ();
    }

  for (i = 0; i < NUM; i++)
    {
      l1 = rand () % 18;
      l2 = rand () % 18;

      switch ((rand () % 4))
	{
	case 0:
	  res = _mm_cmpestri (src1.x[i], l1, src2.x[i], l2, IMM_VAL0);
	  correct = cmp_ei (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL0,
			    NULL);
	  break;

	case 1:
	  res = _mm_cmpestri (src1.x[i], l1, src2.x[i], l2, IMM_VAL1);
	  correct = cmp_ei (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL1,
			    NULL);
	  break;

	case 2:
	  res = _mm_cmpestri (src1.x[i], l1, src2.x[i], l2, IMM_VAL2);
	  correct = cmp_ei (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL2,
			    NULL);
	  break;

	default:
	  res = _mm_cmpestri (src1.x[i], l1, src2.x[i], l2, IMM_VAL3);
	  correct = cmp_ei (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL3,
			    NULL);
	  break;
        }

      if (correct != res)
	abort ();
    }
}
Beispiel #2
0
static const char *findchar_fast(const char *buf, const char *buf_end, const char *ranges, size_t ranges_size, int *found)
{
    *found = 0;
#if __SSE4_2__
    if (likely(buf_end - buf >= 16)) {
        __m128i ranges16 = _mm_loadu_si128((const __m128i *)ranges);

        size_t left = (buf_end - buf) & ~15;
        do {
            __m128i b16 = _mm_loadu_si128((const __m128i *)buf);
            int r = _mm_cmpestri(ranges16, ranges_size, b16, 16, _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS);
            if (unlikely(r != 16)) {
                buf += r;
                *found = 1;
                break;
            }
            buf += 16;
            left -= 16;
        } while (likely(left != 0));
    }
#else
    /* suppress unused parameter warning */
    (void)buf_end;
    (void)ranges;
    (void)ranges_size;
#endif
    return buf;
}
Beispiel #3
0
size_t scanHaystackBlock(const StringPieceLite haystack,
                         const StringPieceLite needles,
                         uint64_t blockStartIdx) {
  DCHECK_GT(needles.size(), 16u); // should handled by *needles16() method
  DCHECK(blockStartIdx + 16 <= haystack.size() ||
         (page_for(haystack.data() + blockStartIdx) ==
          page_for(haystack.data() + blockStartIdx + 15)));

  __m128i arr1;
  if (HAYSTACK_ALIGNED) {
    arr1 = _mm_load_si128(
        reinterpret_cast<const __m128i*>(haystack.data() + blockStartIdx));
  } else {
    arr1 = _mm_loadu_si128(
        reinterpret_cast<const __m128i*>(haystack.data() + blockStartIdx));
  }

  // This load is safe because needles.size() >= 16
  auto arr2 = _mm_loadu_si128(
      reinterpret_cast<const __m128i*>(needles.data()));
  size_t b =
      _mm_cmpestri(arr2, 16, arr1, int(haystack.size() - blockStartIdx), 0);

  size_t j = nextAlignedIndex(needles.data());
  for (; j < needles.size(); j += 16) {
    arr2 = _mm_load_si128(
        reinterpret_cast<const __m128i*>(needles.data() + j));

    auto index = _mm_cmpestri(
        arr2,
        int(needles.size() - j),
        arr1,
        int(haystack.size() - blockStartIdx),
        0);
    b = std::min<size_t>(index, b);
  }

  if (b < 16) {
    return blockStartIdx + b;
  }
  return std::string::npos;
}
Beispiel #4
0
void scanCharDataContentwithSTTNI(SAX2Processor* saxProcessor) { 
    unsigned int length = yylim - yycur; 
    unsigned char* data = (unsigned char*)yycur; 
    if( *data == '<' || *data == '&' || *data == ']') return; 
    unsigned int dataLen = 0; 
    // initialize the one byte encoding rule and nonCharaData rule 
    const __m128i asciiCharData = _mm_set_epi8(0,0,0,0,0,0,0x7F,0x5E,0x5C,0x3D, 0x3B,0x27,0x25,0x20,0,0); 
    const __m128i nonCharData = _mm_set_epi8(0,0,0,0,0,0,0,0,0,0,0,0x5D,0x3C,0x26,0x0D,0x0A); 

    do { 
        // special new line processing for ‘x0A’,‘x0D’ 
        if( *data == '\0' ) { 
            saxProcessor->newLine((char*)data); 
            data++; length--; 
        } else if(*data == '\0') { 
            saxProcessor->newLine((char*)data); 
            if( *(data+1) == '\0' ) { 
                data += 2; length -= 2; yycur++; 
            } else { 
                *data = '\0'; data++; length--; 
            } 
        }

        while( length > 0 ) { 
            if( length >= 16 ) dataLen = 16; 
            else dataLen = length; 
            const __m128i mData = _mm_loadu_si128((__m128i*)data); 
            // locate the Character Data part with the nonCharaData characters 
            int index = _mm_cmpestri(nonCharData, 5, mData, dataLen, _SIDD_CMP_EQUAL_ANY); 
            if( index == 0 ) break;
            if( index > dataLen ) index = dataLen; 
            bool shouldBreak = index < dataLen ? true : false; 
            // check the one byte encoding rule(ASCII) 
            unsigned int mask = _mm_cvtsi128_si32(_mm_cmpestrm(asciiCharData, 10, 
                mData, index, _SIDD_CMP_RANGES|_SIDD_MASKED_NEGATIVE_POLARITY)); 
            // if not all hit ASCII, continue to check other Unicode rules 
            if( mask == 0 || recogUnicodeRange(mData, index, ~mask)) { 
                data += index; 
                length -= index; 
                if( shouldBreak ) break;
            } else { 
                break;
            } 
        }

        unsigned int passLen = (char*)data - yycur; 
        if( passLen == 0 ) break; 
        // report Character Data to user 
        saxProcessor->reportCharDataContent(yycur, passLen); 
        yycur += passLen; 
        YYSWITCHBUFFER; 
    } while( length >= STTNISTRLENLIMIT && (*data == '\0' || *data == '\0') ); 
} 
Beispiel #5
0
// helper method for case where needles.size() <= 16
size_t qfind_first_byte_of_needles16(const StringPieceLite haystack,
                                     const StringPieceLite needles) {
  DCHECK_GT(haystack.size(), 0u);
  DCHECK_GT(needles.size(), 0u);
  DCHECK_LE(needles.size(), 16u);
  if ((needles.size() <= 2 && haystack.size() >= 256) ||
      // must bail if we can't even SSE-load a single segment of haystack
      (haystack.size() < 16 &&
       page_for(haystack.end() - 1) != page_for(haystack.data() + 15)) ||
      // can't load needles into SSE register if it could cross page boundary
      page_for(needles.end() - 1) != page_for(needles.data() + 15)) {
    return detail::qfind_first_byte_of_nosse(haystack, needles);
  }

  auto arr2 = _mm_loadu_si128(
      reinterpret_cast<const __m128i*>(needles.data()));
  // do an unaligned load for first block of haystack
  auto arr1 = _mm_loadu_si128(
      reinterpret_cast<const __m128i*>(haystack.data()));
  auto index =
      _mm_cmpestri(arr2, int(needles.size()), arr1, int(haystack.size()), 0);
  if (index < 16) {
    return index;
  }

  // Now, we can do aligned loads hereafter...
  size_t i = nextAlignedIndex(haystack.data());
  for (; i < haystack.size(); i+= 16) {
    arr1 =
        _mm_load_si128(reinterpret_cast<const __m128i*>(haystack.data() + i));
    index = _mm_cmpestri(
        arr2, int(needles.size()), arr1, int(haystack.size() - i), 0);
    if (index < 16) {
      return i + index;
    }
  }
  return std::string::npos;
}
Beispiel #6
0
void
test8bit (void)
{
  i1 = _mm_cmpistrm (i2, i3, k4);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
  k1 = _mm_cmpistri (i2, i3, k4);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
  k1 = _mm_cmpistra (i2, i3, k4);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
  k1 = _mm_cmpistrc (i2, i3, k4);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
  k1 = _mm_cmpistro (i2, i3, k4);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
  k1 = _mm_cmpistrs (i2, i3, k4);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
  k1 = _mm_cmpistrz (i2, i3, k4);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
  i1 = _mm_cmpestrm (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */
  k1 = _mm_cmpestri (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */
  k1 = _mm_cmpestra (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */
  k1 = _mm_cmpestrc (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */
  k1 = _mm_cmpestro (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */
  k1 = _mm_cmpestrs (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */
  k1 = _mm_cmpestrz (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */
  b1 = _mm256_blend_ps (b2, b3, k4);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
  k1 = _cvtss_sh (f1, k4);		  /* { dg-error "the last argument must be an 8-bit immediate" } */
  i1 = _mm256_cvtps_ph (b2, k4);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
  b1 = _mm256_dp_ps (b2, b3, k4);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
  e1 = _mm256_permute2f128_pd (e2, e3, k4);/* { dg-error "the last argument must be an 8-bit immediate" } */
  b1 = _mm256_permute2f128_ps (b2, b3, k4);/* { dg-error "the last argument must be an 8-bit immediate" } */
  l1 = _mm256_permute2f128_si256 (l2, l3, k4);/* { dg-error "the last argument must be an 8-bit immediate" } */
  b1 = _mm256_permute_ps (b2, k4);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
  i1 = _mm_aeskeygenassist_si128 (i2, k4);/* { dg-error "the last argument must be an 8-bit immediate" } */
  i1 = _mm_blend_epi16 (i2, i3, k4);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
  i1 = _mm_clmulepi64_si128 (i2, i3, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */
  i1 = _mm_cvtps_ph (a1, k4);		  /* { dg-error "the last argument must be an 8-bit immediate" } */
  d1 = _mm_dp_pd (d2, d3, k4);		  /* { dg-error "the last argument must be an 8-bit immediate" } */
  a1 = _mm_dp_ps (a2, a3, k4);		  /* { dg-error "the last argument must be an 8-bit immediate" } */
  a1 = _mm_insert_ps (a2, a3, k4);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
  i1 = _mm_mpsadbw_epu8 (i2, i3, k4);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
  a1 = _mm_permute_ps (a2, k4);		  /* { dg-error "the last argument must be an 8-bit immediate" } */
  i1 = _mm_slli_si128 (i2, k4);		  /* { dg-error "the last argument must be an 8-bit immediate" } */
  i1 = _mm_srli_si128 (i2, k4);		  /* { dg-error "the last argument must be an 8-bit immediate" } */
}
static inline void _phe_escape_html(char *dst, const char *input, size_t input_size) {
#if __SSE4_2__
    const __m128i ranges = _mm_loadu_si128((const __m128i*) RANGES);

    int cursor = 0;

    __m128i v;
    do {
        v = _mm_loadu_si128((const __m128i*) input);
        cursor = _mm_cmpestri(ranges, RANGE_SIZE, v, 16, CMPESTRI_FLAG);
        if (cursor != 16) {
            if ((int) input_size >= 16) {
                memcpy(dst, input, 16);
            } else {
                memcpy(dst, input, cursor);
            }
            dst += cursor;
            const char c = input[cursor];
            switch (c) {
                case '&':
                    memcpy(dst, "&amp;", 5);
                    dst += 5;
                    break;
                case '>':
                    memcpy(dst, "&gt;", 4);
                    dst += 4;
                    break;
                case '<':
                    memcpy(dst, "&lt;", 4);
                    dst += 4;
                    break;
                case '"':
                    memcpy(dst, "&quot;", 6);
                    dst += 6;
                    break;
                case '\'':
                    memcpy(dst, "&#39;", 5);
                    dst += 5;
                    break;
                case '`':
                    // For IE. IE interprets back-quote as valid quoting characters
                    // ref: https://rt.cpan.org/Public/Bug/Display.html?id=84971
                    memcpy(dst, "&#96;", 5);
                    dst += 5;
                    break;
                case '{':
                    // For javascript templates (e.g. AngularJS and such javascript frameworks)
                    // ref: https://github.com/angular/angular.js/issues/5601
                    memcpy(dst, "&#123;", 6);
                    dst += 6;
                    break;
                case '}':
                    // For javascript templates (e.g. AngularJS and such javascript frameworks)
                    // ref: https://github.com/angular/angular.js/issues/5601
                    memcpy(dst, "&#125;", 6);
                    dst += 6;
                    break;
                default:
                    memcpy(dst, &c, 1);
                    dst += 1;
            }

            const int next = cursor + 1;
            input += next;
            input_size -= next;
            continue;
        }

        memcpy(dst, input, 16);
        dst += 16;
        input += 16;
        input_size -= 16;
    } while((int) input_size > 0);

    *dst++ = *"\0";
#else
    const char *ptr = input, *end = input + input_size;
    const static int pp[UCHAR_MAX+1] = {
        0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
        0,0,4,0,0,0,1,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,2,0,
        0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
        6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,8,0,0
        /* following zero(s) */
    };
    const static char* dd[] = {NULL, "&amp;", "&gt;", "&lt;", "&quot;", "&#39;", "&#96;", "&#123;", "&#125;"};
    const static int dl[] = {0, 5, 4, 4, 6, 5, 5, 6, 6};
#define _ESC_AND_COPY(d,s,n) { memcpy(d,s,n); d += n; }
    while (ptr < end) {
        unsigned char c = *ptr++;
        int i = pp[c];
        if (i == 0) *dst++ = c;
        else _ESC_AND_COPY(dst, dd[i], dl[i]);
    }
#undef _ESC_AND_COPY
    *dst++ = 0;
#endif
}
Beispiel #8
0
static void
TEST (void)
{
  union
    {
      __m128i x[NUM];
      char c[NUM *16];
    } src1, src2;
  int res, correct, correct_flags, l1, l2;
  int flags, cf, zf, sf, of, af;
  int i;

  for (i = 0; i < NUM *16; i++)
    {
      src1.c[i] = rand ();
      src2.c[i] = rand ();
    }

  for (i = 0; i < NUM; i++)
    {
      l1 = rand () % 18;
      l2 = rand () % 18;

      switch ((rand () % 4))
	{
	case 0:
	  res = _mm_cmpestri (src1.x[i], l1, src2.x[i], l2, IMM_VAL0);
	  cf = _mm_cmpestrc (src1.x[i], l1, src2.x[i], l2, IMM_VAL0);
	  zf = _mm_cmpestrz (src1.x[i], l1, src2.x[i], l2, IMM_VAL0);
	  sf = _mm_cmpestrs (src1.x[i], l1, src2.x[i], l2, IMM_VAL0);
	  of = _mm_cmpestro (src1.x[i], l1, src2.x[i], l2, IMM_VAL0);
	  af = _mm_cmpestra (src1.x[i], l1, src2.x[i], l2, IMM_VAL0);
	  correct = cmp_ei (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL0,
			    &correct_flags);
	  break;

	case 1:
	  res = _mm_cmpestri (src1.x[i], l1, src2.x[i], l2, IMM_VAL1);
	  cf = _mm_cmpestrc (src1.x[i], l1, src2.x[i], l2, IMM_VAL1);
	  zf = _mm_cmpestrz (src1.x[i], l1, src2.x[i], l2, IMM_VAL1);
	  sf = _mm_cmpestrs (src1.x[i], l1, src2.x[i], l2, IMM_VAL1);
	  of = _mm_cmpestro (src1.x[i], l1, src2.x[i], l2, IMM_VAL1);
	  af = _mm_cmpestra (src1.x[i], l1, src2.x[i], l2, IMM_VAL1);
	  correct = cmp_ei (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL1,
			    &correct_flags);
	  break;

	case 2:
	  res = _mm_cmpestri (src1.x[i], l1, src2.x[i], l2, IMM_VAL2);
	  cf = _mm_cmpestrc (src1.x[i], l1, src2.x[i], l2, IMM_VAL2);
	  zf = _mm_cmpestrz (src1.x[i], l1, src2.x[i], l2, IMM_VAL2);
	  sf = _mm_cmpestrs (src1.x[i], l1, src2.x[i], l2, IMM_VAL2);
	  of = _mm_cmpestro (src1.x[i], l1, src2.x[i], l2, IMM_VAL2);
	  af = _mm_cmpestra (src1.x[i], l1, src2.x[i], l2, IMM_VAL2);
	  correct = cmp_ei (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL2,
			    &correct_flags);
	  break;

	default:
	  res = _mm_cmpestri (src1.x[i], l1, src2.x[i], l2, IMM_VAL3);
	  cf = _mm_cmpestrc (src1.x[i], l1, src2.x[i], l2, IMM_VAL3);
	  zf = _mm_cmpestrz (src1.x[i], l1, src2.x[i], l2, IMM_VAL3);
	  sf = _mm_cmpestrs (src1.x[i], l1, src2.x[i], l2, IMM_VAL3);
	  of = _mm_cmpestro (src1.x[i], l1, src2.x[i], l2, IMM_VAL3);
	  af = _mm_cmpestra (src1.x[i], l1, src2.x[i], l2, IMM_VAL3);
	  correct = cmp_ei (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL3,
			    &correct_flags);
	  break;
	}
      
      if (correct != res)
	abort ();

      flags = 0;
      if (cf)
	flags |= CFLAG;
      if (zf)
	flags |= ZFLAG;
      if (sf)
	flags |= SFLAG;
      if (of)
	flags |= OFLAG;
      
      if (flags != correct_flags
	  || (af && (cf || zf))
	  || (!af && !(cf || zf)))
	abort ();
    }
}
Beispiel #9
0
int test_mm_cmpestri(__m128i A, int LA, __m128i B, int LB) {
  // CHECK-LABEL: test_mm_cmpestri
  // CHECK: call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %{{.*}}, i32 %{{.*}}, <16 x i8> %{{.*}}, i32 %{{.*}}, i8 7)
  return _mm_cmpestri(A, LA, B, LB, 7);
}
Beispiel #10
0
int test_mm_cmpestri(__m128i A, int LA, __m128i B, int LB) {
  // CHECK-LABEL: test_mm_cmpestri
  // CHECK: @llvm.x86.sse42.pcmpestri128
  return _mm_cmpestri(A, LA, B, LB, 7);
}