bool recogUnicodeRange(const __m128i data, int& dataLength, unsigned int mask) { //first check whether in the 2 bytes encoding range const __m128i Unicode_80_BE = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '\xBE','\x80'); unsigned int mask_80_BE = _mm_cvtsi128_si32(_mm_cmpestrm(Unicode_80_BE, 2, data, dataLength, _SIDD_CMP_RANGES)); const __m128i Unicode_C2_DF = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '\xDF', '\xC2'); unsigned int mask_C2_DF = _mm_cvtsi128_si32(_mm_cmpestrm(Unicode_C2_DF, 2, data, dataLength, _SIDD_CMP_RANGES)); if( mask_C2_DF > 0 ) { checkIncompleteBytes(mask_C2_DF, mask, dataLength, 1); if( mask_C2_DF > 0 ) { unsigned int mask_C2_DF_2 = mask_C2_DF << 1; if( (mask_C2_DF_2 & mask_80_BE) != mask_C2_DF_2 ) { const __m128i Unicode_80_BF = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '\xBF', '\x80'); unsigned int mask_80_BF = _mm_cvtsi128_si32(_mm_cmpestrm(Unicode_80_BF, 2, data, dataLength, _SIDD_CMP_RANGES)); if( (mask_C2_DF_2 & mask_80_BF) != mask_C2_DF_2 ) { return false; } } mask |= mask_C2_DF; mask |= mask_C2_DF_2; if( mask == 0xFFFFFFFF ) { return true; } } else { if( dataLength <= 0 ) return false; if( mask == 0xFFFFFFFF ) return true; } } //then check whether in the 3 bytes encoding range const __m128i Unicode_E1_EC_EE_EF = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '\xEF', '\xEF', '\xEE', '\xEE', '\xEC', '\xE1'); unsigned int mask_E1_EC_EE_EF = _mm_cvtsi128_si32(_mm_cmpestrm(Unicode_E1_EC_EE_EF, 6, data, dataLength, _SIDD_CMP_RANGES)); if( mask_E1_EC_EE_EF > 0 ) { checkIncompleteBytes(mask_E1_EC_EE_EF, mask, dataLength, 2); if( mask_E1_EC_EE_EF > 0 ) { unsigned int mask_E1_EC_EE_EF_2 = mask_E1_EC_EE_EF << 1; unsigned int mask_E1_EC_EE_EF_3 = mask_E1_EC_EE_EF << 2; if( (mask_E1_EC_EE_EF_2 & mask_80_BE) == mask_E1_EC_EE_EF_2 ) { if( (mask_E1_EC_EE_EF_3 & mask_80_BE) != mask_E1_EC_EE_EF_3 ) { const __m128i Unicode_80_BF = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '\xBF', '\x80'); unsigned int mask_80_BF = _mm_cvtsi128_si32(_mm_cmpestrm(Unicode_80_BF, 2, data, dataLength, _SIDD_CMP_RANGES)); if( (mask_E1_EC_EE_EF_3 & mask_80_BF) != mask_E1_EC_EE_EF_3 ) { return false; } } mask |= mask_E1_EC_EE_EF; mask |= mask_E1_EC_EE_EF_2; mask |= mask_E1_EC_EE_EF_3; if( mask == 0xFFFFFFFF ) { return true; } } else { return false; } } else { if( dataLength <= 0 ) return false; if( mask == 0xFFFFFFFF ) return true; } } return false; }
static void sse4_2_test (void) { union { __m128i x[NUM]; char c[NUM *16]; } src1, src2; __m128i res, correct; int l1, l2; int i; for (i = 0; i < NUM *16; i++) { src1.c[i] = rand (); src2.c[i] = rand (); } for (i = 0; i < NUM; i++) { l1 = rand () % 18; l2 = rand () % 18; switch((rand() % 4)) { case 0: res = _mm_cmpestrm (src1.x[i], l1, src2.x[i], l2, IMM_VAL0); correct = cmp_em (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL0, NULL); break; case 1: res = _mm_cmpestrm (src1.x[i], l1, src2.x[i], l2, IMM_VAL1); correct = cmp_em (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL1, NULL); break; case 2: res = _mm_cmpestrm (src1.x[i], l1, src2.x[i], l2, IMM_VAL2); correct = cmp_em (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL2, NULL); break; default: res = _mm_cmpestrm (src1.x[i], l1, src2.x[i], l2, IMM_VAL3); correct = cmp_em (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL3, NULL); break; } if (memcmp (&correct, &res, sizeof (res))) abort (); } }
int main(int, char**) { volatile __m128i a = _mm_setzero_si128(); volatile __m128i b = _mm_set1_epi32(42); volatile __m128i result = _mm_cmpestrm(a, 16, b, 16, 0); (void)result; return 0; }
void scanCharDataContentwithSTTNI(SAX2Processor* saxProcessor) { unsigned int length = yylim - yycur; unsigned char* data = (unsigned char*)yycur; if( *data == '<' || *data == '&' || *data == ']') return; unsigned int dataLen = 0; // initialize the one byte encoding rule and nonCharaData rule const __m128i asciiCharData = _mm_set_epi8(0,0,0,0,0,0,0x7F,0x5E,0x5C,0x3D, 0x3B,0x27,0x25,0x20,0,0); const __m128i nonCharData = _mm_set_epi8(0,0,0,0,0,0,0,0,0,0,0,0x5D,0x3C,0x26,0x0D,0x0A); do { // special new line processing for ‘x0A’,‘x0D’ if( *data == '\0' ) { saxProcessor->newLine((char*)data); data++; length--; } else if(*data == '\0') { saxProcessor->newLine((char*)data); if( *(data+1) == '\0' ) { data += 2; length -= 2; yycur++; } else { *data = '\0'; data++; length--; } } while( length > 0 ) { if( length >= 16 ) dataLen = 16; else dataLen = length; const __m128i mData = _mm_loadu_si128((__m128i*)data); // locate the Character Data part with the nonCharaData characters int index = _mm_cmpestri(nonCharData, 5, mData, dataLen, _SIDD_CMP_EQUAL_ANY); if( index == 0 ) break; if( index > dataLen ) index = dataLen; bool shouldBreak = index < dataLen ? true : false; // check the one byte encoding rule(ASCII) unsigned int mask = _mm_cvtsi128_si32(_mm_cmpestrm(asciiCharData, 10, mData, index, _SIDD_CMP_RANGES|_SIDD_MASKED_NEGATIVE_POLARITY)); // if not all hit ASCII, continue to check other Unicode rules if( mask == 0 || recogUnicodeRange(mData, index, ~mask)) { data += index; length -= index; if( shouldBreak ) break; } else { break; } } unsigned int passLen = (char*)data - yycur; if( passLen == 0 ) break; // report Character Data to user saxProcessor->reportCharDataContent(yycur, passLen); yycur += passLen; YYSWITCHBUFFER; } while( length >= STTNISTRLENLIMIT && (*data == '\0' || *data == '\0') ); }
uint seqRank ( uint * vector , byte searchedByte , uint position ){ register uint i , cont = 0; __m128i patt , window , returnValue ; byte * c1 , patt_code [16]; uint d = position > >4 , r = position & 0 xf ; for ( i =0; i <16; i ++) patt_code [i ]= searchedByte ; long long * pat_array = ( long long *) patt_code ; patt = _mm_set_epi64x ( pat_array [1] , pat_array [0]) ; long long * text_array = ( long long *) vector ; for ( i =0; i <d; i ++) { window = _mm_set_epi64x ( text_array [1] , text_array [0]) ; returnValue = _mm_cmpestrm ( patt , 16 , window , 16 , mode ) ; cont += _mm_popcnt_u32 ( _mm_extract_epi32 ( returnValue ,0) ); text_array += 2; } window = _mm_set_epi64x ( text_array [1] , text_array [0]) ; returnValue = _mm_cmpestrm ( patt , r , window , r , mode ); cont += _mm_popcnt_u32 ( _mm_extract_epi32 ( returnValue ,0) ) +r -16; return cont ; }
void test8bit (void) { i1 = _mm_cmpistrm (i2, i3, k4); /* { dg-error "the third argument must be an 8-bit immediate" } */ k1 = _mm_cmpistri (i2, i3, k4); /* { dg-error "the third argument must be an 8-bit immediate" } */ k1 = _mm_cmpistra (i2, i3, k4); /* { dg-error "the third argument must be an 8-bit immediate" } */ k1 = _mm_cmpistrc (i2, i3, k4); /* { dg-error "the third argument must be an 8-bit immediate" } */ k1 = _mm_cmpistro (i2, i3, k4); /* { dg-error "the third argument must be an 8-bit immediate" } */ k1 = _mm_cmpistrs (i2, i3, k4); /* { dg-error "the third argument must be an 8-bit immediate" } */ k1 = _mm_cmpistrz (i2, i3, k4); /* { dg-error "the third argument must be an 8-bit immediate" } */ i1 = _mm_cmpestrm (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */ k1 = _mm_cmpestri (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */ k1 = _mm_cmpestra (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */ k1 = _mm_cmpestrc (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */ k1 = _mm_cmpestro (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */ k1 = _mm_cmpestrs (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */ k1 = _mm_cmpestrz (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */ b1 = _mm256_blend_ps (b2, b3, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */ k1 = _cvtss_sh (f1, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */ i1 = _mm256_cvtps_ph (b2, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */ b1 = _mm256_dp_ps (b2, b3, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */ e1 = _mm256_permute2f128_pd (e2, e3, k4);/* { dg-error "the last argument must be an 8-bit immediate" } */ b1 = _mm256_permute2f128_ps (b2, b3, k4);/* { dg-error "the last argument must be an 8-bit immediate" } */ l1 = _mm256_permute2f128_si256 (l2, l3, k4);/* { dg-error "the last argument must be an 8-bit immediate" } */ b1 = _mm256_permute_ps (b2, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */ i1 = _mm_aeskeygenassist_si128 (i2, k4);/* { dg-error "the last argument must be an 8-bit immediate" } */ i1 = _mm_blend_epi16 (i2, i3, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */ i1 = _mm_clmulepi64_si128 (i2, i3, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */ i1 = _mm_cvtps_ph (a1, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */ d1 = _mm_dp_pd (d2, d3, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */ a1 = _mm_dp_ps (a2, a3, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */ a1 = _mm_insert_ps (a2, a3, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */ i1 = _mm_mpsadbw_epu8 (i2, i3, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */ a1 = _mm_permute_ps (a2, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */ i1 = _mm_slli_si128 (i2, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */ i1 = _mm_srli_si128 (i2, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */ }
static void sse4_2_test (void) { union { __m128i x[NUM]; char c[NUM *16]; } src1, src2; __m128i res, correct; int correct_flags, l1, l2; int flags, cf, zf, sf, of, af; int i; for (i = 0; i < NUM *16; i++) { src1.c[i] = rand (); src2.c[i] = rand (); } for (i = 0; i < NUM; i++) { l1 = rand () % 18; l2 = rand () % 18; switch ((rand () % 4)) { case 0: res = _mm_cmpestrm (src1.x[i], l1, src2.x[i], l2, IMM_VAL0); cf = _mm_cmpestrc (src1.x[i], l1, src2.x[i], l2, IMM_VAL0); zf = _mm_cmpestrz (src1.x[i], l1, src2.x[i], l2, IMM_VAL0); sf = _mm_cmpestrs (src1.x[i], l1, src2.x[i], l2, IMM_VAL0); of = _mm_cmpestro (src1.x[i], l1, src2.x[i], l2, IMM_VAL0); af = _mm_cmpestra (src1.x[i], l1, src2.x[i], l2, IMM_VAL0); correct = cmp_em (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL0, &correct_flags); break; case 1: res = _mm_cmpestrm (src1.x[i], l1, src2.x[i], l2, IMM_VAL1); cf = _mm_cmpestrc (src1.x[i], l1, src2.x[i], l2, IMM_VAL1); zf = _mm_cmpestrz (src1.x[i], l1, src2.x[i], l2, IMM_VAL1); sf = _mm_cmpestrs (src1.x[i], l1, src2.x[i], l2, IMM_VAL1); of = _mm_cmpestro (src1.x[i], l1, src2.x[i], l2, IMM_VAL1); af = _mm_cmpestra (src1.x[i], l1, src2.x[i], l2, IMM_VAL1); correct = cmp_em (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL1, &correct_flags); break; case 2: res = _mm_cmpestrm (src1.x[i], l1, src2.x[i], l2, IMM_VAL2); cf = _mm_cmpestrc (src1.x[i], l1, src2.x[i], l2, IMM_VAL2); zf = _mm_cmpestrz (src1.x[i], l1, src2.x[i], l2, IMM_VAL2); sf = _mm_cmpestrs (src1.x[i], l1, src2.x[i], l2, IMM_VAL2); of = _mm_cmpestro (src1.x[i], l1, src2.x[i], l2, IMM_VAL2); af = _mm_cmpestra (src1.x[i], l1, src2.x[i], l2, IMM_VAL2); correct = cmp_em (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL2, &correct_flags); break; default: res = _mm_cmpestrm (src1.x[i], l1, src2.x[i], l2, IMM_VAL3); cf = _mm_cmpestrc (src1.x[i], l1, src2.x[i], l2, IMM_VAL3); zf = _mm_cmpestrz (src1.x[i], l1, src2.x[i], l2, IMM_VAL3); sf = _mm_cmpestrs (src1.x[i], l1, src2.x[i], l2, IMM_VAL3); of = _mm_cmpestro (src1.x[i], l1, src2.x[i], l2, IMM_VAL3); af = _mm_cmpestra (src1.x[i], l1, src2.x[i], l2, IMM_VAL3); correct = cmp_em (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL3, &correct_flags); break; } if (memcmp (&correct, &res, sizeof (res))) abort (); flags = 0; if (cf) flags |= CFLAG; if (zf) flags |= ZFLAG; if (sf) flags |= SFLAG; if (of) flags |= OFLAG; if (flags != correct_flags || (af && (cf || zf)) || (!af && !(cf || zf))) abort (); } }
__m128i test_mm_cmpestrm(__m128i A, int LA, __m128i B, int LB) { // CHECK-LABEL: test_mm_cmpestrm // CHECK: call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %{{.*}}, i32 %{{.*}}, <16 x i8> %{{.*}}, i32 %{{.*}}, i8 7) return _mm_cmpestrm(A, LA, B, LB, 7); }
__m128i test_mm_cmpestrm(__m128i A, int LA, __m128i B, int LB) { // CHECK-LABEL: test_mm_cmpestrm // CHECK: @llvm.x86.sse42.pcmpestrm128 return _mm_cmpestrm(A, LA, B, LB, 7); }