Example #1
0
void
test8bit (void)
{
  i1 = _mm_cmpistrm (i2, i3, k4);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
  k1 = _mm_cmpistri (i2, i3, k4);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
  k1 = _mm_cmpistra (i2, i3, k4);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
  k1 = _mm_cmpistrc (i2, i3, k4);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
  k1 = _mm_cmpistro (i2, i3, k4);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
  k1 = _mm_cmpistrs (i2, i3, k4);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
  k1 = _mm_cmpistrz (i2, i3, k4);	  /* { dg-error "the third argument must be an 8-bit immediate" } */
  i1 = _mm_cmpestrm (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */
  k1 = _mm_cmpestri (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */
  k1 = _mm_cmpestra (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */
  k1 = _mm_cmpestrc (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */
  k1 = _mm_cmpestro (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */
  k1 = _mm_cmpestrs (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */
  k1 = _mm_cmpestrz (i2, k2, i3, k3, k4); /* { dg-error "the fifth argument must be an 8-bit immediate" } */
  b1 = _mm256_blend_ps (b2, b3, k4);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
  k1 = _cvtss_sh (f1, k4);		  /* { dg-error "the last argument must be an 8-bit immediate" } */
  i1 = _mm256_cvtps_ph (b2, k4);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
  b1 = _mm256_dp_ps (b2, b3, k4);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
  e1 = _mm256_permute2f128_pd (e2, e3, k4);/* { dg-error "the last argument must be an 8-bit immediate" } */
  b1 = _mm256_permute2f128_ps (b2, b3, k4);/* { dg-error "the last argument must be an 8-bit immediate" } */
  l1 = _mm256_permute2f128_si256 (l2, l3, k4);/* { dg-error "the last argument must be an 8-bit immediate" } */
  b1 = _mm256_permute_ps (b2, k4);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
  i1 = _mm_aeskeygenassist_si128 (i2, k4);/* { dg-error "the last argument must be an 8-bit immediate" } */
  i1 = _mm_blend_epi16 (i2, i3, k4);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
  i1 = _mm_clmulepi64_si128 (i2, i3, k4); /* { dg-error "the last argument must be an 8-bit immediate" } */
  i1 = _mm_cvtps_ph (a1, k4);		  /* { dg-error "the last argument must be an 8-bit immediate" } */
  d1 = _mm_dp_pd (d2, d3, k4);		  /* { dg-error "the last argument must be an 8-bit immediate" } */
  a1 = _mm_dp_ps (a2, a3, k4);		  /* { dg-error "the last argument must be an 8-bit immediate" } */
  a1 = _mm_insert_ps (a2, a3, k4);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
  i1 = _mm_mpsadbw_epu8 (i2, i3, k4);	  /* { dg-error "the last argument must be an 8-bit immediate" } */
  a1 = _mm_permute_ps (a2, k4);		  /* { dg-error "the last argument must be an 8-bit immediate" } */
  i1 = _mm_slli_si128 (i2, k4);		  /* { dg-error "the last argument must be an 8-bit immediate" } */
  i1 = _mm_srli_si128 (i2, k4);		  /* { dg-error "the last argument must be an 8-bit immediate" } */
}
Example #2
0
static void
sse4_2_test (void)
{
    union
    {
        __m128i x[NUM];
        char c[NUM *16];
    } src1, src2;
    __m128i res, correct;
    int correct_flags, l1, l2;
    int flags, cf, zf, sf, of, af;
    int i;

    for (i = 0; i < NUM *16; i++)
    {
        src1.c[i] = rand ();
        src2.c[i] = rand ();
    }

    for (i = 0; i < NUM; i++)
    {
        l1 = rand () % 18;
        l2 = rand () % 18;

        switch ((rand () % 4))
        {
        case 0:
            res = _mm_cmpestrm (src1.x[i], l1, src2.x[i], l2, IMM_VAL0);
            cf = _mm_cmpestrc (src1.x[i], l1, src2.x[i], l2, IMM_VAL0);
            zf = _mm_cmpestrz (src1.x[i], l1, src2.x[i], l2, IMM_VAL0);
            sf = _mm_cmpestrs (src1.x[i], l1, src2.x[i], l2, IMM_VAL0);
            of = _mm_cmpestro (src1.x[i], l1, src2.x[i], l2, IMM_VAL0);
            af = _mm_cmpestra (src1.x[i], l1, src2.x[i], l2, IMM_VAL0);
            correct = cmp_em (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL0,
                              &correct_flags);
            break;

        case 1:
            res = _mm_cmpestrm (src1.x[i], l1, src2.x[i], l2, IMM_VAL1);
            cf = _mm_cmpestrc (src1.x[i], l1, src2.x[i], l2, IMM_VAL1);
            zf = _mm_cmpestrz (src1.x[i], l1, src2.x[i], l2, IMM_VAL1);
            sf = _mm_cmpestrs (src1.x[i], l1, src2.x[i], l2, IMM_VAL1);
            of = _mm_cmpestro (src1.x[i], l1, src2.x[i], l2, IMM_VAL1);
            af = _mm_cmpestra (src1.x[i], l1, src2.x[i], l2, IMM_VAL1);
            correct = cmp_em (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL1,
                              &correct_flags);
            break;

        case 2:
            res = _mm_cmpestrm (src1.x[i], l1, src2.x[i], l2, IMM_VAL2);
            cf = _mm_cmpestrc (src1.x[i], l1, src2.x[i], l2, IMM_VAL2);
            zf = _mm_cmpestrz (src1.x[i], l1, src2.x[i], l2, IMM_VAL2);
            sf = _mm_cmpestrs (src1.x[i], l1, src2.x[i], l2, IMM_VAL2);
            of = _mm_cmpestro (src1.x[i], l1, src2.x[i], l2, IMM_VAL2);
            af = _mm_cmpestra (src1.x[i], l1, src2.x[i], l2, IMM_VAL2);
            correct = cmp_em (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL2,
                              &correct_flags);
            break;

        default:
            res = _mm_cmpestrm (src1.x[i], l1, src2.x[i], l2, IMM_VAL3);
            cf = _mm_cmpestrc (src1.x[i], l1, src2.x[i], l2, IMM_VAL3);
            zf = _mm_cmpestrz (src1.x[i], l1, src2.x[i], l2, IMM_VAL3);
            sf = _mm_cmpestrs (src1.x[i], l1, src2.x[i], l2, IMM_VAL3);
            of = _mm_cmpestro (src1.x[i], l1, src2.x[i], l2, IMM_VAL3);
            af = _mm_cmpestra (src1.x[i], l1, src2.x[i], l2, IMM_VAL3);
            correct = cmp_em (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL3,
                              &correct_flags);
            break;
        }

        if (memcmp (&correct, &res, sizeof (res)))
            abort ();

        flags = 0;
        if (cf)
            flags |= CFLAG;
        if (zf)
            flags |= ZFLAG;
        if (sf)
            flags |= SFLAG;
        if (of)
            flags |= OFLAG;

        if (flags != correct_flags
                || (af && (cf || zf))
                || (!af && !(cf || zf)))
            abort ();
    }
}
Example #3
0
int test_mm_cmpestrc(__m128i A, int LA, __m128i B, int LB) {
  // CHECK-LABEL: test_mm_cmpestrc
  // CHECK: call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %{{.*}}, i32 %{{.*}}, <16 x i8> %{{.*}}, i32 %{{.*}}, i8 7)
  return _mm_cmpestrc(A, LA, B, LB, 7);
}
Example #4
0
int test_mm_cmpestrc(__m128i A, int LA, __m128i B, int LB) {
  // CHECK-LABEL: test_mm_cmpestrc
  // CHECK: @llvm.x86.sse42.pcmpestric128
  return _mm_cmpestrc(A, LA, B, LB, 7);
}
}bool validate_utf8_sse(const char *src, size_t len) {
  const char *end = src + len;
  while (src + 16 < end) {
    __m128i chunk = _mm_loadu_si128((const __m128i *)(src));

    int asciiMask = _mm_movemask_epi8(chunk);
    if (!asciiMask) {
      src += 16;
      continue;
    }

    __m128i chunk_signed = _mm_add_epi8(chunk, _mm_set1_epi8(0x80));
    __m128i cond2 =
        _mm_cmplt_epi8(_mm_set1_epi8(0xc2 - 1 - 0x80), chunk_signed);
    __m128i state = _mm_set1_epi8((char)(0x0 | 0x80));
    state = _mm_blendv_epi8(state, _mm_set1_epi8((char)(0x2 | 0xc0)), cond2);

    __m128i cond3 =
        _mm_cmplt_epi8(_mm_set1_epi8(0xe0 - 1 - 0x80), chunk_signed);

    state = _mm_blendv_epi8(state, _mm_set1_epi8((char)(0x3 | 0xe0)), cond3);
    __m128i mask3 = _mm_slli_si128(cond3, 1);

    __m128i cond4 =
        _mm_cmplt_epi8(_mm_set1_epi8(0xf0 - 1 - 0x80), chunk_signed);

    // Fall back to the scalar processing
    if (_mm_movemask_epi8(cond4)) {
      break;
    }

    __m128i count = _mm_and_si128(state, _mm_set1_epi8(0x7));

    __m128i count_sub1 = _mm_subs_epu8(count, _mm_set1_epi8(0x1));

    __m128i counts = _mm_add_epi8(count, _mm_slli_si128(count_sub1, 1));

    __m128i shifts = count_sub1;
    shifts = _mm_add_epi8(shifts, _mm_slli_si128(shifts, 1));
    counts = _mm_add_epi8(
        counts, _mm_slli_si128(_mm_subs_epu8(counts, _mm_set1_epi8(0x2)), 2));
    shifts = _mm_add_epi8(shifts, _mm_slli_si128(shifts, 2));

    if (asciiMask ^ _mm_movemask_epi8(_mm_cmpgt_epi8(counts, _mm_set1_epi8(0))))
      return false; // error
    shifts = _mm_add_epi8(shifts, _mm_slli_si128(shifts, 4));

    if (_mm_movemask_epi8(_mm_cmpgt_epi8(
            _mm_sub_epi8(_mm_slli_si128(counts, 1), counts), _mm_set1_epi8(1))))
      return false; // error

    shifts = _mm_add_epi8(shifts, _mm_slli_si128(shifts, 8));

    __m128i mask = _mm_and_si128(state, _mm_set1_epi8(0xf8));
    shifts =
        _mm_and_si128(shifts, _mm_cmplt_epi8(counts, _mm_set1_epi8(2))); // <=1

    chunk =
        _mm_andnot_si128(mask, chunk); // from now on, we only have usefull bits

    shifts = _mm_blendv_epi8(shifts, _mm_srli_si128(shifts, 1),
                             _mm_srli_si128(_mm_slli_epi16(shifts, 7), 1));

    __m128i chunk_right = _mm_slli_si128(chunk, 1);

    __m128i chunk_low = _mm_blendv_epi8(
        chunk,
        _mm_or_si128(chunk, _mm_and_si128(_mm_slli_epi16(chunk_right, 6),
                                          _mm_set1_epi8(0xc0))),
        _mm_cmpeq_epi8(counts, _mm_set1_epi8(1)));

    __m128i chunk_high =
        _mm_and_si128(chunk, _mm_cmpeq_epi8(counts, _mm_set1_epi8(2)));

    shifts = _mm_blendv_epi8(shifts, _mm_srli_si128(shifts, 2),
                             _mm_srli_si128(_mm_slli_epi16(shifts, 6), 2));
    chunk_high = _mm_srli_epi32(chunk_high, 2);

    shifts = _mm_blendv_epi8(shifts, _mm_srli_si128(shifts, 4),
                             _mm_srli_si128(_mm_slli_epi16(shifts, 5), 4));
    chunk_high = _mm_or_si128(
        chunk_high, _mm_and_si128(_mm_and_si128(_mm_slli_epi32(chunk_right, 4),
                                                _mm_set1_epi8(0xf0)),
                                  mask3));
    int c = _mm_extract_epi16(counts, 7);
    int source_advance = !(c & 0x0200) ? 16 : !(c & 0x02) ? 15 : 14;

    __m128i high_bits = _mm_and_si128(chunk_high, _mm_set1_epi8(0xf8));
    if (!_mm_testz_si128(
            mask3,
            _mm_or_si128(_mm_cmpeq_epi8(high_bits, _mm_set1_epi8(0x00)),
                         _mm_cmpeq_epi8(high_bits, _mm_set1_epi8(0xd8)))))
      return false;

    shifts = _mm_blendv_epi8(shifts, _mm_srli_si128(shifts, 8),
                             _mm_srli_si128(_mm_slli_epi16(shifts, 4), 8));

    chunk_high = _mm_slli_si128(chunk_high, 1);

    __m128i shuf =
        _mm_add_epi8(shifts, _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5,
                                          4, 3, 2, 1, 0));

    chunk_low = _mm_shuffle_epi8(chunk_low, shuf);
    chunk_high = _mm_shuffle_epi8(chunk_high, shuf);
    __m128i utf16_low = _mm_unpacklo_epi8(chunk_low, chunk_high);
    __m128i utf16_high = _mm_unpackhi_epi8(chunk_low, chunk_high);

    if (_mm_cmpestrc(_mm_cvtsi64_si128(0xfdeffdd0fffffffe), 4, utf16_high, 8,
                     _SIDD_UWORD_OPS | _SIDD_CMP_RANGES) |
        _mm_cmpestrc(_mm_cvtsi64_si128(0xfdeffdd0fffffffe), 4, utf16_low, 8,
                     _SIDD_UWORD_OPS | _SIDD_CMP_RANGES)) {
      return false;
    }

    src += source_advance;
  }
  return validate_utf8(src, end - src);
}