static void
sse4_1_test (void)
{
  union
    {
      __m128i x[NUM / 16];
      char i[NUM];
    } dst, src1, src2;
  int i, sign = 1;
  char min;

  for (i = 0; i < NUM; i++)
    {
      src1.i[i] = i * i * sign;
      src2.i[i] = (i + 20) * sign;
      sign = -sign;
    }

  for (i = 0; i < NUM; i += 16)
    dst.x[i / 16] = _mm_min_epi8 (src1.x[i / 16], src2.x[i / 16]);

  for (i = 0; i < NUM; i++)
    {
      min = src1.i[i] >= src2.i[i] ? src2.i[i] : src1.i[i];
      if (min != dst.i[i])
	abort ();
    }
}
Exemplo n.º 2
0
void nibble_sort_tom(unsigned long *buf) {
  for (int i = 0; i < TEST_SIZE; ++i) {
    __m128i x = _mm_and_si128(_mm_set_epi64x(buf[i] >> 4, buf[i]), g_mask);

    x = S(x, 0);
    x = S(x, 1);
    x = S(x, 0);
    x = S(x, 2);
    x = S(x, 3);
    x = S(x, 0);
    x = S(x, 4);
    x = S(x, 5);
    x = S(x, 3);

    /* Final step is different; the output is in the right layout
     * for reassembling for the final write. */
    const __m128i a0 = _mm_shuffle_epi8(x, g_shuffles[0][0]);
    const __m128i b0 = _mm_shuffle_epi8(x, g_shuffles[0][1]);

    const __m128i a1 = _mm_min_epi8(a0, b0);
    const __m128i b1 = _mm_max_epi8(a0, b0);

    const __m128i out = _mm_or_si128(a1, _mm_slli_epi64(b1, 4));
    _mm_storel_epi64((__m128i *)&buf[i], out);
  }
}
Exemplo n.º 3
0
static __m128i S(__m128i x, int i) {
  const __m128i a0 = _mm_shuffle_epi8(x, g_shuffles[i][0]);
  const __m128i b0 = _mm_shuffle_epi8(x, g_shuffles[i][1]);

  const __m128i a1 = _mm_min_epi8(a0, b0);
  const __m128i b1 = _mm_max_epi8(a0, b0);

  const __m128i a2 = _mm_shuffle_epi8(a1, g_shuffles[i][2]);
  const __m128i b2 = _mm_shuffle_epi8(b1, g_shuffles[i][3]);

  return _mm_or_si128(a2, b2);
}
Exemplo n.º 4
0
__m128i test_mm_min_epi8(__m128i x, __m128i y) {
  // CHECK-LABEL: test_mm_min_epi8
  // CHECK: call <16 x i8> @llvm.x86.sse41.pminsb
  // CHECK-ASM: pminsb %xmm{{.*}}, %xmm{{.*}}
  return _mm_min_epi8(x, y);
}
Exemplo n.º 5
0
__m128i test_mm_min_epi8(__m128i x, __m128i y) {
  // CHECK-LABEL: test_mm_min_epi8
  // CHECK:       [[CMP:%.*]] = icmp slt <16 x i8> [[X:%.*]], [[Y:%.*]]
  // CHECK-NEXT:  select <16 x i1> [[CMP]], <16 x i8> [[X]], <16 x i8> [[Y]]
  return _mm_min_epi8(x, y);
}