示例#1
0
void
TEST (void)
{
  int i, sign;
  UNION_TYPE (AVX512F_LEN, i_w) res1, res2, res3;
  UNION_TYPE (128, i_w) src;
  MASK_TYPE mask = SIZE | 123;
  short res_ref[SIZE];

  sign = -1;
  for (i = 0; i < 8; i++)
    {
      src.a[i] = 1 + 3 * i * sign;
      sign = sign * -1;
    }
  for (i = 0; i < SIZE; i++)
    res2.a[i] = DEFAULT_VALUE;

  CALC (res_ref, src.a);

  if (AVX512F_LEN == 512)
    {
      res1.x = INTRINSIC (_broadcastw_epi16) (src.x);
      if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
	abort ();
    }

  res2.x = INTRINSIC (_mask_broadcastw_epi16) (res2.x, mask, src.x);
  res3.x = INTRINSIC (_maskz_broadcastw_epi16) (mask, src.x);

  MASK_MERGE (i_w) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref))
    abort ();

  MASK_ZERO (i_w) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref))
    abort ();

  CALC (res_ref, src.a);

  if (AVX512F_LEN == 512)
    {
      res1.x = INTRINSIC (_set1_epi16) (src.a[0]);
      if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
	abort ();
    }

  res2.x = INTRINSIC (_mask_set1_epi16) (res2.x, mask, src.a[0]);
  res3.x = INTRINSIC (_maskz_set1_epi16) (mask, src.a[0]);

  MASK_MERGE (i_w) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref))
    abort ();

  MASK_ZERO (i_w) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref))
    abort ();
}
示例#2
0
void
TEST (void)
{
  int i, sign;
  UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3, src1;
  MASK_TYPE mask = MASK_VALUE;
  int res_ref[SIZE];

  sign = -1;
  for (i = 0; i < SIZE; i++)
    {
      src1.a[i] = 2 + sign * 7 * i % 291;
      sign = sign * -1;
    }

  for (i = 0; i < SIZE; i++)
    res2.a[i] = DEFAULT_VALUE;

  res1.x = INTRINSIC (_srai_epi32) (src1.x, 3);
  res2.x =
    INTRINSIC (_mask_srai_epi32) (res2.x, mask, src1.x, 3);
  res3.x = INTRINSIC (_maskz_srai_epi32) (mask, src1.x, 3);

  CALC (res_ref, src1.a, 3);

  if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
    abort ();

  MASK_MERGE (i_d) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
    abort ();

  MASK_ZERO (i_d) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
    abort ();

  for (i = 0; i < SIZE; i++)
    res2.a[i] = DEFAULT_VALUE;

  res1.x = INTRINSIC (_srai_epi32) (src1.x, 33);
  res2.x =
    INTRINSIC (_mask_srai_epi32) (res2.x, mask, src1.x, 33);
  res3.x = INTRINSIC (_maskz_srai_epi32) (mask, src1.x, 33);

  CALC (res_ref, src1.a, 33);

  if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
    abort ();

  MASK_MERGE (i_d) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
    abort ();

  MASK_ZERO (i_d) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
    abort ();
}
示例#3
0
void
TEST (void)
{
  int i;
  UNION_TYPE (AVX512F_LEN, i_uq) res1, res2, res3, src1;
  MASK_TYPE mask = MASK_VALUE;
  unsigned long long res_ref[SIZE];

  for (i = 0; i < SIZE; i++)
    {
      src1.a[i] = 2 + 7 * i % 291;
    }

  for (i = 0; i < SIZE; i++)
    res2.a[i] = DEFAULT_VALUE;

  res1.x = INTRINSIC (_srli_epi64) (src1.x, 3);
  res2.x =
    INTRINSIC (_mask_srli_epi64) (res2.x, mask, src1.x, 3);
  res3.x = INTRINSIC (_maskz_srli_epi64) (mask, src1.x, 3);

  CALC (res_ref, src1.a, 3);

  if (UNION_CHECK (AVX512F_LEN, i_uq) (res1, res_ref))
    abort ();

  MASK_MERGE (i_uq) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_uq) (res2, res_ref))
    abort ();

  MASK_ZERO (i_uq) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_uq) (res3, res_ref))
    abort ();


  for (i = 0; i < SIZE; i++)
    res2.a[i] = DEFAULT_VALUE;

  res1.x = INTRINSIC (_srli_epi64) (src1.x, 65);
  res2.x =
    INTRINSIC (_mask_srli_epi64) (res2.x, mask, src1.x, 65);
  res3.x = INTRINSIC (_maskz_srli_epi64) (mask, src1.x, 65);

  CALC (res_ref, src1.a, 65);

  if (UNION_CHECK (AVX512F_LEN, i_uq) (res1, res_ref))
    abort ();

  MASK_MERGE (i_uq) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_uq) (res2, res_ref))
    abort ();

  MASK_ZERO (i_uq) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_uq) (res3, res_ref))
    abort ();
}
示例#4
0
void static
TEST (void)
{
  int i, sign;
  UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3;
  UNION_TYPE (128, i_q) src;
  MASK_TYPE mask = MASK_VALUE;
  long long res_ref[SIZE];

  sign = -1;
  for (i = 0; i < 2; i++)
    {
      src.a[i] = 1 + 34 * i * sign;
      sign = sign * -1;
    }
  for (i = 0; i < SIZE; i++)
    res2.a[i] = DEFAULT_VALUE;

  res1.x = INTRINSIC (_broadcastq_epi64) (src.x);
  res2.x = INTRINSIC (_mask_broadcastq_epi64) (res2.x, mask, src.x);
  res3.x = INTRINSIC (_maskz_broadcastq_epi64) (mask, src.x);

  CALC (res_ref, src.a);

  if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
    abort ();

  MASK_MERGE (i_q) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
    abort ();

  MASK_ZERO (i_q) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
    abort ();

  res1.x = INTRINSIC (_set1_epi64) (src.a[0]);
  res2.x = INTRINSIC (_mask_set1_epi64) (res2.x, mask, src.a[0]);
  res3.x = INTRINSIC (_maskz_set1_epi64) (mask, src.a[0]);

  CALC (res_ref, src.a);

  if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
    abort ();

  MASK_MERGE (i_q) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
    abort ();

  MASK_ZERO (i_q) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
    abort ();
}
示例#5
0
void
avx512f_test (void)
{
  int i, sign;
  union128 res1, res2, res3, res4, res5, src1, src2;
  MASK_TYPE mask = MASK_VALUE;
  float res_ref[SIZE];

  sign = -1;
  for (i = 0; i < SIZE; i++)
    {
      src1.a[i] = 1.5 + 34.67 * i * sign;
      src2.a[i] = -22.17 * i * sign + 1.0;
      sign = sign * -1;
    }
  for (i = 0; i < SIZE; i++)
    {
      res1.a[i] = DEFAULT_VALUE;
      res4.a[i] = DEFAULT_VALUE;
    }

  res1.x = _mm_mask_add_ss (res1.x, mask, src1.x, src2.x);
  res2.x = _mm_maskz_add_ss (mask, src1.x, src2.x);
  res3.x = _mm_add_round_ss (src1.x, src2.x, _MM_FROUND_NO_EXC);
  res4.x = _mm_mask_add_round_ss (res4.x, mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
  res5.x = _mm_maskz_add_round_ss (mask, src1.x, src2.x, _MM_FROUND_NO_EXC);

  calc_add (res_ref, src1.a, src2.a);

  MASK_MERGE () (res_ref, mask, 1);
  if (check_union128 (res1, res_ref))
    abort ();

  MASK_ZERO () (res_ref, mask, 1);
  if (check_union128 (res2, res_ref))
    abort ();

  calc_add (res_ref, src1.a, src2.a);

  if (check_union128 (res3, res_ref))
    abort();
  
  MASK_MERGE () (res_ref, mask, 1);
  if (check_union128 (res4, res_ref))
    abort ();

  MASK_ZERO () (res_ref, mask, 1);
  if (check_union128 (res5, res_ref))
    abort ();
}
示例#6
0
void
TEST (void)
{
  int i, sign;
  UNION_TYPE (AVX512F_LEN, i_b) res1, res2, res3, src1, src2;
  MASK_TYPE mask = MASK_VALUE;
  char res_ref[SIZE];

  sign = -1;
  for (i = 0; i < SIZE; i++)
    {
      src1.a[i] = 34 * i * sign;
      src1.a[i] = 179 * i;
      sign = sign * -1;
    }
  for (i = 0; i < SIZE; i++)
    res2.a[i] = DEFAULT_VALUE;

  res1.x = INTRINSIC (_unpackhi_epi8) (src1.x, src2.x);
  res2.x = INTRINSIC (_mask_unpackhi_epi8) (res2.x, mask, src1.x, src2.x);
  res3.x = INTRINSIC (_maskz_unpackhi_epi8) (mask, src1.x, src2.x);

  CALC (res_ref, src1.a, src2.a);

  if (UNION_CHECK (AVX512F_LEN, i_b) (res1, res_ref))
    abort ();

  MASK_MERGE (i_b) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_b) (res2, res_ref))
    abort ();

  MASK_ZERO (i_b) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_b) (res3, res_ref))
    abort ();
}
示例#7
0
void static
avx512er_test (void)
{
  union512 src, res1, res2, res3;
  __mmask16 mask = MASK_VALUE;
  float res_ref[16];
  int i;

  for (i = 0; i < 16; i++)
    {
      src.a[i] = 179.345 - 6.5645 * i;
      res2.a[i] = DEFAULT_VALUE;
    }

  res1.x = _mm512_exp2a23_ps (src.x);
  res2.x = _mm512_mask_exp2a23_ps (res2.x, mask, src.x);
  res3.x = _mm512_maskz_exp2a23_ps (mask, src.x);

  compute_vexp2ps (src.a, res_ref);

  if (check_rough_union512 (res1, res_ref, 0.0001))
    abort ();

  MASK_MERGE ()(res_ref, mask, 16);
  if (check_rough_union512 (res2, res_ref, 0.0001))
    abort ();

  MASK_ZERO ()(res_ref, mask, 16);
  if (check_rough_union512 (res3, res_ref, 0.0001))
    abort ();
}
示例#8
0
void
TEST (void)
{
  int i, sign;
  UNION_TYPE (AVX512F_LEN, i_b) src1, src2, res1, res2, res3;
  MASK_TYPE mask = MASK_VALUE;
  char res_ref[SIZE];

  for (i = 0; i < SIZE; i++)
    {
      src1.a[i] =  i * sign;
      src2.a[i] = (i + 20) * sign;
      sign = -sign;
      res2.a[i] = DEFAULT_VALUE;
    }

  res1.x = INTRINSIC (_min_epi8) (src1.x, src2.x);
  res2.x = INTRINSIC (_mask_min_epi8) (res2.x, mask, src1.x, src2.x);
  res3.x = INTRINSIC (_maskz_min_epi8) (mask, src1.x, src2.x);

  CALC (src1.a, src2.a, res_ref);

  if (UNION_CHECK (AVX512F_LEN, i_b) (res1, res_ref))
    abort ();

  MASK_MERGE (i_b) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_b) (res2, res_ref))
    abort ();

  MASK_ZERO (i_b) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_b) (res3, res_ref))
    abort ();
}
示例#9
0
static void
TEST (void)
{
  UNION_TYPE (AVX512F_LEN, i_d) s1, s2;
  UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3;
  MASK_TYPE mask = MASK_VALUE;
  unsigned long long res_ref[DST_SIZE];
  int i, sign = 1;

  for (i = 0; i < SRC_SIZE; i++)
    {
      s1.a[i] = i * 20;
      s2.a[i] = i + 20;
    }
  for (i = 0; i < DST_SIZE; i++)
    res2.a[i] = DEFAULT_VALUE;

  CALC (s1.a, s2.a, res_ref);

  res1.x = INTRINSIC (_mul_epu32) (s1.x, s2.x);
  res2.x = INTRINSIC (_mask_mul_epu32) (res2.x, mask, s1.x, s2.x);
  res3.x = INTRINSIC (_maskz_mul_epu32) (mask, s1.x, s2.x);

  if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
    abort ();

  MASK_MERGE (i_q) (res_ref, mask, DST_SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
    abort ();

  MASK_ZERO (i_q) (res_ref, mask, DST_SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
    abort ();
}
示例#10
0
void
TEST (void)
{
  int i;
  UNION_TYPE (AVX512F_LEN, i_w) src1, src2, res1, res2, res3;
  MASK_TYPE mask = MASK_VALUE;
  unsigned short res_ref[SIZE];

  for (i = 0; i < SIZE; i++)
    {
      src1.a[i] = i * i;
      src2.a[i] = i + 20;
      res2.a[i] = DEFAULT_VALUE;
    }

  res1.x = INTRINSIC (_min_epu16) (src1.x, src2.x);
  res2.x = INTRINSIC (_mask_min_epu16) (res2.x, mask, src1.x, src2.x);
  res3.x = INTRINSIC (_maskz_min_epu16) (mask, src1.x, src2.x);

  CALC (src1.a, src2.a, res_ref);

  if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
    abort ();

  MASK_MERGE (i_w) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref))
    abort ();

  MASK_ZERO (i_w) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref))
    abort ();
}
示例#11
0
void static
TEST (void)
{
  UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3, s1, s2;
  MASK_TYPE mask = MASK_VALUE;
  long long res_ref[SIZE];
  int i;

  for (i = 0; i < SIZE; i++)
    {
      s1.a[i] = 2 * i;
      s2.a[i] = i;
      res2.a[i] = DEFAULT_VALUE;
    }

  res1.x = INTRINSIC (_alignr_epi64) (s1.x, s2.x, N);
  res2.x = INTRINSIC (_mask_alignr_epi64) (res2.x, mask, s1.x, s2.x, N);
  res3.x = INTRINSIC (_maskz_alignr_epi64) (mask, s1.x, s2.x, N);

  CALC (s1.a, s2.a, res_ref);

  if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
      abort ();

  MASK_MERGE (i_q) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
      abort ();

  MASK_ZERO (i_q) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
      abort ();
}
示例#12
0
void
TEST (void)
{
  union128i_w s;
  UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3;
  MASK_TYPE mask = MASK_VALUE;
  long long int res_ref[SIZE];
  int i, sign = 1;

  for (i = 0; i < SIZE; i++)
    {
      s.a[i] = 2000 * i * sign;
      res2.a[i] = DEFAULT_VALUE;
      sign = -sign;
    }

  res1.x = INTRINSIC (_cvtepi16_epi64) (s.x);
  res2.x = INTRINSIC (_mask_cvtepi16_epi64) (res2.x, mask, s.x);
  res3.x = INTRINSIC (_maskz_cvtepi16_epi64) (mask, s.x);

  CALC (s.a, res_ref);

  if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
    abort ();

  MASK_MERGE (i_q) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
    abort ();

  MASK_ZERO (i_q) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
    abort ();
}
示例#13
0
void
TEST (void)
{
  int i, sign;
  UNION_TYPE (AVX512F_LEN, d) res1, res2, res3;
  UNION_TYPE (128, d) src;
  MASK_TYPE mask = MASK_VALUE;
  double res_ref[SIZE];

  sign = -1;
  for (i = 0; i < 2; i++)
    {
      src.a[i] = 1.5 + 34.67 * i * sign;
      sign = sign * -1;
    }
  for (i = 0; i < SIZE; i++)
    res2.a[i] = DEFAULT_VALUE;

  res1.x = INTRINSIC (_broadcastsd_pd) (src.x);
  res2.x = INTRINSIC (_mask_broadcastsd_pd) (res2.x, mask, src.x);
  res3.x = INTRINSIC (_maskz_broadcastsd_pd) (mask, src.x);

  CALC (res_ref, src.a);

  if (UNION_CHECK (AVX512F_LEN, d) (res1, res_ref))
    abort ();

  MASK_MERGE (d) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, d) (res2, res_ref))
    abort ();

  MASK_ZERO (d) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, d) (res3, res_ref))
    abort ();
}
示例#14
0
static void
TEST (void)
{
  int ck[SIZE];
  int i;
  UNION_TYPE (AVX512F_LEN, i_d) s, d, dm, dz;
  MASK_TYPE mask = MASK_VALUE;

  for (i = 0; i < SIZE; i++)
    {
      s.a[i] = i * 7 + (i << 15) + 356;
      d.a[i] = DEFAULT_VALUE;
      dm.a[i] = DEFAULT_VALUE;
      dz.a[i] = DEFAULT_VALUE;
    }

  CALC (s.a, ck);

  d.x = INTRINSIC (_abs_epi32) (s.x);
  dz.x = INTRINSIC (_maskz_abs_epi32) (mask, s.x);
  dm.x = INTRINSIC (_mask_abs_epi32) (dm.x, mask, s.x);

  if (UNION_CHECK (AVX512F_LEN, i_d) (d, ck))
    abort ();

  MASK_MERGE (i_d) (ck, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_d) (dm, ck))
    abort ();

  MASK_ZERO (i_d) (ck, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_d) (dz, ck))
    abort ();
}
示例#15
0
void
TEST (void)
{
  UNION_TYPE (AVX512F_LEN, d) s;
  UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3;
  MASK_TYPE mask = MASK_VALUE;
  unsigned long long res_ref[SIZE];
  int i;

  for (i = 0; i < SIZE; i++)
    {
      s.a[i] = 123.456 * (i + 2000);
      res2.a[i] = DEFAULT_VALUE;
    }

  res1.x = INTRINSIC (_cvttpd_epu64) (s.x);
  res2.x = INTRINSIC (_mask_cvttpd_epu64) (res2.x, mask, s.x);
  res3.x = INTRINSIC (_maskz_cvttpd_epu64) (mask, s.x);

  CALC (s.a, res_ref);

  if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
    abort ();

  MASK_MERGE (i_q) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
    abort ();

  MASK_ZERO (i_q) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
    abort ();
}
示例#16
0
void
TEST (void)
{
  int i, sign;
  UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3;
  UNION_TYPE (256, i_d) src;
  MASK_TYPE mask = SIZE | 123;
  int res_ref[SIZE];

  sign = -1;
  for (i = 0; i < 8; i++)
    {
      src.a[i] = 34 * i * sign;
      sign = sign * -1;
    }
  for (i = 0; i < SIZE; i++)
    res2.a[i] = DEFAULT_VALUE;

  res1.x = INTRINSIC (_broadcast_i32x8) (src.x);
  res2.x = INTRINSIC (_mask_broadcast_i32x8) (res2.x, mask, src.x);
  res3.x = INTRINSIC (_maskz_broadcast_i32x8) (mask, src.x);

  CALC (res_ref, src.a);

  if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
    abort ();

  MASK_MERGE (i_d) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
    abort ();

  MASK_ZERO (i_d) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
    abort ();
}
void
TEST (void)
{
  UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3, src, src0;
  MASK_TYPE mask = MASK_VALUE;
  TYPE res_ref[SIZE];
  src.x = INTRINSIC (_set1_epi8) (0x3D);
  int i;

  for (i = 0; i < SIZE; i++)
  {
    res_ref[i] = CALC (src.a[i]);
    src0.a[i] = DEFAULT_VALUE;
  }

  res1.x = INTRINSIC (_popcnt_epi32)       (src.x);
  res2.x = INTRINSIC (_mask_popcnt_epi32)  (src.x, mask, src0.x);
  res3.x = INTRINSIC (_maskz_popcnt_epi32) (mask, src.x);

  if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
    abort ();

  MASK_MERGE (i_d) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
    abort ();

  MASK_ZERO (i_d) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
    abort ();
}
示例#18
0
void static
TEST (void)
{
  int i;
  UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3, src1, src2;
  MASK_TYPE mask = MASK_VALUE;
  unsigned int res_ref[SIZE];

  for (i = 0; i < SIZE; i++)
    {
      src1.a[i] = 2 + 7 * i % 291;
      src2.a[i] = 1 + 17 * i % 71;
    }

  for (i = 0; i < SIZE; i++)
    res2.a[i] = DEFAULT_VALUE;

  res1.x = INTRINSIC (_sllv_epi32) (src1.x, src2.x);
  res2.x = INTRINSIC (_mask_sllv_epi32) (res2.x, mask, src1.x, src2.x);
  res3.x = INTRINSIC (_maskz_sllv_epi32) (mask, src1.x, src2.x);

  CALC (res_ref, src1.a, src2.a);

  if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
    abort ();

  MASK_MERGE (i_d) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
    abort ();

  MASK_ZERO (i_d) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
    abort ();
}
示例#19
0
void static
TEST (void)
{
  UNION_TYPE (AVX512F_LEN, i_d) s1, res1, res2, res3;
  int res_ref[SIZE];
  int i, j, sign = 1;
  MASK_TYPE mask = MASK_VALUE;

  for (j = 0; j < SIZE; j++)
    {
      s1.a[j] = j * i * sign;
      res1.a[j] = DEFAULT_VALUE;
      res2.a[j] = DEFAULT_VALUE;
      res3.a[j] = DEFAULT_VALUE;
      sign = -sign;
    }

  res1.x = INTRINSIC (_shuffle_epi32) (s1.x, 0xec);
  res2.x = INTRINSIC (_mask_shuffle_epi32) (res2.x, mask, s1.x, 0xec);
  res3.x = INTRINSIC (_maskz_shuffle_epi32) (mask, s1.x, 0xec);

  CALC (s1.a, 0xec, res_ref);

  if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
    abort ();

  MASK_MERGE (i_d) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
    abort ();

  MASK_ZERO (i_d) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
    abort ();
}
示例#20
0
void
TEST (void)
{
  UNION_TYPE (AVX512F_LEN, d) s1, s2, res1, res2, res3;
  MASK_TYPE mask = MASK_VALUE;
  double res_ref[SIZE];
  int i;

  for (i = 0; i < SIZE; i++)
    {
      s1.a[i] = i * 123.2 + 32.6;
      s2.a[i] = i + 2.5;
      res2.a[i] = DEFAULT_VALUE;
    }

  res1.x = INTRINSIC (_unpacklo_pd) (s1.x, s2.x);
  res2.x = INTRINSIC (_mask_unpacklo_pd) (res2.x, mask, s1.x, s2.x);
  res3.x = INTRINSIC (_maskz_unpacklo_pd) (mask, s1.x, s2.x);

  CALC (s1.a, s2.a, res_ref);

  if (UNION_CHECK (AVX512F_LEN, d) (res1, res_ref))
    abort ();

  MASK_MERGE (d) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, d) (res2, res_ref))
    abort ();

  MASK_ZERO (d) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, d) (res3, res_ref))
    abort ();
}
示例#21
0
void
TEST (void)
{
  UNION_TYPE (AVX512F_LEN, i_w) s1, s2, res1, res2, res3;
  MASK_TYPE mask = MASK_VALUE;
  short res_ref[SIZE];
  int i, sign = 1;

  for (i = 0; i < SIZE; i++)
    {
      s1.a[i] = i * sign;
      s2.a[i] = i >> 2;
      res2.a[i] = DEFAULT_VALUE;
      sign = -sign;
    }

  res1.x = INTRINSIC (_srav_epi16) (s1.x, s2.x);
  res2.x = INTRINSIC (_mask_srav_epi16) (res2.x, mask, s1.x, s2.x);
  res3.x = INTRINSIC (_maskz_srav_epi16) (mask, s1.x, s2.x);

  CALC (s1.a, s2.a, res_ref);

  if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
    abort ();

  MASK_MERGE (i_w) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref))
    abort ();

  MASK_ZERO (i_w) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref))
    abort ();
}
示例#22
0
void
TEST (void)
{
  UNION_TYPE (AVX512F_LEN, i_b) s1, s2, res1, res2 ,res3;
  MASK_TYPE mask = MASK_VALUE;
  char res_ref[SIZE];
  int i;

  for (i = 0; i < SIZE; i++)
    {
      s1.a[i] = i + 15;
      s2.a[i] = i + 14;
      res2.a[i] = DEFAULT_VALUE;
    }

  res1.x = INTRINSIC (_avg_epu8) (s1.x, s2.x);
  res2.x = INTRINSIC (_mask_avg_epu8) (res2.x, mask, s1.x, s2.x);
  res3.x = INTRINSIC (_maskz_avg_epu8) (mask, s1.x, s2.x);

  CALC (s1.a, s2.a, res_ref);

  if (UNION_CHECK (AVX512F_LEN, i_b) (res1, res_ref))
    abort ();

  MASK_MERGE (i_b) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_b) (res2, res_ref))
    abort ();

  MASK_ZERO (i_b) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_b) (res3, res_ref))
    abort ();
}
示例#23
0
void static
TEST (void)
{
  int i, sign;
  UNION_TYPE (AVX512F_LEN, d) res1, res2, res3, src1, src2;
  MASK_TYPE mask = MASK_VALUE;
  double res_ref[SIZE];

  sign = -1;
  for (i = 0; i < SIZE; i++)
    {
      src1.a[i] = 1.5 + 34.67 * i * sign;
      src2.a[i] = -22.17 * i * sign;
      sign = sign * -1;
    }
  for (i = 0; i < SIZE; i++)
    res2.a[i] = DEFAULT_VALUE;

  res1.x = INTRINSIC (_sub_pd) (src1.x, src2.x);
  res2.x = INTRINSIC (_mask_sub_pd) (res2.x, mask, src1.x, src2.x);
  res3.x = INTRINSIC (_maskz_sub_pd) (mask, src1.x, src2.x);

  CALC (res_ref, src1.a, src2.a);

  if (UNION_CHECK (AVX512F_LEN, d) (res1, res_ref))
    abort ();

  MASK_MERGE (d) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, d) (res2, res_ref))
    abort ();

  MASK_ZERO (d) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, d) (res3, res_ref))
    abort ();
}
示例#24
0
static void
TEST (void)
{
  UNION_TYPE (AVX512F_LEN, i_q) s, res1, res2, res3;
  long long res_ref[SIZE];
  MASK_TYPE mask = MASK_VALUE;
  int i;

  for (i = 0; i < SIZE; i++)
    {
      s.a[i] = 12345678 * (i % 5);
      res1.a[i] = DEFAULT_VALUE;
      res2.a[i] = DEFAULT_VALUE;
      res3.a[i] = DEFAULT_VALUE;
    }

  res1.x = INTRINSIC (_conflict_epi64) (s.x);
  res2.x = INTRINSIC (_mask_conflict_epi64) (res2.x, mask, s.x);
  res3.x = INTRINSIC (_maskz_conflict_epi64) (mask, s.x);

  CALC (s.a, res_ref);

  if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
    abort ();

  MASK_MERGE (i_q) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
    abort ();

  MASK_ZERO (i_q) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
    abort ();
}
示例#25
0
static void
TEST (void)
{
  UNION_TYPE (AVX512F_LEN, d) s;
  UNION_TYPE (AVX512F_LEN_HALF, i_d) res1, res2, res3;
  MASK_TYPE mask = MASK_VALUE;
  unsigned res_ref[DST_SIZE] = { 0 };
  int i;

  for (i = 0; i < SRC_SIZE; i++)
    {
      s.a[i] = 123.456 * (i + 2000);
    }

  for (i = 0; i < DST_SIZE; i++)
    res2.a[i] = DEFAULT_VALUE;

  res1.x = INTRINSIC (_cvtpd_epu32) (s.x);
  res2.x = INTRINSIC (_mask_cvtpd_epu32) (res2.x, mask, s.x);
  res3.x = INTRINSIC (_maskz_cvtpd_epu32) (mask, s.x);

  CALC (s.a, res_ref);

  if (UNION_CHECK (AVX512F_LEN_HALF, i_d) (res1, res_ref))
    abort ();

  MASK_MERGE (i_d) (res_ref, mask, SRC_SIZE);
  if (UNION_CHECK (AVX512F_LEN_HALF, i_d) (res2, res_ref))
    abort ();

  MASK_ZERO (i_d) (res_ref, mask, SRC_SIZE);
  if (UNION_CHECK (AVX512F_LEN_HALF, i_d) (res3, res_ref))
    abort ();
}
示例#26
0
void
TEST (void)
{
  UNION_TYPE (AVX512F_LEN, d) s1, s2, res1, res2, res3;
  MASK_TYPE mask = MASK_VALUE;
  double res_ref[SIZE];
  int i, sign = 1;

  for (i = 0; i < SIZE; i++)
    {
      s1.a[i] = 234.567 * i * sign;
      s2.a[i] = 100 * (i + 1);
      res2.a[i] = DEFAULT_VALUE;
      sign = -sign;
    }

  res1.x = INTRINSIC (_range_pd) (s1.x, s2.x, IMM);
  res2.x = INTRINSIC (_mask_range_pd) (res2.x, mask, s1.x, s2.x, IMM);
  res3.x = INTRINSIC (_maskz_range_pd) (mask, s1.x, s2.x, IMM);

  CALC (s1.a, s2.a, res_ref);

  if (UNION_CHECK (AVX512F_LEN, d) (res1, res_ref))
    abort ();

  MASK_MERGE (d) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, d) (res2, res_ref))
    abort ();

  MASK_ZERO (d) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, d) (res3, res_ref))
    abort ();
}
示例#27
0
void static
TEST (void)
{
  UNION_TYPE (AVX512F_LEN, i_q) s1, s2, res1, res2, res3;
  MASK_TYPE mask = MASK_VALUE;
  long long res_ref[SIZE];
  int i, sign = 1;

  for (i = 0; i < SIZE; i++)
    {
      s1.a[i] = i * sign;
      s2.a[i] = (i + 20) * sign;
      sign = -sign;
      res2.a[i] = DEFAULT_VALUE;
    }

  res1.x = INTRINSIC (_andnot_epi64) (s1.x, s2.x);
  res2.x = INTRINSIC (_mask_andnot_epi64) (res2.x, mask, s1.x, s2.x);
  res3.x = INTRINSIC (_maskz_andnot_epi64) (mask, s1.x, s2.x);

  CALC (s1.a, s2.a, res_ref);

  if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
      abort ();

  MASK_MERGE (i_q) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
    abort ();

  MASK_ZERO (i_q) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
    abort ();
}
示例#28
0
void
TEST (void)
{
  int i;
  UNION_TYPE (AVX512F_LEN, i_w) res1, res2, res3, src1, src2;
  MASK_TYPE mask = MASK_VALUE;
  unsigned short res_ref[SIZE];

  for (i = 0; i < SIZE; i++)
    {
      src1.a[i] = 2 + 7 * i % 291;
      src2.a[i] = 3 + 11 * (i % 377) * i;
    }
  for (i = 0; i < SIZE; i++)
    res2.a[i] = DEFAULT_VALUE;

  res1.x = INTRINSIC (_subs_epu16) (src1.x, src2.x);
  res2.x = INTRINSIC (_mask_subs_epu16) (res2.x, mask, src1.x, src2.x);
  res3.x = INTRINSIC (_maskz_subs_epu16) (mask, src1.x, src2.x);

  CALC (res_ref, src1.a, src2.a);

  if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
    abort ();

  MASK_MERGE (i_w) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref))
    abort ();

  MASK_ZERO (i_w) (res_ref, mask, SIZE);
  if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref))
    abort ();
}
示例#29
0
void static
TEST (void)
{
  int i, sign;
  UNION_TYPE (128, i_b) res1, res2, res3;
  UNION_TYPE (AVX512F_LEN, i_d) src;
  MASK_TYPE mask = MASK_VALUE;
  char res_ref[16];

  sign = -1;
  for (i = 0; i < SIZE; i++)
    {
      src.a[i] = 1 + 34 * i * sign;
      sign = sign * -1;
      res2.a[i] = DEFAULT_VALUE;
    }

  res1.x = INTRINSIC (_cvtepi32_epi8) (src.x);
  res2.x = INTRINSIC (_mask_cvtepi32_epi8) (res2.x, mask, src.x);
  res3.x = INTRINSIC (_maskz_cvtepi32_epi8) (mask, src.x);

  CALC (res_ref, src.a);

  if (UNION_CHECK (128, i_b) (res1, res_ref))
    abort ();

  MASK_MERGE (i_b) (res_ref, mask, SIZE);
  if (UNION_CHECK (128, i_b) (res2, res_ref))
    abort ();

  MASK_ZERO (i_b) (res_ref, mask, SIZE);
  if (UNION_CHECK (128, i_b) (res3, res_ref))
    abort ();
}
示例#30
0
static void
TEST (void)
{
  UNION_TYPE (AVX512F_LEN, d) s, res1, res2, res3;
  MASK_TYPE mask = MASK_VALUE;
  double res_ref[SIZE];
  int i, sign = 1;

  for (i = 0; i < SIZE; i++)
    {
      s.a[i] = 123.456 * (i + 2000) * sign;
      res2.a[i] = DEFAULT_VALUE;
      sign = -sign;
    }

  res1.x = INTRINSIC (_rcp14_pd) (s.x);
  res2.x = INTRINSIC (_mask_rcp14_pd) (res2.x, mask, s.x);
  res3.x = INTRINSIC (_maskz_rcp14_pd) (mask, s.x);

  CALC (s.a, res_ref);

  if (UNION_ROUGH_CHECK (AVX512F_LEN, d) (res1, res_ref, 0.0001))
    abort ();

  MASK_MERGE (d) (res_ref, mask, SIZE);
  if (UNION_ROUGH_CHECK (AVX512F_LEN, d) (res2, res_ref, 0.0001))
    abort ();

  MASK_ZERO (d) (res_ref, mask, SIZE);
  if (UNION_ROUGH_CHECK (AVX512F_LEN, d) (res3, res_ref, 0.0001))
    abort ();
}