static void TEST (void) { UNION_TYPE (AVX512F_LEN, i_d) src1, src2, res1, res2, res3; MASK_TYPE mask = MASK_VALUE; int dst_ref[SIZE]; int i; for (i = 0; i < SIZE; i++) { src1.a[i] = i + 50; src2.a[i] = i + 100; } for (i = 0; i < SIZE; i++) res2.a[i] = DEFAULT_VALUE; res1.x = INTRINSIC (_mullo_epi32) (src1.x, src2.x); res2.x = INTRINSIC (_mask_mullo_epi32) (res2.x, mask, src1.x, src2.x); res3.x = INTRINSIC (_maskz_mullo_epi32) (mask, src1.x, src2.x); CALC (src1.a, src2.a, dst_ref); if (UNION_CHECK (AVX512F_LEN, i_d) (res1, dst_ref)) abort (); MASK_MERGE (i_d) (dst_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, i_d) (res2, dst_ref)) abort (); MASK_ZERO (i_d) (dst_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, i_d) (res3, dst_ref)) abort (); }
void TEST (void) { long long ck[SIZE]; int i; UNION_TYPE (AVX512F_LEN, i_q) s, d, dm, dz; MASK_TYPE mask = MASK_VALUE; for (i = 0; i < SIZE; i++) { s.a[i] = i * 7 + (i << 15) + 356; d.a[i] = DEFAULT_VALUE; dm.a[i] = DEFAULT_VALUE; dz.a[i] = DEFAULT_VALUE; } CALC (s.a, ck); d.x = INTRINSIC (_abs_epi64) (s.x); dz.x = INTRINSIC (_maskz_abs_epi64) (mask, s.x); dm.x = INTRINSIC (_mask_abs_epi64) (dm.x, mask, s.x); if (UNION_CHECK (AVX512F_LEN, i_q) (d, ck)) abort (); MASK_MERGE (i_q) (ck, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, i_q) (dm, ck)) abort (); MASK_ZERO (i_q) (ck, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, i_q) (dz, ck)) abort (); }
void TEST (void) { UNION_TYPE (AVX512F_LEN_HALF, i_d) s; UNION_TYPE (AVX512F_LEN, d) res1, res2, res3; MASK_TYPE mask = MASK_VALUE; double res_ref[DST_SIZE]; int i, sign = 1; for (i = 0; i < SRC_SIZE; i++) { s.a[i] = 123456 * (i + 2000) * sign; sign = -sign; } for (i = 0; i < DST_SIZE; i++) res2.a[i] = DEFAULT_VALUE; res1.x = INTRINSIC (_cvtepi32_pd) (s.x); res2.x = INTRINSIC (_mask_cvtepi32_pd) (res2.x, mask, s.x); res3.x = INTRINSIC (_maskz_cvtepi32_pd) (mask, s.x); CALC (s.a, res_ref); if (UNION_CHECK (AVX512F_LEN, d) (res1, res_ref)) abort (); MASK_MERGE (d) (res_ref, mask, DST_SIZE); if (UNION_CHECK (AVX512F_LEN, d) (res2, res_ref)) abort (); MASK_ZERO (d) (res_ref, mask, DST_SIZE); if (UNION_CHECK (AVX512F_LEN, d) (res3, res_ref)) abort (); }
void TEST (void) { UNION_TYPE (AVX512F_LEN, d) s1, s2, res1, res2, res3; MASK_TYPE mask = MASK_VALUE; double res_ref[SIZE]; int i, sign = 1; for (i = 0; i < SIZE; i++) { s1.a[i] = 234.567 * i * sign; s2.a[i] = 100 * (i + 1); res2.a[i] = DEFAULT_VALUE; sign = -sign; } res1.x = INTRINSIC (_range_pd) (s1.x, s2.x, IMM); res2.x = INTRINSIC (_mask_range_pd) (res2.x, mask, s1.x, s2.x, IMM); res3.x = INTRINSIC (_maskz_range_pd) (mask, s1.x, s2.x, IMM); CALC (s1.a, s2.a, res_ref); if (UNION_CHECK (AVX512F_LEN, d) (res1, res_ref)) abort (); MASK_MERGE (d) (res_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, d) (res2, res_ref)) abort (); MASK_ZERO (d) (res_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, d) (res3, res_ref)) abort (); }
static void TEST (void) { UNION_TYPE (AVX512F_LEN_HALF, i_w) s; UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3; MASK_TYPE mask = MASK_VALUE; int res_ref[SIZE]; int i; for (i = 0; i < SIZE; i++) { s.a[i] = 2000 * i; res2.a[i] = DEFAULT_VALUE; } res1.x = INTRINSIC (_cvtepu16_epi32) (s.x); res2.x = INTRINSIC (_mask_cvtepu16_epi32) (res2.x, mask, s.x); res3.x = INTRINSIC (_maskz_cvtepu16_epi32) (mask, s.x); CALC (s.a, res_ref); if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref)) abort (); MASK_MERGE (i_d) (res_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref)) abort (); MASK_ZERO (i_d) (res_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref)) abort (); }
void static TEST (void) { UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3, s1, s2; MASK_TYPE mask = MASK_VALUE; long long res_ref[SIZE]; int i; for (i = 0; i < SIZE; i++) { s1.a[i] = 2 * i; s2.a[i] = i; res2.a[i] = DEFAULT_VALUE; } res1.x = INTRINSIC (_alignr_epi64) (s1.x, s2.x, N); res2.x = INTRINSIC (_mask_alignr_epi64) (res2.x, mask, s1.x, s2.x, N); res3.x = INTRINSIC (_maskz_alignr_epi64) (mask, s1.x, s2.x, N); CALC (s1.a, s2.a, res_ref); if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref)) abort (); MASK_MERGE (i_q) (res_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref)) abort (); MASK_ZERO (i_q) (res_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref)) abort (); }
void TEST (void) { int i; UNION_TYPE (AVX512F_LEN, i_b) res1, res2, res3, src1, src2; MASK_TYPE mask = MASK_VALUE; unsigned char res_ref[SIZE]; for (i = 0; i < SIZE; i++) { src1.a[i] = 2 + 7 * i % 291; src2.a[i] = 3 + 11 * (i % 377) * i; } for (i = 0; i < SIZE; i++) res2.a[i] = DEFAULT_VALUE; res1.x = INTRINSIC (_subs_epu8) (src1.x, src2.x); res2.x = INTRINSIC (_mask_subs_epu8) (res2.x, mask, src1.x, src2.x); res3.x = INTRINSIC (_maskz_subs_epu8) (mask, src1.x, src2.x); CALC (res_ref, src1.a, src2.a); if (UNION_CHECK (AVX512F_LEN, i_b) (res1, res_ref)) abort (); MASK_MERGE (i_b) (res_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, i_b) (res2, res_ref)) abort (); MASK_ZERO (i_b) (res_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, i_b) (res3, res_ref)) abort (); }
void TEST (void) { int i, sign; UNION_TYPE (AVX512F_LEN, d) res1, res2, res3, src1, src2; MASK_TYPE mask = MASK_VALUE; double res_ref[SIZE]; sign = -1; for (i = 0; i < SIZE; i++) { src1.a[i] = 1.5 + 34.67 * i * sign; src2.a[i] = -22.17 * i * sign; sign = sign * -1; } for (i = 0; i < SIZE; i++) res2.a[i] = DEFAULT_VALUE; res1.x = INTRINSIC (_add_pd) (src1.x, src2.x); res2.x = INTRINSIC (_mask_add_pd) (res2.x, mask, src1.x, src2.x); res3.x = INTRINSIC (_maskz_add_pd) (mask, src1.x, src2.x); CALC (res_ref, src1.a, src2.a); if (UNION_CHECK (AVX512F_LEN, d) (res1, res_ref)) abort (); MASK_MERGE (d) (res_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, d) (res2, res_ref)) abort (); MASK_ZERO (d) (res_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, d) (res3, res_ref)) abort (); }
void TEST (void) { int i, sign; UNION_TYPE (AVX512F_LEN_HALF, i_b) res1, res2, res3; UNION_TYPE (AVX512F_LEN, i_w) src; MASK_TYPE mask = MASK_VALUE; char res_ref[32]; sign = -1; for (i = 0; i < SIZE; i++) { src.a[i] = 1 + 34 * i * sign; sign = sign * -1; res2.a[i] = DEFAULT_VALUE; } res1.x = INTRINSIC (_cvtepi16_epi8) (src.x); res2.x = INTRINSIC (_mask_cvtepi16_epi8) (res2.x, mask, src.x); res3.x = INTRINSIC (_maskz_cvtepi16_epi8) (mask, src.x); CALC (res_ref, src.a); if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res1, res_ref)) abort (); MASK_MERGE (i_b) (res_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res2, res_ref)) abort (); MASK_ZERO (i_b) (res_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res3, res_ref)) abort (); }
void TEST (void) { int i, sign; UNION_TYPE (AVX512F_LEN, d) res1, res2, res3; UNION_TYPE (128, d) src; MASK_TYPE mask = SIZE | 123; double res_ref[SIZE]; sign = -1; for (i = 0; i < 2; i++) { src.a[i] = 34.67 * i * sign; sign = sign * -1; } for (i = 0; i < SIZE; i++) res2.a[i] = DEFAULT_VALUE; res1.x = INTRINSIC (_broadcast_f64x2) (src.x); res2.x = INTRINSIC (_mask_broadcast_f64x2) (res2.x, mask, src.x); res3.x = INTRINSIC (_maskz_broadcast_f64x2) (mask, src.x); CALC (res_ref, src.a); if (UNION_CHECK (AVX512F_LEN, d) (res1, res_ref)) abort (); MASK_MERGE (d) (res_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, d) (res2, res_ref)) abort (); MASK_ZERO (d) (res_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, d) (res3, res_ref)) abort (); }
void TEST (void) { UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3, src, src0; MASK_TYPE mask = MASK_VALUE; TYPE res_ref[SIZE]; src.x = INTRINSIC (_set1_epi8) (0x3D); int i; for (i = 0; i < SIZE; i++) { res_ref[i] = CALC (src.a[i]); src0.a[i] = DEFAULT_VALUE; } res1.x = INTRINSIC (_popcnt_epi32) (src.x); res2.x = INTRINSIC (_mask_popcnt_epi32) (src.x, mask, src0.x); res3.x = INTRINSIC (_maskz_popcnt_epi32) (mask, src.x); if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref)) abort (); MASK_MERGE (i_d) (res_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref)) abort (); MASK_ZERO (i_d) (res_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref)) abort (); }
void TEST (void) { UNION_TYPE (AVX512F_LEN, i_q) s; UNION_TYPE (AVX512F_LEN, d) res1, res2, res3; MASK_TYPE mask = MASK_VALUE; double res_ref[SIZE]; int i; for (i = 0; i < SIZE; i++) { s.a[i] = 123 * (i + 2000); res2.a[i] = DEFAULT_VALUE; } res1.x = INTRINSIC (_cvtepu64_pd) (s.x); res2.x = INTRINSIC (_mask_cvtepu64_pd) (res2.x, mask, s.x); res3.x = INTRINSIC (_maskz_cvtepu64_pd) (mask, s.x); CALC (s.a, res_ref); if (UNION_CHECK (AVX512F_LEN, d) (res1, res_ref)) abort (); MASK_MERGE (d) (res_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, d) (res2, res_ref)) abort (); MASK_ZERO (d) (res_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, d) (res3, res_ref)) abort (); }
void TEST (void) { UNION_TYPE (AVX512F_LEN, i_w) s1, s2, res1, res2, res3; short res_ref[SIZE]; MASK_TYPE mask = MASK_VALUE; int i; int fail = 0; for (i = 0; i < SIZE; i++) { s1.a[i] = i * 17 + i; s2.a[i] = i * -17 + i * 2; res2.a[i] = DEFAULT_VALUE; } res1.x = INTRINSIC (_maddubs_epi16) (s1.x, s2.x); res2.x = INTRINSIC (_mask_maddubs_epi16) (res2.x, mask, s1.x, s2.x); res3.x = INTRINSIC (_maskz_maddubs_epi16) (mask, s1.x, s2.x); CALC(s1.a, s2.a, res_ref); if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref)) abort (); MASK_MERGE (i_w) (res_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref)) abort (); MASK_ZERO (i_w) (res_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref)) abort (); }
void TEST (void) { UNION_TYPE (AVX512F_LEN,d) s1, s2, res1, res2, res3; MASK_TYPE mask = MASK_VALUE; double dst_ref[SIZE]; int i; for (i = 0; i < SIZE; i++) { s1.a[i] = 132.45 * i; s2.a[i] = 43.6 - i * 4.4; res2.a[i] = DEFAULT_VALUE; } res1.x = INTRINSIC (_or_pd) (s1.x, s2.x); res2.x = INTRINSIC (_mask_or_pd) (res2.x, mask, s1.x, s2.x); res3.x = INTRINSIC (_maskz_or_pd) (mask, s1.x, s2.x); CALC (s1.a, s2.a, dst_ref); if (UNION_CHECK (AVX512F_LEN,d) (res1, dst_ref)) abort (); MASK_MERGE (d) (dst_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN,d) (res2, dst_ref)) abort (); MASK_ZERO (d) (dst_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN,d) (res3, dst_ref)) abort (); }
void TEST (void) { UNION_TYPE (AVX512F_LEN, i_d) s, res1, res2, res3; int res_ref[SIZE]; MASK_TYPE mask = MASK_VALUE; int i; for (i = 0; i < SIZE; i++) { s.a[i] = 12345678 * (i % 5); res1.a[i] = DEFAULT_VALUE; res2.a[i] = DEFAULT_VALUE; res3.a[i] = DEFAULT_VALUE; } res1.x = INTRINSIC (_lzcnt_epi32) (s.x); res2.x = INTRINSIC (_mask_lzcnt_epi32) (res2.x, mask, s.x); res3.x = INTRINSIC (_maskz_lzcnt_epi32) (mask, s.x); CALC (s.a, res_ref); if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref)) abort (); MASK_MERGE (i_d) (res_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref)) abort (); MASK_ZERO (i_d) (res_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref)) abort (); }
void TEST (void) { int i, sign; UNION_TYPE (AVX512F_LEN, i_b) src1, src2, res1, res2, res3; MASK_TYPE mask = MASK_VALUE; char res_ref[SIZE]; for (i = 0; i < SIZE; i++) { src1.a[i] = i * sign; src2.a[i] = (i + 20) * sign; sign = -sign; res2.a[i] = DEFAULT_VALUE; } res1.x = INTRINSIC (_min_epi8) (src1.x, src2.x); res2.x = INTRINSIC (_mask_min_epi8) (res2.x, mask, src1.x, src2.x); res3.x = INTRINSIC (_maskz_min_epi8) (mask, src1.x, src2.x); CALC (src1.a, src2.a, res_ref); if (UNION_CHECK (AVX512F_LEN, i_b) (res1, res_ref)) abort (); MASK_MERGE (i_b) (res_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, i_b) (res2, res_ref)) abort (); MASK_ZERO (i_b) (res_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, i_b) (res3, res_ref)) abort (); }
void TEST (void) { UNION_TYPE (AVX512F_LEN, d) s; UNION_TYPE (AVX512F_LEN_HALF, i_ud) res1, res2, res3; MASK_TYPE mask = MASK_VALUE; unsigned res_ref[DST_SIZE] = { 0 }; int i; for (i = 0; i < SRC_SIZE; i++) { s.a[i] = 123.456 * (i + 2000); } for (i = 0; i < DST_SIZE; i++) res2.a[i] = DEFAULT_VALUE; res1.x = INTRINSIC (_cvtpd_epu32) (s.x); res2.x = INTRINSIC (_mask_cvtpd_epu32) (res2.x, mask, s.x); res3.x = INTRINSIC (_maskz_cvtpd_epu32) (mask, s.x); CALC (s.a, res_ref); if (UNION_CHECK (AVX512F_LEN_HALF, i_ud) (res1, res_ref)) abort (); MASK_MERGE (i_ud) (res_ref, mask, SRC_SIZE); if (UNION_CHECK (AVX512F_LEN_HALF, i_ud) (res2, res_ref)) abort (); MASK_ZERO (i_ud) (res_ref, mask, SRC_SIZE); if (UNION_CHECK (AVX512F_LEN_HALF, i_ud) (res3, res_ref)) abort (); }
void TEST (void) { UNION_TYPE (AVX512F_LEN, i_w) s1, s2, res1, res2, res3; MASK_TYPE mask = MASK_VALUE; short res_ref[SIZE]; int i, sign = 1; for (i = 0; i < SIZE; i++) { s1.a[i] = i * sign; s2.a[i] = i >> 2; res2.a[i] = DEFAULT_VALUE; sign = -sign; } res1.x = INTRINSIC (_srlv_epi16) (s1.x, s2.x); res2.x = INTRINSIC (_mask_srlv_epi16) (res2.x, mask, s1.x, s2.x); res3.x = INTRINSIC (_maskz_srlv_epi16) (mask, s1.x, s2.x); CALC (s1.a, s2.a, res_ref); if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref)) abort (); MASK_MERGE (i_w) (res_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref)) abort (); MASK_ZERO (i_w) (res_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref)) abort (); }
void TEST (void) { int i, sign; UNION_TYPE (128, i_b) res1, res2, res3; char res4[16]; UNION_TYPE (AVX512F_LEN, i_d) src; MASK_TYPE mask = MASK_VALUE; char res_ref[16]; char res_ref2[16]; sign = -1; for (i = 0; i < SIZE; i++) { src.a[i] = 1 + 34 * i * sign; sign = sign * -1; res2.a[i] = DEFAULT_VALUE; res4[i] = DEFAULT_VALUE; } for (i = SIZE; i < 16; i++) { res4[i] = DEFAULT_VALUE * 2; res_ref2[i] = DEFAULT_VALUE * 2; } res1.x = INTRINSIC (_cvtepi32_epi8) (src.x); res2.x = INTRINSIC (_mask_cvtepi32_epi8) (res2.x, mask, src.x); res3.x = INTRINSIC (_maskz_cvtepi32_epi8) (mask, src.x); CALC (res_ref, src.a, 0); if (UNION_CHECK (128, i_b) (res1, res_ref)) abort (); MASK_MERGE (i_b) (res_ref, mask, SIZE); if (UNION_CHECK (128, i_b) (res2, res_ref)) abort (); MASK_ZERO (i_b) (res_ref, mask, SIZE); if (UNION_CHECK (128, i_b) (res3, res_ref)) abort (); INTRINSIC (_mask_cvtepi32_storeu_epi8) (res4, mask, src.x); CALC (res_ref2, src.a, 1); MASK_MERGE (i_b) (res_ref2, mask, SIZE); if (checkVc (res4, res_ref2, 16)) abort (); }
void TEST (void) { int i; UNION_TYPE (128, i_uw) res1, res2, res3; unsigned short res4[8]; UNION_TYPE (AVX512F_LEN, i_uq) src; MASK_TYPE mask = MASK_VALUE; unsigned short res_ref[8]; unsigned short res_ref2[8]; for (i = 0; i < SIZE; i++) { src.a[i] = 1 + 34 * i; res2.a[i] = DEFAULT_VALUE; res4[i] = DEFAULT_VALUE; } for (i = SIZE; i < 8; i++) { res_ref2[i] = DEFAULT_VALUE * 2; res4[i] = DEFAULT_VALUE * 2; } res1.x = INTRINSIC (_cvtusepi64_epi16) (src.x); res2.x = INTRINSIC (_mask_cvtusepi64_epi16) (res2.x, mask, src.x); res3.x = INTRINSIC (_maskz_cvtusepi64_epi16) (mask, src.x); CALC (res_ref, src.a, 0); if (UNION_CHECK (128, i_uw) (res1, res_ref)) abort (); MASK_MERGE (i_uw) (res_ref, mask, SIZE); if (UNION_CHECK (128, i_uw) (res2, res_ref)) abort (); MASK_ZERO (i_uw) (res_ref, mask, SIZE); if (UNION_CHECK (128, i_uw) (res3, res_ref)) abort (); INTRINSIC (_mask_cvtusepi64_storeu_epi16) (res4, mask, src.x); CALC (res_ref2, src.a, 1); MASK_MERGE (i_w) (res_ref2, mask, SIZE); if (checkVs (res4, res_ref2, 8)) abort (); }
void TEST (void) { int i, j; UNION_TYPE (AVX512F_LEN, i_w) s1, s2, res, ind; short res_ref[SIZE]; MASK_TYPE mask = MASK_VALUE; for (i = 0; i < NUM; i++) { for (j = 0; j < SIZE; j++) { ind.a[j] = DEFAULT_VALUE; s1.a[j] = i * 2 * j + 1; s2.a[j] = i * 2 * j; res.a[j] = DEFAULT_VALUE; } CALC (res_ref, s1.a, ind.a, s2.a); res.x = INTRINSIC (_mask2_permutex2var_epi16) (s1.x, ind.x, mask, s2.x); MASK_MERGE (i_w) (res_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, i_w) (res, res_ref)) abort (); } }
void TEST (void) { int i; UNION_TYPE (AVX512F_LEN, i_q) s1, s2, res, ind; long long res_ref[SIZE]; MASK_TYPE mask = MASK_VALUE; for (i = 0; i < SIZE; i++) { ind.a[i] = DEFAULT_VALUE; s1.a[i] = 34 * i + 1; s2.a[i] = 34 * i; res.a[i] = DEFAULT_VALUE; } CALC (res_ref, s1.a, ind.a, s2.a); res.x = INTRINSIC (_mask2_permutex2var_epi64) (s1.x, ind.x, mask, s2.x); MASK_MERGE (i_q) (res_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, i_q) (res, res_ref)) abort (); }
void TEST (void) { UNION_TYPE (AVX512F_LEN, d) s1, res2, res3, res4, res5; MASK_TYPE mask = MASK_VALUE; double s2[SIZE]; double res_ref1[SIZE]; double res_ref2[SIZE]; double res_ref3[SIZE]; int i, sign = 1; for (i = 0; i < SIZE; i++) { s1.a[i] = 123.456 * (i + 200) * sign; s2[i] = 789.012 * (i + 300) * sign; res2.a[i] = DEFAULT_VALUE; res4.a[i] = DEFAULT_VALUE; sign = -sign; } res2.x = INTRINSIC (_mask_expand_pd) (res2.x, mask, s1.x); res3.x = INTRINSIC (_maskz_expand_pd) (mask, s1.x); res4.x = INTRINSIC (_mask_expandloadu_pd) (res4.x, mask, s2); res5.x = INTRINSIC (_maskz_expandloadu_pd) (mask, s2); /* no mask is the same as all ones mask. */ CALC (s1.a, res_ref1, MASK_ALL_ONES); CALC (s1.a, res_ref2, mask); CALC (s2, res_ref3, mask); MASK_MERGE (d) (res_ref2, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, d) (res2, res_ref2)) abort (); MASK_ZERO (d) (res_ref2, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, d) (res3, res_ref2)) abort (); MASK_MERGE (d) (res_ref3, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, d) (res4, res_ref3)) abort (); MASK_ZERO (d) (res_ref3, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, d) (res5, res_ref3)) abort (); }
void TEST (void) { int i, sign; UNION_TYPE (AVX512F_LEN, i_w) res1, res2, res3, src1; UNION_TYPE (128, i_q) src2; MASK_TYPE mask = MASK_VALUE; short res_ref[SIZE]; long long imm; sign = -1; for (i = 0; i < SIZE; i++) { src1.a[i] = 2 + sign * 7 * i % 291; sign = sign * -1; } for (i = 0; i < 2; i++) { src2.a[i] = 0; } for (imm = 1; imm <= 17; imm++) { src2.a[0] = imm; for (i = 0; i < SIZE; i++) res2.a[i] = DEFAULT_VALUE; res1.x = INTRINSIC (_sll_epi16) (src1.x, src2.x); res2.x = INTRINSIC (_mask_sll_epi16) (res2.x, mask, src1.x, src2.x); res3.x = INTRINSIC (_maskz_sll_epi16) (mask, src1.x, src2.x); CALC (res_ref, src1.a, src2.a); if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref)) abort (); MASK_MERGE (i_w) (res_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref)) abort (); MASK_ZERO (i_w) (res_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref)) abort (); } }
void TEST (void) { int i, j; UNION_TYPE (AVX512F_LEN, d) res1, res2, res3, s1; UNION_TYPE (AVX512F_LEN, i_q) s2; double res_ref[SIZE]; float vals[2] = { -10, 10 }; int controls[8] = {0x11111111, 0x77777777, 0x77777777, 0x88888888, 0x99999999, 0xaaaaaaaa, 0xbbbbbbbb, 0xcccccccc}; MASK_TYPE mask = MASK_VALUE; for (i = 0; i < 2; i++) { for (j = 0; j < SIZE; j++) { s1.a[j] = vals[i]; s2.a[j] = controls[j]; res1.a[j] = DEFAULT_VALUE; res2.a[j] = DEFAULT_VALUE; res3.a[j] = DEFAULT_VALUE; CALC (&res_ref[j], s1.a[j], s2.a[j]); } res1.x = INTRINSIC (_fixupimm_pd) (res1.x, s1.x, s2.x, 0); res2.x = INTRINSIC (_mask_fixupimm_pd) (res2.x, mask, s1.x, s2.x, 0); res3.x = INTRINSIC (_maskz_fixupimm_pd) (mask, res3.x, s1.x, s2.x, 0); if (UNION_CHECK (AVX512F_LEN, d) (res1, res_ref)) abort (); MASK_MERGE(d) (res_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, d) (res2, res_ref)) abort (); MASK_ZERO(d) (res_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, d) (res3, res_ref)) abort (); } }
static void TEST (void) { UNION_TYPE (AVX512F_LEN, i_d) s1, res1, res2, res3, res4; MASK_TYPE mask = MASK_VALUE; int s2[SIZE]; int res_ref1[SIZE]; int res_ref2[SIZE]; int i, sign = 1; for (i = 0; i < SIZE; i++) { s1.a[i] = 12345 * (i + 200) * sign; s2[i] = 67890 * (i + 300) * sign; res1.a[i] = DEFAULT_VALUE; res3.a[i] = DEFAULT_VALUE; sign = -sign; } res1.x = INTRINSIC (_mask_expand_epi32) (res1.x, mask, s1.x); res2.x = INTRINSIC (_maskz_expand_epi32) (mask, s1.x); res3.x = INTRINSIC (_mask_expandloadu_epi32) (res3.x, mask, s2); res4.x = INTRINSIC (_maskz_expandloadu_epi32) (mask, s2); CALC (s1.a, res_ref1, mask); CALC (s2, res_ref2, mask); MASK_MERGE (i_d) (res_ref1, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref1)) abort (); MASK_ZERO (i_d) (res_ref1, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref1)) abort (); MASK_MERGE (i_d) (res_ref2, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref2)) abort (); MASK_ZERO (i_d) (res_ref2, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, i_d) (res4, res_ref2)) abort (); }
void TEST (void) { int i, sign; UNION_TYPE (AVX512F_LEN_HALF, i_d) res1, res2, res3; int res4[SIZE_HALF]; UNION_TYPE (AVX512F_LEN, i_q) src; MASK_TYPE mask = MASK_VALUE; int res_ref[SIZE_HALF]; sign = -1; for (i = 0; i < SIZE; i++) { src.a[i] = 1 + 34 * i * sign; sign = sign * -1; res2.a[i] = DEFAULT_VALUE; res4[i] = DEFAULT_VALUE; } res1.x = INTRINSIC (_cvtepi64_epi32) (src.x); res2.x = INTRINSIC (_mask_cvtepi64_epi32) (res2.x, mask, src.x); res3.x = INTRINSIC (_maskz_cvtepi64_epi32) (mask, src.x); INTRINSIC (_mask_cvtepi64_storeu_epi32) (res4, mask, src.x); CALC (res_ref, src.a); if (UNION_CHECK (AVX512F_LEN_HALF, i_d) (res1, res_ref)) abort (); MASK_MERGE (i_d) (res_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN_HALF, i_d) (res2, res_ref)) abort (); if (checkVi (res4, res_ref, SIZE_HALF)) abort (); MASK_ZERO (i_d) (res_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN_HALF, i_d) (res3, res_ref)) abort (); }
void TEST (void) { UNION_TYPE (AVX512F_LEN, i_d) s1, s2, res1, res2, res3, res4; MASK_TYPE mask = MASK_VALUE; int res_ref[SIZE]; int i, sign = 1; for (i = 0; i < SIZE; i++) { s1.a[i] = i * sign; s2.a[i] = (i + 20) * sign; sign = -sign; res3.a[i] = DEFAULT_VALUE; } #if AVX512F_LEN == 512 res1.x = INTRINSIC (_or_si512) (s1.x, s2.x); res2.x = INTRINSIC (_or_epi32) (s1.x, s2.x); #endif res3.x = INTRINSIC (_mask_or_epi32) (res3.x, mask, s1.x, s2.x); res4.x = INTRINSIC (_maskz_or_epi32) (mask, s1.x, s2.x); CALC (s1.a, s2.a, res_ref); #if AVX512F_LEN == 512 if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref)) abort (); if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref)) abort (); #endif MASK_MERGE (i_d) (res_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref)) abort (); MASK_ZERO (i_d) (res_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, i_d) (res4, res_ref)) abort (); }
void TEST (void) { int i; UNION_TYPE (AVX512F_LEN, i_w) src1, src2, src3, src4; UNION_TYPE (AVX512F_LEN, i_d) src5, dst, res1, res2, res3; UNION_TYPE (128, i_w) mult; MASK_TYPE mask = MASK_VALUE; int res_ref[SIZE]; for (i = 0; i < SIZE * 2; i++) { src1.a[i] = 2 + 7 * i % 291; src2.a[i] = 3 + 11 * (i % 377) * i; src3.a[i] = src1.a[i] * src1.a[i]; src4.a[i] = src2.a[i] * src2.a[i]; } for (i = 0; i < 8; i++) mult.a[i] = 3 + i * 2; for (i = 0; i < SIZE; i++) src5.a[i] = DEFAULT_VALUE; CALC (src1.a, src2.a, src3.a, src4.a, src5.a, mult.a, res_ref); res1.x = INTRINSIC (_4dpwssds_epi32) ( src5.x, src1.x, src2.x, src3.x, src4.x, &mult.x); res2.x = INTRINSIC (_mask_4dpwssds_epi32) (src5.x, mask, src1.x, src2.x, src3.x, src4.x, &mult.x); res3.x = INTRINSIC (_maskz_4dpwssds_epi32) (mask, src5.x, src1.x, src2.x, src3.x, src4.x, &mult.x); if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref)) abort (); MASK_MERGE (i_d) (res_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref)) abort (); MASK_ZERO (i_d) (res_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref)) abort (); }
void static TEST (void) { int i; UNION_TYPE (AVX512F_LEN, d) s1, s2, res1, res2, res3; UNION_TYPE (AVX512F_LEN, i_q) ind; double res_ref[SIZE]; MASK_TYPE mask = MASK_VALUE; for (i = 0; i < SIZE; i++) { ind.a[i] = 17 * (i << 1); s1.a[i] = DEFAULT_VALUE; s2.a[i] = 22.5 * i; res1.a[i] = DEFAULT_VALUE; res2.a[i] = DEFAULT_VALUE; res3.a[i] = DEFAULT_VALUE; } CALC (res_ref, s1.a, ind.a, s2.a); res1.x = INTRINSIC (_permutex2var_pd) (s1.x, ind.x, s2.x); res2.x = INTRINSIC (_mask_permutex2var_pd) (s1.x, mask, ind.x, s2.x); res3.x = INTRINSIC (_maskz_permutex2var_pd) (mask, s1.x, ind.x, s2.x); if (UNION_CHECK (AVX512F_LEN, d) (res1, res_ref)) abort (); MASK_MERGE (d) (res_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, d) (res2, res_ref)) abort (); MASK_ZERO (d) (res_ref, mask, SIZE); if (UNION_CHECK (AVX512F_LEN, d) (res3, res_ref)) abort (); }