void static avx2_test (void) { int i; union128i_d idx; union128d res; double s1[2], res_ref[2] = { 0 }; for (i = 0; i < 2; ++i) { /* Set some stuff */ s1[i] = 2.718281828459045 * (i + 1) * (i + 2); /* About to gather in reverse order, divide by 2 to demonstrate scale */ idx.a[i] = (16 - (i + 1) * 8) >> 1; } res.x = _mm_i32gather_pd (s1, idx.x, 2); compute_i32gatherpd (s1, idx.a, 2, res_ref); if (check_union128d (res, res_ref) != 0) abort (); }
static void TEST (void) { union128d u; union128 s; double e[2]; s.x = _mm_set_ps (2.78, 7777768.82, 2.331, 3.456); u.x = test (s.x); e[0] = (double)s.a[0]; e[1] = (double)s.a[1]; if (check_union128d (u, e)) #if DEBUG { printf ("sse2_test_cvtps2pd_1; check_union128d failed\n"); printf ("\t cvt\t [%f,%f,%f,%f] -> [%f,%f]\n", s.a[0], s.a[1], s.a[2], s.a[3], u.a[0], u.a[1]); printf ("\t expect\t [%f,%f]\n", e[0], e[1]); } #else abort (); #endif }
void static avx512f_test (void) { union128d res1, res2, res3, res4; union128d s1, s2; double res_ref[SIZE]; MASK_TYPE mask = MASK_VALUE; int i; for (i = 0; i < SIZE; i++) { s1.a[i] = 11.5 * (i + 1); s2.a[i] = 10.5 * (i + 1); res_ref[i] = 9.5 * (i + 1); res1.a[i] = DEFAULT_VALUE; res2.a[i] = DEFAULT_VALUE; res3.a[i] = DEFAULT_VALUE; res4.a[i] = DEFAULT_VALUE; } res1.x = _mm_scalef_sd (s1.x, s2.x); res2.x = _mm_scalef_round_sd (s1.x, s2.x, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); res3.x = _mm_mask_scalef_round_sd (s1.x, mask, s1.x, s2.x, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); res4.x = _mm_maskz_scalef_round_sd (mask, s1.x, s2.x, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); compute_scalefsd (s1.a, s2.a, res_ref); if (check_union128d (res1, res_ref)) abort (); if (check_union128d (res2, res_ref)) abort (); MASK_MERGE (d) (res_ref, mask, 1); if (check_union128d (res3, res_ref)) abort (); MASK_ZERO (d) (res_ref, mask, 1); if (check_union128d (res4, res_ref)) abort (); }
void avx512f_test (void) { int i, sign; union128d res1, res2, res3, res4, src1, src2; MASK_TYPE mask = 0; double res_ref[SIZE]; sign = -1; for (i = 0; i < SIZE; i++) { src1.a[i] = 1.5 + 34.67 * i * sign; src2.a[i] = -22.17 * i * sign + 1.0; res1.a[i] = DEFAULT_VALUE; res3.a[i] = DEFAULT_VALUE; sign = sign * -1; } res1.x = _mm_mask_min_sd (res1.x, mask, src1.x, src2.x); res2.x = _mm_maskz_min_sd (mask, src1.x, src2.x); res3.x = _mm_mask_min_round_sd (res3.x, mask, src1.x, src2.x, _MM_FROUND_NO_EXC); res4.x = _mm_maskz_min_round_sd (mask, src1.x, src2.x, _MM_FROUND_NO_EXC); calc_min (res_ref, src1.a, src2.a); MASK_MERGE (d) (res_ref, mask, 1); if (check_union128d (res1, res_ref)) abort (); MASK_ZERO (d) (res_ref, mask, 1); if (check_union128d (res2, res_ref)) abort (); calc_min (res_ref, src1.a, src2.a); MASK_MERGE (d) (res_ref, mask, 1); if (check_union128d (res3, res_ref)) abort (); MASK_ZERO (d) (res_ref, mask, 1); if (check_union128d (res4, res_ref)) abort (); }
static void TEST (void) { union128d u; double e[2] = {2134.3343,1234.635654}; u.x = test (e); if (check_union128d (u, e)) abort (); }
static void TEST (void) { union128d u; double e[2] __attribute__ ((aligned (8))) = {2134.3343,1234.635654}; u.x = test (e); if (check_union128d (u, e)) abort (); }
static void TEST (void) { union128d u; double e[2] __attribute__ ((aligned(16))); u.x = _mm_set_pd (2134.3343,1234.635654); test (e, u.x); if (check_union128d (u, e)) abort (); }
static void TEST (void) { union128d u; double e[2] = {0.0}; u.x = _mm_set_pd (2134.3343,1234.635654); test (e, u.x); if (check_union128d (u, e)) abort (); }
void check_mm_vmfrcz_sd (__m128d __A, __m128d __B) { union128d a, b, c; double d[2]; a.x = __A; b.x = __B; c.x = _mm_frcz_sd (__A, __B); d[0] = b.a[0] - (int)b.a[0] ; d[1] = a.a[1]; if (check_union128d (c, d)) abort (); }
static void TEST (void) { union128d u, s; long long b = 42949672951333LL; double e[2]; s.x = _mm_set_pd (123.321, 456.987); u.x = test (s.x, b); e[0] = (double)b; e[1] = s.a[1]; if (check_union128d (u, e)) abort (); }
static void TEST (void) { union128d u; double e[2]; u.x = _mm_set_pd (41124.234,2344.2354); test (e, u.x); e[1] = u.a[1]; if (check_union128d (u, e)) abort (); }
static void TEST (void) { union128d u, s1, s2; double e[2]; s1.x = _mm_set_pd (2134.3343,1234.635654); s2.x = _mm_set_pd (41124.234,2344.2354); u.x = test (s1.x, s2.x); e[0] = s1.a[0] > s2.a[0] ? s1.a[0]:s2.a[0]; e[1] = s1.a[1] > s2.a[1] ? s1.a[1]:s2.a[1]; if (check_union128d (u, e)) abort (); }
static void avx512f_test (void) { union128d s1, res; unsigned long long s2; double res_ref[4]; s1.x = _mm_set_pd (-24.43, -43.35); s2 = 0xFEDCBA9876543210; res.x = _mm_cvtu64_sd (s1.x, s2); compute_vcvtusi2sd (s1.a, s2, res_ref); if (check_union128d (res, res_ref)) abort (); }
void check_mm_fnmsub_pd (__m128d __A, __m128d __B, __m128d __C) { union128d a, b, c, e; a.x = __A; b.x = __B; c.x = __C; double d[2]; int i; e.x = _mm_fnmsub_pd (__A, __B, __C); for (i = 0; i < 2; i++) { d[i] = -a.a[i] * b.a[i] - c.a[i]; } if (check_union128d (e, d)) abort (); }
static void TEST (void) { union128d u, s1; union128 s2; double e[2]; s1.x = _mm_set_pd (123.321, 456.987); s2.x = _mm_set_ps (123.321, 456.987, 666.45, 231.987); u.x = test (s1.x, s2.x); e[0] = (double)s2.a[0]; e[1] = s1.a[1]; if (check_union128d (u, e)) abort (); }
static void TEST (void) { union128d u, s; double e[2] = {0.0}; int i; s.x = _mm_set_pd (1.1234, -2.3478); u.x = _mm_round_pd (s.x, iRoundMode); for (i = 0; i < 2; i++) { __m128d tmp = _mm_load_sd (&s.a[i]); tmp = _mm_round_sd (tmp, tmp, iRoundMode); _mm_store_sd (&e[i], tmp); } if (check_union128d (u, e)) abort (); }
void static avx512f_test (void) { union128d res1, s1, s2; double res_ref[SIZE]; int i; for (i = 0; i < SIZE; i++) { s1.a[i] = 11.5 * (i + 1); s2.a[i] = 10.5 * (i + 1); } res1.x = _mm_scalef_sd (s1.x, s2.x); compute_scalefsd (s1.a, s2.a, res_ref); if (check_union128d (res1, res_ref)) abort (); }
static void TEST (void) { union128d u, s1; double e[2]; int i; s1.x = _mm_set_pd (2134.3343,1234.635654); u.x = test (s1.x); for (i = 0; i < 2; i++) { __m128d tmp = _mm_load_sd (&s1.a[i]); tmp = _mm_sqrt_sd (tmp, tmp); _mm_store_sd (&e[i], tmp); } if (check_union128d (u, e)) abort (); }