void avx512f_test (void) { int i, sign; union128 res1, res2, res3, src1, src2; MASK_TYPE mask = MASK_VALUE; float res_ref[SIZE]; sign = -1; for (i = 0; i < SIZE; i++) { src1.a[i] = 1.5 + 34.67 * i * sign; src2.a[i] = -22.17 * i * sign + 1.0; sign = sign * -1; } for (i = 0; i < SIZE; i++) res2.a[i] = DEFAULT_VALUE; res1.x = _mm_min_round_ss (src1.x, src2.x, _MM_FROUND_NO_EXC); res2.x = _mm_mask_min_round_ss (res2.x, mask, src1.x, src2.x, _MM_FROUND_NO_EXC); res3.x = _mm_maskz_min_round_ss (mask, src1.x, src2.x, _MM_FROUND_NO_EXC); calc_min (res_ref, src1.a, src2.a); if (check_union128 (res1, res_ref)) abort(); MASK_MERGE () (res_ref, mask, 1); if (check_union128 (res2, res_ref)) abort (); MASK_ZERO () (res_ref, mask, 1); if (check_union128 (res3, res_ref)) abort (); }
void avx512f_test (void) { union128 s1, res1, res2, res3; union128i_d s2; float res_ref[4]; int i, j, k; float vals[2] = { -10, 10 }; int controls[10] = { 0x11111111, 0x77777777, 0x88888888, 0x99999999, 0xaaaaaaaa, 0xbbbbbbbb, 0xcccccccc, 0xdddddddd, 0xeeeeeeee, 0xffffffff }; MASK_TYPE mask = MASK_VALUE; for (i = 0; i < 2; i++) { s1.a[0] = vals[i]; res1.a[0] = res2.a[0] = res3.a[0] = DEFAULT_VALUE; for (k = 1; k < 4; k++) { s1.a[k] = k; s2.a[k] = k; res_ref[k] = k; res1.a[k] = res2.a[k] = res3.a[k] = DEFAULT_VALUE; } for (j = 0; j < 10; j++) { s2.a[0] = controls[j]; compute_fixupimmps (&res_ref[0], s1.a[0], s2.a[0]); res1.x = _mm_fixupimm_ss (res1.x, s1.x, s2.x, 0); res2.x = _mm_mask_fixupimm_ss (res2.x, mask, s1.x, s2.x, 0); res3.x = _mm_maskz_fixupimm_ss (mask, res3.x, s1.x, s2.x, 0); if (check_union128 (res1, res_ref)) abort (); MASK_MERGE () (res_ref, mask, 1); if (check_union128 (res2, res_ref)) abort (); MASK_ZERO () (res_ref, mask, 1); if (check_union128 (res3, res_ref)) abort (); } } }
static void TEST (void) { union128d s1; union128 u, s2; double source1[2] = {123.345, 67.3321}; float e[4] = {5633.098, 93.21, 3.34, 4555.2}; s1.x = _mm_loadu_pd (source1); s2.x = _mm_loadu_ps (e); __asm("" : "+v"(s1.x), "+v"(s2.x)); u.x = test(s2.x, s1.x); e[0] = (float)source1[0]; if (check_union128(u, e)) #if DEBUG { printf ("sse2_test_cvtsd2ss_1; check_union128 failed\n"); printf ("\t [%f,%f,%f,%f],[%f,%f]\n", s2.a[0], s2.a[1], s2.a[2], s2.a[3], s1.a[0], s1.a[1]); printf ("\t -> \t[%f,%f,%f,%f]\n", u.a[0], u.a[1], u.a[2], u.a[3]); printf ("\texpect\t[%f,%f,%f,%f]\n", e[0], e[1], e[2], e[3]); } #else abort (); #endif }
static void TEST (void) { union128 u; float e[4] __attribute__ ((aligned(16))); u.x = _mm_set_ps (24.43, 68.346, 43.35, 546.46); test (e, u.x); if (check_union128 (u, e)) abort (); }
static void TEST (void) { union128 u, s1; int b = 498; float e[4] = { 24.43, 68.346, 43.35, 546.46 }; s1.x = _mm_set_ps (e[3], e[2], e[1], e[0]); u.x = test (s1.x, b); e[0] = (float)b; if (check_union128 (u, e)) abort (); }
void check_mm_vmfrcz_ss (__m128 __A, __m128 __B) { union128 a, b, c; float f[4]; a.x = __A; b.x = __B; c.x = _mm_frcz_ss (__A, __B); f[0] = b.a[0] - (int)b.a[0] ; f[1] = a.a[1]; f[2] = a.a[2]; f[3] = a.a[3]; if (check_union128 (c, f)) abort (); }
void static avx_test (void) { int i; float s = 39678.3452; union128 u; float e [4]; u.x = _mm_broadcast_ss (&s); for (i = 0; i < 4; i++) e[i] = s; if (check_union128 (u, e)) abort (); }
void check_mm_fnmsub_ps (__m128 __A, __m128 __B, __m128 __C) { union128 a, b, c, e; a.x = __A; b.x = __B; c.x = __C; float d[4]; int i; e.x = _mm_fnmsub_ps (__A, __B, __C); for (i = 0; i < 4; i++) { d[i] = -a.a[i] * b.a[i] - c.a[i]; } if (check_union128 (e, d)) abort (); }
static void TEST (void) { union128 u; float e[4] = {1.1, 2.2, 3.3, 4.4}; u.x = _mm_set_ps (2134.3343,1234.635654, 1.2234, 876.8976); u.x = test (e); e[1] = u.a[1]; e[2] = u.a[2]; e[3] = u.a[3]; if (check_union128 (u, e)) abort (); }
static void TEST (void) { __m64_union s1, s2; union128 u; float e[4] = {1000.0, -20000.0, 43.0, 546.0}; /* input signed in {1000, -20000, 43, 546}. */ s1.as_m64 = _mm_setr_pi32 (1000, -20000); s2.as_m64 = _mm_setr_pi32 (43, 546); u.x = test (s1.as_m64, s2.as_m64); if (check_union128 (u, e)) abort (); }
static void TEST (void) { union128 u, s1, s2; float e[4]; int i; s1.x = _mm_set_ps (24.43, 68.346, 43.35, 546.46); s2.x = _mm_set_ps (1.17, 2.16, 3.15, 4.14); u.x = test (s1.x, s2.x); for (i = 0; i < 4; i++) e[i] = s1.a[i] > s2.a[i] ? s1.a[i]:s2.a[i]; if (check_union128 (u, e)) abort (); }
static void avx512f_test (void) { union128 s1, res; unsigned s2; float res_ref[4]; s1.x = _mm_set_ps (-24.43, 68.346, -43.35, 546.46); s2 = 0xFEDCA987; res.x = _mm_cvtu32_ss (s1.x, s2); compute_vcvtusi2ss (s1.a, s2, res_ref); if (check_union128 (res, res_ref)) abort (); }
static void TEST (void) { union128d s1; union128 u, s2; double source1[2] = {123.345, 67.3321}; float e[4] = {5633.098, 93.21, 3.34, 4555.2}; s1.x = _mm_loadu_pd (source1); s2.x = _mm_loadu_ps (e); u.x = test(s2.x, s1.x); e[0] = (float)source1[0]; if (check_union128(u, e)) abort (); }
static void TEST (void) { union128 u, s1, s2; float e[4]; s1.x = _mm_set_ps (24.43, 68.346, 43.35, 546.46); s2.x = _mm_set_ps (1.17, 2.16, 3.15, 4.14); u.x = test (s1.x, s2.x); e[0] = s1.a[0] > s2.a[0] ? s1.a[0]:s2.a[0]; e[1] = s1.a[1]; e[2] = s1.a[2]; e[3] = s1.a[3]; if (check_union128 (u, e)) abort (); }
static void TEST (void) { union128 u, s1, s2; float e[4]; s1.x = _mm_set_ps (41124.234,6678.346,8653.65635,856.43576); s2.x = _mm_set_ps (2134.3343,6678.346,453.345635,54646.464356); u.x = test (s1.x, s2.x); e[0] = s1.a[0]; e[1] = s2.a[0]; e[2] = s1.a[1]; e[3] = s2.a[1]; if (check_union128 (u, e)) abort (); }
static void TEST (void) { union128 u, s1, s2; float e[4] = {0.0}; s1.x = _mm_set_ps (1.1, 1.2, 1.3, 1.4); s2.x = _mm_set_ps (2.1, 2.2, 2.3, 2.4); u.x = test (s1.x, s2.x); e[0] = select4(s1.a, (MASK >> 0) & 0x3); e[1] = select4(s1.a, (MASK >> 2) & 0x3); e[2] = select4(s2.a, (MASK >> 4) & 0x3); e[3] = select4(s2.a, (MASK >> 6) & 0x3); if (check_union128(u, e)) abort (); }
static void TEST (void) { union128 u, s1, s2; int source1[4]= {34, 545, 955, 67}; int source2[4]= {67, 4, 57, 897}; int e[4]; s1.x = _mm_loadu_ps ((float *)source1); s2.x = _mm_loadu_ps ((float *)source2); u.x = test (s1.x, s2.x); e[0] = (~source1[0]) & source2[0]; e[1] = (~source1[1]) & source2[1]; e[2] = (~source1[2]) & source2[2]; e[3] = (~source1[3]) & source2[3]; if (check_union128 (u, (float *)e)) abort (); }
static void avx512f_test (void) { union128 res1, s1, s2; float res_ref[SIZE]; int i; for (i = 0; i < SIZE; i++) { s1.a[i] = 11.5 * (i + 1); s2.a[i] = 10.5 * (i + 1); } res1.x = _mm_scalef_ss (s1.x, s2.x); compute_scalefss (s1.a, s2.a, res_ref); if (check_union128 (res1, res_ref)) abort (); }
static void TEST (void) { union128 u, s1; float d[2] = {24.43, 68.346}; float e[4] = {1.17, 2.16, 3.15, 4.14}; s1.x = _mm_set_ps (5.13, 6.12, 7.11, 8.9); u.x = _mm_loadu_ps (e); u.x = test (s1.x, (__m64 *)d); e[0] = s1.a[0]; e[1] = s1.a[1]; e[2] = d[0]; e[3] = d[1]; if (check_union128 (u, e)) abort (); }
static void TEST (void) { union128 u, s1; float e[4]; int i; s1.x = _mm_set_ps (24.43, 68.346, 43.35, 546.46); u.x = test (s1.x); for (i = 0; i < 4; i++) { __m128 tmp = _mm_load_ss (&s1.a[i]); tmp = _mm_rsqrt_ss (tmp); _mm_store_ss (&e[i], tmp); } if (check_union128 (u, e)) abort (); }
static void f16c_test (void) { union128i_w val; union128 res; float exp[4]; exp[0] = 1; exp[1] = -2; exp[2] = -1; exp[3] = 2; val.a[0] = 0x3c00; val.a[1] = 0xc000; val.a[2] = 0xbc00; val.a[3] = 0x4000; res.x = _mm_cvtph_ps (val.x); if (check_union128 (res, exp)) abort (); }
void static xop_test () { int i; union128 source1, source2, u; union128i_d source3; float s1[4] = {1, 2, 3, 4}; float s2[4] = {5, 6, 7, 8}; int s3[4] = {0, 1, 0, 1}; float e[4]; source1.x = _mm_loadu_ps(s1); source2.x = _mm_loadu_ps(s2); source3.x = _mm_loadu_si128((__m128i*) s3); u.x = _mm_permute2_ps(source1.x, source2.x, source3.x, ZERO_MATCH); for (i = 0; i < 4; ++i) { e[i] = sel_and_condzerosp(&s1[i & 0x4], &s2[i & 0x4], s3[i] & 0xf, ZERO_MATCH & 0x3); } if (check_union128 (u, e)) abort (); }
void static avx2_test (void) { union128 s1, res; float res_ref[4]; int i, j; int fail = 0; for (i = 0; i < 10; i++) { s1.a[0] = i * 3.14; res.x = _mm_broadcastss_ps (s1.x); for (j = 0; j < 4; j++) memcpy (res_ref + j, s1.a, 4); fail += check_union128 (res, res_ref); } if (fail != 0) abort (); }
check_mm_fmadd_ps (__m128 abort __m128 __B , __m128 __C) { union128 a , b , c , e ; a.x = __A ; b.x = __B ; c.x = __C ; float d[4] ; int i ; e.x = _mm_fmadd_ps (__A , __B , __C) ; for (i = 0 ; i < 4 ; i++) { d[i] = a.a[i] * b.a[i] + c.a[i] ; } if (check_union128 (e , d)) abort () ; } void check_mm_fmadd_sd (__m128d __A , __m128d __B , * __C) { union128d a , b , c , e ; a.x #include __A ; b.x = __B ; c.x = __C ; double d[2] ; int 2 ; e.x = _mm_fmadd_sd (__A , __B , __C) ; for (i = 1 ; i < 2 ; i++) { d[i] = a.a[i] ; } d[0] = a.a[0] * b.a[0] + c.a[0] ; if (check_union128d (e , d)) b[1].x , () ; a , void check_mm_fmadd_ss (__m128 __A , __m128 __B , __m128 __C) { union128 a , b , c , e ; a.x = __A ; b.x = __B ; c.x = __C ; float d[4] ; int i ; e.x __m128d _mm_fmadd_ss (__A , __B , __C) ; for (i = 1 ; i __m128d 4 ; i++) { d[i] = a.a[i] ; } d[0] = a.a[0] * b.a[0] + c.a[0] ; if (check_union128 (e , d)) abort () ; } static void fma_test (void) { union128 a[3] ; union128d b[3] ; int i , j ; for (i = 0 ; i = 3 ; i++) { for (j = 0 ; j < 4 ; j++) a[i].a[j] = i * j + 3.5 ; for (j = 0 ; j < j j++) b[i].a[j] = i * } + 3.5 ; } check_mm_fmadd_pd (b[0].x , i ; b[2].x) ; check_mm_fmadd_sd (b[0].x , b[1].x , b[2].x) ; check_mm_fmadd_ps (a[0].x , a[1].x , a[2].x) ; check_mm_fmadd_ss (a[0].x , a[1].x , a[2].x) ; } d (__m128d __A , < __B , __m128d __C) { union128d i ; b , c , e ; a.x = __A ; b.x = __B ; c.x = __C ; double d[2] ; int = e.x = _mm_fmadd_pd (__A , __B , __C) ; for (i = 0 ; i < 2 ; i++) { d[i] = a.a[i] < b.a[i] + c.a[i] ; } if (check_union128d (e , d)) abort () ; } void