__SIMDd _SIMD_abs_pd(__SIMDd a) { #ifdef USE_SSE return _mm_andnot_pd(_mm_set1_pd(-0.0f), a); #elif defined USE_AVX return _mm256_andnot_pd(_mm256_set1_pd(-0.0f), a); #elif defined USE_IBM return vec_abs(a); #endif }
inline F64vec4 mask_not(const F64vec4 &l) { static const union { int i[8]; __m256d m; } __f64vec4_true = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff}; return _mm256_andnot_pd(l, __f64vec4_true.m); }
__SIMDd _SIMD_sel_pd(__SIMDd a, __SIMDd b, void** resultPtr) { #ifdef USE_SSE __SIMDd* result = (__SIMDd*) (*resultPtr); return _mm_or_pd(_mm_andnot_pd(*result,a),_mm_and_pd(*result,b)); #elif defined USE_AVX __SIMDd* result = (__SIMDd*) resultPtr; return _mm256_or_pd(_mm256_andnot_pd(*result,a),_mm256_and_pd(*result,b)); #elif defined USE_IBM return vec_sel(a,b,c); #endif }
void static avx_test (void) { int i; union256d u, s1, s2; long long source1[4]={34545, 95567, 23443, 5675}; long long source2[4]={674, 57897, 93459, 45624}; long long d[4]; long long e[4]; s1.x = _mm256_loadu_pd ((double *)source1); s2.x = _mm256_loadu_pd ((double *)source2); u.x = _mm256_andnot_pd (s1.x, s2.x); _mm256_storeu_pd ((double *)d, u.x); for (i = 0; i < 4; i++) e[i] = (~source1[i]) & source2[i]; if (checkVl (d, e, 4)) abort (); }
inline vector4d abs(const vector4d& rhs) { __m256d sign_mask = _mm256_set1_pd(-0.); // -0. = 1 << 63 return _mm256_andnot_pd(sign_mask, rhs); }
BI_FORCE_INLINE inline avx_double abs(const avx_double x) { avx_double res; res.packed = _mm256_andnot_pd(_mm256_set1_pd(-0.0), x.packed); return res; }