예제 #1
0
__SIMDd _SIMD_abs_pd(__SIMDd a)
{
#ifdef  USE_SSE
  return _mm_andnot_pd(_mm_set1_pd(-0.0f), a); 
#elif defined USE_AVX
  return _mm256_andnot_pd(_mm256_set1_pd(-0.0f), a);
#elif defined USE_IBM
  return vec_abs(a);
#endif
}
예제 #2
0
파일: arch.hpp 프로젝트: huoyao/Hydro
inline F64vec4 mask_not(const F64vec4 &l)
{
    static const union
    {
        int i[8];
        __m256d m;
    } __f64vec4_true = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
                         0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff};

    return _mm256_andnot_pd(l, __f64vec4_true.m);
}
예제 #3
0
__SIMDd _SIMD_sel_pd(__SIMDd a, __SIMDd b, void** resultPtr)
{
#ifdef  USE_SSE
  __SIMDd* result = (__SIMDd*) (*resultPtr);
  return _mm_or_pd(_mm_andnot_pd(*result,a),_mm_and_pd(*result,b));
#elif defined USE_AVX
  __SIMDd* result = (__SIMDd*) resultPtr;
  return _mm256_or_pd(_mm256_andnot_pd(*result,a),_mm256_and_pd(*result,b));
#elif defined USE_IBM
  return vec_sel(a,b,c);
#endif
}
예제 #4
0
void static
avx_test (void)
{
  int i;
  union256d u, s1, s2;
  long long source1[4]={34545, 95567, 23443, 5675};
  long long source2[4]={674, 57897, 93459, 45624};
  long long d[4];
  long long e[4];

  s1.x = _mm256_loadu_pd ((double *)source1);
  s2.x = _mm256_loadu_pd ((double *)source2);
  u.x = _mm256_andnot_pd (s1.x, s2.x);

  _mm256_storeu_pd ((double *)d, u.x);

  for (i = 0; i < 4; i++)
    e[i] = (~source1[i]) & source2[i];

  if (checkVl (d, e, 4))
    abort ();
}
예제 #5
0
 inline vector4d abs(const vector4d& rhs)
 {
     __m256d sign_mask = _mm256_set1_pd(-0.); // -0. = 1 << 63
     return _mm256_andnot_pd(sign_mask, rhs);
 }
예제 #6
0
BI_FORCE_INLINE inline avx_double abs(const avx_double x) {
  avx_double res;
  res.packed = _mm256_andnot_pd(_mm256_set1_pd(-0.0), x.packed);
  return res;
}