Пример #1
0
    div(avx_simd_complex_double<T> lhs, avx_simd_complex_double<T> rhs) {
        //lhs = [x1.real, x1.img, x2.real, x2.img]
        //rhs = [y1.real, y1.img, y2.real, y2.img]

        //ymm0 = [y1.real, y1.real, y2.real, y2.real]
        __m256d ymm0 = _mm256_movedup_pd(rhs.value);

        //ymm1 = [y1.imag, y1.imag, y2.imag, y2.imag]
        __m256d ymm1 = _mm256_permute_pd(rhs.value, 0b1111);

        //ymm2 = [x1.img, x1.real, x2.img, x2.real]
        __m256d ymm2 = _mm256_permute_pd(lhs.value, 0b0101);

        //ymm4 = [x.img * y.img, x.real * y.img]
        __m256d ymm4 = _mm256_mul_pd(ymm2, ymm1);

        //ymm5 = subadd((lhs * ymm0), ymm4)

#ifdef __FMA__
        __m256d ymm5 = _mm256_fmsubadd_pd(lhs.value, ymm0, ymm4);
#else
        __m256d t1   = _mm256_mul_pd(lhs.value, ymm0);
        __m256d t2   = _mm256_sub_pd(_mm256_set1_pd(0.0), ymm4);
        __m256d ymm5 = _mm256_addsub_pd(t1, t2);
#endif

        //ymm3 = [y.imag^2, y.imag^2]
        __m256d ymm3 = _mm256_mul_pd(ymm1, ymm1);

        //ymm0 = (ymm0 * ymm0 + ymm3)

#ifdef __FMA__
        ymm0 = _mm256_fmadd_pd(ymm0, ymm0, ymm3);
#else
        __m256d t3   = _mm256_mul_pd(ymm0, ymm0);
        ymm0         = _mm256_add_pd(t3, ymm3);
#endif

        //result = ymm5 / ymm0
        return _mm256_div_pd(ymm5, ymm0);
    }
Пример #2
0
__m256d test_mm256_fmsubadd_pd(__m256d a, __m256d b, __m256d c) {
  // CHECK-LABEL: test_mm256_fmsubadd_pd
  // CHECK: [[NEG:%.+]] = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %{{.+}}
  // CHECK: @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %{{.+}}, <4 x double> %{{.+}}, <4 x double> [[NEG]])
  return _mm256_fmsubadd_pd(a, b, c);
}
Пример #3
0
__m256d test_mm256_fmsubadd_pd(__m256d a, __m256d b, __m256d c) {
  // CHECK: @llvm.x86.fma.vfmsubadd.pd.256
  return _mm256_fmsubadd_pd(a, b, c);
}
Пример #4
0
__m256d __attribute__((__target__("fma"))) mm256_fmsubadd_pd_wrap(__m256d a, __m256d b, __m256d c) {
  return _mm256_fmsubadd_pd(a, b, c);
}
Пример #5
0
__m256d
check_mm256_fmsubadd_pd (__m256d a, __m256d b, __m256d c)
{
  return _mm256_fmsubadd_pd (a, b, c);
}