__m128d test_mm_sqrt_sd(__m128d A, __m128d B) { // DAG-LABEL: test_mm_sqrt_sd // DAG: call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %{{.*}}) // // ASM-LABEL: test_mm_sqrt_sd // ASM: sqrtsd return _mm_sqrt_sd(A, B); }
__inline __m128d Length(__m128d vec1,__m128d vec2) { __m128d result1 = _mm_mul_pd(vec1, vec1); __m128d result2 = _mm_mul_sd(vec2, vec2); __m128d result3 = _mm_shuffle_pd(result1, result1, 1); __m128d result4 = _mm_add_sd(result1, result2); __m128d result5 = _mm_add_sd(result4, result3); __m128d result6 = _mm_sqrt_sd(vec1, result5); return result6; }
F64 root(F64 val) { #ifdef USE_SSE2 __m128d i = _mm_set_sd(val); __m128d unused; i = _mm_sqrt_sd(unused, i); _mm_store_sd(&val, i); return val; #else return core_sqrt(val); #endif }
static void TEST (void) { union128d u, s1; double e[2]; int i; s1.x = _mm_set_pd (2134.3343,1234.635654); u.x = test (s1.x); for (i = 0; i < 2; i++) { __m128d tmp = _mm_load_sd (&s1.a[i]); tmp = _mm_sqrt_sd (tmp, tmp); _mm_store_sd (&e[i], tmp); } if (check_union128d (u, e)) abort (); }
/*SSE2 contains an instruction SQRTSD. This instruction Computes the square root of the low-order double-precision floating-point value in an XMM register or in a 64-bit memory location and writes the result in the low-order quadword of another XMM register. The corresponding intrinsic is _mm_sqrt_sd()*/ double FN_PROTOTYPE(sqrt)(double x) { __m128d X128; double result; UT64 uresult; if(x < 0.0) { uresult.u64 = 0xfff8000000000000; __amd_handle_error(DOMAIN, EDOM, "sqrt", x, 0.0 , uresult.f64); return uresult.f64; } /*Load x into an XMM register*/ X128 = _mm_load_sd(&x); /*Calculate sqrt using SQRTSD instrunction*/ X128 = _mm_sqrt_sd(X128, X128); /*Store back the result into a double precision floating point number*/ _mm_store_sd(&result, X128); return result; }
extern "C" YEP_PRIVATE_SYMBOL double sqrt(double x) { const __m128d xmm = _mm_set_sd(x); return _mm_cvtsd_f64(_mm_sqrt_sd(xmm, xmm)); }
__m128d __attribute__((__target__("sse2"))) mm_sqrt_sd_wrap(__m128d a, __m128d b) { return _mm_sqrt_sd(a, b); }