void magnitude64f(const double* x, const double* y, double* mag, int len) { CV_INSTRUMENT_REGION(); int i = 0; #if CV_SIMD_64F const int VECSZ = v_float64::nlanes; for( ; i < len; i += VECSZ*2 ) { if( i + VECSZ*2 > len ) { if( i == 0 || mag == x || mag == y ) break; i = len - VECSZ*2; } v_float64 x0 = vx_load(x + i), x1 = vx_load(x + i + VECSZ); v_float64 y0 = vx_load(y + i), y1 = vx_load(y + i + VECSZ); x0 = v_sqrt(v_muladd(x0, x0, y0*y0)); x1 = v_sqrt(v_muladd(x1, x1, y1*y1)); v_store(mag + i, x0); v_store(mag + i + VECSZ, x1); } vx_cleanup(); #endif for( ; i < len; i++ ) { double x0 = x[i], y0 = y[i]; mag[i] = std::sqrt(x0*x0 + y0*y0); } }
TheTest & test_float_math() { typedef typename V_RegTrait128<LaneType>::int_reg Ri; Data<R> data1, data2, data3; data1 *= 1.1; data2 += 10; R a1 = data1, a2 = data2, a3 = data3; Data<Ri> resB = v_round(a1), resC = v_trunc(a1), resD = v_floor(a1), resE = v_ceil(a1); Data<R> resF = v_magnitude(a1, a2), resG = v_sqr_magnitude(a1, a2), resH = v_muladd(a1, a2, a3); for (int i = 0; i < R::nlanes; ++i) { EXPECT_EQ(cvRound(data1[i]), resB[i]); EXPECT_EQ((typename Ri::lane_type)data1[i], resC[i]); EXPECT_EQ(cvFloor(data1[i]), resD[i]); EXPECT_EQ(cvCeil(data1[i]), resE[i]); EXPECT_COMPARE_EQ(std::sqrt(data1[i]*data1[i] + data2[i]*data2[i]), resF[i]); EXPECT_COMPARE_EQ(data1[i]*data1[i] + data2[i]*data2[i], resG[i]); EXPECT_COMPARE_EQ(data1[i]*data2[i] + data3[i], resH[i]); } return *this; }
float normL2Sqr_(const float* a, const float* b, int n) { int j = 0; float d = 0.f; #if CV_SIMD v_float32 v_d = vx_setzero_f32(); for (; j <= n - v_float32::nlanes; j += v_float32::nlanes) { v_float32 t = vx_load(a + j) - vx_load(b + j); v_d = v_muladd(t, t, v_d); } d = v_reduce_sum(v_d); #endif for( ; j < n; j++ ) { float t = a[j] - b[j]; d += t*t; } return d; }