template<typename _Ts, typename _Td> inline void cvtabs_32f( const _Ts* src, size_t sstep, _Td* dst, size_t dstep, Size size, float a, float b ) { #if CV_SIMD v_float32 va = vx_setall_f32(a), vb = vx_setall_f32(b); const int VECSZ = v_float32::nlanes*2; #endif sstep /= sizeof(src[0]); dstep /= sizeof(dst[0]); for( int i = 0; i < size.height; i++, src += sstep, dst += dstep ) { int j = 0; #if CV_SIMD for( ; j < size.width; j += VECSZ ) { if( j > size.width - VECSZ ) { if( j == 0 || src == (_Ts*)dst ) break; j = size.width - VECSZ; } v_float32 v0, v1; vx_load_pair_as(src + j, v0, v1); v0 = v_fma(v0, va, vb); v1 = v_fma(v1, va, vb); v_store_pair_as(dst + j, v_abs(v0), v_abs(v1)); } #endif for( ; j < size.width; j++ ) dst[j] = saturate_cast<_Td>(std::abs(src[j]*a + b)); } }
double match_hexadecagon(hexadecagon_t* local,hexadecagon_t* visitor) { double result=0; int i; for(i=0; i<NUMPTOS; i++) result+=v_abs(visitor->m_list[i]-local->m_list[i]); return result; }
/* * Retrieve constants of motion (as if) in 2-body problem * (of first two particles) reduced to 1-body-problem. * Returns |j|, |e|, a, T in fields provided. */ void get_reduced(struct particle parts[], int pos, double *red_e, double *red_a, double *red_t, double *red_j) { _enter_function(_UL_KEPLER, _UL_KEPLER_GET_REDUCED); int i; double r_[3], v_[3], j[3], e[3], a, omega; for(i = 0; i < 3; i++) { r_[i] = parts[pos].xp[i] - parts[0].xp[i]; v_[i] = parts[pos].vp[i] - parts[0].vp[i]; } get_constants(r_, v_, parts[0].m//+parts[pos].m , j, e, &a, &omega); *red_e = v_abs(e); *red_a = a; *red_t = 2*M_PI/omega; *red_j = v_abs(j); _exit_function(); }
TheTest & test_sqrt_abs() { Data<R> dataA, dataD; dataD *= -1.0; R a = dataA, d = dataD; Data<R> resB = v_sqrt(a), resC = v_invsqrt(a), resE = v_abs(d); for (int i = 0; i < R::nlanes; ++i) { EXPECT_COMPARE_EQ((float)std::sqrt(dataA[i]), (float)resB[i]); EXPECT_COMPARE_EQ(1/(float)std::sqrt(dataA[i]), (float)resC[i]); EXPECT_COMPARE_EQ((float)abs(dataA[i]), (float)resE[i]); } return *this; }
TheTest & test_abs() { typedef typename V_RegTrait128<LaneType>::u_reg Ru; typedef typename Ru::lane_type u_type; Data<R> dataA, dataB(10); R a = dataA, b = dataB; a = a - b; Data<Ru> resC = v_abs(a); for (int i = 0; i < Ru::nlanes; ++i) { EXPECT_EQ((u_type)std::abs(dataA[i] - dataB[i]), resC[i]); } return *this; }
double SpectralDifferenceAudioCurve::processDouble(const double *mag, int increment) { double result = 0.0; const int hs1 = m_lastPerceivedBin + 1; v_convert(m_tmpbuf, mag, hs1); v_square(m_tmpbuf, hs1); v_subtract(m_mag, m_tmpbuf, hs1); v_abs(m_mag, hs1); v_sqrt(m_mag, hs1); for (int i = 0; i < hs1; ++i) { result += m_mag[i]; } v_copy(m_mag, m_tmpbuf, hs1); return result; }
int check_fast_approaches( struct particle *parts, struct particle *p, struct particle *pk/*, double r_2*/) { _enter_function(_UL_TIMESTEP, _UL_TIMESTEP_CHECK_FAST_APPROACHES); int i, collision=0; double temp, r_close_2=-1., r_temp_2, t_close, dt=.0, dt2=.0, dt3=.0, dt4=.0, dt5=.0; double *px=p->x, *pv=p->v, *pkx, *pkv, r_2; double x[3], v[3], a[3], a_[3]; assert(p != pk); add_over(1, &count_approach_checks, &count_approach_checks_over); if(pk->active) { pkx = pk->x; pkv = pk->v; } else // have to use predicted x, v and corrected derivatives { #ifndef USE_GRAPE pkx = pk->xp; #else pkx = pk->x; #endif pkv = pk->v; dt = p->t - pk->t; dt2 = .5 * dt * dt; dt3 = dt * dt2 * _1_3; dt4 = .25 * dt * dt3; dt5 = .2 * dt * dt4; } for(i = 0; i < 3; i++) { if( 1 #ifdef USE_GRAPE && pk->active #endif ) { x[i] = px[i] - pkx[i]; } else { x[i] = px[i] - (pkx[i] + dt * pk->v[i] + dt2 * (pk->a[i] + pk->ha[i]) + dt3 * (pk->a_[i] + pk->ha_[i]) + dt4 * (pk->a_2[i] + pk->ha_2[i]) #ifndef USE_GRAPE + dt5 * (pk->a_3[i] + pk->ha_3[i]) #endif ); } if(pk->active) { v[i] = pv[i] - pkv[i]; a[i] = p->ha[i] + p->a[i] - pk->ha[i] - pk->a[i]; a_[i] = p->ha_[i] + p->a_[i] - pk->ha_[i] - pk->a_[i]; } else { v[i] = pv[i] - (pkv[i] + dt * (pk->a[i] + pk->ha[i]) + dt2 * (pk->a_[i] + pk->ha_[i]) + dt3 * (pk->a_2[i] + pk->ha_2[i]) #ifndef USE_GRAPE + dt4 * (pk->a_3[i] + pk->ha_3[i]) #endif ); a[i] = p->ha[i] + p->a[i] - (pk->a[i] + pk->ha[i] + dt * (pk->a_[i] + pk->ha_[i]) + dt2 * (pk->a_2[i] + pk->ha_2[i]) #ifndef USE_GRAPE + dt3 * (pk->a_3[i] + pk->ha_3[i]) #endif ); a_[i] = p->ha_[i] + p->a_[i] - (pk->a_[i] + pk->ha_[i] + dt * (pk->a_2[i] + pk->ha_2[i]) #ifndef USE_GRAPE + dt2* (pk->a_3[i] + pk->ha_3[i]) #endif ); } } r_2 = scal_prod(x, x); // linear approximation of time of closest encounter //t_close = -scal_prod(x, v) / scal_prod(v, v); // 2nd order approximation of time of closest encounter double xv, xa, v2, va, a2, _p, _q, _p3, _q2, _d, _u, _v, dy, t2, t3, _1_a2; xv = scal_prod(x, v); xa = scal_prod(x, a); v2 = scal_prod(v, v); va = scal_prod(v, a); a2 = scal_prod(a, a); _1_a2 = 1. / a2; dy = - va * _1_a2; _p = (a2 * 2.* (v2 + xa) - 3. * va * va) * (_1_a2 * _1_a2); _p3 = _p * _p * _p; _q = (2. * va * va * va - va * 2.*(v2 + xa) * a2 + 2. * xv * a2 * a2) * (_1_a2 * _1_a2 * _1_a2); _q2 = _q * _q; _d = 4. * _p3 + 27. * _q2; if(_d > 0) { _u = -.5 * _q; _v = sqrt(.25 * _q2 + _p3 * _1_27); t_close = cbrt(_u + _v) + cbrt(_u - _v) + dy; } else if(_d == 0) { t_close = cbrt(.5 * _q) + dy; t3 = cbrt(-4. * _q) + dy; if(t3 > 0 && (t3 < t_close || t_close <= 0)) { t_close = t3; } } else // _d < 0 { _u = sqrt(-4. *_1_3 * _p); _v = acos(-.5 * _q * sqrt(-27. / _p3)) * _1_3; t_close = _u * _v + dy; t2 = -_u * (_v + M_PI * _1_3) + dy; t3 = -_u * (_v - M_PI * _1_3) + dy; if(t2 > 0 && (t2 < t_close || t_close <= 0)) t_close = t2; if(t3 > 0 && (t3 < t_close || t_close <= 0)) t_close = t3; } while(1) { // check distance after next step r_temp_2 = .0; for(i = 0; i < DIMENSIONS; i++) { temp = x[i] + p->dt * (v[i] + p->dt * .5 * (a[i] /*+ p->dt / 3. * a_[i]*/)); r_temp_2 += temp * temp; } if(r_2 > r_temp_2 * MAX_APPROACH_FACTOR_2 || r_2 * MAX_APPROACH_FACTOR_2 < r_temp_2) { add_over(1, &count_approach_reduce_t, &count_approach_reduce_t_over); #ifdef DEBUG_ALL fprintf(get_file(FILE_DEBUG), "\t# halving dt: approach m%d - m%d: \tr(t=%1.6e)=%1.2e\t\tr(t=%1.6e)=%1.2e\n", pk->name, p->name, t_total(p->t), convert_length(sqrt(r_2), 0), t_total(p->t+p->dt), convert_length(sqrt(r_temp_2), 0)); fflush(get_file(FILE_DEBUG)); #endif p->dt *= .5; #ifdef SYNCHRONIZE_APPROACHING_TIMESTEPS if(!pk->active) { while(pk->htlast + .5 * pk->dt > p->t + p->dt + DT_TOLERANCE) { pk->dt *= .5; #ifdef DEBUG_ALL fprintf(get_file(FILE_DEBUG), "#### [t=%1.12e] shrinking timestep for m%d as of close encounter with m%d to %e ####\n", t_total(p->t), pk->name, p->name, t_total(pk->dt)); fflush(get_file(FILE_DEBUG)); #endif } } #endif continue; } if(r_2 < 9. * C_2G_C2 * C_2G_C2 * (pk->m + p->m) * (pk->m + p->m)) { // collision in 3 Schwarzschild-radii collision = 1; fprintf(get_file(FILE_WARNING), "#### [t=%1.12e] COLLISION of m%d and m%d at %1.12e: %e (r_S = %e) ####\n", t_total(p->t), p->name, pk->name, t_total(p->t + t_close), convert_length(sqrt(r_2), 0), convert_length(C_2G_C2 * (pk->m + p->m), 0)); fflush(get_file(FILE_WARNING)); } if(t_close > .0 && t_close < p->dt) { // close encounter will happen _during_ next step, now calculate distance if(r_close_2 <.0) { r_close_2 = .0; for(i = 0; i < DIMENSIONS; i++) { temp = (x[i] + t_close * (v[i] + .5 * t_close * a[i])); r_close_2 += temp * temp; } } if(r_close_2 < square(3. * C_2G_C2 * (pk->m + p->m))) { // collision in 3 Schwarzschild-radii collision = 1; fprintf(get_file(FILE_WARNING), "#### [t=%1.12e] COLLISION of m%d and m%d at %1.12e: %e (r_S = %e) ####\n", t_total(p->t), p->name, pk->name, t_total(p->t + t_close), convert_length(sqrt(r_close_2), 0), convert_length(C_2G_C2 * (pk->m + p->m), 0)); fflush(get_file(FILE_WARNING)); } // approach to small multiple of impact parameter: // r'_12 < warn_fact * b = warn_fact * 2 * r_1 * m / M #ifdef WARN_CLOSEENC if(r_close_2 * parts->m * parts->m < square(WARN_APPROACH_FACT * 2 * pk->m) * scal_prod(p->xp, p->xp) && (N_MAX_DETAIL < -1 || p->name <= N_MAX_DETAIL || pk->name <= N_MAX_DETAIL) ) { fprintf(get_file(FILE_WARNING), "\t# predicted close encounter m%d - m%d: \tr(t=%1.6e)=%1.2e\t\tr(t=%1.6e)=%1.2e=%1.2fb\tp.dt=%1.2e\tpk->dt=%1.2e (%1.2e el.) [%d:%d]\n", pk->name, p->name, t_total(p->t), convert_length(sqrt(r_2), 0), t_total(p->t + t_close), convert_length(sqrt(r_close_2), 0), sqrt(r_close_2) / (2. * v_abs(p->xp) * pk->m) * parts->m, convert_time(p->dt, 0), convert_time(pk->dt, 0), convert_time(p->t - pk->t, 0), pk->nearestneighbour, p->nearestneighbour); fprintf(get_file(FILE_WARNING), " PCE %1.12e\t%d\t%e\t%1.10e\t%1.10e\t%1.10e\t%1.10e\t%1.10e\t%1.10e\t%d\t%e\t%1.10e\t%1.10e\t%1.10e\t%1.10e\t%1.10e\t%1.10e\n", t_total(p->t), pk->name, convert_mass(pk->m, 0), convert_length(pkx[0], 0), convert_length(pkx[1], 0), convert_length(pkx[2], 0), convert_length(convert_time(pkv[0], 1), 0), convert_length(convert_time(pkv[1], 1), 0), convert_length(convert_time(pkv[2], 1), 0), p->name, convert_mass(p->m, 0), convert_length(px[0], 0), convert_length(px[1], 0), convert_length(px[2], 0), convert_length(convert_time(pv[0], 1), 0), convert_length(convert_time(pv[1], 1), 0), convert_length(convert_time(pv[2], 1), 0) ); fflush(get_file(FILE_WARNING)); } #endif if(r_2 > MAX_APPROACH_FACTOR_2 * r_close_2) { add_over(1, &count_approach_reduce_t, &count_approach_reduce_t_over); #ifdef DEBUG_ALL fprintf(get_file(FILE_DEBUG), "\t# halving dt: encounter m%d - m%d: \tr(t=%1.6e)=%1.2e\t\tr(t=%1.6e)=%1.2e\tstep: t=%1.6e\n", pk->name, p->name, t_total(p->t), convert_length(sqrt(r_2), 0), t_total(p->t + t_close), convert_length(sqrt(r_close_2), 0), t_total(p->t + p->dt)); fflush(get_file(FILE_DEBUG)); #endif p->dt *= .5; #ifdef SYNCHRONIZE_APPROACHING_TIMESTEPS if(!pk->active) while(pk->htlast + .5 * pk->dt > p->t + p->dt + DT_TOLERANCE) { pk->dt *= .5; #ifdef DEBUG_ALL fprintf(get_file(FILE_DEBUG), "#### shrinking. timestep for m%d as of close encounter with m%d to %e ####\n", t_total(p->t), pk->name, p->name, t_total(pk->dt)); #endif } #endif continue; } } break; } _exit_function(); return collision; }
/* * Calculate Kepler position and velocity for given timestep _dt_ * for particle no. _pos_. * _xp_ and _vp_ will be updated. */ void step_kepler_1(struct particle parts[], int pcount, int pos, double dt, double *out_a, double *out_a_, double *out_a_2, double *out_a_3, double *curr_a, double *curr_e) { _enter_function(_UL_KEPLER, _UL_KEPLER_STEP_KEPLER_1); int i; struct particle *p0 = parts, *p1 = parts + pos; double r_[3], v_[3], j_[3], ecc_[3], a_[3], b_[3], _1_r2, afact, v_r_, v_v_, r_a_, v_a_, r_a__; double ecc, a, r, v, b, omega, e, mean, cosp, sinp; double m_c=p0->m, _cosp_ecc, e2, _1_ecc, _cosp_1, de_dt;//+p1->m; // get relative position / motion for(i = 0; i < 3; i++) { r_[i] = p1->xp[i] - p0->xp[i]; v_[i] = p1->vp[i] - p0->vp[i]; } // calculate ellipse constants get_constants(r_, v_, m_c, j_, ecc_, &a, &omega); //printf("# [%d]:\t%e\t%e\t%e\n", pos, v_abs(ecc_), a, omega); ecc = v_abs(ecc_); // b_ = a * sqrt(|1-e²|) * (j_ x e_) / |j_ x e_| vec_prod(j_, ecc_, b_); b = a * sqrt(fabs(1-ecc*ecc)) / v_abs(b_); for(i = 0; i < 3; i++) { a_[i] = a*ecc_[i]/ecc; // semi major vector b_[i] *= b; // semi minor vector } if(curr_a != NULL) *curr_a = a; if(curr_e != NULL) *curr_e = ecc; if(ecc < 1) // elliptical orbit { if(!p1->is_elliptical) { fprintf(get_file(FILE_WARNING), "#### [t=%1.12e] Particle #%d captured onto elliptical orbit with e=%e ####\n", t_total(p1->t), pos, ecc); p1->is_elliptical = 1; } // calculate eccentric anomaly e at t+dt e = (a - v_abs(r_)) / (ecc * a); if(e >= 1.0) e = .0; else if(e <= -1.0) e = M_PI; else e = acos(e); if(scal_prod(r_, b_) < 0) e = 2*M_PI - e; mean = (e - ecc*sin(e)) + dt * omega; while(mean >= 2. * M_PI) mean -= 2. * M_PI; e = solve_kepler(mean, ecc); cosp = cos(e); sinp = sin(e); _cosp_ecc = cosp - ecc; de_dt = omega / (1. - ecc * cosp); if(ecc > .99) { e2 = (e > 2. * M_PI - 1e-3) ? e - 2. * M_PI : e; if(e2 < 1e-3) { e2 *= e2; _1_ecc = scal_prod(j_, j_)/(p0->m*a*(1+ecc)); _cosp_1 = - .5 * e2 * (1 - e2 / 12. * (1 - e2 / 30.)); _cosp_ecc = _1_ecc + _cosp_1; de_dt = omega / (_1_ecc - ecc * _cosp_1); } } for(i = 0; i < DIMENSIONS; i++) { r_[i] = a_[i] * _cosp_ecc + b_[i] * sinp ; // new location v_[i] = (-a_[i] * sinp + b_[i] * cosp) * de_dt; // direction of v only } } else // hyperbolic orbit // parabolic? { if(p1->is_elliptical) { fprintf(get_file(FILE_WARNING), "#### [t=%1.12e+%1.12e] Particle #%d thrown onto hyperbolic orbit with e=%e (E=%e, a=%e) ####\n", t_total(p1->t), convert_time(dt, 0), pos, ecc, p1->energy, convert_length(a, 0)); p1->is_elliptical = 0; } if(ecc == 1) fprintf(get_file(FILE_WARNING), "# # # %e\tParabolic orbit of m%d treated as hyperbolic: e=%e\t(x=%e)\n", t_total(p1->t), pos, ecc, convert_length(v_abs(p1->xp), 0)); // calculate eccentric anomaly e at t+dt e = (a + v_abs(r_)) / (ecc * a); if(e < 1.0) e = .0; else if(scal_prod(r_, v_) < 0) e = -acosh(e); else e = acosh(e); e = kepler(ecc, ecc * sinh(e) - e + dt * omega); cosp = cosh(e); sinp = sinh(e); de_dt = omega / (ecc * cosp - 1.); for(i = 0; i < DIMENSIONS; i++) { r_[i] = a_[i] * (ecc - cosp) + b_[i] * sinp; // new location v_[i] = (-a_[i] * sinp + b_[i] * cosp) * de_dt; // direction of v only } } // get |v_| from j_ = r_ x v_ v = v_abs(v_); r = v_abs(r_); v = v_abs(j_) / (r * v * sin(acos(scal_prod(r_, v_)/ (r * v)))); for(i = 0; i < DIMENSIONS; i++) { //v_[i] *= v; // total motion relative to fix central mass p1->xp[i] = p0->xp[i] + r_[i]; p1->vp[i] = p0->vp[i] + v_[i]; } if(out_a != NULL) { _1_r2 = 1. / scal_prod(r_, r_); afact = - m_c * _1_r2 * sqrt(_1_r2); //printf("4 %e %e %e\n", *(out_a), *(out_a+1), *(out_a+2)); for(i = 0; i < DIMENSIONS; i++) out_a[i] = afact * r_[i]; if(out_a_ != NULL) { v_r_ = scal_prod(v_, r_); for(i = 0; i < DIMENSIONS; i++) out_a_[i] = afact * (v_[i] - 3 * _1_r2 * v_r_ * r_[i]); if(out_a_2 != NULL) { v_v_ = scal_prod(v_, v_); r_a_ = scal_prod(r_, out_a); for(i = 0; i < DIMENSIONS; i++) out_a_2[i] = afact * (out_a[i] - 3. * _1_r2 * (v_r_ * (2. * v_[i] - 5. * v_r_ * r_[i] * _1_r2) + (v_v_ + r_a_) * r_[i])); if(out_a_3 != NULL) { v_a_ = scal_prod(v_, out_a); r_a__ = scal_prod(r_, out_a_); for(i = 0; i < DIMENSIONS; i++) out_a_3[i] = afact * (out_a_[i] - 3. * _1_r2 * (3. * v_r_ * out_a[i] + 3. * (v_v_ + r_a_) * (v_[i] - 5. * v_r_ * _1_r2 * r_[i]) + (3. * v_a_ + r_a__) * r_[i] + v_r_ * v_r_ * _1_r2 * (-15. * v_[i] + 35. * v_r_ * _1_r2 * r_[i]))); } } } } _exit_function(); }
void operator()(const Range &boundaries) const { CV_TRACE_FUNCTION(); Mat dx, dy; AutoBuffer<short> dxMax(0), dyMax(0); std::deque<uchar*> stack, borderPeaksLocal; const int rowStart = max(0, boundaries.start - 1), rowEnd = min(src.rows, boundaries.end + 1); int *_mag_p, *_mag_a, *_mag_n; short *_dx, *_dy, *_dx_a = NULL, *_dy_a = NULL, *_dx_n = NULL, *_dy_n = NULL; uchar *_pmap; double scale = 1.0; CV_TRACE_REGION("gradient") if(needGradient) { if (aperture_size == 7) { scale = 1 / 16.0; } Sobel(src.rowRange(rowStart, rowEnd), dx, CV_16S, 1, 0, aperture_size, scale, 0, BORDER_REPLICATE); Sobel(src.rowRange(rowStart, rowEnd), dy, CV_16S, 0, 1, aperture_size, scale, 0, BORDER_REPLICATE); } else { dx = src.rowRange(rowStart, rowEnd); dy = src2.rowRange(rowStart, rowEnd); } CV_TRACE_REGION_NEXT("magnitude"); if(cn > 1) { dxMax.allocate(2 * dx.cols); dyMax.allocate(2 * dy.cols); _dx_a = (short*)dxMax; _dx_n = _dx_a + dx.cols; _dy_a = (short*)dyMax; _dy_n = _dy_a + dy.cols; } // _mag_p: previous row, _mag_a: actual row, _mag_n: next row #if CV_SIMD128 AutoBuffer<int> buffer(3 * (mapstep * cn + CV_MALLOC_SIMD128)); _mag_p = alignPtr((int*)buffer + 1, CV_MALLOC_SIMD128); _mag_a = alignPtr(_mag_p + mapstep * cn, CV_MALLOC_SIMD128); _mag_n = alignPtr(_mag_a + mapstep * cn, CV_MALLOC_SIMD128); #else AutoBuffer<int> buffer(3 * (mapstep * cn)); _mag_p = (int*)buffer + 1; _mag_a = _mag_p + mapstep * cn; _mag_n = _mag_a + mapstep * cn; #endif // For the first time when just 2 rows are filled and for left and right borders if(rowStart == boundaries.start) memset(_mag_n - 1, 0, mapstep * sizeof(int)); else _mag_n[src.cols] = _mag_n[-1] = 0; _mag_a[src.cols] = _mag_a[-1] = _mag_p[src.cols] = _mag_p[-1] = 0; // calculate magnitude and angle of gradient, perform non-maxima suppression. // fill the map with one of the following values: // 0 - the pixel might belong to an edge // 1 - the pixel can not belong to an edge // 2 - the pixel does belong to an edge for (int i = rowStart; i <= boundaries.end; ++i) { // Scroll the ring buffer std::swap(_mag_n, _mag_a); std::swap(_mag_n, _mag_p); if(i < rowEnd) { // Next row calculation _dx = dx.ptr<short>(i - rowStart); _dy = dy.ptr<short>(i - rowStart); if (L2gradient) { int j = 0, width = src.cols * cn; #if CV_SIMD128 if (haveSIMD) { for ( ; j <= width - 8; j += 8) { v_int16x8 v_dx = v_load((const short*)(_dx + j)); v_int16x8 v_dy = v_load((const short*)(_dy + j)); v_int32x4 v_dxp_low, v_dxp_high; v_int32x4 v_dyp_low, v_dyp_high; v_expand(v_dx, v_dxp_low, v_dxp_high); v_expand(v_dy, v_dyp_low, v_dyp_high); v_store_aligned((int *)(_mag_n + j), v_dxp_low*v_dxp_low+v_dyp_low*v_dyp_low); v_store_aligned((int *)(_mag_n + j + 4), v_dxp_high*v_dxp_high+v_dyp_high*v_dyp_high); } } #endif for ( ; j < width; ++j) _mag_n[j] = int(_dx[j])*_dx[j] + int(_dy[j])*_dy[j]; } else { int j = 0, width = src.cols * cn; #if CV_SIMD128 if (haveSIMD) { for(; j <= width - 8; j += 8) { v_int16x8 v_dx = v_load((const short *)(_dx + j)); v_int16x8 v_dy = v_load((const short *)(_dy + j)); v_dx = v_reinterpret_as_s16(v_abs(v_dx)); v_dy = v_reinterpret_as_s16(v_abs(v_dy)); v_int32x4 v_dx_ml, v_dy_ml, v_dx_mh, v_dy_mh; v_expand(v_dx, v_dx_ml, v_dx_mh); v_expand(v_dy, v_dy_ml, v_dy_mh); v_store_aligned((int *)(_mag_n + j), v_dx_ml + v_dy_ml); v_store_aligned((int *)(_mag_n + j + 4), v_dx_mh + v_dy_mh); } } #endif for ( ; j < width; ++j) _mag_n[j] = std::abs(int(_dx[j])) + std::abs(int(_dy[j])); } if(cn > 1) { std::swap(_dx_n, _dx_a); std::swap(_dy_n, _dy_a); for(int j = 0, jn = 0; j < src.cols; ++j, jn += cn) { int maxIdx = jn; for(int k = 1; k < cn; ++k) if(_mag_n[jn + k] > _mag_n[maxIdx]) maxIdx = jn + k; _mag_n[j] = _mag_n[maxIdx]; _dx_n[j] = _dx[maxIdx]; _dy_n[j] = _dy[maxIdx]; } _mag_n[src.cols] = 0; } // at the very beginning we do not have a complete ring // buffer of 3 magnitude rows for non-maxima suppression if (i <= boundaries.start) continue; } else { memset(_mag_n - 1, 0, mapstep * sizeof(int)); if(cn > 1) { std::swap(_dx_n, _dx_a); std::swap(_dy_n, _dy_a); } } // From here actual src row is (i - 1) // Set left and right border to 1 #if CV_SIMD128 if(haveSIMD) _pmap = map.ptr<uchar>(i) + CV_MALLOC_SIMD128; else #endif _pmap = map.ptr<uchar>(i) + 1; _pmap[src.cols] =_pmap[-1] = 1; if(cn == 1) { _dx = dx.ptr<short>(i - rowStart - 1); _dy = dy.ptr<short>(i - rowStart - 1); } else { _dx = _dx_a; _dy = _dy_a; } const int TG22 = 13573; int j = 0; #if CV_SIMD128 if (haveSIMD) { const v_int32x4 v_low = v_setall_s32(low); const v_int8x16 v_one = v_setall_s8(1); for (; j <= src.cols - 32; j += 32) { v_int32x4 v_m1 = v_load_aligned((const int*)(_mag_a + j)); v_int32x4 v_m2 = v_load_aligned((const int*)(_mag_a + j + 4)); v_int32x4 v_m3 = v_load_aligned((const int*)(_mag_a + j + 8)); v_int32x4 v_m4 = v_load_aligned((const int*)(_mag_a + j + 12)); v_int32x4 v_cmp1 = v_m1 > v_low; v_int32x4 v_cmp2 = v_m2 > v_low; v_int32x4 v_cmp3 = v_m3 > v_low; v_int32x4 v_cmp4 = v_m4 > v_low; v_m1 = v_load_aligned((const int*)(_mag_a + j + 16)); v_m2 = v_load_aligned((const int*)(_mag_a + j + 20)); v_m3 = v_load_aligned((const int*)(_mag_a + j + 24)); v_m4 = v_load_aligned((const int*)(_mag_a + j + 28)); v_store_aligned((signed char*)(_pmap + j), v_one); v_store_aligned((signed char*)(_pmap + j + 16), v_one); v_int16x8 v_cmp80 = v_pack(v_cmp1, v_cmp2); v_int16x8 v_cmp81 = v_pack(v_cmp3, v_cmp4); v_cmp1 = v_m1 > v_low; v_cmp2 = v_m2 > v_low; v_cmp3 = v_m3 > v_low; v_cmp4 = v_m4 > v_low; v_int8x16 v_cmp = v_pack(v_cmp80, v_cmp81); v_cmp80 = v_pack(v_cmp1, v_cmp2); v_cmp81 = v_pack(v_cmp3, v_cmp4); unsigned int mask = v_signmask(v_cmp); v_cmp = v_pack(v_cmp80, v_cmp81); mask |= v_signmask(v_cmp) << 16; if (mask) { int k = j; do { int l = trailingZeros32(mask); k += l; mask >>= l; int m = _mag_a[k]; short xs = _dx[k]; short ys = _dy[k]; int x = (int)std::abs(xs); int y = (int)std::abs(ys) << 15; int tg22x = x * TG22; if (y < tg22x) { if (m > _mag_a[k - 1] && m >= _mag_a[k + 1]) { CANNY_CHECK_SIMD(m, high, (_pmap+k), stack); } } else { int tg67x = tg22x + (x << 16); if (y > tg67x) { if (m > _mag_p[k] && m >= _mag_n[k]) { CANNY_CHECK_SIMD(m, high, (_pmap+k), stack); } } else { int s = (xs ^ ys) < 0 ? -1 : 1; if(m > _mag_p[k - s] && m > _mag_n[k + s]) { CANNY_CHECK_SIMD(m, high, (_pmap+k), stack); } } } ++k; } while((mask >>= 1)); } } if (j <= src.cols - 16) { v_int32x4 v_m1 = v_load_aligned((const int*)(_mag_a + j)); v_int32x4 v_m2 = v_load_aligned((const int*)(_mag_a + j + 4)); v_int32x4 v_m3 = v_load_aligned((const int*)(_mag_a + j + 8)); v_int32x4 v_m4 = v_load_aligned((const int*)(_mag_a + j + 12)); v_store_aligned((signed char*)(_pmap + j), v_one); v_int32x4 v_cmp1 = v_m1 > v_low; v_int32x4 v_cmp2 = v_m2 > v_low; v_int32x4 v_cmp3 = v_m3 > v_low; v_int32x4 v_cmp4 = v_m4 > v_low; v_int16x8 v_cmp80 = v_pack(v_cmp1, v_cmp2); v_int16x8 v_cmp81 = v_pack(v_cmp3, v_cmp4); v_int8x16 v_cmp = v_pack(v_cmp80, v_cmp81); unsigned int mask = v_signmask(v_cmp); if (mask) { int k = j; do { int l = trailingZeros32(mask); k += l; mask >>= l; int m = _mag_a[k]; short xs = _dx[k]; short ys = _dy[k]; int x = (int)std::abs(xs); int y = (int)std::abs(ys) << 15; int tg22x = x * TG22; if (y < tg22x) { if (m > _mag_a[k - 1] && m >= _mag_a[k + 1]) { CANNY_CHECK_SIMD(m, high, (_pmap+k), stack); } } else { int tg67x = tg22x + (x << 16); if (y > tg67x) { if (m > _mag_p[k] && m >= _mag_n[k]) { CANNY_CHECK_SIMD(m, high, (_pmap+k), stack); } } else { int s = (xs ^ ys) < 0 ? -1 : 1; if(m > _mag_p[k - s] && m > _mag_n[k + s]) { CANNY_CHECK_SIMD(m, high, (_pmap+k), stack); } } } ++k; } while((mask >>= 1)); } j += 16; } } #endif for (; j < src.cols; j++) { int m = _mag_a[j]; if (m > low) { short xs = _dx[j]; short ys = _dy[j]; int x = (int)std::abs(xs); int y = (int)std::abs(ys) << 15; int tg22x = x * TG22; if (y < tg22x) { if (m > _mag_a[j - 1] && m >= _mag_a[j + 1]) { CANNY_CHECK(m, high, (_pmap+j), stack); } } else { int tg67x = tg22x + (x << 16); if (y > tg67x) { if (m > _mag_p[j] && m >= _mag_n[j]) { CANNY_CHECK(m, high, (_pmap+j), stack); } } else { int s = (xs ^ ys) < 0 ? -1 : 1; if(m > _mag_p[j - s] && m > _mag_n[j + s]) { CANNY_CHECK(m, high, (_pmap+j), stack); } } } } _pmap[j] = 1; } }