OBox OBox::box_triangle(const Triangle& t) { Vec3f e0 = t.get_v1()-t.get_v0(); Vec3f e1 = t.get_v2()-t.get_v1(); Vec3f e2 = t.get_v0()-t.get_v2(); Vec3f X,Y,Z; if(sqr_length(e0) > sqr_length(e1)) { if(sqr_length(e0) > sqr_length(e2)) { X = normalize(e0); Y = normalize(e1 - X * dot(X, e1)); } else { X = normalize(e2); Y = normalize(e0 - X * dot(X, e0)); } } else { if(sqr_length(e1) > sqr_length(e2)) { X = normalize(e1); Y = normalize(e2 - X * dot(X, e2)); } else { X = normalize(e2); Y = normalize(e0 - X * dot(X, e0)); } } Z = cross(X,Y); const Mat3x3f Rot(X,Y,Z); Vec3f p0 = Rot * t.get_v0(); Vec3f p1 = Rot * t.get_v1(); Vec3f p2 = Rot * t.get_v2(); Vec3f pmin = v_min(p0, v_min(p1, p2)); Vec3f pmax = v_max(p0, v_max(p1, p2)); Vec3f centre_close = v_max(pmin, v_min(pmax, Rot * t.get_centre())); return OBox(Rot, AABox(pmin, pmax, centre_close)); }
bool AABox::intersect(const CGLA::Vec3f& p, const CGLA::Vec3f& dir) const { Vec3f t0,t1; for(int i=0;i<3;++i) { t0[i] = (pmin[i]-p[i])/dir[i]; t1[i] = (pmax[i]-p[i])/dir[i]; } Vec3f tin = v_min(t0, t1); Vec3f tout = v_max(t0,t1); float tmin = max(tin[0], max(tin[1], tin[2])); float tmax = min(tout[0], min(tout[1], tout[2])); return ( (tmin-CGLA::TINY) < (tmax+CGLA::TINY)); }
TheTest & test_min_max() { Data<R> dataA, dataB; dataB.reverse(); R a = dataA, b = dataB; Data<R> resC = v_min(a, b), resD = v_max(a, b); for (int i = 0; i < R::nlanes; ++i) { EXPECT_EQ(std::min(dataA[i], dataB[i]), resC[i]); EXPECT_EQ(std::max(dataA[i], dataB[i]), resD[i]); } return *this; }
list<vector<Volume> > Node::GetActions(const vector<Volume>& capacities) const { auto dimention = volumes.size(); auto d_nodes = vector<const Node*>(dimention); list<vector<Volume> > result; for (auto d = 0; d < dimention; d++) { vector<Volume> v_zero(volumes); vector<Volume> v_max(volumes); v_zero[d] = 0; v_max[d] = capacities[d]; if (volumes[d] != v_zero[d]) result.push_back(v_zero); if (volumes[d] != v_max[d]) result.push_back(v_max); } for (auto d1 = 0; d1 < dimention; d1++) { for (auto d2 = 0; d2 < dimention; d2++) { if (d1 == d2) continue; auto vd1 = volumes[d1] + volumes[d2] - capacities[d2]; auto vd2 = capacities[d2]; if (vd1 < 0) { vd2 += vd1; vd1 = 0; } if (volumes[d1] != vd1 && volumes[d2] != vd2) { vector<Volume> v_swap(volumes); v_swap[d1] = vd1; v_swap[d2] = vd2; result.push_back(v_swap); } } } return result; }
/*! Send to the controller a velocity. \param frame : Control frame type. Only articular (vpRobot::ARTICULAR_FRAME) and camera frame (vpRobot::CAMERA_FRAME) are implemented. \param v : Velocity to apply to the robot. - In the camera frame, this velocity is represented by a vector of dimension 6 \f$ {\bf v} = [{\bf t}, {\bf \theta u }]^t \f$ where \f$ \bf t \f$ is a translation vector and \f$ {\bf \theta u} \f$ is a rotation vector (see vpThetaUVector): \f$ {\bf v} = [t_x, t_y, t_z, {\theta u}_x, {\theta u}_y, {\theta u}_z] \f$ (see vpTranslationVector and vpThetaUVector). - In articular, this velocity is represented by a 6 dimension vector \f$ \dot{{\bf q}} = [{\bf t}, {\bf \theta u}]^t \f$ where \f$ \bf t \f$ is a translation vector and \f$ {\bf \theta u} \f$ is a rotation vector (see vpThetaUVector): \f$ \dot{{\bf q}} = [t_x, t_y, t_z, {\theta u}_x, {\theta u}_y, {\theta u}_z] \f$ (see vpTranslationVector and vpThetaUVector). The robot jacobian \f$ {^e}{\bf J}_e\f$ expressed in the end-effector frame is here set to identity. We use the exponential map (vpExponentialMap) to update the camera location. Sampling time can be set using setSamplingTime(). \sa setSamplingTime() */ void vpSimulatorCamera::setVelocity(const vpRobot::vpControlFrameType frame, const vpColVector &v) { if (vpRobot::STATE_VELOCITY_CONTROL != getRobotState ()) { setRobotState(vpRobot::STATE_VELOCITY_CONTROL); } switch (frame) { case vpRobot::ARTICULAR_FRAME: case vpRobot::CAMERA_FRAME: { vpColVector v_max(6); for (unsigned int i=0; i<3; i++) v_max[i] = getMaxTranslationVelocity(); for (unsigned int i=3; i<6; i++) v_max[i] = getMaxRotationVelocity(); vpColVector v_sat = vpRobot::saturateVelocities(v, v_max, true); wMc_ = wMc_ * vpExponentialMap::direct(v_sat, delta_t_); setRobotFrame(frame); break ; } case vpRobot::REFERENCE_FRAME: vpERROR_TRACE ("Cannot set a velocity in the reference frame: " "functionality not implemented"); throw vpRobotException (vpRobotException::wrongStateError, "Cannot set a velocity in the reference frame:" "functionality not implemented"); break ; case vpRobot::MIXT_FRAME: vpERROR_TRACE ("Cannot set a velocity in the mixt frame: " "functionality not implemented"); throw vpRobotException (vpRobotException::wrongStateError, "Cannot set a velocity in the mixt frame:" "functionality not implemented"); break ; } }
/*! Send to the controller a velocity. \param frame : Control frame type. Only vpRobot::ARTICULAR_FRAME is implemented. \param v : Velocity vector \f$(v_x, w_z)\f$ to apply to the robot. Depending on the velocity specified as input, the robot position is updated using the sampling time that can be modified using setSamplingTime(). \sa setSamplingTime() */ void vpSimulatorPioneer::setVelocity(const vpRobot::vpControlFrameType frame, const vpColVector &v) { switch (frame) { case vpRobot::ARTICULAR_FRAME: { if (vpRobot::STATE_VELOCITY_CONTROL != getRobotState ()) { setRobotState(vpRobot::STATE_VELOCITY_CONTROL); } setRobotFrame(frame); // v is a 2 dimension vector that contains v,w if (v.size() != 2) { vpERROR_TRACE ("Bad dimension of the control vector"); throw vpRobotException (vpRobotException::dimensionError, "Bad dimension of the control vector"); } vpColVector v_max(2); v_max[0] = getMaxTranslationVelocity(); v_max[1] = getMaxRotationVelocity(); vpColVector v_sat = vpRobot::saturateVelocities(v, v_max, true); xm_ += delta_t_ * v_sat[0] * cos(theta_); ym_ += delta_t_ * v_sat[0] * sin(theta_); theta_ += delta_t_ * v_sat[1]; vpRotationMatrix wRe(0, 0, theta_); vpTranslationVector wte(xm_, ym_, 0); wMe_.buildFrom(wte, wRe); wMc_ = wMe_ * cMe_.inverse(); break ; } break ; case vpRobot::CAMERA_FRAME: vpERROR_TRACE ("Cannot set a velocity in the camera frame: " "functionality not implemented"); throw vpRobotException (vpRobotException::wrongStateError, "Cannot set a velocity in the camera frame:" "functionality not implemented"); break ; case vpRobot::REFERENCE_FRAME: vpERROR_TRACE ("Cannot set a velocity in the reference frame: " "functionality not implemented"); throw vpRobotException (vpRobotException::wrongStateError, "Cannot set a velocity in the articular frame:" "functionality not implemented"); case vpRobot::MIXT_FRAME: vpERROR_TRACE ("Cannot set a velocity in the mixt frame: " "functionality not implemented"); throw vpRobotException (vpRobotException::wrongStateError, "Cannot set a velocity in the mixt frame:" "functionality not implemented"); break ; } }
void exp64f( const double *_x, double *y, int n ) { CV_INSTRUMENT_REGION(); const double* const expTab = cv::details::getExpTab64f(); const double A5 = .99999999999999999998285227504999 / EXPPOLY_32F_A0, A4 = .69314718055994546743029643825322 / EXPPOLY_32F_A0, A3 = .24022650695886477918181338054308 / EXPPOLY_32F_A0, A2 = .55504108793649567998466049042729e-1 / EXPPOLY_32F_A0, A1 = .96180973140732918010002372686186e-2 / EXPPOLY_32F_A0, A0 = .13369713757180123244806654839424e-2 / EXPPOLY_32F_A0; int i = 0; const Cv64suf* x = (const Cv64suf*)_x; double minval = (-exp_max_val/exp_prescale); double maxval = (exp_max_val/exp_prescale); #if CV_SIMD_64F const int VECSZ = v_float64::nlanes; const v_float64 vprescale = vx_setall_f64(exp_prescale); const v_float64 vpostscale = vx_setall_f64(exp_postscale); const v_float64 vminval = vx_setall_f64(minval); const v_float64 vmaxval = vx_setall_f64(maxval); const v_float64 vA1 = vx_setall_f64(A1); const v_float64 vA2 = vx_setall_f64(A2); const v_float64 vA3 = vx_setall_f64(A3); const v_float64 vA4 = vx_setall_f64(A4); const v_float64 vA5 = vx_setall_f64(A5); const v_int32 vidxmask = vx_setall_s32(EXPTAB_MASK); bool y_aligned = (size_t)(void*)y % 32 == 0; for( ; i < n; i += VECSZ*2 ) { if( i + VECSZ*2 > n ) { if( i == 0 || _x == y ) break; i = n - VECSZ*2; y_aligned = false; } v_float64 xf0 = vx_load(&x[i].f), xf1 = vx_load(&x[i + VECSZ].f); xf0 = v_min(v_max(xf0, vminval), vmaxval); xf1 = v_min(v_max(xf1, vminval), vmaxval); xf0 *= vprescale; xf1 *= vprescale; v_int32 xi0 = v_round(xf0); v_int32 xi1 = v_round(xf1); xf0 = (xf0 - v_cvt_f64(xi0))*vpostscale; xf1 = (xf1 - v_cvt_f64(xi1))*vpostscale; v_float64 yf0 = v_lut(expTab, xi0 & vidxmask); v_float64 yf1 = v_lut(expTab, xi1 & vidxmask); v_int32 v0 = vx_setzero_s32(), v1023 = vx_setall_s32(1023), v2047 = vx_setall_s32(2047); xi0 = v_min(v_max(v_shr<EXPTAB_SCALE>(xi0) + v1023, v0), v2047); xi1 = v_min(v_max(v_shr<EXPTAB_SCALE>(xi1) + v1023, v0), v2047); v_int64 xq0, xq1, dummy; v_expand(xi0, xq0, dummy); v_expand(xi1, xq1, dummy); yf0 *= v_reinterpret_as_f64(v_shl<52>(xq0)); yf1 *= v_reinterpret_as_f64(v_shl<52>(xq1)); v_float64 zf0 = xf0 + vA1; v_float64 zf1 = xf1 + vA1; zf0 = v_fma(zf0, xf0, vA2); zf1 = v_fma(zf1, xf1, vA2); zf0 = v_fma(zf0, xf0, vA3); zf1 = v_fma(zf1, xf1, vA3); zf0 = v_fma(zf0, xf0, vA4); zf1 = v_fma(zf1, xf1, vA4); zf0 = v_fma(zf0, xf0, vA5); zf1 = v_fma(zf1, xf1, vA5); zf0 *= yf0; zf1 *= yf1; if( y_aligned ) { v_store_aligned(y + i, zf0); v_store_aligned(y + i + VECSZ, zf1); } else { v_store(y + i, zf0); v_store(y + i + VECSZ, zf1); } } vx_cleanup(); #endif for( ; i < n; i++ ) { double x0 = x[i].f; x0 = std::min(std::max(x0, minval), maxval); x0 *= exp_prescale; Cv64suf buf; int xi = saturate_cast<int>(x0); x0 = (x0 - xi)*exp_postscale; int t = (xi >> EXPTAB_SCALE) + 1023; t = !(t & ~2047) ? t : t < 0 ? 0 : 2047; buf.i = (int64)t << 52; y[i] = buf.f * expTab[xi & EXPTAB_MASK] * (((((A0*x0 + A1)*x0 + A2)*x0 + A3)*x0 + A4)*x0 + A5); } }
void exp32f( const float *_x, float *y, int n ) { CV_INSTRUMENT_REGION(); const float* const expTab_f = cv::details::getExpTab32f(); const float A4 = (float)(1.000000000000002438532970795181890933776 / EXPPOLY_32F_A0), A3 = (float)(.6931471805521448196800669615864773144641 / EXPPOLY_32F_A0), A2 = (float)(.2402265109513301490103372422686535526573 / EXPPOLY_32F_A0), A1 = (float)(.5550339366753125211915322047004666939128e-1 / EXPPOLY_32F_A0); int i = 0; const Cv32suf* x = (const Cv32suf*)_x; float minval = (float)(-exp_max_val/exp_prescale); float maxval = (float)(exp_max_val/exp_prescale); float postscale = (float)exp_postscale; #if CV_SIMD const int VECSZ = v_float32::nlanes; const v_float32 vprescale = vx_setall_f32((float)exp_prescale); const v_float32 vpostscale = vx_setall_f32((float)exp_postscale); const v_float32 vminval = vx_setall_f32(minval); const v_float32 vmaxval = vx_setall_f32(maxval); const v_float32 vA1 = vx_setall_f32((float)A1); const v_float32 vA2 = vx_setall_f32((float)A2); const v_float32 vA3 = vx_setall_f32((float)A3); const v_float32 vA4 = vx_setall_f32((float)A4); const v_int32 vidxmask = vx_setall_s32(EXPTAB_MASK); bool y_aligned = (size_t)(void*)y % 32 == 0; for( ; i < n; i += VECSZ*2 ) { if( i + VECSZ*2 > n ) { if( i == 0 || _x == y ) break; i = n - VECSZ*2; y_aligned = false; } v_float32 xf0 = vx_load(&x[i].f), xf1 = vx_load(&x[i + VECSZ].f); xf0 = v_min(v_max(xf0, vminval), vmaxval); xf1 = v_min(v_max(xf1, vminval), vmaxval); xf0 *= vprescale; xf1 *= vprescale; v_int32 xi0 = v_round(xf0); v_int32 xi1 = v_round(xf1); xf0 = (xf0 - v_cvt_f32(xi0))*vpostscale; xf1 = (xf1 - v_cvt_f32(xi1))*vpostscale; v_float32 yf0 = v_lut(expTab_f, xi0 & vidxmask); v_float32 yf1 = v_lut(expTab_f, xi1 & vidxmask); v_int32 v0 = vx_setzero_s32(), v127 = vx_setall_s32(127), v255 = vx_setall_s32(255); xi0 = v_min(v_max(v_shr<EXPTAB_SCALE>(xi0) + v127, v0), v255); xi1 = v_min(v_max(v_shr<EXPTAB_SCALE>(xi1) + v127, v0), v255); yf0 *= v_reinterpret_as_f32(v_shl<23>(xi0)); yf1 *= v_reinterpret_as_f32(v_shl<23>(xi1)); v_float32 zf0 = xf0 + vA1; v_float32 zf1 = xf1 + vA1; zf0 = v_fma(zf0, xf0, vA2); zf1 = v_fma(zf1, xf1, vA2); zf0 = v_fma(zf0, xf0, vA3); zf1 = v_fma(zf1, xf1, vA3); zf0 = v_fma(zf0, xf0, vA4); zf1 = v_fma(zf1, xf1, vA4); zf0 *= yf0; zf1 *= yf1; if( y_aligned ) { v_store_aligned(y + i, zf0); v_store_aligned(y + i + VECSZ, zf1); } else { v_store(y + i, zf0); v_store(y + i + VECSZ, zf1); } } vx_cleanup(); #endif for( ; i < n; i++ ) { float x0 = x[i].f; x0 = std::min(std::max(x0, minval), maxval); x0 *= (float)exp_prescale; Cv32suf buf; int xi = saturate_cast<int>(x0); x0 = (x0 - xi)*postscale; int t = (xi >> EXPTAB_SCALE) + 127; t = !(t & ~255) ? t : t < 0 ? 0 : 255; buf.i = t << 23; y[i] = buf.f * expTab_f[xi & EXPTAB_MASK] * ((((x0 + A1)*x0 + A2)*x0 + A3)*x0 + A4); } }
AABox AABox::box_and_split(const std::vector<Triangle>& invec, std::vector<Triangle>& lvec, std::vector<Triangle>& rvec) { const size_t N = invec.size(); Vec3f tri_pmin(FLT_MAX), tri_pmax(-FLT_MAX); for(size_t i=0;i<N;++i) { tri_pmin = v_min(invec[i].get_pmin(), tri_pmin); tri_pmax = v_max(invec[i].get_pmax(), tri_pmax); } Vec3f diff = tri_pmax - tri_pmin; // Find the point closest to the centre. Vec3f centre = tri_pmin + diff; Vec3f centre_close = invec[0].get_v0(); float min_dist = FLT_MAX; for(size_t i=0;i<N;++i) { Vec3f v0 = invec[i].get_v0(); Vec3f v1 = invec[i].get_v1(); Vec3f v2 = invec[i].get_v2(); float sl0 = sqr_length(centre-v0); if(sl0 < min_dist) { min_dist = sl0; centre_close = v0; } float sl1 = sqr_length(centre-v1); if(sl1 < min_dist) { min_dist = sl1; centre_close = v1; } float sl2 = sqr_length(centre-v2); if(sl2 < min_dist) { min_dist = sl2; centre_close = v2; } } int k; if(diff[0]>diff[1]) { if(diff[0]>diff[2]) k = 0; else k = 2; } else { if(diff[1]>diff[2]) k = 1; else k = 2; } float thresh = diff[k]/2.0f + tri_pmin[k]; for(size_t i=0;i<N;++i) { if(invec[i].get_centre()[k] > thresh) rvec.push_back(invec[i]); else lvec.push_back(invec[i]); } if(lvec.empty() || rvec.empty()) { lvec.clear(); lvec.insert(lvec.end(), invec.begin(), invec.begin()+N/2); rvec.clear(); rvec.insert(rvec.end(), invec.begin()+N/2, invec.end()); } assert(!lvec.empty()); assert(!rvec.empty()); assert(lvec.size()+rvec.size() == invec.size()); return AABox(tri_pmin, tri_pmax, centre_close); }
OBox OBox::box_and_split(const std::vector<Triangle>& invec, std::vector<Triangle>& lvec, std::vector<Triangle>& rvec) { // Obtain the rotation matrix for the OBB const Mat3x3f Rot = compute_rotation(invec); const int N_tri = invec.size(); const int N_pts = 3*N_tri; // Compute the rotated set of points and the extents of the point aligned // BBox. vector<Vec3f> pts(N_pts); Vec3f tri_pmin(FLT_MAX), tri_pmax(-FLT_MAX); for(int i=0;i<N_tri;++i) { const Triangle& tri = invec[i]; int offs = 3*i; pts[offs ] = Rot*tri.get_v0(); pts[offs+1] = Rot*tri.get_v1(); pts[offs+2] = Rot*tri.get_v2(); for(int j=0;j<3;++j) { tri_pmin = v_min(pts[offs+j], tri_pmin); tri_pmax = v_max(pts[offs+j], tri_pmax); } } // Find the point closest to the centre. const Vec3f centre = tri_pmin + 0.5f*(tri_pmax-tri_pmin); Vec3f centre_close; float min_dist = FLT_MAX; for(int i=0;i<N_pts;++i) { Vec3f v = pts[i]; float sl = sqr_length(centre-v); if(sl < min_dist) { min_dist = sl; centre_close = v; } } // Partition the triangles const float thresh = centre[0]; for(int i=0;i<N_tri;++i) { Vec3f p = Rot * invec[i].get_centre(); if( p[0] > thresh) rvec.push_back(invec[i]); else lvec.push_back(invec[i]); } // If all triangles landed in one box, split them naively. if(lvec.empty() || rvec.empty()) { lvec.clear(); lvec.insert(lvec.end(), invec.begin(), invec.begin()+N_tri/2); rvec.clear(); rvec.insert(rvec.end(), invec.begin()+N_tri/2, invec.end()); } return OBox(Rot, AABox(tri_pmin, tri_pmax, centre_close)); }
void FAST_t(InputArray _img, std::vector<KeyPoint>& keypoints, int threshold, bool nonmax_suppression) { Mat img = _img.getMat(); const int K = patternSize/2, N = patternSize + K + 1; int i, j, k, pixel[25]; makeOffsets(pixel, (int)img.step, patternSize); #if CV_SIMD128 const int quarterPatternSize = patternSize/4; v_uint8x16 delta = v_setall_u8(0x80), t = v_setall_u8((char)threshold), K16 = v_setall_u8((char)K); bool hasSimd = hasSIMD128(); #if CV_TRY_AVX2 Ptr<opt_AVX2::FAST_t_patternSize16_AVX2> fast_t_impl_avx2; if(CV_CPU_HAS_SUPPORT_AVX2) fast_t_impl_avx2 = opt_AVX2::FAST_t_patternSize16_AVX2::getImpl(img.cols, threshold, nonmax_suppression, pixel); #endif #endif keypoints.clear(); threshold = std::min(std::max(threshold, 0), 255); uchar threshold_tab[512]; for( i = -255; i <= 255; i++ ) threshold_tab[i+255] = (uchar)(i < -threshold ? 1 : i > threshold ? 2 : 0); AutoBuffer<uchar> _buf((img.cols+16)*3*(sizeof(int) + sizeof(uchar)) + 128); uchar* buf[3]; buf[0] = _buf.data(); buf[1] = buf[0] + img.cols; buf[2] = buf[1] + img.cols; int* cpbuf[3]; cpbuf[0] = (int*)alignPtr(buf[2] + img.cols, sizeof(int)) + 1; cpbuf[1] = cpbuf[0] + img.cols + 1; cpbuf[2] = cpbuf[1] + img.cols + 1; memset(buf[0], 0, img.cols*3); for(i = 3; i < img.rows-2; i++) { const uchar* ptr = img.ptr<uchar>(i) + 3; uchar* curr = buf[(i - 3)%3]; int* cornerpos = cpbuf[(i - 3)%3]; memset(curr, 0, img.cols); int ncorners = 0; if( i < img.rows - 3 ) { j = 3; #if CV_SIMD128 if( hasSimd ) { if( patternSize == 16 ) { #if CV_TRY_AVX2 if (fast_t_impl_avx2) fast_t_impl_avx2->process(j, ptr, curr, cornerpos, ncorners); #endif //vz if (j <= (img.cols - 27)) //it doesn't make sense using vectors for less than 8 elements { for (; j < img.cols - 16 - 3; j += 16, ptr += 16) { v_uint8x16 v = v_load(ptr); v_int8x16 v0 = v_reinterpret_as_s8((v + t) ^ delta); v_int8x16 v1 = v_reinterpret_as_s8((v - t) ^ delta); v_int8x16 x0 = v_reinterpret_as_s8(v_sub_wrap(v_load(ptr + pixel[0]), delta)); v_int8x16 x1 = v_reinterpret_as_s8(v_sub_wrap(v_load(ptr + pixel[quarterPatternSize]), delta)); v_int8x16 x2 = v_reinterpret_as_s8(v_sub_wrap(v_load(ptr + pixel[2*quarterPatternSize]), delta)); v_int8x16 x3 = v_reinterpret_as_s8(v_sub_wrap(v_load(ptr + pixel[3*quarterPatternSize]), delta)); v_int8x16 m0, m1; m0 = (v0 < x0) & (v0 < x1); m1 = (x0 < v1) & (x1 < v1); m0 = m0 | ((v0 < x1) & (v0 < x2)); m1 = m1 | ((x1 < v1) & (x2 < v1)); m0 = m0 | ((v0 < x2) & (v0 < x3)); m1 = m1 | ((x2 < v1) & (x3 < v1)); m0 = m0 | ((v0 < x3) & (v0 < x0)); m1 = m1 | ((x3 < v1) & (x0 < v1)); m0 = m0 | m1; int mask = v_signmask(m0); if( mask == 0 ) continue; if( (mask & 255) == 0 ) { j -= 8; ptr -= 8; continue; } v_int8x16 c0 = v_setzero_s8(); v_int8x16 c1 = v_setzero_s8(); v_uint8x16 max0 = v_setzero_u8(); v_uint8x16 max1 = v_setzero_u8(); for( k = 0; k < N; k++ ) { v_int8x16 x = v_reinterpret_as_s8(v_load((ptr + pixel[k])) ^ delta); m0 = v0 < x; m1 = x < v1; c0 = v_sub_wrap(c0, m0) & m0; c1 = v_sub_wrap(c1, m1) & m1; max0 = v_max(max0, v_reinterpret_as_u8(c0)); max1 = v_max(max1, v_reinterpret_as_u8(c1)); } max0 = v_max(max0, max1); int m = v_signmask(K16 < max0); for( k = 0; m > 0 && k < 16; k++, m >>= 1 ) { if(m & 1) { cornerpos[ncorners++] = j+k; if(nonmax_suppression) curr[j+k] = (uchar)cornerScore<patternSize>(ptr+k, pixel, threshold); } } } } } } #endif for( ; j < img.cols - 3; j++, ptr++ ) { int v = ptr[0]; const uchar* tab = &threshold_tab[0] - v + 255; int d = tab[ptr[pixel[0]]] | tab[ptr[pixel[8]]]; if( d == 0 ) continue; d &= tab[ptr[pixel[2]]] | tab[ptr[pixel[10]]]; d &= tab[ptr[pixel[4]]] | tab[ptr[pixel[12]]]; d &= tab[ptr[pixel[6]]] | tab[ptr[pixel[14]]]; if( d == 0 ) continue; d &= tab[ptr[pixel[1]]] | tab[ptr[pixel[9]]]; d &= tab[ptr[pixel[3]]] | tab[ptr[pixel[11]]]; d &= tab[ptr[pixel[5]]] | tab[ptr[pixel[13]]]; d &= tab[ptr[pixel[7]]] | tab[ptr[pixel[15]]]; if( d & 1 ) { int vt = v - threshold, count = 0; for( k = 0; k < N; k++ ) { int x = ptr[pixel[k]]; if(x < vt) { if( ++count > K ) { cornerpos[ncorners++] = j; if(nonmax_suppression) curr[j] = (uchar)cornerScore<patternSize>(ptr, pixel, threshold); break; } } else count = 0; } } if( d & 2 ) { int vt = v + threshold, count = 0; for( k = 0; k < N; k++ ) { int x = ptr[pixel[k]]; if(x > vt) { if( ++count > K ) { cornerpos[ncorners++] = j; if(nonmax_suppression) curr[j] = (uchar)cornerScore<patternSize>(ptr, pixel, threshold); break; } } else count = 0; } } } } cornerpos[-1] = ncorners; if( i == 3 ) continue; const uchar* prev = buf[(i - 4 + 3)%3]; const uchar* pprev = buf[(i - 5 + 3)%3]; cornerpos = cpbuf[(i - 4 + 3)%3]; ncorners = cornerpos[-1]; for( k = 0; k < ncorners; k++ ) { j = cornerpos[k]; int score = prev[j]; if( !nonmax_suppression || (score > prev[j+1] && score > prev[j-1] && score > pprev[j-1] && score > pprev[j] && score > pprev[j+1] && score > curr[j-1] && score > curr[j] && score > curr[j+1]) ) { keypoints.push_back(KeyPoint((float)j, (float)(i-1), 7.f, -1, (float)score)); } } }